from __future__ import annotations
import contextlib
try:
from importlib.resources import as_file, files, open_text
except ImportError:
from importlib_resources import as_file, files, open_text
import pathlib
import shutil
from typing import Union
import appdirs
import attr
from ..__about__ import __version__ as version
from ..typing import PathLike
APP_DIRS = appdirs.AppDirs(appname="crowsetta", appauthor="vocalpy", version=version)
[docs]
@attr.define
class ExampleAnnotFile:
"""Class representing
an example annotation file.
Returned by ``crowsetta.data.get``.
Attributes
----------
annot_path : pathlib.Path, contextlib._GeneratorContextManager
Path to annotation file,
can be used to load.
If annotation files are not been extracted to
the local file system using the function
``crowsetta.data.extract_data_files``,
then ``crowsetta.data.get`` will return
``annot_path`` as a context manager
that will provide a path to a temporary file.
citation : str
Citation for dataset
from which example is taken
"""
annot_path: Union[PathLike, contextlib._GeneratorContextManager]
citation: str
DATA = {
"aud-bbox": FormatPathArgs(package="crowsetta.data.audbbox", resource="spinetail.txt"),
"aud-seq": FormatPathArgs(
package="crowsetta.data.audseq", resource="405_marron1_June_14_2016_69640887.audacity.txt"
),
"birdsong-recognition-dataset": FormatPathArgs(package="crowsetta.data.birdsongrec", resource="Annotation.xml"),
"generic-seq": FormatPathArgs(package="crowsetta.data.generic", resource="example_custom_format.csv"),
"notmat": FormatPathArgs(package="crowsetta.data.notmat", resource="gy6or6_baseline_230312_0808.138.cbin.not.mat"),
"raven": FormatPathArgs(package="crowsetta.data.raven", resource="Recording_1_Segment_02.Table.1.selections.txt"),
"simple-seq": FormatPathArgs(
package="crowsetta.data.simple", resource="bl26lb16_190412_0721.20144_annotations.csv"
),
"textgrid": FormatPathArgs(package="crowsetta.data.textgrid", resource="AVO-maea-basic.TextGrid"),
"timit": FormatPathArgs(package="crowsetta.data.timit", resource="sa1.phn"),
}
def _get_example_from_user_data_dir(format: str, user_data_dir: PathLike | None = None) -> ExampleAnnotFile:
"""Returns example from ``user_data_dir``.
Assumes that example data has already been copied to
``user_data_dir`` by calling ``_extract_data_files``.
Helper function used by ``crowsetta.data.get``.
Parameters
----------
format : str
Name of annotation format.
Should be the shorthand string name,
as listed by ``crowsetta.formats.as_list``.
user_data_dir : str, pathlib.Path
Location where example annotation files have been extracted to,
by calling ``crowsetta.data.extract_data_files``.
If none is given, defaults to the value of
``crowsetta.data.data.APP_DIRS.user_data_dir``
Returns
-------
example : ExampleAnnotFile
with ``annot_path`` and ``citation`` attributes.
"""
try:
path_args = DATA[format]
except KeyError as e:
raise ValueError(f"format not recognized: {format}") from e
if user_data_dir is None:
user_data_dir = APP_DIRS.user_data_dir
format_pkg = path_args.package.split(".")[-1]
annot_path = user_data_dir / format_pkg / path_args.resource
citation_txt = user_data_dir / format_pkg / "citation.txt"
with citation_txt.open("r") as fp:
citation = fp.read().replace("\n", "")
return ExampleAnnotFile(annot_path=annot_path, citation=citation)
def _get_example_as_context_manager(format: str) -> ExampleAnnotFile:
"""Gets an example annotation file
as a context manager, that can be used
as shown in the example below.
Helper function used by ``crowsetta.data.get``.
Parameters
----------
format : str
Name of annotation format.
Should be the shorthand string name,
as listed by ``crowsetta.formats.as_list``.
Returns
-------
example_annot_file : crowsetta.data.ExampleAnnotFile
class instance with attributes ``annot_path``
and ``citation``. The ``annot_path``
attribute should be used as part of a ``with``
statement to open the file; see Examples below
or examples in the docstrings.
"""
try:
path_args = DATA[format]
except KeyError as e:
raise ValueError(f"format not recognized: {format}") from e
# don't use full name `importlib.resources` here
# because we need to use backport package, not stdlib, on Python 3.8
source = files(path_args.package).joinpath(path_args.resource)
annot_path = as_file(source)
with open_text(package=path_args.package, resource="citation.txt") as fp:
citation = fp.read().replace("\n", "")
return ExampleAnnotFile(annot_path=annot_path, citation=citation)
[docs]
def get(format: str, user_data_dir: PathLike | None = None) -> ExampleAnnotFile:
"""Get an example annotation files.
Parameters
----------
format : str
Name of annotation format.
Should be the shorthand string name,
as listed by ``crowsetta.formats.as_list``.
user_data_dir : str, pathlib.Path
Location where example annotation files
are stored.
If none is given, defaults to the value of
``crowsetta.data.data.APP_DIRS.user_data_dir``
This default can be changed, but will require
passing the same path in every time
this function is called to avoid
being prompted about extracting the example files
to the default location.
Returns
-------
example_annot_file : ExampleAnnotFile
class instance with attributes ``annot_path``
and ``citation``.
If the annotation files have been
extracted to the local file system,
then ``annot_path`` will be a path
to a file.
Otherwise, ``annot_path`` will be
a context manager that should be
used as part of a ``with``
statement to open the file; see Examples below
or examples in the docstrings.
Examples
--------
>>> # example of a context manager
>>> example = crowsetta.data.get('textgrid')
>>> with example.annot_path as annot_path:
... textgrid = crowsetta.formats.seq.TextGrid.from_file(annot_path)
"""
if format not in DATA:
raise ValueError(f"format not recognized: {format}")
if user_data_dir is None:
user_data_dir = APP_DIRS.user_data_dir
user_data_dir = pathlib.Path(user_data_dir)
if not user_data_dir.exists():
y_or_n = input(
f"``user_data_dir`` does not exist at default location:\n{user_data_dir}\n"
"(To choose a location besides the default, call this function with that location "
"as the argument for ``user_data_dir``.)\n\n"
"Do you want to create this ``user_data_dir`` and extract example annotation files into it?\n"
"[yes]/no >>>"
)
if y_or_n.lower().startswith("y") or y_or_n == "":
extract_data_files(user_data_dir)
return _get_example_from_user_data_dir(format, user_data_dir)
else:
print(
"""Not extracting data. Will return a context manager.\n
Use the context manager to get a path to a temporary path
like in the following example:\n
>>> example = crowsetta.data.get('timit')
>>> with example.annot_path as annot_path:
... timit = crowsetta.formats.seq.Timit.from_file(annot_path=annot_path)
>>> annot = timit.to_annot()
"""
)
return _get_example_as_context_manager(format)
else:
return _get_example_from_user_data_dir(format, user_data_dir)