Source code for crowsetta.transcriber

import inspect
import warnings
from typing import Union


[docs] class Transcriber: """The :class:`crowsetta.Transcriber` class provides a way to work with all annotation formats in :mod:`crowsetta`, without needing to know the names of classes that represent formats (e.g., :class:`crowsetta.formats.seq.AudSeq` or :class:`crowsetta.formats.bbox.Raven`.) When you make a :class:`~crowsetta.Transcriber` instance, you specify its `format` as a string name, one of the names returned by :func:`crowsetta.formats.as_list`. You can then use this :class:`~crowsetta.Transcriber` instance to load multiple annotation files in that ``format``, by calling the :meth:`~crowsetta.Transcriber.from_file` method repeatedly, e.g., in a for loop or list comprehension. This will create multiple instances of the classes that represent annotation format, one instance for each annotation file. With method chaining you can convert each loaded file at the same time to :class:`crowsetta.Annotation`s (the data structure used to work with annotations and convert between formats), and save annotations to comma-separated values (csv) files or other file formats. See examples below. Attributes ---------- format : str or class If a string, name of annotation format that the :class:`~crowsetta.Transcriber` will use. Must be one of the shorthand string names returned by :func:`crowsetta.formats.as_list`. If a class, must be one of the classes in :mod:`crowsetta.formats` that the shorthand strings refer to. You can register your own class using :func:`crowsetta.formats.register_format`. All format classes must be either sequence-like or bounding-box-like, i.e., registered as either :class:`crowsetta.interface.seq.SeqLike` or :class:`crowsetta.interface.bbox.BBoxLike`. Methods ------- from_file : Loads annotations from a file Examples -------- An example of loading a sequence-like format with the :meth:`~crowsetta.Transcriber.from_file` method. >>> import crowsetta >>> scribe = crowsetta.Transcriber(format='aud-seq') >>> example = crowsetta.data.get('aud-seq') >>> audseq = scribe.from_file(example.annot_path) >>> annot = audseq.to_annot() >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/audseq/405_marron1_June_14_2016_69640887.audacity.txt'), notated_path=None, seq=<Sequence with 61 segments>) # noqa An example of loading a bounding box-like format with the :meth:`~crowsetta.Transcriber.from_file` method. Notice this format has a parameter ``annot_col`` we need to specify for it to load correctly. We can pass this additional parameter into the ``from_file`` method as a keyword argument. >>> import crowsetta >>> scribe = crowsetta.Transcriber(format='raven') >>> example = crowsetta.data.get('raven') >>> raven = scribe.from_file(example.annot_path, annot_col='Species') >>> annot = raven.to_annot() >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/raven/Recording_1_Segment_02.Table.1.selections.txt'), notated_path=None, bboxes=[BBox(onset=154.387792767, offset=154.911598217, low_freq=2878.2, high_freq=4049.0, label='EATO'), BBox(onset=167.526598245, offset=168.17302044, low_freq=2731.9, high_freq=3902.7, label='EATO'), BBox(onset=183.609636834, offset=184.097751553, low_freq=2878.2, high_freq=3975.8, label='EATO'), BBox(onset=250.527480604, offset=251.160710509, low_freq=2756.2, high_freq=3951.4, label='EATO'), BBox(onset=277.88724277, offset=278.480895806, low_freq=2707.5, high_freq=3975.8, label='EATO'), BBox(onset=295.52970757, offset=296.110168316, low_freq=2951.4, high_freq=3975.8, label='EATO')]) # noqa An example of loading a set of annotations in the :class:`~crowsetta.formats.seq.NotMat` format, converting them to :class:`~crowsetta.Annotation` instances at the same time with method chaining, and then finally saving them as a csv file, using the :class:`~crowsetta.formats.seq.GenericSeq` format. >>> import pathlib >>> import crowsetta >>> notmat_paths = sorted(pathlib.Path('./data/bfsongrepo').glob('*.not.mat') >>> scribe = crowsetta.Transcriber('notmat') >>> # next line, use method chaining to load NotMat and convert to crowsetta.Annotation all at once >>> annots = [scribe.from_file(notmat_path).to_annot() for notmat_path in notmat_paths] >>> generic_seq = crowsetta.formats.seq.GenericSeq(annots) >>> generic_seq.to_csv('./data/bfsongrepo/notmats.csv') """
[docs] def __init__(self, format: "Union[str, crowsetta.interface.SeqLike, crowsetta.interface.BBoxLike]"): # noqa: F821 """Initialize a new :class:`crowsetta.Transcriber` instance. Parameters ---------- format : str or class If a string, name of annotation format that the :class:`~crowsetta.Transcriber` will use. Must be one of the shorthand string names returned by :func:`crowsetta.formats.as_list`. If a class, must be one of the classes in :mod:`crowsetta.formats` that the shorthand strings refer to. You can register your own class using :func:`crowsetta.formats.register_format`. All format classes must be either sequence-like or bounding-box-like, i.e., registered as either :class:`crowsetta.interface.seq.SeqLike` or :class:`crowsetta.interface.bbox.BBoxLike`. """ # avoid circular imports from . import formats, interface if isinstance(format, str): if format not in formats.FORMATS: raise ValueError(f"Format name '{format}' not recognized." f"Valid format names:\n{formats.as_list()}") if format == "csv": warnings.warn( "The format 'csv' has been renamed to 'generic-seq', " "and the name 'csv' will stop working in the next version. " "Please change any usages of the name 'csv' to 'generic-seq'` now.", FutureWarning, stacklevel=2, ) _format_class = formats.by_name(format) elif inspect.isclass(format): if not issubclass(format, interface.BaseFormat): raise TypeError( "Format recognized as a class, but it is not a subclass of ``crowsetta.interface.BaseFormat``." "Please ``register`` the class as a subclass of either ``crowsetta.interface.SeqLike`` or " f"``crowsetta.interface.BBoxLike``. Class was: {format}" ) _format_class = format else: raise ValueError(f"Invalid value for ``format``: {format}") self.format = format self._format_class = _format_class
def __repr__(self): return f"crowsetta.Transcriber(format='{self.format}')"
[docs] def from_file( self, annot_path, *args, **kwargs ) -> "Union[crowsetta.interface.SeqLike,crowsetta.interface.BBoxLike]": # noqa: F821 """Load annotations from a file. Parameters ---------- annot_path : str, pathlib.Path Path to file containing annotations. Returns ------- annotations : class-instance An instance of the class referred to by ``self.format``, with annotations loaded from ``annot_path`` Examples -------- An example of loading a sequence-like format with the :meth:`~crowsetta.Transcriber.from_file` method. >>> import crowsetta >>> scribe = crowsetta.Transcriber(format='aud-seq') >>> example = crowsetta.data.get('aud-seq') >>> audseq = scribe.from_file(example.annot_path) >>> annot = audseq.to_annot() >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/audseq/405_marron1_June_14_2016_69640887.audacity.txt'), notated_path=None, seq=<Sequence with 61 segments>) # noqa An example of loading a bounding box-like format with the :meth:`~crowsetta.Transcriber.from_file` method. Notice this format has a parameter ``annot_col`` we need to specify for it to load correctly. We can pass this additional parameter into the :meth:`~crowsetta.Transcriber.from_file` method as a keyword argument. >>> import crowsetta >>> scribe = crowsetta.Transcriber(format='raven') >>> example = crowsetta.data.get('raven') >>> raven = scribe.from_file(example.annot_path, annot_col='Species') >>> annot = raven.to_annot() >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/raven/Recording_1_Segment_02.Table.1.selections.txt'), notated_path=None, bboxes=[BBox(onset=154.387792767, offset=154.911598217, low_freq=2878.2, high_freq=4049.0, label='EATO'), BBox(onset=167.526598245, offset=168.17302044, low_freq=2731.9, high_freq=3902.7, label='EATO'), BBox(onset=183.609636834, offset=184.097751553, low_freq=2878.2, high_freq=3975.8, label='EATO'), BBox(onset=250.527480604, offset=251.160710509, low_freq=2756.2, high_freq=3951.4, label='EATO'), BBox(onset=277.88724277, offset=278.480895806, low_freq=2707.5, high_freq=3975.8, label='EATO'), BBox(onset=295.52970757, offset=296.110168316, low_freq=2951.4, high_freq=3975.8, label='EATO')]) # noqa An example of loading a set of annotations in the :class:`~crowsetta.formats.seq.NotMat` format, converting them to :class:`~crowsetta.Annotation` instances at the same time with method chaining, and then finally saving them as a csv file, using the :class:`~crowsetta.formats.seq.GenericSeq` format. >>> import pathlib >>> import crowsetta >>> notmat_paths = sorted(pathlib.Path('./data/bfsongrepo').glob('*.not.mat') >>> scribe = crowsetta.Transcriber('notmat') >>> # next line, use method chaining to load NotMat and convert to crowsetta.Annotation all at once >>> annots = [scribe.from_file(notmat_path).to_annot() for notmat_path in notmat_paths] >>> generic_seq = crowsetta.formats.seq.GenericSeq(annots) >>> generic_seq.to_csv('./data/bfsongrepo/notmats.csv') """ return self._format_class.from_file(annot_path, *args, **kwargs)