Source code for crowsetta.formats.bbox.raven

"""module with functions that handle .txt annotation files
from Raven (https://ravensoundsoftware.com/software/).

Adapted in part from ``opensoundscape``
https://github.com/kitzeslab/opensoundscape/blob/master/opensoundscape/annotations.py
under MIT license
"""
import pathlib
from typing import ClassVar, List, Optional

import attr
import pandas as pd
import pandera
from pandera.typing import Series

import crowsetta
from crowsetta.typing import PathLike


[docs] class RavenSchema(pandera.SchemaModel): """A :class:`pandera.SchemaModel` that validates :type:`pandas.DataFrame`s loaded from a txt file, created by exporting a Selection Table from Raven. """ begin_time_s: Series[float] = pandera.Field() end_time_s: Series[float] = pandera.Field() low_freq_hz: Series[float] = pandera.Field() high_freq_hz: Series[float] = pandera.Field() annotation: Series[pd.StringDtype] = pandera.Field(coerce=True)
[docs] class Config: # we set strict fo False # because we just ignore other columns, e.g. 'Selection', # and because there should be an annotation column # and we don't want to throw an error because of it strict = False
[docs] @crowsetta.interface.BBoxLike.register @attr.define class Raven: """Class that represents txt annotation files from Raven (https://ravensoundsoftware.com/software/), created by exporting a Selection Table. Attributes ---------- name: str Shorthand name for annotation format: 'raven'. ext: str Extension of files in annotation format: '.txt' df : pandas.DataFrame with annotations loaded into it annot_path : str, pathlib.Path Path to Raven txt file from which annotations were loaded. audio_path : str. pathlib.Path Path to audio file that the Raven txt file annotates. """ name: ClassVar[str] = "raven" ext: ClassVar[str] = (".txt",) COLUMNS_MAP: ClassVar[dict] = { "Begin Time (s)": "begin_time_s", "End Time (s)": "end_time_s", "Low Freq (Hz)": "low_freq_hz", "High Freq (Hz)": "high_freq_hz", } df: pd.DataFrame annot_path: pathlib.Path annot_col: str audio_path: Optional[pathlib.Path] = attr.field(default=None, converter=attr.converters.optional(pathlib.Path))
[docs] @classmethod def from_file( cls, annot_path: PathLike, annot_col: str = "Annotation", audio_path: Optional[PathLike] = None ) -> "Self": # noqa: F821 """Load annotations from a Raven annotation file, created by exporting a Selection Table. Parameters ---------- annot_path : str, pathlib.Path Path to a txt file exported from Raven. annot_col : str Name of column that contains annotations. audio_path : str, pathlib.Path Path to audio file that the Raven txt file annotates. Optional, defaults to None. Examples -------- >>> example = crowsetta.data.get('raven') >>> raven = crowsetta.formats.bbox.Raven.from_file(example.annot_path) """ annot_path = pathlib.Path(annot_path) crowsetta.validation.validate_ext(annot_path, extension=cls.ext) # assume file is space-separated with no header df = pd.read_csv(annot_path, sep="\t") if len(df) < 1: raise ValueError(f"Cannot load annotations, " f"there are no rows in Raven txt file:\n{df}") columns_map = dict(cls.COLUMNS_MAP) # copy columns_map.update({annot_col: "annotation"}) df.rename(columns=columns_map, inplace=True) df = RavenSchema.validate(df) return cls( df=df, annot_path=annot_path, annot_col=annot_col, audio_path=audio_path, )
[docs] def to_bbox(self) -> List[crowsetta.BBox]: """Convert this Raven annotation to a :class:`list` of :class:`crowsetta.Bbox` instances. Returns ------- bboxes : list A :class:`list` of :class:`crowsetta.BBox` instances. Examples -------- >>> example = crowsetta.data.get('raven') >>> raven = crowsetta.formats.bbox.Raven.from_file(example.annot_path) >>> bboxes = raven.to_bbox() """ bboxes = [] for begin_time, end_time, low_freq, high_freq, label in zip( self.df.begin_time_s.values, self.df.end_time_s.values, self.df.low_freq_hz.values, self.df.high_freq_hz.values, self.df["annotation"].values, ): bboxes.append( crowsetta.BBox(onset=begin_time, offset=end_time, low_freq=low_freq, high_freq=high_freq, label=label) ) return bboxes
[docs] def to_annot(self) -> crowsetta.Annotation: """Convert this Raven annotation to a :class:`crowsetta.Annotation`. Returns ------- annot : crowsetta.Annotation Examples -------- >>> example = crowsetta.data.get('raven') >>> raven = crowsetta.formats.bbox.Raven.from_file(example.annot_path) >>> annot = raven.to_annot() """ bboxes = self.to_bbox() return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.audio_path, bboxes=bboxes)
[docs] def to_file(self, annot_path: PathLike) -> None: """Make a txt file that can be read by Raven from this annotation Parameters ---------- annot_path : str, pahtlib.Path Path including filename where file should be saved. Must have extension '.txt' """ crowsetta.validation.validate_ext(annot_path, extension=self.ext) columns_map = {v: k for k, v in self.COLUMNS_MAP.items()} # copy columns_map.update({"annotation": self.annot_col}) df_out = self.df.rename(columns=columns_map) df_out.to_csv(annot_path, sep="\t", index=False)