Source code for crowsetta.formats.bbox.raven

"""module with functions that handle .txt annotation files
from Raven (https://ravensoundsoftware.com/software/).

Adapted in part from ``opensoundscape``
https://github.com/kitzeslab/opensoundscape/blob/master/opensoundscape/annotations.py
under MIT license
"""
import pathlib
from typing import ClassVar, List, Optional

import attr
import pandas as pd
import pandera
from pandera.typing import Series

import crowsetta
from crowsetta.typing import PathLike



[docs]
class RavenSchema(pandera.SchemaModel):
    """A :class:`pandera.SchemaModel` that validates :type:`pandas.DataFrame`s
    loaded from a txt file, created by exporting a Selection Table
    from Raven.
    """

    begin_time_s: Series[float] = pandera.Field()
    end_time_s: Series[float] = pandera.Field()
    low_freq_hz: Series[float] = pandera.Field()
    high_freq_hz: Series[float] = pandera.Field()
    annotation: Series[pd.StringDtype] = pandera.Field(coerce=True)


[docs]
    class Config:
        # we set strict fo False
        # because we just ignore other columns, e.g. 'Selection',
        # and because there should be an annotation column
        # and we don't want to throw an error because of it
        strict = False





[docs]
@crowsetta.interface.BBoxLike.register
@attr.define
class Raven:
    """Class that represents txt annotation files
    from Raven (https://ravensoundsoftware.com/software/),
    created by exporting a Selection Table.

    Attributes
    ----------
    name: str
        Shorthand name for annotation format: 'raven'.
    ext: str
        Extension of files in annotation format: '.txt'
    df : pandas.DataFrame
        with annotations loaded into it
    annot_path : str, pathlib.Path
        Path to Raven txt file from which annotations were loaded.
    audio_path : str. pathlib.Path
        Path to audio file that the Raven txt file annotates.
    """

    name: ClassVar[str] = "raven"
    ext: ClassVar[str] = (".txt",)
    COLUMNS_MAP: ClassVar[dict] = {
        "Begin Time (s)": "begin_time_s",
        "End Time (s)": "end_time_s",
        "Low Freq (Hz)": "low_freq_hz",
        "High Freq (Hz)": "high_freq_hz",
    }

    df: pd.DataFrame
    annot_path: pathlib.Path
    annot_col: str
    audio_path: Optional[pathlib.Path] = attr.field(default=None, converter=attr.converters.optional(pathlib.Path))


[docs]
    @classmethod
    def from_file(
        cls, annot_path: PathLike, annot_col: str = "Annotation", audio_path: Optional[PathLike] = None
    ) -> "Self":  # noqa: F821
        """Load annotations from a Raven annotation file,
        created by exporting a Selection Table.

        Parameters
        ----------
        annot_path : str, pathlib.Path
            Path to a txt file exported from Raven.
        annot_col : str
            Name of column that contains annotations.
        audio_path : str, pathlib.Path
            Path to audio file that the Raven txt file annotates.
            Optional, defaults to None.

        Examples
        --------
        >>> example = crowsetta.data.get('raven')
        >>> raven = crowsetta.formats.bbox.Raven.from_file(example.annot_path)
        """
        annot_path = pathlib.Path(annot_path)
        crowsetta.validation.validate_ext(annot_path, extension=cls.ext)

        #  assume file is space-separated with no header
        df = pd.read_csv(annot_path, sep="\t")
        if len(df) < 1:
            raise ValueError(f"Cannot load annotations, " f"there are no rows in Raven txt file:\n{df}")
        columns_map = dict(cls.COLUMNS_MAP)  # copy
        columns_map.update({annot_col: "annotation"})
        df.rename(columns=columns_map, inplace=True)
        df = RavenSchema.validate(df)

        return cls(
            df=df,
            annot_path=annot_path,
            annot_col=annot_col,
            audio_path=audio_path,
        )



[docs]
    def to_bbox(self) -> List[crowsetta.BBox]:
        """Convert this Raven annotation to a
        :class:`list` of :class:`crowsetta.Bbox` instances.

        Returns
        -------
        bboxes : list
            A :class:`list` of :class:`crowsetta.BBox` instances.

        Examples
        --------
        >>> example = crowsetta.data.get('raven')
        >>> raven = crowsetta.formats.bbox.Raven.from_file(example.annot_path)
        >>> bboxes = raven.to_bbox()
        """
        bboxes = []
        for begin_time, end_time, low_freq, high_freq, label in zip(
            self.df.begin_time_s.values,
            self.df.end_time_s.values,
            self.df.low_freq_hz.values,
            self.df.high_freq_hz.values,
            self.df["annotation"].values,
        ):
            bboxes.append(
                crowsetta.BBox(onset=begin_time, offset=end_time, low_freq=low_freq, high_freq=high_freq, label=label)
            )
        return bboxes



[docs]
    def to_annot(self) -> crowsetta.Annotation:
        """Convert this Raven annotation to a
        :class:`crowsetta.Annotation`.

        Returns
        -------
        annot : crowsetta.Annotation

        Examples
        --------
        >>> example = crowsetta.data.get('raven')
        >>> raven = crowsetta.formats.bbox.Raven.from_file(example.annot_path)
        >>> annot = raven.to_annot()
        """
        bboxes = self.to_bbox()
        return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.audio_path, bboxes=bboxes)



[docs]
    def to_file(self, annot_path: PathLike) -> None:
        """Make a txt file that can be read by Raven
        from this annotation

        Parameters
        ----------
        annot_path : str, pahtlib.Path
             Path including filename where file should be saved.
             Must have extension '.txt'
        """
        crowsetta.validation.validate_ext(annot_path, extension=self.ext)

        columns_map = {v: k for k, v in self.COLUMNS_MAP.items()}  # copy
        columns_map.update({"annotation": self.annot_col})
        df_out = self.df.rename(columns=columns_map)
        df_out.to_csv(annot_path, sep="\t", index=False)