Skip to content

API Reference

Functional Helpers

gridio

High-level helpers for converting Praat TextGrid files.

These wrappers expose the Rust-backed parsing and serialization implemented in :mod:gridio.gridio and provide convenient Pythonic utilities for turning TextGrid content into tabular or nested data structures and writing them back to disk.

Functions:

Name Description
textgrid_to_df

Parse TextGrid files into a DataFrame-like structure.

textgrid_to_data

Parse TextGrid files into nested data resembling the Rust output.

df_to_textgrid

Serialize a tabular representation back into a TextGrid file.

data_to_textgrid

Write nested tier data to a TextGrid file using the Rust backend.

textgrid_to_df(file, strict=False, file_type='auto', file_name_column=None, file_name_func=None, backend='pandas')

Parse TextGrid files into a DataFrame-like structure.

Parameters:

Name Type Description Default
file Union[str, list[str], Path, list[Path]]

Path to a single TextGrid file or an iterable of paths.

required
strict bool

When True the parser raises on malformed structures instead of attempting a best-effort conversion.

False
file_type str

"short", "long", or "auto" to let the backend infer the dialect.

'auto'
file_name_column Optional[bool]

Force inclusion (True) or exclusion (False) of a filename column. Defaults to True for multi-file inputs and False otherwise.

None
file_name_func Optional[Any]

Optional callable used to transform each filename before it is written to the DataFrame. The callable receives the original path object and must return a string.

None
backend Literal['pandas', 'polars']

"pandas" yields a :class:pandas.DataFrame; "polars" yields a :class:polars.DataFrame.

'pandas'

Returns:

Type Description
DataFrame or DataFrame

Tabular representation containing tmin, tmax, label, tier and interval flags; optionally includes filename.

Examples:

>>> df = textgrid_to_df("data/short_format.TextGrid")
>>> df.head().to_dict(orient="records")
[{'tmin': 0.0, 'tmax': 0.25, 'label': 'sil', 'tier': 'phones',
  'is_interval': True},
 {'tmin': 0.25, 'tmax': 0.53, 'label': 's', 'tier': 'phones',
  'is_interval': True}]

The primary columns are tmin, tmax, label, tier, is_interval, and (for multi-file inputs) filename.

See Also

df_to_textgrid : Persist the DataFrame back to a TextGrid file.

Source code in gridio/__init__.py
def textgrid_to_df(
    file: Union[str, list[str], Path, list[Path]],
    strict: bool = False,
    file_type: str = "auto",
    file_name_column: Optional[bool] = None,
    file_name_func: Optional[Any] = None,
    backend: Literal["pandas", "polars"] = "pandas",
):
    """Parse TextGrid files into a DataFrame-like structure.

    Parameters
    ----------
    file:
        Path to a single TextGrid file or an iterable of paths.
    strict:
        When ``True`` the parser raises on malformed structures instead of
        attempting a best-effort conversion.
    file_type:
        ``"short"``, ``"long"``, or ``"auto"`` to let the backend infer the
        dialect.
    file_name_column:
        Force inclusion (``True``) or exclusion (``False``) of a filename column.
        Defaults to ``True`` for multi-file inputs and ``False`` otherwise.
    file_name_func:
        Optional callable used to transform each filename before it is written
        to the DataFrame. The callable receives the original path object and
        must return a string.
    backend:
        ``"pandas"`` yields a :class:`pandas.DataFrame`; ``"polars"`` yields a
        :class:`polars.DataFrame`.

    Returns
    -------
    pandas.DataFrame or polars.DataFrame
        Tabular representation containing ``tmin``, ``tmax``, ``label``, ``tier``
        and interval flags; optionally includes ``filename``.

    Examples
    --------
    >>> df = textgrid_to_df("data/short_format.TextGrid")
    >>> df.head().to_dict(orient="records")  # doctest: +SKIP
    [{'tmin': 0.0, 'tmax': 0.25, 'label': 'sil', 'tier': 'phones',
      'is_interval': True},
     {'tmin': 0.25, 'tmax': 0.53, 'label': 's', 'tier': 'phones',
      'is_interval': True}]

    The primary columns are ``tmin``, ``tmax``, ``label``, ``tier``,
    ``is_interval``, and (for multi-file inputs) ``filename``.

    See Also
    --------
    df_to_textgrid : Persist the DataFrame back to a TextGrid file.
    """
    if file_name_column is None:
        file_name_column = isinstance(file, list)

    vectors = _dispatch_files(
        file,
        func_single=lambda f: rc_tg2vecs(f, strict=strict, file_type=file_type),
        func_multiple=lambda fs: rc_tgs2vecs(fs, strict=strict, file_type=file_type),
    )

    tmins, tmaxs, labels, tiers, is_intervals = vectors[:5]

    data = {
        "tmin": tmins,
        "tmax": tmaxs,
        "label": np.array(labels, dtype=np.str_),
        "tier": np.array(tiers, dtype=np.str_),
        "is_interval": is_intervals,
    }

    if file_name_column:
        file_names = _file_name(file, file_name_func=file_name_func)
        if isinstance(file, (str, Path)):
            file_names = np.repeat(file_names, len(tiers))
        else:
            file_ids = vectors[5]
            file_names = np.array(file_names, dtype=np.str_)[file_ids]
        data["filename"] = file_names

    if backend == "pandas":
        import pandas as pd

        df = pd.DataFrame(data, copy=False)
    elif backend == "polars":
        import polars as pl

        df = pl.DataFrame(data)
    else:
        raise ValueError("backend must be 'pandas' or 'polars'")
    return df

textgrid_to_data(file, strict=False, file_name_func=None, file_type='auto')

Parse TextGrid files into nested data resembling the Rust output.

Parameters:

Name Type Description Default
file Union[str, list[str], Path, list[Path]]

Path to a single TextGrid file or an iterable of paths.

required
strict bool

When True enforce strict parsing.

False
file_name_func Optional[Any]

Optional callable used to transform each filename key in the returned dictionary for multi-file inputs.

None
file_type str

"short", "long" or "auto" to control dialect detection.

'auto'

Returns:

Type Description
tuple or dict[str, tuple]

The raw structured data from the Rust bindings. For multiple files a dictionary keyed by filename is returned.

Examples:

>>> data = textgrid_to_data("data/short_format.TextGrid")
>>> round(data[0], 2), round(data[1], 2)
(0.0, 2.43)
>>> data[2][0]
('phones', True, [(0.0, 0.25, 'sil'), (0.25, 0.53, 's')])

The tuple layout is (tmin, tmax, tiers). Each tier entry is (name, is_interval, items) where items is an ordered list of (start, end, label) tuples. Point tiers repeat their timestamp for start and end so the shape remains consistent with interval tiers.

With multiple input files the function returns a dictionary mapping the normalised filename (after applying file_name_func when provided) to the same tuple structure.

See Also

data_to_textgrid : Convert the tuple structure back into a TextGrid file.

Source code in gridio/__init__.py
def textgrid_to_data(
    file: Union[str, list[str], Path, list[Path]],
    strict: bool = False,
    file_name_func: Optional[Any] = None,
    file_type: str = "auto",
):
    """Parse TextGrid files into nested data resembling the Rust output.

    Parameters
    ----------
    file:
        Path to a single TextGrid file or an iterable of paths.
    strict:
        When ``True`` enforce strict parsing.
    file_name_func:
        Optional callable used to transform each filename key in the returned
        dictionary for multi-file inputs.
    file_type:
        ``"short"``, ``"long"`` or ``"auto"`` to control dialect detection.

    Returns
    -------
    tuple or dict[str, tuple]
        The raw structured data from the Rust bindings. For multiple files a
        dictionary keyed by filename is returned.

    Examples
    --------
    >>> data = textgrid_to_data("data/short_format.TextGrid")
    >>> round(data[0], 2), round(data[1], 2)
    (0.0, 2.43)
    >>> data[2][0]
    ('phones', True, [(0.0, 0.25, 'sil'), (0.25, 0.53, 's')])

    The tuple layout is ``(tmin, tmax, tiers)``. Each tier entry is
    ``(name, is_interval, items)`` where ``items`` is an ordered list of
    ``(start, end, label)`` tuples. Point tiers repeat their timestamp for
    ``start`` and ``end`` so the shape remains consistent with interval tiers.

    With multiple input files the function returns a dictionary mapping the
    normalised filename (after applying ``file_name_func`` when provided) to the
    same tuple structure.

    See Also
    --------
    data_to_textgrid : Convert the tuple structure back into a TextGrid file.
    """

    data = _dispatch_files(
        file,
        func_single=lambda f: rc_tg2data(f, strict=strict, file_type=file_type),
        func_multiple=lambda fs: rc_tgs2data(fs, strict=strict, file_type=file_type),
    )
    if isinstance(file, (str, Path)):
        return data
    file_names = _file_name(file, file_name_func=file_name_func)
    return {file_name: file_data for file_name, file_data in zip(file_names, data)}

df_to_textgrid(df, out_file, tmin=None, tmax=None, file_type='long')

Serialize a tabular representation back into a TextGrid file.

Parameters:

Name Type Description Default
df Any

DataFrame-like object exposing tmin, tmax, label, tier and is_interval columns.

required
out_file str

Destination path for the emitted TextGrid file.

required
tmin Optional[float]

Optional overrides for the global bounds written to the file.

None
tmax Optional[float]

Optional overrides for the global bounds written to the file.

None
file_type str

Dialect to emit ("short" or "long").

'long'

Examples:

>>> df = textgrid_to_df("data/short_format.TextGrid")
>>> df_to_textgrid(df, "out.TextGrid", file_type="short")

The converter expects the same column layout described in :func:textgrid_to_df.

See Also

textgrid_to_df : Parse TextGrid files into the expected DataFrame format.

Source code in gridio/__init__.py
def df_to_textgrid(
    df: Any,
    out_file: str,
    tmin: Optional[float] = None,
    tmax: Optional[float] = None,
    file_type: str = "long",
):
    """Serialize a tabular representation back into a TextGrid file.

    Parameters
    ----------
    df:
        DataFrame-like object exposing ``tmin``, ``tmax``, ``label``, ``tier``
        and ``is_interval`` columns.
    out_file:
        Destination path for the emitted TextGrid file.
    tmin, tmax:
        Optional overrides for the global bounds written to the file.
    file_type:
        Dialect to emit (``"short"`` or ``"long"``).

    Examples
    --------
    >>> df = textgrid_to_df("data/short_format.TextGrid")
    >>> df_to_textgrid(df, "out.TextGrid", file_type="short")  # doctest: +SKIP

    The converter expects the same column layout described in
    :func:`textgrid_to_df`.

    See Also
    --------
    textgrid_to_df : Parse TextGrid files into the expected DataFrame format.
    """
    tmins = df["tmin"].tolist()
    tmaxs = df["tmax"].tolist()
    labels = df["label"].tolist()
    tiers = df["tier"].tolist()
    is_intervals = df["is_interval"].tolist()
    rc_vecs2tg(
        tmins,
        tmaxs,
        labels,
        tiers,
        is_intervals,
        tmin,
        tmax,
        out_file,
        file_type=file_type,
    )

data_to_textgrid(data, out_file, file_type='long')

Write nested tier data to a TextGrid file using the Rust backend.

Parameters:

Name Type Description Default
data Any

Tuple of (tmin, tmax, tiers) as returned by :func:textgrid_to_data.

required
out_file str

Destination path for the serialized TextGrid.

required
file_type str

Dialect to emit ("short" or "long").

'long'

Examples:

>>> data = textgrid_to_data("data/short_format.TextGrid")
>>> data_to_textgrid(data, "out.TextGrid", file_type="long")

Refer to :func:textgrid_to_data for details on the expected tuple layout.

See Also

textgrid_to_data : Produce the tuple structure consumed by this helper.

Source code in gridio/__init__.py
def data_to_textgrid(
    data: Any,
    out_file: str,
    file_type: str = "long",
):
    """Write nested tier data to a TextGrid file using the Rust backend.

    Parameters
    ----------
    data:
        Tuple of ``(tmin, tmax, tiers)`` as returned by :func:`textgrid_to_data`.
    out_file:
        Destination path for the serialized TextGrid.
    file_type:
        Dialect to emit (``"short"`` or ``"long"``).

    Examples
    --------
    >>> data = textgrid_to_data("data/short_format.TextGrid")
    >>> data_to_textgrid(data, "out.TextGrid", file_type="long")  # doctest: +SKIP

    Refer to :func:`textgrid_to_data` for details on the expected tuple layout.

    See Also
    --------
    textgrid_to_data : Produce the tuple structure consumed by this helper.
    """
    tmin, tmax, tiers = data
    rc_data2tg(tiers, tmin, tmax, out_file, file_type=file_type)

Object-Oriented API

gridio.textgrid

Classes:

Name Description
IntervalItem

Lightweight container for a single interval tier entry.

IntervalTier

Tier containing interval items spanning start and end times.

PointItem

Container for a point tier entry stored at a single timestamp.

PointTier

Tier containing point items at discrete timestamps.

TextGrid

In-memory representation of a Praat TextGrid document.

Tier

Mutable collection of TextGrid items with a shared tier name.

IntervalItem

Lightweight container for a single interval tier entry.

Parameters:

Name Type Description Default
tmin float

Start and end boundaries in seconds.

required
tmax float

Start and end boundaries in seconds.

required
label str

Text label associated with the interval.

required

Attributes:

Name Type Description
data

Return the tuple representation consumed by the Rust backend.

Source code in gridio/textgrid.py
class IntervalItem:
    """Lightweight container for a single interval tier entry.

    Parameters
    ----------
    tmin, tmax : float
        Start and end boundaries in seconds.
    label : str
        Text label associated with the interval.
    """

    def __init__(self, tmin, tmax, label):
        self.tmin = tmin
        self.tmax = tmax
        self.label = label

    @property
    def data(self):
        """Return the tuple representation consumed by the Rust backend."""
        return (self.tmin, self.tmax, self.label)

data property

Return the tuple representation consumed by the Rust backend.

IntervalTier

Bases: Tier

Tier containing interval items spanning start and end times.

Methods:

Name Description
get_item

Return the :class:IntervalItem stored at index.

Source code in gridio/textgrid.py
class IntervalTier(Tier):
    """Tier containing interval items spanning start and end times."""

    def __init__(self, name):
        super().__init__(name, True)

    def get_item(self, index: int) -> IntervalItem:
        """Return the :class:`IntervalItem` stored at ``index``."""
        item_data = self._items[index]
        return IntervalItem(item_data[0], item_data[1], item_data[2])

get_item(index)

Return the :class:IntervalItem stored at index.

Source code in gridio/textgrid.py
def get_item(self, index: int) -> IntervalItem:
    """Return the :class:`IntervalItem` stored at ``index``."""
    item_data = self._items[index]
    return IntervalItem(item_data[0], item_data[1], item_data[2])

PointItem

Container for a point tier entry stored at a single timestamp.

Parameters:

Name Type Description Default
time float

Absolute time position in seconds.

required
label str

Text label associated with the point.

required

Attributes:

Name Type Description
data

Represent the point as (time, time, label) for uniform storage.

Source code in gridio/textgrid.py
class PointItem:
    """Container for a point tier entry stored at a single timestamp.

    Parameters
    ----------
    time : float
        Absolute time position in seconds.
    label : str
        Text label associated with the point.
    """

    def __init__(self, time, label):
        self.time = time
        self.label = label

    @property
    def data(self):
        """Represent the point as ``(time, time, label)`` for uniform storage."""
        return (self.time, self.time, self.label)

data property

Represent the point as (time, time, label) for uniform storage.

PointTier

Bases: Tier

Tier containing point items at discrete timestamps.

Methods:

Name Description
get_item

Return the :class:PointItem stored at index.

Source code in gridio/textgrid.py
class PointTier(Tier):
    """Tier containing point items at discrete timestamps."""

    def __init__(self, name):
        super().__init__(name, False)

    def get_item(self, index: int) -> PointItem:
        """Return the :class:`PointItem` stored at ``index``."""
        item_data = self._items[index]
        return PointItem(item_data[0], item_data[2])

get_item(index)

Return the :class:PointItem stored at index.

Source code in gridio/textgrid.py
def get_item(self, index: int) -> PointItem:
    """Return the :class:`PointItem` stored at ``index``."""
    item_data = self._items[index]
    return PointItem(item_data[0], item_data[2])

TextGrid

In-memory representation of a Praat TextGrid document.

Parameters:

Name Type Description Default
tmin float

Global bounds for all tiers. They are preserved when saving if not overridden.

None
tmax float

Global bounds for all tiers. They are preserved when saving if not overridden.

None

Methods:

Name Description
add_tier

Insert tier at where (append when where is -1).

from_file

Read TextGrid files and wrap them in :class:TextGrid objects.

get_tier

Return a tier by numeric index or name, preserving subclass type.

remove_tier

Remove a tier, looking it up by name or numeric index.

save

Write the TextGrid to out_file using the Rust serializer.

Attributes:

Name Type Description
data

Return the backend-compatible tuple (tmin, tmax, tiers).

ntiers int

Total number of tiers in the TextGrid.

Source code in gridio/textgrid.py
class TextGrid:
    """In-memory representation of a Praat TextGrid document.

    Parameters
    ----------
    tmin, tmax : float, optional
        Global bounds for all tiers. They are preserved when saving if not
        overridden.
    """

    def __init__(self, tmin=None, tmax=None):
        self.tmin = tmin
        self.tmax = tmax
        self._tiers = []

    def _name2id(self, tier_name: str) -> Optional[int]:
        for i, tier in enumerate(self._tiers):
            if tier[0] == tier_name:
                return i
        return None

    def _tier_route(self, indexer=None, tier_name=None, tier_id=None) -> Optional[int]:
        if not indexer is None:
            if isinstance(indexer, int):
                tier_id = indexer
            elif isinstance(indexer, str):
                tier_name = indexer
            else:
                raise TypeError("indexer must be int or str")
        if not tier_name is None:
            tier_id = self._name2id(tier_name)
        return tier_id

    def get_tier(self, indexer=None, tier_name=None, tier_id=None) -> Optional[Tier]:
        """Return a tier by numeric index or name, preserving subclass type."""
        tier_id = self._tier_route(
            indexer=indexer, tier_name=tier_name, tier_id=tier_id
        )
        return Tier._from_data(self._tiers[tier_id])

    def add_tier(self, tier: Tier, where: int = -1):
        """Insert ``tier`` at ``where`` (append when ``where`` is ``-1``)."""
        if where == -1:
            where = len(self._tiers)
        self._tiers.insert(where, tier.data)

    def remove_tier(self, tier_name=None, tier_id=None):
        """Remove a tier, looking it up by name or numeric index."""
        tier_id = self._tier_route(tier_name=tier_name, tier_id=tier_id)
        if tier_id is None:
            return
        self._tiers.pop(tier_id)

    @property
    def ntiers(self) -> int:
        """Total number of tiers in the TextGrid."""
        return len(self._tiers)

    @property
    def data(self):
        """Return the backend-compatible tuple ``(tmin, tmax, tiers)``."""
        return (self.tmin, self.tmax, self._tiers)

    def save(
        self,
        out_file: str,
        file_type: str = "long",
    ):
        """Write the TextGrid to ``out_file`` using the Rust serializer."""
        data_to_textgrid(self.data, out_file, file_type=file_type)

    @staticmethod
    def _from_data(data):
        """Build a :class:`TextGrid` instance from ``textgrid_to_data`` output."""
        tmin, tmax, tiers = data
        tg = TextGrid(tmin, tmax)
        tg._tiers = tiers
        return tg

    @staticmethod
    def from_file(
        file: Union[str, Path, list[str], list[Path]],
        strict: bool = False,
        file_type: str = "auto",
        file_name_func: Optional[Any] = None,
    ) -> Union["TextGrid", Dict[str, "TextGrid"]]:
        """Read TextGrid files and wrap them in :class:`TextGrid` objects."""
        tg_data = textgrid_to_data(
            file,
            strict=strict,
            file_type=file_type,
            file_name_func=file_name_func,
        )
        if isinstance(file, (str, Path)):
            return TextGrid._from_data(tg_data)
        else:
            return {fname: TextGrid._from_data(data) for fname, data in tg_data.items()}

data property

Return the backend-compatible tuple (tmin, tmax, tiers).

ntiers property

Total number of tiers in the TextGrid.

add_tier(tier, where=-1)

Insert tier at where (append when where is -1).

Source code in gridio/textgrid.py
def add_tier(self, tier: Tier, where: int = -1):
    """Insert ``tier`` at ``where`` (append when ``where`` is ``-1``)."""
    if where == -1:
        where = len(self._tiers)
    self._tiers.insert(where, tier.data)

from_file(file, strict=False, file_type='auto', file_name_func=None) staticmethod

Read TextGrid files and wrap them in :class:TextGrid objects.

Source code in gridio/textgrid.py
@staticmethod
def from_file(
    file: Union[str, Path, list[str], list[Path]],
    strict: bool = False,
    file_type: str = "auto",
    file_name_func: Optional[Any] = None,
) -> Union["TextGrid", Dict[str, "TextGrid"]]:
    """Read TextGrid files and wrap them in :class:`TextGrid` objects."""
    tg_data = textgrid_to_data(
        file,
        strict=strict,
        file_type=file_type,
        file_name_func=file_name_func,
    )
    if isinstance(file, (str, Path)):
        return TextGrid._from_data(tg_data)
    else:
        return {fname: TextGrid._from_data(data) for fname, data in tg_data.items()}

get_tier(indexer=None, tier_name=None, tier_id=None)

Return a tier by numeric index or name, preserving subclass type.

Source code in gridio/textgrid.py
def get_tier(self, indexer=None, tier_name=None, tier_id=None) -> Optional[Tier]:
    """Return a tier by numeric index or name, preserving subclass type."""
    tier_id = self._tier_route(
        indexer=indexer, tier_name=tier_name, tier_id=tier_id
    )
    return Tier._from_data(self._tiers[tier_id])

remove_tier(tier_name=None, tier_id=None)

Remove a tier, looking it up by name or numeric index.

Source code in gridio/textgrid.py
def remove_tier(self, tier_name=None, tier_id=None):
    """Remove a tier, looking it up by name or numeric index."""
    tier_id = self._tier_route(tier_name=tier_name, tier_id=tier_id)
    if tier_id is None:
        return
    self._tiers.pop(tier_id)

save(out_file, file_type='long')

Write the TextGrid to out_file using the Rust serializer.

Source code in gridio/textgrid.py
def save(
    self,
    out_file: str,
    file_type: str = "long",
):
    """Write the TextGrid to ``out_file`` using the Rust serializer."""
    data_to_textgrid(self.data, out_file, file_type=file_type)

Tier

Mutable collection of TextGrid items with a shared tier name.

Parameters:

Name Type Description Default
name str

Tier identifier as stored in the TextGrid.

required
is_interval bool

True for interval tiers, False for point tiers.

required

Methods:

Name Description
insert_item

Insert a new item at index (append when index is -1).

remove_item

Remove the item at index from the tier.

Attributes:

Name Type Description
data

Return the tuple (name, is_interval, items) used by the bindings.

nitems int

Number of stored items.

Source code in gridio/textgrid.py
class Tier:
    """Mutable collection of TextGrid items with a shared tier name.

    Parameters
    ----------
    name : str
        Tier identifier as stored in the TextGrid.
    is_interval : bool
        ``True`` for interval tiers, ``False`` for point tiers.
    """

    def __init__(self, name, is_interval):
        self.name = name
        self.is_interval = is_interval
        self._items = []

    def insert_item(self, item, index: int = -1):
        """Insert a new item at ``index`` (append when ``index`` is ``-1``)."""
        if index == -1:
            index = len(self._items)
        self._items.insert(index, item.data)

    def remove_item(self, index: int):
        """Remove the item at ``index`` from the tier."""
        self._items.pop(index)

    @property
    def nitems(self) -> int:
        """Number of stored items."""
        return len(self._items)

    @property
    def data(self):
        """Return the tuple ``(name, is_interval, items)`` used by the bindings."""
        return (self.name, self.is_interval, self._items)

    @staticmethod
    def _from_data(data):
        """Instantiate an appropriate tier subclass from backend data."""
        name, is_interval, items = data
        if is_interval:
            tier = IntervalTier(name)
        else:
            tier = PointTier(name)
        tier._items = items
        return tier

data property

Return the tuple (name, is_interval, items) used by the bindings.

nitems property

Number of stored items.

insert_item(item, index=-1)

Insert a new item at index (append when index is -1).

Source code in gridio/textgrid.py
def insert_item(self, item, index: int = -1):
    """Insert a new item at ``index`` (append when ``index`` is ``-1``)."""
    if index == -1:
        index = len(self._items)
    self._items.insert(index, item.data)

remove_item(index)

Remove the item at index from the tier.

Source code in gridio/textgrid.py
def remove_item(self, index: int):
    """Remove the item at ``index`` from the tier."""
    self._items.pop(index)