Source code for spine.io.parse.data

"""Data structures used as canonical outputs of IO parsers."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

import numpy as np

from spine.constants import VALUE_COL
from spine.data import Meta, ObjectList

__all__ = [
    "ParserTensor",
    "ParserIndex",
    "ParserIndexList",
    "ParserEdgeIndex",
    "ParserObjectList",
]



[docs]
@dataclass
class ParserTensor:
    """Container describing a parsed tensor-like payload.

    Attributes
    ----------
    features : np.ndarray
        Feature matrix associated with the parsed tensor.
    coords : np.ndarray, optional
        Sparse tensor coordinates, typically with shape ``(N, 3)``.
    meta : Meta, optional
        Geometry metadata used to convert voxel indices into detector
        coordinates.
    index_shifts : np.ndarray, optional
        Shifts applied to index-bearing feature columns during batching.
    index_cols : np.ndarray, optional
        Feature columns that store indices.
    remove_duplicates : bool, default False
        If `True`, drop duplicate coordinates during collation.
    sum_cols : np.ndarray, optional
        Feature columns that should be summed when duplicates are merged.
    avg_cols : np.ndarray, optional
        Feature columns that should be averaged when duplicates are merged.
    prec_col : int, optional
        Feature column used to break duplicate-coordinate ties.
    precedence : np.ndarray, optional
        Precedence ordering used with ``prec_col``.
    feats_only : bool, default False
        If `True`, the payload is feature-only and has no associated
        coordinate tensor.
    overlay_reference : str, optional
        Product key whose duplicate-cleaning row selection should be applied
        to this tensor during overlay.
    """

    features: np.ndarray
    coords: np.ndarray | None = None
    meta: Meta | None = None
    index_shifts: np.ndarray | None = None
    index_cols: np.ndarray | None = None
    remove_duplicates: bool = False
    sum_cols: np.ndarray | None = None
    avg_cols: np.ndarray | None = None
    prec_col: int | None = None
    precedence: np.ndarray | None = None
    feats_only: bool = False
    overlay_reference: str | None = None

    @property
    def feat_index_cols(self) -> np.ndarray | None:
        """Return index-bearing columns expressed in feature-only coordinates.

        Returns
        -------
        np.ndarray, optional
            Feature-column indices corresponding to :attr:`index_cols`.
        """
        if self.index_cols is None:
            return self.index_cols

        return self.index_cols - VALUE_COL

    @property
    def feat_sum_cols(self) -> np.ndarray | None:
        """Return duplicate-summed columns in feature-only coordinates.

        Returns
        -------
        np.ndarray, optional
            Feature-column indices corresponding to :attr:`sum_cols`.
        """
        if self.sum_cols is None:
            return self.sum_cols

        return self.sum_cols - VALUE_COL

    @property
    def feat_avg_cols(self) -> np.ndarray | None:
        """Return duplicate-averaged columns in feature-only coordinates.

        Returns
        -------
        np.ndarray, optional
            Feature-column indices corresponding to :attr:`avg_cols`.
        """
        if self.avg_cols is None:
            return self.avg_cols

        return self.avg_cols - VALUE_COL

    @property
    def feat_prec_col(self) -> int | None:
        """Return the precedence column in feature-only coordinates.

        Returns
        -------
        int, optional
            Feature-column index corresponding to :attr:`prec_col`.
        """
        if self.prec_col is None or self.prec_col < 0:
            return self.prec_col

        return self.prec_col - VALUE_COL




[docs]
@dataclass
class ParserIndex:
    """Container describing one flat index payload.

    Attributes
    ----------
    features : np.ndarray
        One-dimensional index array.
    span : int
        Parent-entry span used when batching entries.
    """

    features: np.ndarray
    span: int




[docs]
@dataclass
class ParserIndexList:
    """Container describing one jagged index-list payload.

    Attributes
    ----------
    features : list[np.ndarray]
        List of one-dimensional index arrays.
    span : int
        Parent-entry span used when batching entries.
    single_counts : np.ndarray, optional
        Per-index sizes used to restore jagged list structure after batching.
    """

    features: list[np.ndarray]
    span: int
    single_counts: np.ndarray | None = None




[docs]
@dataclass
class ParserEdgeIndex:
    """Container describing one edge-index payload.

    Attributes
    ----------
    features : np.ndarray
        Two-dimensional edge-index array with shape ``(2, E)``.
    span : int
        Parent-entry node span used when batching entries.
    """

    features: np.ndarray
    span: int




[docs]
class ParserObjectList(ObjectList):
    """Object list with index shifting instructions.

    Attributes
    ----------
    index_shifts : int or dict[str, int]
        Shift(s) to apply to object index attributes during collation.
    """

    def __init__(
        self,
        object_list: list[Any],
        default: Any,
        index_shifts: int | dict[str, int] | None = None,
    ) -> None:
        """Initialize the list and the default value.

        Parameters
        ----------
        object_list : list[Any]
            Parsed objects associated with one event entry.
        default : Any
            Default object used to type an empty list.
        index_shifts : int or dict[str, int], optional
            Shift(s) to apply to object index attributes during batching.
        """
        # Initialize the underlying object list
        super().__init__(object_list, default)

        # Store the index shifts
        if index_shifts is not None:
            self.index_shifts = index_shifts
        else:
            self.index_shifts = len(object_list)

    @property
    def to_object_list(self) -> ObjectList:
        """Drop parser-specific batching metadata and return a plain ObjectList.

        Returns
        -------
        ObjectList
            Underlying object list without ``index_shifts`` metadata.
        """
        return ObjectList(self, default=self.default)