"""Data structures used as canonical outputs of IO parsers."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
import numpy as np
from spine.constants import VALUE_COL
from spine.data import Meta, ObjectList
__all__ = [
"ParserTensor",
"ParserIndex",
"ParserIndexList",
"ParserEdgeIndex",
"ParserObjectList",
]
[docs]
@dataclass
class ParserTensor:
"""Container describing a parsed tensor-like payload.
Attributes
----------
features : np.ndarray
Feature matrix associated with the parsed tensor.
coords : np.ndarray, optional
Sparse tensor coordinates, typically with shape ``(N, 3)``.
meta : Meta, optional
Geometry metadata used to convert voxel indices into detector
coordinates.
index_shifts : np.ndarray, optional
Shifts applied to index-bearing feature columns during batching.
index_cols : np.ndarray, optional
Feature columns that store indices.
remove_duplicates : bool, default False
If `True`, drop duplicate coordinates during collation.
sum_cols : np.ndarray, optional
Feature columns that should be summed when duplicates are merged.
avg_cols : np.ndarray, optional
Feature columns that should be averaged when duplicates are merged.
prec_col : int, optional
Feature column used to break duplicate-coordinate ties.
precedence : np.ndarray, optional
Precedence ordering used with ``prec_col``.
feats_only : bool, default False
If `True`, the payload is feature-only and has no associated
coordinate tensor.
overlay_reference : str, optional
Product key whose duplicate-cleaning row selection should be applied
to this tensor during overlay.
"""
features: np.ndarray
coords: np.ndarray | None = None
meta: Meta | None = None
index_shifts: np.ndarray | None = None
index_cols: np.ndarray | None = None
remove_duplicates: bool = False
sum_cols: np.ndarray | None = None
avg_cols: np.ndarray | None = None
prec_col: int | None = None
precedence: np.ndarray | None = None
feats_only: bool = False
overlay_reference: str | None = None
@property
def feat_index_cols(self) -> np.ndarray | None:
"""Return index-bearing columns expressed in feature-only coordinates.
Returns
-------
np.ndarray, optional
Feature-column indices corresponding to :attr:`index_cols`.
"""
if self.index_cols is None:
return self.index_cols
return self.index_cols - VALUE_COL
@property
def feat_sum_cols(self) -> np.ndarray | None:
"""Return duplicate-summed columns in feature-only coordinates.
Returns
-------
np.ndarray, optional
Feature-column indices corresponding to :attr:`sum_cols`.
"""
if self.sum_cols is None:
return self.sum_cols
return self.sum_cols - VALUE_COL
@property
def feat_avg_cols(self) -> np.ndarray | None:
"""Return duplicate-averaged columns in feature-only coordinates.
Returns
-------
np.ndarray, optional
Feature-column indices corresponding to :attr:`avg_cols`.
"""
if self.avg_cols is None:
return self.avg_cols
return self.avg_cols - VALUE_COL
@property
def feat_prec_col(self) -> int | None:
"""Return the precedence column in feature-only coordinates.
Returns
-------
int, optional
Feature-column index corresponding to :attr:`prec_col`.
"""
if self.prec_col is None or self.prec_col < 0:
return self.prec_col
return self.prec_col - VALUE_COL
[docs]
@dataclass
class ParserIndex:
"""Container describing one flat index payload.
Attributes
----------
features : np.ndarray
One-dimensional index array.
span : int
Parent-entry span used when batching entries.
"""
features: np.ndarray
span: int
[docs]
@dataclass
class ParserIndexList:
"""Container describing one jagged index-list payload.
Attributes
----------
features : list[np.ndarray]
List of one-dimensional index arrays.
span : int
Parent-entry span used when batching entries.
single_counts : np.ndarray, optional
Per-index sizes used to restore jagged list structure after batching.
"""
features: list[np.ndarray]
span: int
single_counts: np.ndarray | None = None
[docs]
@dataclass
class ParserEdgeIndex:
"""Container describing one edge-index payload.
Attributes
----------
features : np.ndarray
Two-dimensional edge-index array with shape ``(2, E)``.
span : int
Parent-entry node span used when batching entries.
"""
features: np.ndarray
span: int
[docs]
class ParserObjectList(ObjectList):
"""Object list with index shifting instructions.
Attributes
----------
index_shifts : int or dict[str, int]
Shift(s) to apply to object index attributes during collation.
"""
def __init__(
self,
object_list: list[Any],
default: Any,
index_shifts: int | dict[str, int] | None = None,
) -> None:
"""Initialize the list and the default value.
Parameters
----------
object_list : list[Any]
Parsed objects associated with one event entry.
default : Any
Default object used to type an empty list.
index_shifts : int or dict[str, int], optional
Shift(s) to apply to object index attributes during batching.
"""
# Initialize the underlying object list
super().__init__(object_list, default)
# Store the index shifts
if index_shifts is not None:
self.index_shifts = index_shifts
else:
self.index_shifts = len(object_list)
@property
def to_object_list(self) -> ObjectList:
"""Drop parser-specific batching metadata and return a plain ObjectList.
Returns
-------
ObjectList
Underlying object list without ``index_shifts`` metadata.
"""
return ObjectList(self, default=self.default)