Source code for spine.data.out.particle

"""Module with a data class objects which represent output particles."""

from dataclasses import dataclass, field

import numpy as np
from scipy.spatial.distance import cdist

from spine.constants import (
    PID_LABELS,
    PID_MASSES,
    PID_TO_PDG,
    SHAPE_LABELS,
    SHOWR_SHP,
    TRACK_SHP,
    ParticlePID,
    ParticleShape,
)
from spine.data.decorator import stored_alias, stored_property
from spine.data.field import FieldMetadata
from spine.data.larcv.particle import Particle

from .base import OutBase, RecoBase, TruthBase
from .fragment import RecoFragment, TruthFragment

__all__ = ["RecoParticle", "TruthParticle"]


@dataclass(eq=False, repr=False)
class ParticleBase(OutBase):
    """Base particle-specific information.

    Attributes
    ----------
    fragment_ids : np.ndarray
        List of Fragment IDs that make up this particle
    num_fragments : int
        Number of fragments that make up this particle
    interaction_id : int
        Index of the interaction this particle belongs to
    shape : int
        Semantic type (shower (0), track (1), Michel (2), delta (3),
        low energy scatter (4)) of this particle
    pid : int
        Particle species (Photon (0), Electron (1), Muon (2), Charged Pion (3),
        Proton (4), Kaon (5)) of this particle
    chi2_pid : int
        Particle species as predicted by the chi2 template method (Muon (2),
        Charged Pion (3), Proton (4), Kaon (5)) of this particle
    chi2_per_pid : np.ndarray
        (P) Array of chi2 values associated with each particle class
    is_primary : bool
        Whether this particle was the first in the particle group
    length : float
        Length of the particle (only assigned to track objects)
    start_point : np.ndarray
        (3) Particle start point
    end_point : np.ndarray
        (3) Particle end point (only assigned to track objects)
    calo_ke : float
        Kinetic energy reconstructed from the energy depositions alone in MeV
    csda_ke : float
        Kinetic energy reconstructed from the particle range in MeV
    csda_ke_per_pid : np.ndarray
        (P) Same as `csda_ke` but for every available track PID hypothesis
    mcs_ke : float
        Kinetic energy reconstructed using the MCS method in MeV
    mcs_ke_per_pid : np.ndarray
        (P) Same as `mcs_ke` but for every available track PID hypothesis
    p : float
        Momentum magnitude of the particle at the production point in MeV/c
    is_crt_matched : bool
        True if the particle was matched to a CRT hit
    crt_ids : np.ndarray
        (C) Indices of the CRT hits the particle is matched to
    crt_times : np.ndarray
        (C) Times at which the CRT hits occurred in microseconds
    crt_scores : np.ndarray
        (C) Quality metric associated with the CRT matches
    is_valid : bool
        Whether this particle counts towards an interaction topology. This
        may be False if a particle is below some defined energy threshold.
    """

    # Scalar attributes
    interaction_id: int = -1
    chi2_pid: int = -1

    is_primary: bool = False
    is_crt_matched: bool = False
    is_valid: bool = True

    length: float = field(default=np.nan, metadata=FieldMetadata(units="instance"))
    calo_ke: float = field(default=np.nan, metadata=FieldMetadata(units="MeV"))
    csda_ke: float = field(default=np.nan, metadata=FieldMetadata(units="MeV"))
    mcs_ke: float = field(default=np.nan, metadata=FieldMetadata(units="MeV"))

    # Enumerated attributes
    shape: int = field(default=-1, metadata=FieldMetadata(enum=ParticleShape))
    pid: int = field(default=-1, metadata=FieldMetadata(enum=ParticlePID))

    # Vector attributes
    fragment_ids: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.int32),
        metadata=FieldMetadata(dtype=np.int32, cat=True, units="instance"),
    )

    start_point: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=3,
            dtype=np.float32,
            position=True,
            units="instance",
        ),
    )
    end_point: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=3,
            dtype=np.float32,
            position=True,
            units="instance",
        ),
    )

    chi2_per_pid: np.ndarray = field(
        default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=len(PID_LABELS) - 1, dtype=np.float32),
    )
    csda_ke_per_pid: np.ndarray = field(
        default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=len(PID_LABELS) - 1, dtype=np.float32, units="MeV"
        ),
    )
    mcs_ke_per_pid: np.ndarray = field(
        default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=len(PID_LABELS) - 1, dtype=np.float32, units="MeV"
        ),
    )

    crt_ids: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.int32),
        metadata=FieldMetadata(dtype=np.int32),
    )
    crt_times: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.float32),
        metadata=FieldMetadata(dtype=np.float32, units="us"),
    )
    crt_scores: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.float32),
        metadata=FieldMetadata(dtype=np.float32),
    )

    def __str__(self):
        """Human-readable string representation of the particle object.

        Results
        -------
        str
            Basic information about the particle properties
        """
        pid_label = PID_LABELS[self.pid]
        match = self.match_ids[0] if len(self.match_ids) > 0 else -1
        return (
            f"Particle(ID: {self.id:<3} | PID: {pid_label:<8} "
            f"| Primary: {self.is_primary:<2} "
            f"| Size: {self.size:<5} | Match: {match:<3})"
        )

    def reset_crt_match(self) -> None:
        """Reset all the CRT hit matching attributes."""
        self.is_crt_matched = False
        self.crt_ids = np.empty(0, dtype=np.int32)
        self.crt_times = np.empty(0, dtype=np.float32)
        self.crt_scores = np.empty(0, dtype=np.float32)

    @property
    @stored_property
    def num_fragments(self) -> int:
        """Number of fragments that make up this particle.

        Returns
        -------
        int
            Number of fragments that make up the particle instance
        """
        return len(self.fragment_ids)



[docs]
@dataclass(eq=False, repr=False)
class RecoParticle(ParticleBase, RecoBase):
    """Reconstructed particle information.

    Attributes
    ----------
    fragments : List[RecoFragment]
        List of fragments that make up this particle
    pdg_code : int
        PDG code corresponding to the PID number
    mass : float
        Rest mass of the particle in MeV/c^2
    ke : float
        Best guess kinetic energy of the particle in MeV
    momentum : np.ndarray
        3-momentum of the particle at the production point in MeV/c
    start_dir : np.ndarray
        (3) Particle direction w.r.t. the start point
    end_dir : np.ndarray
        (3) Particle direction w.r.t. the end point (only assigned
        to track objects)
    pid_scores : np.ndarray
        (P) Array of softmax scores associated with each particle class
    primary_scores : np.ndarray
        (2) Array of softmax scores associated with secondary and primary
    ppn_ids : np.ndarray
        (M) List of indexes of PPN points associated with this particle
    ppn_points : np.ndarray
        (M, 3) List of PPN points tagged to this particle
    start_dedx : float
        dE/dx around a user-defined neighborhood of the start point in MeV/cm
    end_dedx : float
        dE/dx around a user-defined neighborhood of the end point in MeV/cm
    vertex_distance : float
        Set-to-point distance between all particle points and the parent
        interaction vertex position in cm
    start_straightness : float
        Explained variance ratio of the beginning of the particle
    directional_spread : float
        Estimate of the angular spread of the particle (cosine spread)
    axial_spread : float
        Pearson correlation coefficient of the axial profile of the particle
        w.r.t. to the distance from its start point
    """

    # Scalar attributes
    start_dedx: float = field(default=np.nan, metadata=FieldMetadata(units="MeV/cm"))
    end_dedx: float = field(default=np.nan, metadata=FieldMetadata(units="MeV/cm"))
    vertex_distance: float = field(
        default=np.nan, metadata=FieldMetadata(units="instance")
    )
    start_straightness: float = np.nan
    directional_spread: float = np.nan
    axial_spread: float = np.nan

    # Object list attributes
    fragments: list[RecoFragment] = field(
        default_factory=lambda: [],
        metadata=FieldMetadata(skip=True, cat=True),
    )

    # Vector attributes
    start_dir: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=3, dtype=np.float32, vector=True),
    )
    end_dir: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=3, dtype=np.float32, vector=True),
    )

    pid_scores: np.ndarray = field(
        default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=len(PID_LABELS) - 1, dtype=np.float32),
    )
    primary_scores: np.ndarray = field(
        default_factory=lambda: np.full(2, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=2, dtype=np.float32),
    )

    ppn_ids: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.int32),
        metadata=FieldMetadata(dtype=np.int32),
    )
    ppn_points: np.ndarray = field(
        default_factory=lambda: np.empty((0, 3), dtype=np.float32),
        metadata=FieldMetadata(
            dtype=np.float32,
            position=True,
            skip=True,
            units="instance",
        ),
    )

    def __str__(self):
        """Human-readable string representation of the particle object.

        Results
        -------
        str
            Basic information about the particle properties
        """
        return "Reco" + super().__str__()


[docs]
    def merge(self, other):
        """Merge another particle instance into this one.

        The merging strategy differs depending on the the particle shapes
        merged together:

        - Track plus track: the merged particle uses the pair of end points
          that are farthest apart, and the primary and PID scores are copied
          from the constituent with the highest primary score.
        - Shower plus track: the track is merged into the shower, the shower
          start point is updated to the track end point farthest from the
          current shower start point, the primary score follows the most
          primary-like constituent and the PID stays that of the shower.

        Parameters
        ----------
        other : RecoParticle
            Other reconstructed particle to merge into this one
        """
        # Check that the particles being merged fit one of two categories
        if self.shape not in (SHOWR_SHP, TRACK_SHP) or other.shape != TRACK_SHP:
            raise ValueError(
                "Can only merge two track particles or a track into a shower."
            )

        # Check that neither particle has yet been matched
        if self.is_matched or other.is_matched:
            raise ValueError("Cannot merge particles that already have matches.")

        # Concatenate the two particle long-form attributes together
        for attr in self._cat_attrs:
            self_val = getattr(self, attr)
            other_val = getattr(other, attr)
            # Handle lists separately from numpy arrays
            if isinstance(self_val, list):
                val = self_val + other_val
            else:
                val = np.concatenate([self_val, other_val])
            setattr(self, attr, val)

        # Select end points and end directions appropriately
        if self.shape == TRACK_SHP:
            # If two tracks, pick points furthest apart
            points_i = np.vstack([self.start_point, self.end_point])
            points_j = np.vstack([other.start_point, other.end_point])
            dirs_i = np.vstack([self.start_dir, self.end_dir])
            dirs_j = np.vstack([other.start_dir, other.end_dir])

            dists = cdist(points_i, points_j)
            max_index = np.argmax(dists)
            max_i, max_j = max_index // 2, max_index % 2

            self.start_point = points_i[max_i]
            self.end_point = points_j[max_j]
            self.start_dir = dirs_i[max_i]
            self.end_dir = dirs_j[max_j]

        else:
            # If a shower and a track, pick track point furthest from shower
            points_i = self.start_point.reshape(-1, 3)
            points_j = np.vstack([other.start_point, other.end_point])
            dirs_j = np.vstack([other.start_dir, other.end_dir])

            dists = cdist(points_i, points_j)
            max_j = np.argmax(dists)

            self.start_point = points_j[max_j]
            self.start_dir = dirs_j[max_j]

        # Match primary/PID to the most primary particle
        if other.primary_scores[-1] > self.primary_scores[-1]:
            self.primary_scores = other.primary_scores
            self.is_primary = other.is_primary
            if self.shape == TRACK_SHP:
                self.pid_scores = other.pid_scores
                self.pid = other.pid

        # If the calorimetric KEs have been computed, can safely sum
        if not np.isnan(self.calo_ke) and not np.isnan(other.calo_ke):
            self.calo_ke += other.calo_ke


    @property
    @stored_property
    def pdg_code(self) -> int:
        """Translates the enumerated particle type to a sign-less PDG code.

        Returns
        -------
        int
            Reconstructed sign-less PDG code
        """
        return PID_TO_PDG[self.pid]

    @property
    @stored_property(units="MeV/c^2")
    def mass(self) -> float:
        """Rest mass of the particle in MeV/c^2.

        The mass is inferred from the predicted mass.

        Returns
        -------
        float
            Rest mass of the particle
        """
        if self.pid in PID_MASSES:
            return PID_MASSES[self.pid]

        return np.nan

    @property
    @stored_property(units="MeV")
    def ke(self) -> float:
        """Best-guess kinetic energy in MeV.

        Uses calorimetry for EM activity and this order for track:
        - CSDA-based estimate if it is available
        - MCS-based estimate if it is available
        - Calorimetry if all else fails

        Returns
        -------
        float
            Best-guess kinetic energy
        """
        if self.shape != TRACK_SHP:
            # If a particle is not a track, can only use calorimetry
            return self.calo_ke

        else:
            # If a particle is a track, pick CSDA for contained tracks and
            # pick MCS for uncontained tracks, unless specified otherwise
            if self.is_contained and self.csda_ke > 0.0:
                return self.csda_ke
            elif not self.is_contained and self.mcs_ke > 0.0:
                return self.mcs_ke
            else:
                return self.calo_ke

    @property
    @stored_property(length=3, dtype=np.float32, units="MeV/c")
    def momentum(self) -> np.ndarray:
        """Best-guess momentum in MeV/c.

        Returns
        -------
        np.ndarray
            (3) Momentum vector
        """
        ke = self.ke
        if (
            not np.isnan(ke)
            and not np.any(np.isnan(self.start_dir))
            and self.pid in PID_MASSES
        ):
            mass = PID_MASSES[self.pid]
            mom = np.sqrt(ke**2 + 2 * ke * mass)
            return mom * self.start_dir

        else:
            return np.full(3, np.nan, dtype=np.float32)

    @property
    @stored_property(units="MeV/c")
    def p(self) -> float:
        """Computes the magnitude of the initial momentum.

        Returns
        -------
        float
            Norm of the initial momentum vector
        """
        return float(np.linalg.norm(self.momentum))

    @property
    @stored_alias("ke")
    def reco_ke(self) -> float:
        """Alias for `ke`, to match nomenclature in truth."""
        return self.ke

    @property
    @stored_alias("momentum")
    def reco_momentum(self) -> np.ndarray:
        """Alias for `momentum`, to match nomenclature in truth."""
        return self.momentum

    @property
    @stored_alias("length")
    def reco_length(self) -> float:
        """Alias for `length`, to match nomenclature in truth."""
        return self.length

    @property
    @stored_alias("start_dir")
    def reco_start_dir(self) -> np.ndarray:
        """Alias for `start_dir`, to match nomenclature in truth."""
        return self.start_dir

    @property
    @stored_alias("end_dir")
    def reco_end_dir(self) -> np.ndarray:
        """Alias for `end_dir`, to match nomenclature in truth."""
        return self.end_dir




[docs]
@dataclass(eq=False, repr=False)
class TruthParticle(Particle, ParticleBase, TruthBase):
    """Truth particle information.

    This inherits all of the attributes of :class:`Particle`, which contains
    the G4 truth information for the particle.

    Attributes
    ----------
    fragments : List[TruthFragment]
        List of fragments that make up this particle
    orig_interaction_id : int
        Unaltered index of the interaction in the original MC particle list
    orig_parent_id : int
        Unaltered index of the particle parent in the original MC particle list
    orig_group_id : int
        Unaltered index of the particle group in the original MC particle list
    orig_children_id : np.ndarray
        Unaltered list of the particle children in the original MC particle list
    children_counts : np.ndarray
        (P) Number of truth child particle of each shape
    pdg_code : int
        PDG code corresponding to the PID number
    mass : float
        Rest mass of the particle in MeV/c^2
    ke : float
        Kinetic energy of the particle in MeV
    momentum : np.ndarray
        3-momentum of the particle at the production point in MeV/c
    start_dir : np.ndarray
        (3) Particle direction w.r.t. the start point
    end_dir : np.ndarray
        (3) Particle direction w.r.t. the end point (only assigned
        to track objects)
    reco_length : float
        Reconstructed length of the particle (only assigned to track objects)
    reco_start_dir : np.ndarray
        (3) Particle direction estimate w.r.t. the start point
    reco_end_dir : np.ndarray
        (3) Particle direction estimate w.r.t. the end point (only assigned
        to track objects)
    reco_ke : float
        Best-guess reconstructed KE of the particle
    reco_momentum : np.ndarray
        Best-guess reconstructed momentum of the particle
    """

    # Scalar attributes
    orig_interaction_id: int = -1
    orig_parent_id: int = -1
    orig_group_id: int = -1

    reco_length: float = np.nan

    # Object list attributes
    fragments: list[TruthFragment] = field(
        default_factory=lambda: [],
        metadata=FieldMetadata(skip=True),
    )

    # Vector attributes
    orig_children_id: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.int32),
        metadata=FieldMetadata(dtype=np.int32),
    )
    children_counts: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.int32),
        metadata=FieldMetadata(dtype=np.int32),
    )

    reco_start_dir: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=3, dtype=np.float32, vector=True),
    )
    reco_end_dir: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=3, dtype=np.float32, vector=True),
    )

    def __str__(self):
        """Human-readable string representation of the particle object.

        Results
        -------
        str
            Basic information about the particle properties
        """
        return "Truth" + super().__str__()

    @property
    @stored_property(length=3, dtype=np.float32, vector=True)
    def start_dir(self) -> np.ndarray:
        """Converts the initial momentum to a direction vector.

        Returns
        -------
        np.ndarray
            (3) Start direction vector
        """
        norm = np.linalg.norm(self.momentum)
        if norm > 0.0:
            return self.momentum / norm

        return np.full(3, np.nan, dtype=np.float32)

    @property
    @stored_property(length=3, dtype=np.float32, vector=True)
    def end_dir(self) -> np.ndarray:
        """Converts the final momentum to a direction vector.

        Note that if a particle stops, this is unreliable as an estimate of the
        direction of the particle before it stops.

        Returns
        -------
        np.ndarray
            (3) End direction vector
        """
        if self.shape == TRACK_SHP:
            norm = np.linalg.norm(self.end_momentum)
            if norm > 0.0:
                return self.end_momentum / norm

        return np.full(3, np.nan, dtype=np.float32)

    @property
    @stored_property(units="MeV")
    def ke(self) -> float:
        """Converts the particle initial energy to a kinetic energy.

        This only works for particles with a known mass (as defined in
        `spine.constants`).

        Returns
        -------
        float
            Initial kinetic energy of the particle
        """
        if not np.isnan(self.mass):
            return self.energy_init - self.mass

        return np.nan

    @property
    @stored_property(units="MeV")
    def reco_ke(self) -> float:
        """Best-guess reconstructed kinetic energy in MeV.

        Uses calorimetry for EM activity and this order for track:
        - CSDA-based estimate if it is available
        - MCS-based estimate if it is available
        - Calorimetry if all else fails

        Returns
        -------
        float
            Best-guess kinetic energy
        """
        if self.shape != TRACK_SHP:
            # If a particle is not a track, can only use calorimetry
            return self.calo_ke

        else:
            # If a particle is a track, pick CSDA for contained tracks and
            # pick MCS for uncontained tracks, unless specified otherwise
            if self.is_contained and not np.isnan(self.csda_ke):
                return self.csda_ke
            elif not self.is_contained and not np.isnan(self.mcs_ke):
                return self.mcs_ke
            else:
                return self.calo_ke

    @property
    @stored_property(length=3, dtype=np.float32, units="MeV/c")
    def reco_momentum(self) -> np.ndarray:
        """Best-guess reconstructed momentum in MeV/c.

        Returns
        -------
        np.ndarray
            (3) Momentum vector
        """
        ke = self.reco_ke
        if not np.isnan(ke) and self.pid in PID_MASSES:
            mass = PID_MASSES[self.pid]
            mom = np.sqrt(ke**2 + 2 * ke * mass)
            return mom * self.reco_start_dir

        else:
            return np.full(3, np.nan, dtype=np.float32)