Source code for spine.data.out.particle

"""Module with a data class objects which represent output particles."""

from dataclasses import dataclass, field

import numpy as np
from scipy.spatial.distance import cdist

from spine.constants import (
    PID_LABELS,
    PID_MASSES,
    PID_TO_PDG,
    SHAPE_LABELS,
    SHOWR_SHP,
    TRACK_SHP,
    ParticlePID,
    ParticleShape,
)
from spine.data.decorator import stored_alias, stored_property
from spine.data.field import FieldMetadata
from spine.data.larcv.particle import Particle

from .base import OutBase, RecoBase, TruthBase
from .fragment import RecoFragment, TruthFragment

__all__ = ["RecoParticle", "TruthParticle"]


@dataclass(eq=False, repr=False)
class ParticleBase(OutBase):
    """Base particle-specific information.

    Attributes
    ----------
    fragment_ids : np.ndarray
        List of Fragment IDs that make up this particle
    num_fragments : int
        Number of fragments that make up this particle
    interaction_id : int
        Index of the interaction this particle belongs to
    shape : int
        Semantic type (shower (0), track (1), Michel (2), delta (3),
        low energy scatter (4)) of this particle
    pid : int
        Particle species (Photon (0), Electron (1), Muon (2), Charged Pion (3),
        Proton (4), Kaon (5)) of this particle
    chi2_pid : int
        Particle species as predicted by the chi2 template method (Muon (2),
        Charged Pion (3), Proton (4), Kaon (5)) of this particle
    chi2_per_pid : np.ndarray
        (P) Array of chi2 values associated with each particle class
    is_primary : bool
        Whether this particle was the first in the particle group
    length : float
        Length of the particle (only assigned to track objects)
    start_point : np.ndarray
        (3) Particle start point
    end_point : np.ndarray
        (3) Particle end point (only assigned to track objects)
    calo_ke : float
        Kinetic energy reconstructed from the energy depositions alone in MeV
    csda_ke : float
        Kinetic energy reconstructed from the particle range in MeV
    csda_ke_per_pid : np.ndarray
        (P) Same as `csda_ke` but for every available track PID hypothesis
    mcs_ke : float
        Kinetic energy reconstructed using the MCS method in MeV
    mcs_ke_per_pid : np.ndarray
        (P) Same as `mcs_ke` but for every available track PID hypothesis
    p : float
        Momentum magnitude of the particle at the production point in MeV/c
    is_crt_matched : bool
        True if the particle was matched to a CRT hit
    crt_ids : np.ndarray
        (C) Indices of the CRT hits the particle is matched to
    crt_times : np.ndarray
        (C) Times at which the CRT hits occurred in microseconds
    crt_scores : np.ndarray
        (C) Quality metric associated with the CRT matches
    is_valid : bool
        Whether this particle counts towards an interaction topology. This
        may be False if a particle is below some defined energy threshold.
    """

    # Scalar attributes
    interaction_id: int = -1
    chi2_pid: int = -1

    is_primary: bool = False
    is_crt_matched: bool = False
    is_valid: bool = True

    length: float = field(default=np.nan, metadata=FieldMetadata(units="instance"))
    calo_ke: float = field(default=np.nan, metadata=FieldMetadata(units="MeV"))
    csda_ke: float = field(default=np.nan, metadata=FieldMetadata(units="MeV"))
    mcs_ke: float = field(default=np.nan, metadata=FieldMetadata(units="MeV"))

    # Enumerated attributes
    shape: int = field(default=-1, metadata=FieldMetadata(enum=ParticleShape))
    pid: int = field(default=-1, metadata=FieldMetadata(enum=ParticlePID))

    # Vector attributes
    fragment_ids: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.int32),
        metadata=FieldMetadata(dtype=np.int32, cat=True, units="instance"),
    )

    start_point: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=3,
            dtype=np.float32,
            position=True,
            units="instance",
        ),
    )
    end_point: np.ndarray = field(
        default_factory=lambda: np.full(3, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=3,
            dtype=np.float32,
            position=True,
            units="instance",
        ),
    )

    chi2_per_pid: np.ndarray = field(
        default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32),
        metadata=FieldMetadata(length=len(PID_LABELS) - 1, dtype=np.float32),
    )
    csda_ke_per_pid: np.ndarray = field(
        default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=len(PID_LABELS) - 1, dtype=np.float32, units="MeV"
        ),
    )
    mcs_ke_per_pid: np.ndarray = field(
        default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32),
        metadata=FieldMetadata(
            length=len(PID_LABELS) - 1, dtype=np.float32, units="MeV"
        ),
    )

    crt_ids: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.int32),
        metadata=FieldMetadata(dtype=np.int32),
    )
    crt_times: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.float32),
        metadata=FieldMetadata(dtype=np.float32, units="us"),
    )
    crt_scores: np.ndarray = field(
        default_factory=lambda: np.empty(0, dtype=np.float32),
        metadata=FieldMetadata(dtype=np.float32),
    )

    def __str__(self):
        """Human-readable string representation of the particle object.

        Results
        -------
        str
            Basic information about the particle properties
        """
        pid_label = PID_LABELS[self.pid]
        match = self.match_ids[0] if len(self.match_ids) > 0 else -1
        return (
            f"Particle(ID: {self.id:<3} | PID: {pid_label:<8} "
            f"| Primary: {self.is_primary:<2} "
            f"| Size: {self.size:<5} | Match: {match:<3})"
        )

    def reset_crt_match(self) -> None:
        """Reset all the CRT hit matching attributes."""
        self.is_crt_matched = False
        self.crt_ids = np.empty(0, dtype=np.int32)
        self.crt_times = np.empty(0, dtype=np.float32)
        self.crt_scores = np.empty(0, dtype=np.float32)

    @property
    @stored_property
    def num_fragments(self) -> int:
        """Number of fragments that make up this particle.

        Returns
        -------
        int
            Number of fragments that make up the particle instance
        """
        return len(self.fragment_ids)


[docs] @dataclass(eq=False, repr=False) class RecoParticle(ParticleBase, RecoBase): """Reconstructed particle information. Attributes ---------- fragments : List[RecoFragment] List of fragments that make up this particle pdg_code : int PDG code corresponding to the PID number mass : float Rest mass of the particle in MeV/c^2 ke : float Best guess kinetic energy of the particle in MeV momentum : np.ndarray 3-momentum of the particle at the production point in MeV/c start_dir : np.ndarray (3) Particle direction w.r.t. the start point end_dir : np.ndarray (3) Particle direction w.r.t. the end point (only assigned to track objects) pid_scores : np.ndarray (P) Array of softmax scores associated with each particle class primary_scores : np.ndarray (2) Array of softmax scores associated with secondary and primary ppn_ids : np.ndarray (M) List of indexes of PPN points associated with this particle ppn_points : np.ndarray (M, 3) List of PPN points tagged to this particle start_dedx : float dE/dx around a user-defined neighborhood of the start point in MeV/cm end_dedx : float dE/dx around a user-defined neighborhood of the end point in MeV/cm vertex_distance : float Set-to-point distance between all particle points and the parent interaction vertex position in cm start_straightness : float Explained variance ratio of the beginning of the particle directional_spread : float Estimate of the angular spread of the particle (cosine spread) axial_spread : float Pearson correlation coefficient of the axial profile of the particle w.r.t. to the distance from its start point """ # Scalar attributes start_dedx: float = field(default=np.nan, metadata=FieldMetadata(units="MeV/cm")) end_dedx: float = field(default=np.nan, metadata=FieldMetadata(units="MeV/cm")) vertex_distance: float = field( default=np.nan, metadata=FieldMetadata(units="instance") ) start_straightness: float = np.nan directional_spread: float = np.nan axial_spread: float = np.nan # Object list attributes fragments: list[RecoFragment] = field( default_factory=lambda: [], metadata=FieldMetadata(skip=True, cat=True), ) # Vector attributes start_dir: np.ndarray = field( default_factory=lambda: np.full(3, np.nan, dtype=np.float32), metadata=FieldMetadata(length=3, dtype=np.float32, vector=True), ) end_dir: np.ndarray = field( default_factory=lambda: np.full(3, np.nan, dtype=np.float32), metadata=FieldMetadata(length=3, dtype=np.float32, vector=True), ) pid_scores: np.ndarray = field( default_factory=lambda: np.full(len(PID_LABELS) - 1, np.nan, dtype=np.float32), metadata=FieldMetadata(length=len(PID_LABELS) - 1, dtype=np.float32), ) primary_scores: np.ndarray = field( default_factory=lambda: np.full(2, np.nan, dtype=np.float32), metadata=FieldMetadata(length=2, dtype=np.float32), ) ppn_ids: np.ndarray = field( default_factory=lambda: np.empty(0, dtype=np.int32), metadata=FieldMetadata(dtype=np.int32), ) ppn_points: np.ndarray = field( default_factory=lambda: np.empty((0, 3), dtype=np.float32), metadata=FieldMetadata( dtype=np.float32, position=True, skip=True, units="instance", ), ) def __str__(self): """Human-readable string representation of the particle object. Results ------- str Basic information about the particle properties """ return "Reco" + super().__str__()
[docs] def merge(self, other): """Merge another particle instance into this one. The merging strategy differs depending on the the particle shapes merged together: - Track plus track: the merged particle uses the pair of end points that are farthest apart, and the primary and PID scores are copied from the constituent with the highest primary score. - Shower plus track: the track is merged into the shower, the shower start point is updated to the track end point farthest from the current shower start point, the primary score follows the most primary-like constituent and the PID stays that of the shower. Parameters ---------- other : RecoParticle Other reconstructed particle to merge into this one """ # Check that the particles being merged fit one of two categories if self.shape not in (SHOWR_SHP, TRACK_SHP) or other.shape != TRACK_SHP: raise ValueError( "Can only merge two track particles or a track into a shower." ) # Check that neither particle has yet been matched if self.is_matched or other.is_matched: raise ValueError("Cannot merge particles that already have matches.") # Concatenate the two particle long-form attributes together for attr in self._cat_attrs: self_val = getattr(self, attr) other_val = getattr(other, attr) # Handle lists separately from numpy arrays if isinstance(self_val, list): val = self_val + other_val else: val = np.concatenate([self_val, other_val]) setattr(self, attr, val) # Select end points and end directions appropriately if self.shape == TRACK_SHP: # If two tracks, pick points furthest apart points_i = np.vstack([self.start_point, self.end_point]) points_j = np.vstack([other.start_point, other.end_point]) dirs_i = np.vstack([self.start_dir, self.end_dir]) dirs_j = np.vstack([other.start_dir, other.end_dir]) dists = cdist(points_i, points_j) max_index = np.argmax(dists) max_i, max_j = max_index // 2, max_index % 2 self.start_point = points_i[max_i] self.end_point = points_j[max_j] self.start_dir = dirs_i[max_i] self.end_dir = dirs_j[max_j] else: # If a shower and a track, pick track point furthest from shower points_i = self.start_point.reshape(-1, 3) points_j = np.vstack([other.start_point, other.end_point]) dirs_j = np.vstack([other.start_dir, other.end_dir]) dists = cdist(points_i, points_j) max_j = np.argmax(dists) self.start_point = points_j[max_j] self.start_dir = dirs_j[max_j] # Match primary/PID to the most primary particle if other.primary_scores[-1] > self.primary_scores[-1]: self.primary_scores = other.primary_scores self.is_primary = other.is_primary if self.shape == TRACK_SHP: self.pid_scores = other.pid_scores self.pid = other.pid # If the calorimetric KEs have been computed, can safely sum if not np.isnan(self.calo_ke) and not np.isnan(other.calo_ke): self.calo_ke += other.calo_ke
@property @stored_property def pdg_code(self) -> int: """Translates the enumerated particle type to a sign-less PDG code. Returns ------- int Reconstructed sign-less PDG code """ return PID_TO_PDG[self.pid] @property @stored_property(units="MeV/c^2") def mass(self) -> float: """Rest mass of the particle in MeV/c^2. The mass is inferred from the predicted mass. Returns ------- float Rest mass of the particle """ if self.pid in PID_MASSES: return PID_MASSES[self.pid] return np.nan @property @stored_property(units="MeV") def ke(self) -> float: """Best-guess kinetic energy in MeV. Uses calorimetry for EM activity and this order for track: - CSDA-based estimate if it is available - MCS-based estimate if it is available - Calorimetry if all else fails Returns ------- float Best-guess kinetic energy """ if self.shape != TRACK_SHP: # If a particle is not a track, can only use calorimetry return self.calo_ke else: # If a particle is a track, pick CSDA for contained tracks and # pick MCS for uncontained tracks, unless specified otherwise if self.is_contained and self.csda_ke > 0.0: return self.csda_ke elif not self.is_contained and self.mcs_ke > 0.0: return self.mcs_ke else: return self.calo_ke @property @stored_property(length=3, dtype=np.float32, units="MeV/c") def momentum(self) -> np.ndarray: """Best-guess momentum in MeV/c. Returns ------- np.ndarray (3) Momentum vector """ ke = self.ke if ( not np.isnan(ke) and not np.any(np.isnan(self.start_dir)) and self.pid in PID_MASSES ): mass = PID_MASSES[self.pid] mom = np.sqrt(ke**2 + 2 * ke * mass) return mom * self.start_dir else: return np.full(3, np.nan, dtype=np.float32) @property @stored_property(units="MeV/c") def p(self) -> float: """Computes the magnitude of the initial momentum. Returns ------- float Norm of the initial momentum vector """ return float(np.linalg.norm(self.momentum)) @property @stored_alias("ke") def reco_ke(self) -> float: """Alias for `ke`, to match nomenclature in truth.""" return self.ke @property @stored_alias("momentum") def reco_momentum(self) -> np.ndarray: """Alias for `momentum`, to match nomenclature in truth.""" return self.momentum @property @stored_alias("length") def reco_length(self) -> float: """Alias for `length`, to match nomenclature in truth.""" return self.length @property @stored_alias("start_dir") def reco_start_dir(self) -> np.ndarray: """Alias for `start_dir`, to match nomenclature in truth.""" return self.start_dir @property @stored_alias("end_dir") def reco_end_dir(self) -> np.ndarray: """Alias for `end_dir`, to match nomenclature in truth.""" return self.end_dir
[docs] @dataclass(eq=False, repr=False) class TruthParticle(Particle, ParticleBase, TruthBase): """Truth particle information. This inherits all of the attributes of :class:`Particle`, which contains the G4 truth information for the particle. Attributes ---------- fragments : List[TruthFragment] List of fragments that make up this particle orig_interaction_id : int Unaltered index of the interaction in the original MC particle list orig_parent_id : int Unaltered index of the particle parent in the original MC particle list orig_group_id : int Unaltered index of the particle group in the original MC particle list orig_children_id : np.ndarray Unaltered list of the particle children in the original MC particle list children_counts : np.ndarray (P) Number of truth child particle of each shape pdg_code : int PDG code corresponding to the PID number mass : float Rest mass of the particle in MeV/c^2 ke : float Kinetic energy of the particle in MeV momentum : np.ndarray 3-momentum of the particle at the production point in MeV/c start_dir : np.ndarray (3) Particle direction w.r.t. the start point end_dir : np.ndarray (3) Particle direction w.r.t. the end point (only assigned to track objects) reco_length : float Reconstructed length of the particle (only assigned to track objects) reco_start_dir : np.ndarray (3) Particle direction estimate w.r.t. the start point reco_end_dir : np.ndarray (3) Particle direction estimate w.r.t. the end point (only assigned to track objects) reco_ke : float Best-guess reconstructed KE of the particle reco_momentum : np.ndarray Best-guess reconstructed momentum of the particle """ # Scalar attributes orig_interaction_id: int = -1 orig_parent_id: int = -1 orig_group_id: int = -1 reco_length: float = np.nan # Object list attributes fragments: list[TruthFragment] = field( default_factory=lambda: [], metadata=FieldMetadata(skip=True), ) # Vector attributes orig_children_id: np.ndarray = field( default_factory=lambda: np.empty(0, dtype=np.int32), metadata=FieldMetadata(dtype=np.int32), ) children_counts: np.ndarray = field( default_factory=lambda: np.empty(0, dtype=np.int32), metadata=FieldMetadata(dtype=np.int32), ) reco_start_dir: np.ndarray = field( default_factory=lambda: np.full(3, np.nan, dtype=np.float32), metadata=FieldMetadata(length=3, dtype=np.float32, vector=True), ) reco_end_dir: np.ndarray = field( default_factory=lambda: np.full(3, np.nan, dtype=np.float32), metadata=FieldMetadata(length=3, dtype=np.float32, vector=True), ) def __str__(self): """Human-readable string representation of the particle object. Results ------- str Basic information about the particle properties """ return "Truth" + super().__str__() @property @stored_property(length=3, dtype=np.float32, vector=True) def start_dir(self) -> np.ndarray: """Converts the initial momentum to a direction vector. Returns ------- np.ndarray (3) Start direction vector """ norm = np.linalg.norm(self.momentum) if norm > 0.0: return self.momentum / norm return np.full(3, np.nan, dtype=np.float32) @property @stored_property(length=3, dtype=np.float32, vector=True) def end_dir(self) -> np.ndarray: """Converts the final momentum to a direction vector. Note that if a particle stops, this is unreliable as an estimate of the direction of the particle before it stops. Returns ------- np.ndarray (3) End direction vector """ if self.shape == TRACK_SHP: norm = np.linalg.norm(self.end_momentum) if norm > 0.0: return self.end_momentum / norm return np.full(3, np.nan, dtype=np.float32) @property @stored_property(units="MeV") def ke(self) -> float: """Converts the particle initial energy to a kinetic energy. This only works for particles with a known mass (as defined in `spine.constants`). Returns ------- float Initial kinetic energy of the particle """ if not np.isnan(self.mass): return self.energy_init - self.mass return np.nan @property @stored_property(units="MeV") def reco_ke(self) -> float: """Best-guess reconstructed kinetic energy in MeV. Uses calorimetry for EM activity and this order for track: - CSDA-based estimate if it is available - MCS-based estimate if it is available - Calorimetry if all else fails Returns ------- float Best-guess kinetic energy """ if self.shape != TRACK_SHP: # If a particle is not a track, can only use calorimetry return self.calo_ke else: # If a particle is a track, pick CSDA for contained tracks and # pick MCS for uncontained tracks, unless specified otherwise if self.is_contained and not np.isnan(self.csda_ke): return self.csda_ke elif not self.is_contained and not np.isnan(self.mcs_ke): return self.mcs_ke else: return self.calo_ke @property @stored_property(length=3, dtype=np.float32, units="MeV/c") def reco_momentum(self) -> np.ndarray: """Best-guess reconstructed momentum in MeV/c. Returns ------- np.ndarray (3) Momentum vector """ ke = self.reco_ke if not np.isnan(ke) and self.pid in PID_MASSES: mass = PID_MASSES[self.pid] mom = np.sqrt(ke**2 + 2 * ke * mass) return mom * self.reco_start_dir else: return np.full(3, np.nan, dtype=np.float32)