Source code for spine.utils.particles

"""Particle truth information post-processors.

They add/correct information stored in LArCV particles.
"""

from warnings import warn

import numpy as np

from spine.constants import (
    DELTA_SHP,
    INVAL_ID,
    INVAL_IDX,
    INVAL_TID,
    MICHL_SHP,
    PDG_TO_PID,
)


[docs] def process_particles( particles, particle_event, particle_mpv_event=None, neutrino_event=None ): """Process Particle object list to add/correct attributes in place. Does the following: - Checks that the group ID and parent ID are valid, assign -1 if not - Checks that the ancestor track ID is valid, assign -1 if not - Adds interaction ID information if it is not provided - Adds the true neutrino ID this particle came from - Adds a simplified enumerated particle species ID - Adds a flag as to whether a particle is a primary within its interaction - Adds a flag as to whether a particle is a primary within its group Notes ----- Generic (v04) files may have invalid parent IDs, interaction IDs (`INVAL_IDX`) or ancestor track IDs (`INVAL_TID`). ICARUS and SBND files may have invalid parent IDs, interaction IDs (`INVAL_ID`) or ancestor track IDs (`INVAL_TID`). DUNE-ND and 2x2 files may have invalid group IDs (`INVAL_ID`). Parameters ---------- particles : List[Particle] (P) List of true particle instances particle_event : larcv.EventParticle (P) List of true particle instances particle_mpv_event : larcv.EventParticle, optional (M) List of true MPV particle instances neutrino_event : larcv.EventNeutrino, optional (N) List of true neutrino instances """ # If the list is empty, there is nothing to do if len(particles) == 0: return # Get the additional attributes ( parent_ids, group_ids, ancestor_ids, interaction_ids, nu_ids, group_primary_ids, inter_primary_ids, pids, ) = process_particle_event(particle_event, particle_mpv_event, neutrino_event) # Update the particles objects in place for i, p in enumerate(particles): if p.id > -1: p.parent_id = parent_ids[i] p.group_id = group_ids[i] p.ancestor_track_id = ancestor_ids[i] p.interaction_id = interaction_ids[i] p.nu_id = nu_ids[i] p.group_primary = group_primary_ids[i] p.interaction_primary = inter_primary_ids[i] p.pid = pids[i]
[docs] def process_particle_event( particle_event, particle_mpv_event=None, neutrino_event=None ): """Corrects/fetches attributes for a larcv.EventParticle object. Does the following: - Checks that the group ID and parent ID are valid, assign -1 if not - Checks that the ancestor track ID is valid, assign -1 if not - Builds the interaction ID information if it is not provided - Gets the true neutrino ID this particle came from - Gets a simplified enumerated particle species ID - Gets a flag as to whether a particle is a primary within its interaction - Gets a flag as to whether a particle is a primary within its group Notes ----- Generic (v04) files may have invalid parent IDs, interaction IDs (`INVAL_IDX`) or ancestor track IDs (`INVAL_TID`). ICARUS and SBND files may have invalid parent IDs, interaction IDs (`INVAL_ID`) or ancestor track IDs (`INVAL_TID`). DUNE-ND and 2x2 files may have invalid group IDs (`INVAL_ID`). Parameters ---------- particle_event : larcv.EventParticle (P) List of true particle instances particle_mpv_event : larcv.EventParticle, optional (M) List of true MPV particle instances neutrino_event : larcv.EventNeutrino, optional (N) List of true neutrino instances Returns ------- group_ids : np.ndarray (P) List of group IDs, one per true particle instance parent_ids : np.ndarray (P) List of parent IDs, one per true particle instance ancestor_ids : np.ndarray (P) List of ancestor IDs, one per true particle instance interaction_ids : np.ndarray (P) List of interaction IDs, one per true particle instance nu_ids : np.ndarray (P) List of neutrino IDs, one per true particle instance group_primary_ids : np.ndarray (P) List of particle group primary IDs, one per true particle instance inter_primary_ids : np.ndarray (P) List of particle primary IDs, one per true particle instance pids : np.ndarray (P) List of particle IDs, one per true particle instance """ # Converts the input to simple python lists of objects particles = list(particle_event.as_vector()) particles_mpv, neutrinos = None, None if particle_mpv_event is not None: particles_mpv = list(particle_mpv_event.as_vector()) if neutrino_event is not None: neutrinos = list(neutrino_event.as_vector()) # Check on the parent ID of each particle parent_ids = np.array([p.parent_id() for p in particles], dtype=int) if np.any(parent_ids == INVAL_ID): # This takes care of ICARUS/SBND file issues parent_ids[parent_ids == INVAL_ID] = -1 elif len(particles) <= INVAL_IDX: # This takes care of generic file issues parent_ids[parent_ids == INVAL_IDX] = -1 # Check on the group ID of each particle # This takes care of DUNE-ND/2x2 file issues group_ids = np.array([p.group_id() for p in particles], dtype=int) group_ids[group_ids == INVAL_ID] = -1 # Check on the ancestor track ID of each particle # This takes care of generic/ICARUS/SBND file issues ancestor_ids = np.array([p.ancestor_track_id() for p in particles], dtype=int) ancestor_ids[ancestor_ids == INVAL_TID] = -1 # Get the mask of valid particle labels valid_mask = get_valid_mask(particles) # Get the interaction ID of each particle interaction_ids = get_interaction_ids(particles, valid_mask) # Get the neutrino ID of each particle nu_ids = get_nu_ids(particles, group_ids, interaction_ids, particles_mpv, neutrinos) # Get the group primary status of each particle group_primary_ids = get_group_primary_ids(particles, valid_mask) # Get the interaction primary status of each particle inter_primary_ids = get_inter_primary_ids(particles, valid_mask) # Get the particle species (PID) of each particle pids = get_particle_ids(particles, valid_mask) # Return return ( parent_ids, group_ids, ancestor_ids, interaction_ids, nu_ids, group_primary_ids, inter_primary_ids, pids, )
[docs] def get_valid_mask(particles): """Gets a mask corresponding to particles with valid labels. This function checks that the particle labels have been filled properly at the Supera level. It checks that the ancestor track ID of each particle is not an invalid number and that the ancestor creation process is filled. Parameters ---------- particles : List[larcv.Particle] (P) List of true particle instances Results ------- np.ndarray (P) Boolean list of validity, one per true particle instance """ # If there are no particles, nothing to do here if len(particles) == 0: return np.empty(0, dtype=bool) # If the interaction IDs are set in the particle tree, simply use that interaction_ids = np.array([p.interaction_id() for p in particles], dtype=int) if np.any((interaction_ids != INVAL_ID) & (interaction_ids != INVAL_IDX)): return interaction_ids != INVAL_ID # Otherwise, check that the ancestor track ID and creation process are valid mask = np.array([p.ancestor_track_id() != INVAL_TID for p in particles]) mask &= np.array([bool(p.ancestor_creation_process()) for p in particles]) return mask
[docs] def get_interaction_ids(particles, valid_mask=None): """Gets the interaction ID of each particle. If the `interaction_id` attribute of the Particle class is filled, it simply uses that quantity. Otherwise, it leverages shared ancestor position as a basis for interaction building and sets the interaction ID to -1 for particles with invalid ancestor track IDs. Parameters ---------- particles : List[larcv.Particle] (P) List of true particle instances valid_mask : np.ndarray, optional (P) Particle label validity mask Results ------- np.ndarray (P) List of interaction IDs, one per true particle instance """ # If there are no particles, nothing to do here if len(particles) == 0: return np.empty(0, dtype=int) # Compute the validity mask if it is not provided if valid_mask is None: valid_mask = get_valid_mask(particles) # If the interaction IDs are set in the particle tree, simply use that interaction_ids = np.array([p.interaction_id() for p in particles], dtype=int) if np.any((interaction_ids != INVAL_ID) & (interaction_ids != INVAL_IDX)): interaction_ids[~valid_mask] = -1 return interaction_ids # Otherwise, define interaction IDs on the basis of sharing # an ancestor vertex position anc_pos = np.vstack([get_coords(p.ancestor_position()) for p in particles]) interaction_ids = np.unique(anc_pos, axis=0, return_inverse=True)[-1] # Now set the interaction ID of particles with an undefined ancestor to -1 interaction_ids[~valid_mask] = -1 return interaction_ids
[docs] def get_nu_ids( particles, group_ids, interaction_ids, particles_mpv=None, neutrinos=None ): """Gets the neutrino-like ID of each partcile Convention: -1 for non-neutrinos, neutrino index for others If a list of multi-particle vertex (MPV) particles or neutrinos is provided, that information is leveraged to identify which interactions are neutrino-like and which are not. If `particles_mpv` and `neutrinos` are not specified, it assumes that only neutrino-like interactions have more than one true primary particle in a single interaction. Parameters ---------- particles : List[larcv.Particle] (P) List of true particle instances group_ids : np.ndarray (P) List of group IDs, one per true particle instance interaction_ids : np.ndarray (P) Array of interaction ID values, one per true particle instance particles_mpv : List[larcv.Particle], optional (M) List of true MPV particle instances neutrinos : list(larcv.Neutrino), optional (N) List of true neutrino instances Results ------- np.ndarray (P) List of neutrino IDs, one per true particle instance """ # If there are no particles, nothing to do here if len(particles) == 0: return np.empty(0, dtype=int) # Make sure there is only either MPV particles or neutrinos specified, not both assert particles_mpv is None or neutrinos is None, ( "Do not specify both particle_mpv_event and neutrino_event " "in `get_neutrino_ids`. Can only use one or the other." ) # Initialize neutrino IDs nu_ids = -np.ones(len(particles), dtype=int) if particles_mpv is None and neutrinos is None: # Warn that this is ad-hoc warn( "Neutrino IDs are being produced on the basis of interaction " "multiplicity (i.e. neutrino if >= 2 primaries). This is " "not an exact method and might lead to unexpected results." ) # Loop over the interactions primary_ids = get_inter_primary_ids(particles, interaction_ids > -1) nu_id = 0 for i in np.unique(interaction_ids): # If the interaction ID is invalid, skip if i < 0: continue # If there are at least two primary *groups*, the interaction is nu-like inter_index = np.where(interaction_ids == i)[0] if ( len(np.unique(group_ids[inter_index][primary_ids[inter_index] == 1])) > 1 ): nu_ids[inter_index] = nu_id nu_id += 1 else: # Find the reference positions to gauge if a particle comes from a # nu-like interaction ref_ids, ref_pos = None, None if particles_mpv and len(particles_mpv) > 0: ref_pos = np.vstack([get_coords(p.position()) for p in particles_mpv]) ref_pos = np.unique(ref_pos, axis=0) elif neutrinos and len(neutrinos) > 0: if ( hasattr(neutrinos[0], "interaction_id") and neutrinos[0].interaction_id() != INVAL_IDX ): ref_ids = np.array([n.interaction_id() for n in neutrinos]) else: ref_pos = np.vstack([get_coords(n.position()) for n in neutrinos]) # If an interaction ID is provided for neutrinos, the matching is trivial if ref_ids is not None: for i in np.unique(interaction_ids): # If the interaction is invalid, skip if i < 0: continue # Loop over positions in the interaction find a reference match inter_index = np.where(interaction_ids == i)[0] for nu_id, ref_id in enumerate(ref_ids): if i == ref_id: nu_ids[inter_index] = nu_id break # Otherwise, if a particle in an interaction shares its ancestor # position with an MPV particle or a neutrino, the whole interaction # is labeled as a nu-like interaction. if ref_pos is not None: warn( "Neutrino IDs are being produced on the basis of floating " "point agreement between particle ancestor positions and " "neutrino positions. For safety, should provide a matching " "interaction_id to the larcv.Neutrino object." ) anc_pos = np.vstack([get_coords(p.ancestor_position()) for p in particles]) for i in np.unique(interaction_ids): # If the interaction is invalid, skip if i < 0: continue # Loop over positions in the interaction find a reference match inter_index = np.where(interaction_ids == i)[0] for ref_id, pos in enumerate(ref_pos): if np.any((anc_pos[inter_index] == pos).all(axis=1)): nu_ids[inter_index] = ref_id break return nu_ids
[docs] def get_group_primary_ids(particles, valid_mask=None): """Gets the group primary status of particle fragments. This could be handled somewhere else (e.g. Supera). Parameters ---------- particles : List[larcv.Particle] (P) List of true particle instances valid_mask : np.ndarray, optional (P) Particle label validity mask Results ------- np.ndarray (P) List of particle group primary IDs, one per true particle instance """ # Compute the validity mask if it is not provided if valid_mask is None: valid_mask = get_valid_mask(particles) # Loop over the list of particle groups primary_ids = np.zeros(len(particles), dtype=int) group_ids = np.array([p.group_id() for p in particles], dtype=int) for group_id in np.unique(group_ids): # Check that the group ID is within the expected range group_index = np.where(group_ids == group_id)[0] if group_id != INVAL_ID and group_id > len(particles) - 1: warn( f"Bad group ID ({group_id}) not matching INVAL_ID " f"({INVAL_ID}). This may happen for old files." ) primary_ids[group_index] = -1 continue # If the particle group has invalid labeling, the concept of group # primary is ill-defined if group_id == INVAL_ID or not valid_mask[group_id]: primary_ids[group_index] = -1 continue # If a group originates from a Delta or a Michel, it has a primary group_p = particles[group_id] if group_p.shape() == MICHL_SHP or group_p.shape() == DELTA_SHP: primary_ids[group_id] = 1 continue # If a particle group's parent fragment is the first in time, # it is a valid primary. TODO: use first step time. clust_times = np.array([particles[i].t() for i in group_index]) min_id = np.argmin(clust_times) if group_index[min_id] == group_id: primary_ids[group_id] = 1 return primary_ids
[docs] def get_inter_primary_ids(particles, valid_mask=None): """Gets the interaction primary ID for each particle. Parameters ---------- particles : List[larcv.Particle] (P) List of true particle instances valid_mask : np.ndarray, optional (P) Particle label validity mask Results ------- np.ndarray (P) List of particle primary IDs, one per true particle instance """ # Compute the validity mask if it is not provided if valid_mask is None: valid_mask = get_valid_mask(particles) # Loop over the list of particles primary_ids = -np.ones(len(particles), dtype=int) for i, p in enumerate(particles): # If the particle has invalid labeling, it has invalid primary status group_id = p.group_id() if group_id == INVAL_ID or not valid_mask[i]: continue # Check that the group ID is within the expected range if group_id > len(particles) - 1: warn( f"Bad group ID ({group_id}) not matching INVAL_ID " f"({INVAL_ID}). This may happen for old files." ) continue # Tag particles originating from a very short-lived particle as primary group_p = particles[group_id] if ( abs(group_p.parent_pdg_code()) in [111, 113, 221, 331, 3212] and group_p.parent_track_id() == group_p.ancestor_track_id() ): primary_ids[i] = 1 continue # If the track ID of a particle *group* agrees with the track ID of # its ancestor, label as primary primary_ids[i] = group_p.track_id() == p.ancestor_track_id() return primary_ids
[docs] def get_particle_ids(particles, valid_mask=None): """Gets a particle species ID (PID) for each particle. All shower daughters are labeled the same as their primary so that an electron primary is not overruled by its photon daughters in a voxel-wise majority vote. That special case is handled downstream with the high-purity flag. Particles that are not in the target list are labeled `-1`. Parameters ---------- particles : List[Particle] (P) List of true particle instances valid_mask : np.ndarray, optional (P) Particle label validity mask Returns ------- np.ndarray (P) List of particle IDs, one per true particle instance """ # Compute the validity mask if it is not provided if valid_mask is None: valid_mask = get_valid_mask(particles) # Loop over the list of particles particle_ids = -np.ones(len(particles), dtype=int) for i, p in enumerate(particles): # If the primary ID is invalid, skip group_id = p.group_id() if group_id == INVAL_ID or not valid_mask[i]: continue # Check that the group ID is within the expected range if group_id > len(particles) - 1: warn( f"Bad group ID ({group_id}) not matching INVAL_ID " f"({INVAL_ID}). This may happen for old files." ) continue # If the particle type exists in the predefined list, assign t = particles[group_id].pdg_code() if t in PDG_TO_PID.keys(): particle_ids[i] = PDG_TO_PID[t] return particle_ids
[docs] def get_coords(position): """Gets the coordinates of a larcv.Vertex object. Parameters ---------- position : larcv.Vertex Encodes the position of a point with attributes x, y, z and t Returns ------- List[float] Coordinates of the point (x, y, z) """ return np.array([getattr(position, a)() for a in ["x", "y", "z"]])