Skip to content

Outlier Detection

Outlier Detection

We can remove any points from the data by the physical dimension of the detectors, [250,250,1000] in x, y, and z, respectively.

Bases: BaseEstimator, TransformerMixin

Parameters

None

Returns

event_data: (array) Data with outliers removed event_lengths: (array) New event lengths with removal of outiler points

Source code in scripts/ml_preprocessing_steps.py
class OutlierDetection(BaseEstimator,TransformerMixin):
    """
    Parameters
    ----------
    None

    Returns
    ----------
    event_data: (array)
        Data with outliers removed
    event_lengths: (array)
        New event lengths with removal of outiler points
    """
    def __init__(self):
        pass

    def fit(self,X,y=None):
        return self

    def transform(self,X,y=None):
        """Detecting outliers and removing them from the point cloud data

        Args:
            X (tuple): utliers removed data with new lengths (event_data,new_event_lengths)
            y (None): Defaults to None.

        Returns:
            (tuple): modified data and new event lengths
        """
        data,event_lengths = X
        event_data = np.full(data.shape, np.nan)
        new_event_lengths = np.full_like(event_lengths, np.nan)
        tot_count = 0

        for i in tqdm.tqdm(range(len(data)), desc="Removing outliers"):
            event_points = data[i,:event_lengths[i]]
            condition = ((-270 <= event_points[:, 0]) & (event_points[:, 0] <= 270) &   \
                (-270 <= event_points[:, 1]) & (event_points[:, 1] <= 270) &
                (0 <= event_points[:, 2]) & (event_points[:, 2]  <= 1003))
            allowed_points = event_points[condition] #only allows points that are not outliers

            event_data[i,:len(allowed_points)] = allowed_points #only assigns the valid points to the new array
            event_data[i,-2] = data[i,-2] #need to include the labels
            event_data[i,-1] = data[i,-1] #need to include the original index

            new_event_lengths[i] = len(allowed_points)  #original event number minus the number of outliers
            tot_count+=event_lengths[i] -new_event_lengths[i]

        print(f"Number of outlier points removed: {tot_count}") 
        return (event_data,new_event_lengths)

transform(X, y=None)

Detecting outliers and removing them from the point cloud data

Parameters:

Name Type Description Default
X tuple

utliers removed data with new lengths (event_data,new_event_lengths)

required
y None

Defaults to None.

None

Returns:

Type Description
tuple

modified data and new event lengths

Source code in scripts/ml_preprocessing_steps.py
def transform(self,X,y=None):
    """Detecting outliers and removing them from the point cloud data

    Args:
        X (tuple): utliers removed data with new lengths (event_data,new_event_lengths)
        y (None): Defaults to None.

    Returns:
        (tuple): modified data and new event lengths
    """
    data,event_lengths = X
    event_data = np.full(data.shape, np.nan)
    new_event_lengths = np.full_like(event_lengths, np.nan)
    tot_count = 0

    for i in tqdm.tqdm(range(len(data)), desc="Removing outliers"):
        event_points = data[i,:event_lengths[i]]
        condition = ((-270 <= event_points[:, 0]) & (event_points[:, 0] <= 270) &   \
            (-270 <= event_points[:, 1]) & (event_points[:, 1] <= 270) &
            (0 <= event_points[:, 2]) & (event_points[:, 2]  <= 1003))
        allowed_points = event_points[condition] #only allows points that are not outliers

        event_data[i,:len(allowed_points)] = allowed_points #only assigns the valid points to the new array
        event_data[i,-2] = data[i,-2] #need to include the labels
        event_data[i,-1] = data[i,-1] #need to include the original index

        new_event_lengths[i] = len(allowed_points)  #original event number minus the number of outliers
        tot_count+=event_lengths[i] -new_event_lengths[i]

    print(f"Number of outlier points removed: {tot_count}") 
    return (event_data,new_event_lengths)