Outlier Detection
Outlier Detection
We can remove any points from the data by the physical dimension of the detectors, [250,250,1000] in x, y, and z, respectively.
Bases: BaseEstimator, TransformerMixin
Parameters
None
Returns
event_data: (array)
Data with outliers removed
event_lengths: (array)
New event lengths with removal of outiler points
Source code in scripts/ml_preprocessing_steps.py
| class OutlierDetection(BaseEstimator,TransformerMixin):
"""
Parameters
----------
None
Returns
----------
event_data: (array)
Data with outliers removed
event_lengths: (array)
New event lengths with removal of outiler points
"""
def __init__(self):
pass
def fit(self,X,y=None):
return self
def transform(self,X,y=None):
"""Detecting outliers and removing them from the point cloud data
Args:
X (tuple): utliers removed data with new lengths (event_data,new_event_lengths)
y (None): Defaults to None.
Returns:
(tuple): modified data and new event lengths
"""
data,event_lengths = X
event_data = np.full(data.shape, np.nan)
new_event_lengths = np.full_like(event_lengths, np.nan)
tot_count = 0
for i in tqdm.tqdm(range(len(data)), desc="Removing outliers"):
event_points = data[i,:event_lengths[i]]
condition = ((-270 <= event_points[:, 0]) & (event_points[:, 0] <= 270) & \
(-270 <= event_points[:, 1]) & (event_points[:, 1] <= 270) &
(0 <= event_points[:, 2]) & (event_points[:, 2] <= 1003))
allowed_points = event_points[condition] #only allows points that are not outliers
event_data[i,:len(allowed_points)] = allowed_points #only assigns the valid points to the new array
event_data[i,-2] = data[i,-2] #need to include the labels
event_data[i,-1] = data[i,-1] #need to include the original index
new_event_lengths[i] = len(allowed_points) #original event number minus the number of outliers
tot_count+=event_lengths[i] -new_event_lengths[i]
print(f"Number of outlier points removed: {tot_count}")
return (event_data,new_event_lengths)
|
Detecting outliers and removing them from the point cloud data
Parameters:
| Name |
Type |
Description |
Default |
X
|
tuple
|
utliers removed data with new lengths (event_data,new_event_lengths)
|
required
|
y
|
None
|
|
None
|
Returns:
| Type |
Description |
tuple
|
modified data and new event lengths
|
Source code in scripts/ml_preprocessing_steps.py
| def transform(self,X,y=None):
"""Detecting outliers and removing them from the point cloud data
Args:
X (tuple): utliers removed data with new lengths (event_data,new_event_lengths)
y (None): Defaults to None.
Returns:
(tuple): modified data and new event lengths
"""
data,event_lengths = X
event_data = np.full(data.shape, np.nan)
new_event_lengths = np.full_like(event_lengths, np.nan)
tot_count = 0
for i in tqdm.tqdm(range(len(data)), desc="Removing outliers"):
event_points = data[i,:event_lengths[i]]
condition = ((-270 <= event_points[:, 0]) & (event_points[:, 0] <= 270) & \
(-270 <= event_points[:, 1]) & (event_points[:, 1] <= 270) &
(0 <= event_points[:, 2]) & (event_points[:, 2] <= 1003))
allowed_points = event_points[condition] #only allows points that are not outliers
event_data[i,:len(allowed_points)] = allowed_points #only assigns the valid points to the new array
event_data[i,-2] = data[i,-2] #need to include the labels
event_data[i,-1] = data[i,-1] #need to include the original index
new_event_lengths[i] = len(allowed_points) #original event number minus the number of outliers
tot_count+=event_lengths[i] -new_event_lengths[i]
print(f"Number of outlier points removed: {tot_count}")
return (event_data,new_event_lengths)
|