Bases: BaseEstimator, TransformerMixin
Parameters
None
Return
limited_data: (np.array)
All classes are not balanced through limitation
Source code in scripts/ml_preprocessing_steps.py
| class DataLimitation(BaseEstimator,TransformerMixin):
"""
Parameters
----------
None
Return
----------
limited_data: (np.array)
All classes are not balanced through limitation
"""
def __init__(self, target_size):
self.target_size = target_size
def fit(self,X,y=None):
return self
def transform(self,X,y=None):
"""Balancing the classes by limiting to the lowest class to remove any training bias
Args:
X (np.array): data array with reclassified labels
y (None): Defaults to None.
Returns:
(np.array): limited data with balanced classes
"""
labels = X[:,-2,0].astype(int)
valid_mask = labels < 5
X_valid = X[valid_mask]
labels_valid = labels[valid_mask]
unique_labels , counts_labels = np.unique(labels_valid, return_counts=True)
print(f"Current class distribution is {counts_labels}")
lowest_events = min(counts_labels)
limiting_data = np.full((5*lowest_events,self.target_size + 2,4), np.nan)
print(f"The new shape after limiting to lowest class{limiting_data.shape}")
counters = [0, 0, 0, 0, 0]
insert_index = 0
for i in range(len(X_valid)):
label = labels_valid[i]
if counters[label] < lowest_events:
limiting_data[insert_index] = X_valid[i]
counters[label] += 1
insert_index += 1
if all(c == lowest_events for c in counters):
break
labels_ll = limiting_data[:,-2,0].astype(int)
_ , counts_labels_ll = np.unique(labels_ll, return_counts=True)
print(f"Updated class distribution is {counts_labels_ll}")
return limiting_data
|
Balancing the classes by limiting to the lowest class to remove any training bias
Parameters:
| Name |
Type |
Description |
Default |
X
|
array
|
data array with reclassified labels
|
required
|
y
|
None
|
|
None
|
Returns:
| Type |
Description |
array
|
limited data with balanced classes
|
Source code in scripts/ml_preprocessing_steps.py
| def transform(self,X,y=None):
"""Balancing the classes by limiting to the lowest class to remove any training bias
Args:
X (np.array): data array with reclassified labels
y (None): Defaults to None.
Returns:
(np.array): limited data with balanced classes
"""
labels = X[:,-2,0].astype(int)
valid_mask = labels < 5
X_valid = X[valid_mask]
labels_valid = labels[valid_mask]
unique_labels , counts_labels = np.unique(labels_valid, return_counts=True)
print(f"Current class distribution is {counts_labels}")
lowest_events = min(counts_labels)
limiting_data = np.full((5*lowest_events,self.target_size + 2,4), np.nan)
print(f"The new shape after limiting to lowest class{limiting_data.shape}")
counters = [0, 0, 0, 0, 0]
insert_index = 0
for i in range(len(X_valid)):
label = labels_valid[i]
if counters[label] < lowest_events:
limiting_data[insert_index] = X_valid[i]
counters[label] += 1
insert_index += 1
if all(c == lowest_events for c in counters):
break
labels_ll = limiting_data[:,-2,0].astype(int)
_ , counts_labels_ll = np.unique(labels_ll, return_counts=True)
print(f"Updated class distribution is {counts_labels_ll}")
return limiting_data
|