Skip to content

Scaling Data

Scaling Data

All features are scaled by MinMaxScaler(), documentation from the scikit-learn library can be found here

Bases: BaseEstimator, TransformerMixin

Parameters

None

Return

X: (np.array) MinMaxScaler() applied data for all columns

Source code in scripts/ml_preprocessing_steps.py
class ScalingData(BaseEstimator,TransformerMixin):
    """
    Parameters
    ----------
    None

    Return
    ----------
    X: (np.array) 
        MinMaxScaler() applied data for all columns

    """
    def __init__(self,dimension=4):
        self.dimension = dimension
        self.scalers = [MinMaxScaler(feature_range=(-1, 1)) for _ in range(dimension)]

    def fit(self,X,y=None):
        for n in range(self.dimension):
            data = X[:, :-2, n].reshape(-1, 1)
            self.scalers[n].fit(data)
        return self

    def transform(self,X,y=None):
        """Scaling with MinMaxScaler to (-1,1) range

        Args:
            X (np.array): data array
            y (None): Defaults to None

        Returns:
            (np.array): data with scaled training features
        """
        n_dict = {0:"x",1:"y",2:"z",3:"charge"}
        for n in range(self.dimension):
            data = X[:, :-2, n].reshape(-1, 1)
            X[:, :-2, n] = self.scalers[n].transform(data).reshape(X.shape[0], X.shape[1]-2)
            print(f"Scaler min/max for {n_dict[n]}: {self.scalers[n].data_min_[0]}, {self.scalers[n].data_max_[0]}")

        return X

transform(X, y=None)

Scaling with MinMaxScaler to (-1,1) range

Parameters:

Name Type Description Default
X array

data array

required
y None

Defaults to None

None

Returns:

Type Description
array

data with scaled training features

Source code in scripts/ml_preprocessing_steps.py
def transform(self,X,y=None):
    """Scaling with MinMaxScaler to (-1,1) range

    Args:
        X (np.array): data array
        y (None): Defaults to None

    Returns:
        (np.array): data with scaled training features
    """
    n_dict = {0:"x",1:"y",2:"z",3:"charge"}
    for n in range(self.dimension):
        data = X[:, :-2, n].reshape(-1, 1)
        X[:, :-2, n] = self.scalers[n].transform(data).reshape(X.shape[0], X.shape[1]-2)
        print(f"Scaler min/max for {n_dict[n]}: {self.scalers[n].data_min_[0]}, {self.scalers[n].data_max_[0]}")

    return X