diff --git a/model_data/simulation_system/core/FeatureProcessor.py b/model_data/simulation_system/core/FeatureProcessor.py index aef9605f..cefcee9b 100644 --- a/model_data/simulation_system/core/FeatureProcessor.py +++ b/model_data/simulation_system/core/FeatureProcessor.py @@ -4,13 +4,14 @@ Create additional features from the dataset import pandas as pd from typing import List -from core.Logger import logger +from model_data.simulation_system.core.Logger import logger RDSAP_CHANGE_DROP_COLUMNS = ['UPRN', 'HEAT_DEMAND_CHANGE'] HEAT_DEMAND_CHANGE_DROP_COLUMNS = ['UPRN', 'RDSAP_CHANGE'] -RANDOM_SEED = 0 - +RANDOM_SEED = 0 + + class FeatureProcessor: """ Handle all feature manipulation before modelling @@ -38,11 +39,11 @@ class FeatureProcessor: if not set(features).issubset(df.columns): logger.error('Features defined is not contained in data') exit(1) - + df = df[features] return df - + @staticmethod def subsample_data(df: pd.DataFrame, subsample_amount: int = None) -> pd.DataFrame: """ @@ -53,14 +54,13 @@ class FeatureProcessor: df = df.sample(subsample_amount, random_state=RANDOM_SEED) return df - def process( - self, - df: pd.DataFrame, - target_column: str = "RDSAP_CHANGE", - features: List[str] = None, - subsample_amount: int = None - ) -> pd.DataFrame: + self, + df: pd.DataFrame, + target_column: str = "RDSAP_CHANGE", + features: List[str] = None, + subsample_amount: int = None + ) -> pd.DataFrame: """ Pipeline to get data ready for building a model """