updated import for featureprocessor

This commit is contained in:
Khalim Conn-Kowlessar 2023-08-25 15:22:55 +01:00
parent 81d7e6afb7
commit 0e755626de

View file

@ -4,13 +4,14 @@ Create additional features from the dataset
import pandas as pd
from typing import List
from core.Logger import logger
from model_data.simulation_system.core.Logger import logger
RDSAP_CHANGE_DROP_COLUMNS = ['UPRN', 'HEAT_DEMAND_CHANGE']
HEAT_DEMAND_CHANGE_DROP_COLUMNS = ['UPRN', 'RDSAP_CHANGE']
RANDOM_SEED = 0
RANDOM_SEED = 0
class FeatureProcessor:
"""
Handle all feature manipulation before modelling
@ -38,11 +39,11 @@ class FeatureProcessor:
if not set(features).issubset(df.columns):
logger.error('Features defined is not contained in data')
exit(1)
df = df[features]
return df
@staticmethod
def subsample_data(df: pd.DataFrame, subsample_amount: int = None) -> pd.DataFrame:
"""
@ -53,14 +54,13 @@ class FeatureProcessor:
df = df.sample(subsample_amount, random_state=RANDOM_SEED)
return df
def process(
self,
df: pd.DataFrame,
target_column: str = "RDSAP_CHANGE",
features: List[str] = None,
subsample_amount: int = None
) -> pd.DataFrame:
self,
df: pd.DataFrame,
target_column: str = "RDSAP_CHANGE",
features: List[str] = None,
subsample_amount: int = None
) -> pd.DataFrame:
"""
Pipeline to get data ready for building a model
"""