mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
updated import for featureprocessor
This commit is contained in:
parent
81d7e6afb7
commit
0e755626de
1 changed files with 12 additions and 12 deletions
|
|
@ -4,13 +4,14 @@ Create additional features from the dataset
|
|||
|
||||
import pandas as pd
|
||||
from typing import List
|
||||
from core.Logger import logger
|
||||
from model_data.simulation_system.core.Logger import logger
|
||||
|
||||
RDSAP_CHANGE_DROP_COLUMNS = ['UPRN', 'HEAT_DEMAND_CHANGE']
|
||||
HEAT_DEMAND_CHANGE_DROP_COLUMNS = ['UPRN', 'RDSAP_CHANGE']
|
||||
|
||||
RANDOM_SEED = 0
|
||||
|
||||
RANDOM_SEED = 0
|
||||
|
||||
|
||||
class FeatureProcessor:
|
||||
"""
|
||||
Handle all feature manipulation before modelling
|
||||
|
|
@ -38,11 +39,11 @@ class FeatureProcessor:
|
|||
if not set(features).issubset(df.columns):
|
||||
logger.error('Features defined is not contained in data')
|
||||
exit(1)
|
||||
|
||||
|
||||
df = df[features]
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@staticmethod
|
||||
def subsample_data(df: pd.DataFrame, subsample_amount: int = None) -> pd.DataFrame:
|
||||
"""
|
||||
|
|
@ -53,14 +54,13 @@ class FeatureProcessor:
|
|||
df = df.sample(subsample_amount, random_state=RANDOM_SEED)
|
||||
return df
|
||||
|
||||
|
||||
def process(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
target_column: str = "RDSAP_CHANGE",
|
||||
features: List[str] = None,
|
||||
subsample_amount: int = None
|
||||
) -> pd.DataFrame:
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
target_column: str = "RDSAP_CHANGE",
|
||||
features: List[str] = None,
|
||||
subsample_amount: int = None
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Pipeline to get data ready for building a model
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue