mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Added split training data
This commit is contained in:
parent
6d7d03b3f0
commit
d28aeddb8b
3 changed files with 15 additions and 3 deletions
Binary file not shown.
Binary file not shown.
|
|
@ -99,18 +99,30 @@ def training(train_filepath: str, test_filepath: str) -> None:
|
|||
# logger.info('Split data into train and validation')
|
||||
|
||||
logger.info('Build Model')
|
||||
data = TabularDataset(data=train_df)
|
||||
|
||||
data = TabularDataset(data=train_filepath)
|
||||
data = data.drop(columns=['UPRN', 'HEAT_DEMAND_CHANGE'])
|
||||
TOP_FEATURES = ['MAINHEAT', 'ROOF', 'WALLS', 'MAINHEATCONT', 'PHOTO', 'HOTWATER', 'SECONDHEAT']
|
||||
# top_features = data.columns[data.columns.str.startswith(tuple(TOP_FEATURES))]
|
||||
|
||||
data = data[['RDSAP_CHANGE'] + top_features.to_list()]
|
||||
# data = TabularDataset(data=train_df)
|
||||
# data['RDSAP_CHANGE'] = data['RDSAP_CHANGE'].astype(float)
|
||||
subsample_size = round(len(data)/4)
|
||||
subsample_size = round(len(data)/20)
|
||||
data = data.sample(subsample_size, random_state=RANDOM_SEED)
|
||||
|
||||
# Add custom metric class MAPE
|
||||
# Have a look at temporal features
|
||||
|
||||
target_column = 'RDSAP_CHANGE'
|
||||
predictor_RDSAP = TabularPredictor(
|
||||
label=target_column,
|
||||
path="agModels-predictRDSAP",
|
||||
problem_type="regression",
|
||||
eval_metric='mean_absolute_error'
|
||||
).fit(data, time_limit=8000, presets='high_quality', excluded_model_types=['KNN'])
|
||||
).fit(data, time_limit=200, presets='best_quality', excluded_model_types=['KNN'])
|
||||
|
||||
|
||||
|
||||
logger.info('Evaluate matrics')
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue