deleted training file for redundant kwh model

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-10 02:18:57 +01:00
parent d65ce731c0
commit 6ec7995ac3

View file

@ -1,57 +0,0 @@
from pprint import pprint
import msgpack
from utils.s3 import read_from_s3
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
def handler():
"""
This function is used to train the model and store the final models in s3 as pickles
:return:
"""
dataset_version = "2024-07-08"
# Usage:
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
model.feature_engineering()
model.save_dummy_schema(dataset_version=dataset_version)
# For heating_kwh
model.split_dataset(target='heating_kwh')
model.fit_model(target='heating_kwh')
model.re_train_final_model(target='heating_kwh')
evaluation_results = model.evaluate_model(target='heating_kwh')
pprint(evaluation_results["train"])
pprint(evaluation_results["test"])
model.save_model(target='heating_kwh', dataset_version=dataset_version)
# importance_df = evaluation_results["train"]["Feature Importance"]
# testing_predictions = model.testing_predictions["heating_kwh"]
# testing_predictions = testing_predictions.sort_values("residual", ascending=False)
# training_predictions = model.training_predictions["heating_kwh"]
# training_predictions = training_predictions.sort_values("residual", ascending=False)
# # Merge on model.input_data, by the index
# merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
# merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
# For hot_water_kwh
model.split_dataset(target='hot_water_kwh')
model.fit_model(target='hot_water_kwh')
model.re_train_final_model(target='hot_water_kwh')
evaluation_results = model.evaluate_model(target='hot_water_kwh')
pprint(evaluation_results["train"])
pprint(evaluation_results["test"])
model.save_model(target='hot_water_kwh', dataset_version=dataset_version)