From 0a1f728f37705a396f4d18879ae7d89881544ea9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 2 Jul 2024 17:48:06 +0100 Subject: [PATCH] implemented xgboost which performs really well --- etl/bill_savings/EnergyConsumptionModel.py | 5 ++--- etl/bill_savings/data_collection.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index ca221175..51972a36 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -2,7 +2,6 @@ import pandas as pd from xgboost import XGBRegressor from datetime import datetime from sklearn.model_selection import train_test_split -from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error from sklearn.feature_selection import RFECV from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet @@ -16,8 +15,8 @@ class EnergyConsumptionModel: FEATURES = { "heating_kwh": [ "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current", - "heating-cost-current", "main-fuel", "total-floor-area", "number-heated-rooms", "number-habitable-rooms", - "mainheat-energy-eff" + "heating-cost-current", "total-floor-area", "number-heated-rooms", "number-habitable-rooms", + # "mainheat-energy-eff", "mainheat-description", "main-fuel", ], "hot_water_kwh": [ "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current", diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py index 79afa936..24b10d7f 100644 --- a/etl/bill_savings/data_collection.py +++ b/etl/bill_savings/data_collection.py @@ -133,7 +133,7 @@ def app(): energy_consumption_data = [] for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)): # Skip the first 50 - if i < 50: + if i < 90: continue data = pd.read_csv(directory / "certificates.csv", low_memory=False)