diff --git a/backend/Property.py b/backend/Property.py index 31612a49..188564e4 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -643,14 +643,24 @@ class Property: https://www.sciencedirect.com/science/article/pii/S0378778823002542 :return: """ - # We should adjust the costs first and then calculate the energy consumption - # These are the costs at the time that the EPC was created - we need a cost based on today's prices so - # we do the following: - # 1) Score the models to get kwh for heating and hot water - # 2) Adjust the kwh values with the UCL paper - # 3) Convert the kwh values to costs today using current prices - # - # For the moment, we'll just use the same cost for lighting, since the value is quite low + + # We get the following things: + # 1) Today's cost. This give us a basline figure for what the cost is today + # 2) Predicted KwH + + # Today's costs + todays_heating_cost = energy_consumption_client.convert_cost_to_today( + original_cost=float(self.data["heating-cost-current"]), + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + ) + todays_hot_water_cost = energy_consumption_client.convert_cost_to_today( + original_cost=float(self.data["hot-water-cost-current"]), + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + ) + todays_lighting_cost = energy_consumption_client.convert_cost_to_today( + original_cost=float(self.data["lighting-cost-current"]), + lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]) + ) scoring_df = pd.DataFrame([self.epc_record.prepared_epc]) # Change columns from underscores to hyphens @@ -694,33 +704,34 @@ class Property: current_epc_rating=self.data["current-energy-rating"], ) - # Convert to cost - if self.heating_energy_source == "Electricity": - adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP - unadjusted_heating_cost = heating_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP - elif self.heating_energy_source == "Natural Gas": - adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.GAS_PRICE_CAP - unadjusted_heating_cost = heating_prediction * AnnualBillSavings.GAS_PRICE_CAP - else: - raise NotImplementedError("Not implemented cost for this fuel type") + # Adjust today's cost figures with the UCL model + adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered( + epc_energy_cost=todays_heating_cost, + current_epc_rating=self.data["current-energy-rating"], + ) - if self.hot_water_energy_source == "Electricity": - adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP - unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP - elif self.hot_water_energy_source == "Natural Gas": - adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.GAS_PRICE_CAP - unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.GAS_PRICE_CAP - else: - raise NotImplementedError("Not implemented cost for this fuel type") + adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_cost_to_metered( + epc_energy_cost=todays_hot_water_cost, + current_epc_rating=self.data["current-energy-rating"], + ) - adjusted_lighting_cost = adjusted_lighting_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP + adjusted_lighting_cost = AnnualBillSavings.adjust_energy_cost_to_metered( + epc_energy_cost=todays_lighting_cost, + current_epc_rating=self.data["current-energy-rating"], + ) - adjusted_appliances_cost = adjusted_applicances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP + adjusted_appliances_cost = AnnualBillSavings.adjust_energy_cost_to_metered( + epc_energy_cost=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP, + current_epc_rating=self.data["current-energy-rating"], + ) # Sum up the adjusted kwh figures self.current_adjusted_energy = ( adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh ) + self.expected_energy_bill = ( + adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost + ) self.energy_cost_estimates = { "adjusted": { @@ -730,9 +741,9 @@ class Property: "appliances": adjusted_appliances_cost }, "unadjusted": { - "heating": unadjusted_heating_cost, - "hot_water": unadjusted_hot_water_cost, - "lighting": float(self.data["lighting-cost-current"]), + "heating": todays_heating_cost, + "hot_water": todays_hot_water_cost, + "lighting": todays_lighting_cost, "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP }, "epc": { @@ -757,10 +768,6 @@ class Property: } } - self.expected_energy_bill = ( - adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost - ) - def set_spatial(self, spatial: pd.DataFrame): """ Sets whether the property is in a conservation area given the output of the ConservationAreaClient diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 609d737f..7a0bba2a 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -359,7 +359,8 @@ async def trigger_plan(body: PlanTriggerRequest): }, dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl", consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet", - cleaned=cleaned + cleaned=cleaned, + environment=get_settings().ENVIRONMENT ) logger.info("Getting spatial data") diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index e7658de5..9a7d6523 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -1,11 +1,10 @@ import pandas as pd import numpy as np from xgboost import XGBRegressor -from datetime import datetime from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error from sklearn.feature_selection import RFECV -from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet +from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3 from utils.logger import setup_logger logger = setup_logger() @@ -46,11 +45,17 @@ class EnergyConsumptionModel: "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating" ] - def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1): + retail_price_comparison = None + + def __init__( + self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1, + environment="dev" + ): self.cleaned = cleaned self.models = {} self.model_paths = model_paths or {} self.n_jobs = n_jobs + self.environment = environment self.data = None self.input_data = None @@ -76,26 +81,84 @@ class EnergyConsumptionModel: if model_paths: for target, path in model_paths.items(): # Read model - self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path) + self.models[target] = read_pickle_from_s3( + bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path + ) # Read dummy schema if dummy_schema_path: self.dummy_schema = read_pickle_from_s3( - bucket_name="retrofit-model-directory-dev", + bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=dummy_schema_path ) self.consumption_averages = None if consumption_average_path: self.consumption_averages = read_dataframe_from_s3_parquet( - bucket_name="retrofit-data-dev", + bucket_name=f"retrofit-data-{environment}", file_key=consumption_average_path ) + # We also retrieve the newest retail price comparison data which comes from Ofgem: + # https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators + # We use the detail price comparison by company and tariff type data + self.read_retail_price_comparison() + + def read_retail_price_comparison(self): + data = read_csv_from_s3( + bucket_name=f"retrofit-data-{self.environment}", + filepath="energy_consumption/retail-price-comparison.csv" + ) + header = ['Date', 'Average standard variable tariff (Large legacy suppliers)', + 'Average standard variable tariff (Other suppliers)', 'Average fixed tariff', + 'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)', + 'Cheapest tariff (Basket)', 'Default tariff cap level'] + + # Extract data rows + data_rows = [] + for row in data[1:]: + date = row['\ufeff"'] + values = row[None] + data_rows.append([date] + values) + + self.retail_price_comparison = pd.DataFrame(data_rows, columns=header) + self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce') + + def convert_cost_to_today(self, original_cost, lodgement_date): + """ + Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs + (or as close to today as possible) + :param original_cost: The original energy cost + :param lodgement_date: The date the EPC was lodged + :return: + """ + closest_date = self.retail_price_comparison.iloc[ + (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1] + ]['Date'].values[0] + closest_date = pd.Timestamp(closest_date) + + # Extract the tariff price on the closest date + tariff_2024 = self.retail_price_comparison[ + self.retail_price_comparison['Date'] == closest_date + ]['Average standard variable tariff (Large legacy suppliers)'].values[0] + + # Extract the latest available tariff price + latest_tariff = self.retail_price_comparison[ + 'Average standard variable tariff (Large legacy suppliers)' + ].iloc[-1] + + # Calculate the ratio + ratio = float(latest_tariff) / float(tariff_2024) + + # Calculate the updated heating cost + updated_cost = original_cost * ratio + + return updated_cost + def read_dataset(self, file_path): """Reads the dataset from the specified file path.""" logger.info(f"Reading dataset from {file_path}") - self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path) + self.data = read_dataframe_from_s3_parquet(bucket_name=f"retrofit-data-{self.environment}", file_key=file_path) self.input_data = self.data.copy() def feature_engineering(self, drop_first=False): @@ -392,7 +455,7 @@ class EnergyConsumptionModel: logger.info(f"Saving model for target {target}") save_pickle_to_s3( self.models[target], - bucket_name="retrofit-model-directory-dev", + bucket_name=f"retrofit-model-directory-{self.environment}", s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl" ) @@ -400,7 +463,7 @@ class EnergyConsumptionModel: logger.info("Saving dummy schema for target {target}") save_pickle_to_s3( self.dummy_schema, - bucket_name="retrofit-model-directory-dev", + bucket_name=f"retrofit-model-directory-{self.environment}", s3_file_name=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl" )