diff --git a/backend/Property.py b/backend/Property.py index 497d976a..5c065458 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -585,14 +585,14 @@ class Property: def get_components( self, cleaned, - energy_consumption_client, + kwh_client, kwh_predictions ): """ Given the cleaning that has been performed, we'll use this to identify the property components, from roof to walls to windows, heating and hot water :param cleaned: This is the dictionary of components found in cleaner.cleaned - :param energy_consumption_client: The client that will be used to convert the energy costs to today's costs + :param kwh_client: The client that will be used to convert the energy costs to today's costs :param kwh_predictions: Contains the kwh predictions for heating and hot water :return: """ @@ -658,7 +658,7 @@ class Property: self.set_windows_count() self.set_energy_source() self.find_energy_sources() - self.set_current_energy_bill(energy_consumption_client, kwh_predictions) + self.set_current_energy_bill(kwh_client, kwh_predictions) def set_solar_panel_configuration( self, solar_panel_configuration, roof_area @@ -671,7 +671,7 @@ class Property: # We also set the roof area self.roof_area = roof_area - def set_current_energy_bill(self, energy_consumption_client, kwh_predictions): + def set_current_energy_bill(self, kwh_client, kwh_predictions): """ Given what we know about the property now, estimates the current energy consumption using the UCL paper https://www.sciencedirect.com/science/article/pii/S0378778823002542 @@ -683,7 +683,7 @@ class Property: # 2) Predicted KwH # Today's costs - todays_lighting_cost = energy_consumption_client.convert_cost_to_today( + todays_lighting_cost = kwh_client.convert_cost_to_today( original_cost=float(self.data["lighting-cost-current"]), lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None) ) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 8a9cbd53..56b4909e 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -431,9 +431,11 @@ async def trigger_plan(body: PlanTriggerRequest): environment=get_settings().ENVIRONMENT ) + kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True) + model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - epcs_for_scoring = KwhData().transform(data=KwhData().prepare_epc(input_properties), cleaned=cleaned) + epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned) kwh_predictions = model_api.predict_all( df=epcs_for_scoring, @@ -444,14 +446,13 @@ async def trigger_plan(body: PlanTriggerRequest): ) # Insert the spatial data + logger.info("Getting spatial data") input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET) - logger.info("Getting spatial data") + logger.info("Setting property components") for p in tqdm(input_properties): p.get_components( - cleaned=cleaned, - energy_consumption_client=energy_consumption_client, - kwh_predictions=kwh_predictions + cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions ) logger.info("Performing solar analysis") diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index 25bd04ed..4daf2b31 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -126,37 +126,6 @@ class EnergyConsumptionModel: self.retail_price_comparison = pd.DataFrame(data_rows, columns=header) self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce') - def convert_cost_to_today(self, original_cost, lodgement_date): - """ - Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs - (or as close to today as possible) - :param original_cost: The original energy cost - :param lodgement_date: The date the EPC was lodged - :return: - """ - closest_date = self.retail_price_comparison.iloc[ - (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1] - ]['Date'].values[0] - closest_date = pd.Timestamp(closest_date) - - # Extract the tariff price on the closest date - tariff_2024 = self.retail_price_comparison[ - self.retail_price_comparison['Date'] == closest_date - ]['Average standard variable tariff (Large legacy suppliers)'].values[0] - - # Extract the latest available tariff price - latest_tariff = self.retail_price_comparison[ - 'Average standard variable tariff (Large legacy suppliers)' - ].iloc[-1] - - # Calculate the ratio - ratio = float(latest_tariff) / float(tariff_2024) - - # Calculate the updated heating cost - updated_cost = original_cost * ratio - - return updated_cost - def read_dataset(self, file_path): """Reads the dataset from the specified file path.""" logger.info(f"Reading dataset from {file_path}") diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 39461c81..5563014b 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -4,7 +4,10 @@ import numpy as np from datetime import datetime from tqdm import tqdm from utils.logger import setup_logger -from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet +from utils.s3 import ( + list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet, + read_csv_from_s3 +) from backend.Property import Property logger = setup_logger() @@ -30,7 +33,7 @@ class KwhData: 'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency' ] - def __init__(self, bucket=None): + def __init__(self, bucket=None, read_consumption_data=False): self.run_date = datetime.now().strftime("%Y-%m-%d") self.bucket = bucket self.data = None @@ -39,6 +42,50 @@ class KwhData: self.consumption_averages_filepath = None self.model_training_data_filepath = None + self.consumption_averages = None + self.retail_price_comparison = None + if read_consumption_data: + self.get_consumption_data() + self.read_retail_price_comparison() + + def get_consumption_data(self): + + # Look for the latest version of this file + s3_contents = list_files_in_s3_folder(bucket_name=self.bucket, folder_name="energy_consumption/") + consumption_averages = [ + {"run_date": pd.to_datetime(x.split("/")[1]), "filepath": x} + for x in s3_contents if "consumption_averages.parquet" in x + ] + # Get the file with the soonest run date + consumption_averages = sorted(consumption_averages, key=lambda x: x["run_date"]) + if not consumption_averages: + raise ValueError("No consumption averages data found, something went wrong") + + self.consumption_averages = read_dataframe_from_s3_parquet( + bucket_name=self.bucket, + file_key=consumption_averages[-1]["filepath"] + ) + + def read_retail_price_comparison(self): + data = read_csv_from_s3( + bucket_name=self.bucket, + filepath="energy_consumption/retail-price-comparison.csv" + ) + header = ['Date', 'Average standard variable tariff (Large legacy suppliers)', + 'Average standard variable tariff (Other suppliers)', 'Average fixed tariff', + 'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)', + 'Cheapest tariff (Basket)', 'Default tariff cap level'] + + # Extract data rows + data_rows = [] + for row in data[1:]: + date = row['\ufeff"'] + values = row[None] + data_rows.append([date] + values) + + self.retail_price_comparison = pd.DataFrame(data_rows, columns=header) + self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce') + @staticmethod def extract_kwh_value(text: str): """ @@ -282,3 +329,34 @@ class KwhData: scoring_data["id"] = scoring_data["uprn"].copy() return scoring_data + + def convert_cost_to_today(self, original_cost, lodgement_date): + """ + Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs + (or as close to today as possible) + :param original_cost: The original energy cost + :param lodgement_date: The date the EPC was lodged + :return: + """ + closest_date = self.retail_price_comparison.iloc[ + (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1] + ]['Date'].values[0] + closest_date = pd.Timestamp(closest_date) + + # Extract the tariff price on the closest date + tariff_2024 = self.retail_price_comparison[ + self.retail_price_comparison['Date'] == closest_date + ]['Average standard variable tariff (Large legacy suppliers)'].values[0] + + # Extract the latest available tariff price + latest_tariff = self.retail_price_comparison[ + 'Average standard variable tariff (Large legacy suppliers)' + ].iloc[-1] + + # Calculate the ratio + ratio = float(latest_tariff) / float(tariff_2024) + + # Calculate the updated heating cost + updated_cost = original_cost * ratio + + return updated_cost