Completed set up of bills costs

2026-07-27 23:35:01 +00:00 · 2024-07-09 14:03:21 +01:00 · 2024-07-09 14:03:21 +01:00 · 4c79abe512
commit 4c79abe512
parent 5274b9b713
3 changed files with 115 additions and 44 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -643,14 +643,24 @@ class Property:
        https://www.sciencedirect.com/science/article/pii/S0378778823002542
        :return:
        """
-        # We should adjust the costs first and then calculate the energy consumption
-        # These are the costs at the time that the EPC was created - we need a cost based on today's prices so
-        # we do the following:
-        # 1) Score the models to get kwh for heating and hot water
-        # 2) Adjust the kwh values with the UCL paper
-        # 3) Convert the kwh values to costs today using current prices
-        #
-        # For the moment, we'll just use the same cost for lighting, since the value is quite low
+
+        # We get the following things:
+        # 1) Today's cost. This give us a basline figure for what the cost is today
+        # 2) Predicted KwH
+
+        # Today's costs
+        todays_heating_cost = energy_consumption_client.convert_cost_to_today(
+            original_cost=float(self.data["heating-cost-current"]),
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+        )
+        todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
+            original_cost=float(self.data["hot-water-cost-current"]),
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+        )
+        todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
+            original_cost=float(self.data["lighting-cost-current"]),
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+        )

        scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
        # Change columns from underscores to hyphens
@ -694,33 +704,34 @@ class Property:
            current_epc_rating=self.data["current-energy-rating"],
        )

-        # Convert to cost
-        if self.heating_energy_source == "Electricity":
-            adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-            unadjusted_heating_cost = heating_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-        elif self.heating_energy_source == "Natural Gas":
-            adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.GAS_PRICE_CAP
-            unadjusted_heating_cost = heating_prediction * AnnualBillSavings.GAS_PRICE_CAP
-        else:
-            raise NotImplementedError("Not implemented cost for this fuel type")
+        # Adjust today's cost figures with the UCL model
+        adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=todays_heating_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )

-        if self.hot_water_energy_source == "Electricity":
-            adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-            unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-        elif self.hot_water_energy_source == "Natural Gas":
-            adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.GAS_PRICE_CAP
-            unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.GAS_PRICE_CAP
-        else:
-            raise NotImplementedError("Not implemented cost for this fuel type")
+        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=todays_hot_water_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )

-        adjusted_lighting_cost = adjusted_lighting_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=todays_lighting_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )

-        adjusted_appliances_cost = adjusted_applicances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        adjusted_appliances_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
+            current_epc_rating=self.data["current-energy-rating"],
+        )

        # Sum up the adjusted kwh figures
        self.current_adjusted_energy = (
            adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
        )
+        self.expected_energy_bill = (
+            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
+        )

        self.energy_cost_estimates = {
            "adjusted": {
@ -730,9 +741,9 @@ class Property:
                "appliances": adjusted_appliances_cost
            },
            "unadjusted": {
-                "heating": unadjusted_heating_cost,
-                "hot_water": unadjusted_hot_water_cost,
-                "lighting": float(self.data["lighting-cost-current"]),
+                "heating": todays_heating_cost,
+                "hot_water": todays_hot_water_cost,
+                "lighting": todays_lighting_cost,
                "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
            },
            "epc": {
@ -757,10 +768,6 @@ class Property:
            }
        }

-        self.expected_energy_bill = (
-            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
-        )
-
    def set_spatial(self, spatial: pd.DataFrame):
        """
        Sets whether the property is in a conservation area given the output of the ConservationAreaClient
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -359,7 +359,8 @@ async def trigger_plan(body: PlanTriggerRequest):
            },
            dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl",
            consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet",
-            cleaned=cleaned
+            cleaned=cleaned,
+            environment=get_settings().ENVIRONMENT
        )

        logger.info("Getting spatial data")
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@ -1,11 +1,10 @@
 import pandas as pd
 import numpy as np
 from xgboost import XGBRegressor
-from datetime import datetime
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
 from sklearn.feature_selection import RFECV
-from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
+from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3
 from utils.logger import setup_logger

 logger = setup_logger()
@ -46,11 +45,17 @@ class EnergyConsumptionModel:
        "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
    ]

-    def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1):
+    retail_price_comparison = None
+
+    def __init__(
+        self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1,
+        environment="dev"
+    ):
        self.cleaned = cleaned
        self.models = {}
        self.model_paths = model_paths or {}
        self.n_jobs = n_jobs
+        self.environment = environment

        self.data = None
        self.input_data = None
@ -76,26 +81,84 @@ class EnergyConsumptionModel:
        if model_paths:
            for target, path in model_paths.items():
                # Read model
-                self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
+                self.models[target] = read_pickle_from_s3(
+                    bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path
+                )
                # Read dummy schema

        if dummy_schema_path:
            self.dummy_schema = read_pickle_from_s3(
-                bucket_name="retrofit-model-directory-dev",
+                bucket_name=f"retrofit-model-directory-{environment}",
                s3_file_name=dummy_schema_path
            )

        self.consumption_averages = None
        if consumption_average_path:
            self.consumption_averages = read_dataframe_from_s3_parquet(
-                bucket_name="retrofit-data-dev",
+                bucket_name=f"retrofit-data-{environment}",
                file_key=consumption_average_path
            )

+            # We also retrieve the newest retail price comparison data which comes from Ofgem:
+            # https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators
+            # We use the detail price comparison by company and tariff type data
+            self.read_retail_price_comparison()
+
+    def read_retail_price_comparison(self):
+        data = read_csv_from_s3(
+            bucket_name=f"retrofit-data-{self.environment}",
+            filepath="energy_consumption/retail-price-comparison.csv"
+        )
+        header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
+                  'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
+                  'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
+                  'Cheapest tariff (Basket)', 'Default tariff cap level']
+
+        # Extract data rows
+        data_rows = []
+        for row in data[1:]:
+            date = row['\ufeff"']
+            values = row[None]
+            data_rows.append([date] + values)
+
+        self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
+        self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
+
+    def convert_cost_to_today(self, original_cost, lodgement_date):
+        """
+        Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
+        (or as close to today as possible)
+        :param original_cost: The original energy cost
+        :param lodgement_date: The date the EPC was lodged
+        :return:
+        """
+        closest_date = self.retail_price_comparison.iloc[
+            (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
+        ]['Date'].values[0]
+        closest_date = pd.Timestamp(closest_date)
+
+        # Extract the tariff price on the closest date
+        tariff_2024 = self.retail_price_comparison[
+            self.retail_price_comparison['Date'] == closest_date
+            ]['Average standard variable tariff (Large legacy suppliers)'].values[0]
+
+        # Extract the latest available tariff price
+        latest_tariff = self.retail_price_comparison[
+            'Average standard variable tariff (Large legacy suppliers)'
+        ].iloc[-1]
+
+        # Calculate the ratio
+        ratio = float(latest_tariff) / float(tariff_2024)
+
+        # Calculate the updated heating cost
+        updated_cost = original_cost * ratio
+
+        return updated_cost
+
    def read_dataset(self, file_path):
        """Reads the dataset from the specified file path."""
        logger.info(f"Reading dataset from {file_path}")
-        self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
+        self.data = read_dataframe_from_s3_parquet(bucket_name=f"retrofit-data-{self.environment}", file_key=file_path)
        self.input_data = self.data.copy()

    def feature_engineering(self, drop_first=False):
@ -392,7 +455,7 @@ class EnergyConsumptionModel:
        logger.info(f"Saving model for target {target}")
        save_pickle_to_s3(
            self.models[target],
-            bucket_name="retrofit-model-directory-dev",
+            bucket_name=f"retrofit-model-directory-{self.environment}",
            s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl"
        )

@ -400,7 +463,7 @@ class EnergyConsumptionModel:
        logger.info("Saving dummy schema for target {target}")
        save_pickle_to_s3(
            self.dummy_schema,
-            bucket_name="retrofit-model-directory-dev",
+            bucket_name=f"retrofit-model-directory-{self.environment}",
            s3_file_name=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl"
        )