From 523ca28b686da9ef292fcb2dd3f88cb268936d52 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Jul 2024 12:49:23 +0100
Subject: [PATCH 01/36] Added new score_new_data function and setting up
 training script

---
 etl/bill_savings/EnergyConsumptionModel.py | 104 ++++-----------------
 etl/bill_savings/training.py               |   5 +
 2 files changed, 24 insertions(+), 85 deletions(-)
 create mode 100644 etl/bill_savings/training.py

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index b616be08..14ece803 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -26,8 +26,6 @@ class EnergyConsumptionModel:
             "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
             "low-energy-lighting", "environment-impact-current", "energy-tariff",
             "county", "construction-age-band", "co2-emissions-current",
-            # TODO: Testing
-            "lighting-cost-current", "hot-water-cost-current", "current-energy-rating"
         ],
         "hot_water_kwh": [
             "lodgement-year", "lodgement-month",
@@ -144,9 +142,9 @@ class EnergyConsumptionModel:
             self.data = self.data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
 
         # Modify number of heated rooms and number of habitable rooms
-        self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply(
-            lambda x: "16_or_more" if x > 15 else str(x)
-        )
+        # self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply(
+        #     lambda x: "16_or_more" if x > 15 else str(x)
+        # )
         # self.data["number-habitable-rooms"] = self.data["number-habitable-rooms"].apply(
         #     lambda x: "10+" if x > 10 else str(x)
         # )
@@ -398,93 +396,29 @@ class EnergyConsumptionModel:
         if target not in self.models:
             raise ValueError(f"Model for target {target} not loaded or trained")
 
-        new_data_transformed = self.transform_new_data(new_data, target)
-        return self.models[target].predict(new_data_transformed)
+        # Verify that self.data is None
+        if self.data is not None:
+            raise ValueError("self.data is not None. Ensure that self.data is reset before scoring new data.")
 
-    def transform_new_data(self, new_data, target):
-        """Applies the same transformations to new data as were applied to the training data."""
+        # Temporarily set self.data to new data
+        self.data = new_data.copy()
 
-        # TODO THis should jsut use our other transformation function
-        new_data["lodgement-date"] = pd.to_datetime(new_data["lodgement-date"])
-        new_data["lodgement-year"] = new_data["lodgement-date"].dt.year
-        new_data["lodgement-month"] = new_data["lodgement-date"].dt.month
+        # Run feature engineering
+        self.feature_engineering()
 
-        # Convert categorical columns to dummies
-        new_data = pd.get_dummies(new_data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
+        # Select the transformed data
+        new_data_transformed = self.data[self.dummy_columns[target]]
 
-        # Align new data with the dummy columns from training data
-        new_data = new_data.reindex(columns=self.dummy_columns[target], fill_value=0)
+        # Ensure the columns match the selected features
+        new_data_transformed = new_data_transformed[self.selected_features[target]]
 
-        # Select the features used by the model
-        new_data = new_data[self.selected_features[target]]
+        # Generate predictions
+        predictions = self.models[target].predict(new_data_transformed)
 
-        return new_data
+        # Reset self.data to None
+        self.data = None
 
-    def error_analysis(self, target, top_n=10, unique_threshold=0.8):
-        """
-        Perform error analysis on the provided model and dataset.
-
-        Parameters:
-        - target: The target variable to analyze.
-        - top_n: Number of top residuals to consider for analysis.
-        - unique_threshold: Threshold to exclude columns with high unique values.
-
-        Returns:
-        - summary: Dictionary summarizing common features among poorly performing rows.
-        """
-
-        # Calculate predictions and residuals
-        y_train_pred = self.models[target].predict(self.x_train[target])
-        y_test_pred = self.models[target].predict(self.x_test[target])
-
-        train_residuals = self.y_train[target] - y_train_pred
-        test_residuals = self.y_test[target] - y_test_pred
-
-        # Identify top N poorly performing rows by absolute residuals
-        top_train_indices = train_residuals.abs().nlargest(top_n).index
-        top_test_indices = test_residuals.abs().nlargest(top_n).index
-
-        top_train_data = self.input_data.loc[top_train_indices]
-        top_test_data = self.input_data.loc[top_test_indices]
-
-        # Automatically detect and exclude columns
-        def exclude_columns(data, threshold):
-            exclude_cols = []
-            num_rows = data.shape[0]
-            for col in data.columns:
-                if data[col].dtype == 'object' and data[col].nunique() / num_rows >= threshold:
-                    exclude_cols.append(col)
-            return exclude_cols
-
-        exclude_cols = exclude_columns(top_train_data, unique_threshold)
-
-        top_train_data = top_train_data.drop(columns=exclude_cols)
-        top_test_data = top_test_data.drop(columns=exclude_cols)
-
-        # One-hot encode categorical variables
-        categorical_columns = top_train_data.select_dtypes(include=['object']).columns.tolist()
-        top_train_data_encoded = pd.get_dummies(top_train_data, columns=categorical_columns, drop_first=True)
-        top_test_data_encoded = pd.get_dummies(top_test_data, columns=categorical_columns, drop_first=True)
-
-        # Ensure all original columns are included in the encoded data
-        top_train_data_encoded = top_train_data_encoded.reindex(columns=self.input_data.columns, fill_value=0)
-        top_test_data_encoded = top_test_data_encoded.reindex(columns=self.input_data.columns, fill_value=0)
-
-        # Correlation analysis with residuals
-        train_corr = top_train_data_encoded.corrwith(train_residuals.loc[top_train_indices])
-        test_corr = top_test_data_encoded.corrwith(test_residuals.loc[top_test_indices])
-
-        # Return summaries
-        summary = {
-            "train_summary": top_train_data.describe(include='all').T,
-            "test_summary": top_test_data.describe(include='all').T,
-            "train_corr": train_corr,
-            "test_corr": test_corr,
-            "top_train_data": top_train_data,
-            "top_test_data": top_test_data
-        }
-
-        return summary
+        return predictions
 
 
 # Usage:
diff --git a/etl/bill_savings/training.py b/etl/bill_savings/training.py
new file mode 100644
index 00000000..2c29c317
--- /dev/null
+++ b/etl/bill_savings/training.py
@@ -0,0 +1,5 @@
+def hanlder():
+    """
+    This function is used to train the model and store the final models in s3 as pickles
+    :return:
+    """

From 96235ed3a9d4c60b9d83bf6a61888df1dac4d1d1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 10:48:00 +0100
Subject: [PATCH 02/36] adding storage of dummy schema

---
 backend/Property.py                           |  26 +++-
 backend/app/plan/router.py                    |  30 ++++-
 backend/requirements/base.txt                 |   5 +-
 etl/bill_savings/EnergyConsumptionModel.py    | 119 ++++++------------
 etl/bill_savings/data_collection.py           |   2 +-
 etl/bill_savings/training.py                  |  53 +++++++-
 .../places_for_people/demo_portfolio.py       | 118 +++++++++++++++++
 7 files changed, 269 insertions(+), 84 deletions(-)
 create mode 100644 etl/customers/places_for_people/demo_portfolio.py

diff --git a/backend/Property.py b/backend/Property.py
index a80c3057..76bea0a6 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -512,7 +512,11 @@ class Property:
         return output
 
     def get_components(
-        self, cleaned, photo_supply_lookup, floor_area_decile_thresholds
+        self,
+        cleaned,
+        photo_supply_lookup,
+        floor_area_decile_thresholds,
+        energy_consumption_client
     ):
         """
         Given the cleaning that has been performed, we'll use this to identify the property
@@ -522,6 +526,8 @@ class Property:
                                     of the roof that is suitable for solar panels
         :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
                                              solar pv roof area
+        :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
+                                        energy annual consumption in kWh
         :return:
         """
 
@@ -592,12 +598,28 @@ class Property:
         self.find_energy_sources()
         self.set_current_energy_bill()
 
-    def set_current_energy_bill(self):
+    def set_current_energy_bill(self, energy_consumption_client):
         """
         Given what we know about the property now, estimates the current energy consumption using the UCL paper
         https://www.sciencedirect.com/science/article/pii/S0378778823002542
         :return:
         """
+        scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
+        # Change columns from underscores to hyphens
+        scoring_df.columns = [
+            x.lower().replace("_", "-") for x in scoring_df.columns
+        ]
+        for col in ["heating_kwh", "hot_water_kwh"]:
+            scoring_df[col] = None
+        energy_consumption_client.data = None
+        heating_prediction = energy_consumption_client.score_new_data(
+            new_data=scoring_df, target="heating_kwh"
+        )
+
+        hot_water_prediction = energy_consumption_client.score_new_data(
+            new_data=scoring_df, target="hot_water_kwh"
+        )
+
         starting_heat_demand = (
             float(self.data["energy-consumption-current"]) * self.floor_area
         )
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 258449c2..7c2d156b 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -40,6 +40,7 @@ from recommendations.Mds import Mds
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
 from backend.ml_models.Valuation import PropertyValuation
+from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
 
 logger = setup_logger()
 
@@ -262,6 +263,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
 
+        # TODO: insert building id
         input_properties = []
         for config in tqdm(plan_input):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
@@ -337,6 +339,11 @@ async def trigger_plan(body: PlanTriggerRequest):
         if not input_properties:
             return Response(status_code=204)
 
+        # TOOD: TEMP - store locally as pickle
+        # import pickle
+        # with open("input_properties.pkl", "wb") as f:
+        #     pickle.dump(input_properties, f)
+
         # The materials data could be cached or local so we don't need to make
         # consistent requests to the backend for
         # the same data
@@ -350,9 +357,30 @@ async def trigger_plan(body: PlanTriggerRequest):
         photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
         solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
 
+        dataset_version = "2024-07-05"
+        energy_consumption_client = EnergyConsumptionModel(
+            model_paths={
+                "heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl",
+                "hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl"
+            },
+            cleaned=cleaned
+        )
+
+        # Store all of these locally
+        # with open("temp_inputs.pkl", "wb") as f:
+        #     pickle.dump({
+        #         "input_properties": input_properties,
+        #         "materials": materials,
+        #         "cleaned": cleaned,
+        #         "uprn_filenames": uprn_filenames,
+        #         "photo_supply_lookup": photo_supply_lookup,
+        #         "floor_area_decile_thresholds": floor_area_decile_thresholds,
+        #         "model_client": model_client
+        #     }, f)
+
         logger.info("Getting spatial data")
         for p in input_properties:
-            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
+            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
             p.get_spatial_data(uprn_filenames)
             # Call Google Solar API
             # TODO: Complete me
diff --git a/backend/requirements/base.txt b/backend/requirements/base.txt
index 3173f7f8..c4e7367c 100644
--- a/backend/requirements/base.txt
+++ b/backend/requirements/base.txt
@@ -36,4 +36,7 @@ boto3==1.28.3
 pandas==1.5.3
 pyarrow==12.0.1
 textblob
-usaddress==0.5.10
\ No newline at end of file
+usaddress==0.5.10
+
+# Requirements we may not need
+xgboost==1.7.6
\ No newline at end of file
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 14ece803..c77001b3 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -1,17 +1,14 @@
 import pandas as pd
 import numpy as np
-import msgpack
 from xgboost import XGBRegressor
 from datetime import datetime
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
 from sklearn.feature_selection import RFECV
-from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_from_s3
-import logging
-from pprint import pprint
+from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
 
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = setup_logger()
 
 
 class EnergyConsumptionModel:
@@ -61,6 +58,7 @@ class EnergyConsumptionModel:
         self.training_predictions = {}
         self.testing_predictions = {}
         self.best_iteration = {}
+        self.dummy_schema = None
 
         self.x_train = {}
         self.x_test = {}
@@ -81,13 +79,13 @@ class EnergyConsumptionModel:
 
     def read_dataset(self, file_path):
         """Reads the dataset from the specified file path."""
-        logging.info(f"Reading dataset from {file_path}")
+        logger.info(f"Reading dataset from {file_path}")
         self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
         self.input_data = self.data.copy()
 
-    def feature_engineering(self):
+    def feature_engineering(self, drop_first=False):
         """Performs feature engineering on the dataset."""
-        logging.info("Starting feature engineering")
+        logger.info("Starting feature engineering")
         self.data["lodgement-date"] = pd.to_datetime(self.data["lodgement-date"])
         self.data["lodgement-year"] = self.data["lodgement-date"].dt.year
         self.data["lodgement-month"] = self.data["lodgement-date"].dt.month
@@ -141,20 +139,13 @@ class EnergyConsumptionModel:
             )
             self.data = self.data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
 
-        # Modify number of heated rooms and number of habitable rooms
-        # self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply(
-        #     lambda x: "16_or_more" if x > 15 else str(x)
-        # )
-        # self.data["number-habitable-rooms"] = self.data["number-habitable-rooms"].apply(
-        #     lambda x: "10+" if x > 10 else str(x)
-        # )
-
         # Convert data types
         self.data[self.NUMERICAL_COLUMNS] = self.data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
         self.data[self.CATEGORICAL_COLUMNS] = self.data[self.CATEGORICAL_COLUMNS].astype(str)
 
         # Convert categorical columns to dummies
-        self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
+        self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=drop_first)
+        self.dummy_schema = self.data.columns.tolist()
 
         # Store the dummy columns
         self.dummy_columns = {}
@@ -168,14 +159,14 @@ class EnergyConsumptionModel:
                     dummy_feature_columns.append(feature)
             self.dummy_columns[target] = dummy_feature_columns
 
-        logging.info("Feature engineering completed")
+        logger.info("Feature engineering completed")
 
     def split_dataset(self, target, test_size=0.2, validation_size=0.2, random_state=42):
         """Splits the dataset into training, validation, and testing sets."""
         if target not in self.TARGETS:
             raise ValueError(f"Target {target} not in {self.TARGETS}")
 
-        logging.info(f"Splitting dataset for target {target}")
+        logger.info(f"Splitting dataset for target {target}")
 
         # Split into train + validation and test sets
         x_train_val, x_test, y_train_val, y_test = train_test_split(
@@ -209,7 +200,7 @@ class EnergyConsumptionModel:
         if target not in self.TARGETS:
             raise ValueError(f"Target {target} not in {self.TARGETS}")
 
-        logging.info(f"Starting feature selection for target {target}")
+        logger.info(f"Starting feature selection for target {target}")
 
         # Sample the data if specified
         if sample_fraction < 1.0:
@@ -236,7 +227,7 @@ class EnergyConsumptionModel:
         self.x_test[target] = self.x_test[target][self.selected_features[target]]
         self.x_val[target] = self.x_val[target][self.selected_features[target]]
 
-        logging.info(f"Feature selection completed for target {target}")
+        logger.info(f"Feature selection completed for target {target}")
 
     def init_model(self, feature_selection=False):
 
@@ -269,7 +260,7 @@ class EnergyConsumptionModel:
     def fit_model(self, target):
         """Fits the model to the training data and removes zero-importance features."""
 
-        logging.info(f"Fitting model for target {target}")
+        logger.info(f"Fitting model for target {target}")
 
         # Initialize and fit the model
         model = self.init_model()
@@ -291,7 +282,7 @@ class EnergyConsumptionModel:
         zero_importance_features = feature_importance[feature_importance['Importance'] == 0]['Feature'].tolist()
 
         if zero_importance_features:
-            logging.info(f"Removing zero-importance features for target {target}: {zero_importance_features}")
+            logger.info(f"Removing zero-importance features for target {target}: {zero_importance_features}")
 
             self.x_train[target] = self.x_train[target].drop(columns=zero_importance_features)
             self.x_val[target] = self.x_val[target].drop(columns=zero_importance_features)
@@ -312,22 +303,22 @@ class EnergyConsumptionModel:
         # Store the best iteration
         self.best_iteration[target] = self.models[target].best_iteration
 
-        logging.info(f"Model fitting completed for target {target}")
+        logger.info(f"Model fitting completed for target {target}")
 
     def re_train_final_model(self, target):
         """Re-trains the final model on the combined training and validation set."""
-        logging.info(f"Re-training final model for target {target}")
+        logger.info(f"Re-training final model for target {target}")
         x_train_val = pd.concat([self.x_train[target], self.x_val[target]])
         y_train_val = pd.concat([self.y_train[target], self.y_val[target]])
 
         self.models[target] = self.init_model()
 
         self.models[target].fit(x_train_val, y_train_val, verbose=False)
-        logging.info(f"Re-training final model completed for target {target}")
+        logger.info(f"Re-training final model completed for target {target}")
 
     def evaluate_model(self, target):
         """Evaluates the model on training and testing data."""
-        logging.info(f"Evaluating model for target {target}")
+        logger.info(f"Evaluating model for target {target}")
         y_train_pred = self.models[target].predict(self.x_train[target])
         train_mse = mean_squared_error(self.y_train[target], y_train_pred)
         train_r2 = r2_score(self.y_train[target], y_train_pred)
@@ -365,7 +356,7 @@ class EnergyConsumptionModel:
                 'Importance': self.models[target].feature_importances_
             }).sort_values(by='Importance', ascending=False)
 
-        logging.info(f"Evaluation completed for target {target}")
+        logger.info(f"Evaluation completed for target {target}")
 
         return {
             'train': {
@@ -381,14 +372,19 @@ class EnergyConsumptionModel:
             }
         }
 
-    def save_model(self, target):
+    def save_model(self, target, dataset_version):
         """Saves the model to S3."""
-        logging.info(f"Saving model for target {target}")
-        run_date = datetime.now().strftime("%Y-%m-%d")
+        logger.info(f"Saving model for target {target}")
         save_pickle_to_s3(
             self.models[target],
             bucket_name="retrofit-model-directory-dev",
-            s3_file_name=f"model_directory/energy_consumption_model/{target}_{run_date}.pkl"
+            s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl"
+        )
+        logger.info("Saving dummy schema for target {target}")
+        save_pickle_to_s3(
+            self.dummy_schema,
+            bucket_name="retrofit-model-directory-dev",
+            s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}_dummy_schema.pkl"
         )
 
     def score_new_data(self, new_data, target):
@@ -404,57 +400,24 @@ class EnergyConsumptionModel:
         self.data = new_data.copy()
 
         # Run feature engineering
-        self.feature_engineering()
+        # TODO: This needs to be dummied out according to the training data
+        self.feature_engineering(drop_first=False)
 
         # Select the transformed data
-        new_data_transformed = self.data[self.dummy_columns[target]]
+        new_data_transformed = self.data[self.dummy_columns[target]].copy()
 
-        # Ensure the columns match the selected features
-        new_data_transformed = new_data_transformed[self.selected_features[target]]
+        missed_dummies = [c for c in self.models[target].feature_names_in_ if c not in new_data_transformed.columns]
+        zero_df = pd.DataFrame([dict(zip(missed_dummies, [0, ] * len(missed_dummies)))])
+
+        new_data_transformed = pd.concat([new_data_transformed, zero_df], axis=1)
+        # When we dummy in this case, we run with drop_first = False so we may end up with some of those
+        # first columns, we we'll need to dorp them
+        new_data_transformed = new_data_transformed[self.models[target].feature_names_in_]
 
         # Generate predictions
-        predictions = self.models[target].predict(new_data_transformed)
+        prediction = self.models[target].predict(new_data_transformed)
 
         # Reset self.data to None
         self.data = None
 
-        return predictions
-
-
-# Usage:
-cleaned = read_from_s3(
-    s3_file_name="cleaned_epc_data/cleaned.bson",
-    bucket_name="retrofit-data-dev"
-)
-
-cleaned = msgpack.unpackb(cleaned, raw=False)
-
-model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
-model.read_dataset('energy_consumption/2024-07-05/energy_consumption_dataset.parquet')
-model.feature_engineering()
-
-# For heating_kwh
-model.split_dataset(target='heating_kwh')
-model.fit_model(target='heating_kwh')
-model.re_train_final_model(target='heating_kwh')
-evaluation_results = model.evaluate_model(target='heating_kwh')
-
-pprint(evaluation_results["train"])
-pprint(evaluation_results["test"])
-
-importance_df = evaluation_results["train"]["Feature Importance"]
-testing_predictions = model.testing_predictions["heating_kwh"]
-testing_predictions = testing_predictions.sort_values("residual", ascending=False)
-training_predictions = model.training_predictions["heating_kwh"]
-training_predictions = training_predictions.sort_values("residual", ascending=False)
-# Merge on model.input_data, by the index
-merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
-merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
-
-# For hot_water_kwh
-model.split_dataset(target='hot_water_kwh')
-model.fit_model(target='hot_water_kwh')
-model.re_train_final_model(target='hot_water_kwh')
-evaluation_results = model.evaluate_model(target='hot_water_kwh')
-pprint(evaluation_results["train"])
-pprint(evaluation_results["test"])
+        return prediction
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 4d913e8f..4fc03f99 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,7 +133,7 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 36:
+        if i < 260:
             continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
diff --git a/etl/bill_savings/training.py b/etl/bill_savings/training.py
index 2c29c317..b1a939a1 100644
--- a/etl/bill_savings/training.py
+++ b/etl/bill_savings/training.py
@@ -1,5 +1,56 @@
-def hanlder():
+from pprint import pprint
+import msgpack
+from utils.s3 import read_from_s3
+from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
+
+
+def handler():
     """
     This function is used to train the model and store the final models in s3 as pickles
     :return:
     """
+
+    dataset_version = "2024-07-05"
+
+    # Usage:
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
+    model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
+    model.feature_engineering()
+
+    # For heating_kwh
+    model.split_dataset(target='heating_kwh')
+    model.fit_model(target='heating_kwh')
+    model.re_train_final_model(target='heating_kwh')
+    evaluation_results = model.evaluate_model(target='heating_kwh')
+
+    pprint(evaluation_results["train"])
+    pprint(evaluation_results["test"])
+
+    model.save_model(target='heating_kwh', dataset_version=dataset_version)
+
+    # importance_df = evaluation_results["train"]["Feature Importance"]
+    # testing_predictions = model.testing_predictions["heating_kwh"]
+    # testing_predictions = testing_predictions.sort_values("residual", ascending=False)
+    # training_predictions = model.training_predictions["heating_kwh"]
+    # training_predictions = training_predictions.sort_values("residual", ascending=False)
+    # # Merge on model.input_data, by the index
+    # merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
+    # merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
+
+    # For hot_water_kwh
+    model.split_dataset(target='hot_water_kwh')
+    model.fit_model(target='hot_water_kwh')
+    model.re_train_final_model(target='hot_water_kwh')
+    evaluation_results = model.evaluate_model(target='hot_water_kwh')
+
+    pprint(evaluation_results["train"])
+    pprint(evaluation_results["test"])
+
+    model.save_model(target='hot_water_kwh', dataset_version=dataset_version)
diff --git a/etl/customers/places_for_people/demo_portfolio.py b/etl/customers/places_for_people/demo_portfolio.py
new file mode 100644
index 00000000..5c290ad7
--- /dev/null
+++ b/etl/customers/places_for_people/demo_portfolio.py
@@ -0,0 +1,118 @@
+import pandas as pd
+
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 83
+USER_ID = 8
+
+
+def app():
+    # TODO: We can insert a variable, indicating the they own all of the units in the building
+    asset_list = [
+        {
+            "address": "Flat 1, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140644,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 2, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140645,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 3, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140646,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 4, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140647,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 5, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140648,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 6, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140649,
+            "building_id": 1,
+        }
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
+
+    # Get an example of flats with solar panels from epc data
+
+    # import inspect
+    # import pandas as pd
+    # from tqdm import tqdm
+    # from pathlib import Path
+    #
+    # src_file_path = inspect.getfile(lambda: None)
+    #
+    # EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+    #
+    # epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+    #
+    # directory = epc_directories[1]
+    # data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+    # # Get flats
+    # data = data[data["PROPERTY_TYPE"].str.lower().str.contains("flat")]
+    # data = data[~pd.isnull(data["UPRN"])]
+    # data["UPRN"] = data["UPRN"].astype(int).astype(str)
+    # data = data[pd.to_datetime(data["LODGEMENT_DATE"]) > "2020-01-01"]
+    # flats_with_solar = data[data['PHOTO_SUPPLY'] > 0]
+    #
+    # print(flats_with_solar["UPRN"])
+    #
+    # flats_with_solar[["ADDRESS", "UPRN"]]
+    #
+    # # Good example:
+    # # UPRN: 10013160824, Flat 39, The Meadow, 30 Busk Meadow S5 7JH (care home with 39 flats, have solar panels)
+    # #
+    # # Mostly, For a mid-floor flat, the property doesn't show as having solar panels through the photo_supply variable
+    # # But actually for UPRN: 10013245713, Apartment 4, Orchard House, Gill Lane PR4 5QN, this has a dwelling above
+    # # but the photo_supply variable is 20
+    #
+    # # Small flat consisting of 2 units
+    # # UPRN: 42172953, FLAT 2, 276 CLAUGHTON ROAD, BIRKENHEAD CH41 4DX
+    #
+    # # Flat containing 5 units
+    # # UPRN: 10013247127 Flat 1, Old Church House PR4 5GE
+    # # UPRN: 10013247130 Flat 4, Old Church House PR4 5GE
+    #
+    # # Flat containing multiple units:
+    # # UPRNS: 10013245710, 10013245716, 10013245711, 10013245717, 10013245714, 10013245715, 10013245712, 10013245713
+    #
+    # # Look for flats with air source heat pumps!
+    # flats_with_asps = data[data["MAINHEAT_DESCRIPTION"].str.lower().str.contains("air source heat pump")]
+    # print(flats_with_asps[["UPRN", "ADDRESS"]])

From eb65ff538e1e1df8ef5d67fb10ac5475166fce65 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 11:47:31 +0100
Subject: [PATCH 03/36] integrated scoring new data

---
 backend/Property.py                        |  3 ++-
 backend/app/plan/router.py                 | 18 +---------------
 etl/bill_savings/EnergyConsumptionModel.py | 25 ++++++++++++++++------
 etl/bill_savings/training.py               |  3 ++-
 4 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 76bea0a6..35c19034 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -596,7 +596,7 @@ class Property:
         )
         self.set_energy_source()
         self.find_energy_sources()
-        self.set_current_energy_bill()
+        self.set_current_energy_bill(energy_consumption_client)
 
     def set_current_energy_bill(self, energy_consumption_client):
         """
@@ -611,6 +611,7 @@ class Property:
         ]
         for col in ["heating_kwh", "hot_water_kwh"]:
             scoring_df[col] = None
+
         energy_consumption_client.data = None
         heating_prediction = energy_consumption_client.score_new_data(
             new_data=scoring_df, target="heating_kwh"
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7c2d156b..0cf670c2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -339,11 +339,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         if not input_properties:
             return Response(status_code=204)
 
-        # TOOD: TEMP - store locally as pickle
-        # import pickle
-        # with open("input_properties.pkl", "wb") as f:
-        #     pickle.dump(input_properties, f)
-
         # The materials data could be cached or local so we don't need to make
         # consistent requests to the backend for
         # the same data
@@ -363,21 +358,10 @@ async def trigger_plan(body: PlanTriggerRequest):
                 "heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl",
                 "hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl"
             },
+            dummy_schema_path=f"model_directory/energy_consumption_model/dummy_schema_{dataset_version}.pkl",
             cleaned=cleaned
         )
 
-        # Store all of these locally
-        # with open("temp_inputs.pkl", "wb") as f:
-        #     pickle.dump({
-        #         "input_properties": input_properties,
-        #         "materials": materials,
-        #         "cleaned": cleaned,
-        #         "uprn_filenames": uprn_filenames,
-        #         "photo_supply_lookup": photo_supply_lookup,
-        #         "floor_area_decile_thresholds": floor_area_decile_thresholds,
-        #         "model_client": model_client
-        #     }, f)
-
         logger.info("Getting spatial data")
         for p in input_properties:
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index c77001b3..59a68a56 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -46,7 +46,7 @@ class EnergyConsumptionModel:
         "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
     ]
 
-    def __init__(self, cleaned, model_paths=None, n_jobs=1):
+    def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, n_jobs=1):
         self.cleaned = cleaned
         self.models = {}
         self.model_paths = model_paths or {}
@@ -75,7 +75,15 @@ class EnergyConsumptionModel:
 
         if model_paths:
             for target, path in model_paths.items():
+                # Read model
                 self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
+                # Read dummy schema
+
+        if dummy_schema_path:
+            self.dummy_schema = read_pickle_from_s3(
+                bucket_name="retrofit-model-directory-dev",
+                s3_file_name=dummy_schema_path
+            )
 
     def read_dataset(self, file_path):
         """Reads the dataset from the specified file path."""
@@ -380,11 +388,13 @@ class EnergyConsumptionModel:
             bucket_name="retrofit-model-directory-dev",
             s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl"
         )
+
+    def save_dummy_schema(self, dataset_version):
         logger.info("Saving dummy schema for target {target}")
         save_pickle_to_s3(
             self.dummy_schema,
             bucket_name="retrofit-model-directory-dev",
-            s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}_dummy_schema.pkl"
+            s3_file_name=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl"
         )
 
     def score_new_data(self, new_data, target):
@@ -400,16 +410,19 @@ class EnergyConsumptionModel:
         self.data = new_data.copy()
 
         # Run feature engineering
-        # TODO: This needs to be dummied out according to the training data
         self.feature_engineering(drop_first=False)
 
-        # Select the transformed data
-        new_data_transformed = self.data[self.dummy_columns[target]].copy()
+        new_data_transformed = self.data.copy()
 
+        for col in self.dummy_schema:
+            if col not in new_data_transformed.columns:
+                new_data_transformed[col] = 0
+
+        new_data_transformed = new_data_transformed[self.dummy_schema]
         missed_dummies = [c for c in self.models[target].feature_names_in_ if c not in new_data_transformed.columns]
         zero_df = pd.DataFrame([dict(zip(missed_dummies, [0, ] * len(missed_dummies)))])
-
         new_data_transformed = pd.concat([new_data_transformed, zero_df], axis=1)
+
         # When we dummy in this case, we run with drop_first = False so we may end up with some of those
         # first columns, we we'll need to dorp them
         new_data_transformed = new_data_transformed[self.models[target].feature_names_in_]
diff --git a/etl/bill_savings/training.py b/etl/bill_savings/training.py
index b1a939a1..df60298b 100644
--- a/etl/bill_savings/training.py
+++ b/etl/bill_savings/training.py
@@ -10,7 +10,7 @@ def handler():
     :return:
     """
 
-    dataset_version = "2024-07-05"
+    dataset_version = "2024-07-08"
 
     # Usage:
     cleaned = read_from_s3(
@@ -23,6 +23,7 @@ def handler():
     model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
     model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
     model.feature_engineering()
+    model.save_dummy_schema(dataset_version=dataset_version)
 
     # For heating_kwh
     model.split_dataset(target='heating_kwh')

From 30ce7df6c161c94959789d760a577d074fa6a11c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 14:12:37 +0100
Subject: [PATCH 04/36] completed the current energy bill estimate

---
 backend/Property.py                    | 55 +++++++++++++++++---
 backend/ml_models/AnnualBillSavings.py | 71 ++++++++++++++++++++++----
 2 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 35c19034..5a9d3fe8 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -112,6 +112,8 @@ class Property:
         self.wall_type = None
         self.floor_type = None
 
+        self.energy_cost_estimates = {}
+
         self.energy = {
             "primary_energy_consumption": epc_record.get("energy_consumption_current"),
             "co2_emissions": epc_record.get("co2_emissions_current"),
@@ -612,26 +614,63 @@ class Property:
         for col in ["heating_kwh", "hot_water_kwh"]:
             scoring_df[col] = None
 
+        # We should adjust the costs first and then calculate the energy consumption
+        lighting_cost = float(self.data["lighting-cost-current"])
+        heating_cost = float(self.data["heating-cost-current"])
+        hot_water_cost = float(self.data["hot-water-cost-current"])
+        total_cost = lighting_cost + heating_cost + hot_water_cost
+
+        adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=heating_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
+
+        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=hot_water_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
+
+        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=lighting_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
+
+        scoring_df["heating-cost-current"] = [adjusted_heating_cost]
+        scoring_df["hot-water-cost-current"] = [adjusted_hot_water_cost]
+        scoring_df["lighting-cost-current"] = [adjusted_lighting_cost]
+
         energy_consumption_client.data = None
         heating_prediction = energy_consumption_client.score_new_data(
             new_data=scoring_df, target="heating_kwh"
-        )
+        )[0]
 
         hot_water_prediction = energy_consumption_client.score_new_data(
             new_data=scoring_df, target="hot_water_kwh"
-        )
+        )[0]
 
-        starting_heat_demand = (
-            float(self.data["energy-consumption-current"]) * self.floor_area
-        )
+        # We convert the lighting cost into kwh, just using the price cap
+        lighting_kwh = float(adjusted_lighting_cost) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
+
+        appliances_energy_use = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
+        appliances_energy_cost = appliances_energy_use * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+
+        total_energy_consumption = heating_prediction + hot_water_prediction + lighting_kwh + appliances_energy_use
 
         self.current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy_consumption=starting_heat_demand,
+            epc_energy_consumption=total_energy_consumption,
             current_epc_rating=self.data["current-energy-rating"],
-            total_floor_area=self.floor_area
         )
 
-        self.current_energy_bill = AnnualBillSavings.calculate_annual_bill(self.current_adjusted_energy)
+        self.energy_cost_estimates = {
+            "heating": adjusted_heating_cost,
+            "hot_water": adjusted_hot_water_cost,
+            "lighting": adjusted_lighting_cost,
+            "appliances": appliances_energy_cost
+        }
+
+        self.expected_energy_bill = (
+            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + appliances_energy_cost
+        )
 
     def set_spatial(self, spatial: pd.DataFrame):
         """
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index e6494bcd..4747e587 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -25,9 +25,9 @@ class AnnualBillSavings:
     AVERAGE_GAS_CONSUMPTION = 11500
 
     # Latest price cap figures from Ofgem are for April 2024
-    # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
-    ELECTRICITY_PRICE_CAP = 0.245
-    GAS_PRICE_CAP = 0.0604
+    # https://www.ofgem.gov.uk/energy-price-cap
+    ELECTRICITY_PRICE_CAP = 0.2236
+    GAS_PRICE_CAP = 0.0548
     # This is the most recent export payment figure, at 12p per kwh
     ELECTRICITY_EXPORT_PAYMENT = 0.12
 
@@ -125,7 +125,17 @@ class AnnualBillSavings:
         return eam
 
     @classmethod
-    def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating, total_floor_area):
+    def estimate_appliances_energy_use(cls, total_floor_area):
+        # The EPC energy consumption does not factor in cooking and applicance use, so this is estimated using the
+        # methodology outlined in SAP, and is discussed in the UCL paper in section 3.1.1
+        estimated_occupants = cls.calculate_occupants(total_floor_area=total_floor_area)
+        appliances_energy_use = cls.estimate_electrical_appliances(estimated_occupants, total_floor_area)
+        return appliances_energy_use
+
+    @classmethod
+    def adjust_energy_to_metered(
+        cls, epc_energy_consumption, current_epc_rating
+    ):
         """
         The over-prediction of energy use by EPCs in Great Britain: A comparison
         of EPC-modelled and metered primary energy use intensity
@@ -136,13 +146,6 @@ class AnnualBillSavings:
         :return:
         """
 
-        # The EPC energy consumption does not factor in cooking and applicance use, so this is estimated using the
-        # methodology outlined in SAP, and is discussed in the UCL paper in section 3.1.1
-        estimated_occupants = cls.calculate_occupants(total_floor_area=total_floor_area)
-        appliances_energy_use = cls.estimate_electrical_appliances(estimated_occupants, total_floor_area)
-
-        epc_energy_consumption += appliances_energy_use
-
         gradients = {
             "A": -0.1,
             "B": -0.1,
@@ -175,6 +178,52 @@ class AnnualBillSavings:
 
         return adjusted_consumption
 
+    @classmethod
+    def adjust_energy_cost_to_metered(cls, epc_energy_cost, current_epc_rating):
+        """
+        The over-prediction of energy use by EPCs in Great Britain: A comparison
+        of EPC-modelled and metered primary energy use intensity
+
+        Which can be found here: https://www.sciencedirect.com/science/article/pii/S0378778823002542
+        We implement the results on page 10
+
+        This is used to just re-map the cost from the EPC to the metered cost
+        :return:
+        """
+
+        gradients = {
+            "A": -0.1,
+            "B": -0.1,
+            "C": -0.43,
+            "D": -0.52,
+            "E": -0.7,
+            "F": -0.76,
+            "G": -0.76
+        }
+
+        intercepts = {
+            "A": 28,
+            "B": 28,
+            "C": 97,
+            "D": 119,
+            "E": 160,
+            "F": 157,
+            "G": 157
+        }
+
+        gradient = gradients[current_epc_rating]
+        intercept = intercepts[current_epc_rating]
+
+        # This should be negative
+        consumption_difference = gradient * epc_energy_cost + intercept
+        consumption_difference = 0 if consumption_difference > 0 else consumption_difference
+
+        adjusted_consumption = (epc_energy_cost + consumption_difference)
+        if adjusted_consumption < 0:
+            raise ValueError("consumption_difference should be negative")
+
+        return adjusted_consumption
+
     @classmethod
     def adjust_expected_band(cls, expected_epc_rating, current_epc_rating):
         """

From f5eab413c207319c0be12fc5e0fe9e555ee02461 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 14:13:59 +0100
Subject: [PATCH 05/36] capture energy_consumption_estimates

---
 backend/Property.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index 5a9d3fe8..c54405b2 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -113,6 +113,7 @@ class Property:
         self.floor_type = None
 
         self.energy_cost_estimates = {}
+        self.energy_consumption_estimates = {}
 
         self.energy = {
             "primary_energy_consumption": epc_record.get("energy_consumption_current"),
@@ -618,7 +619,6 @@ class Property:
         lighting_cost = float(self.data["lighting-cost-current"])
         heating_cost = float(self.data["heating-cost-current"])
         hot_water_cost = float(self.data["hot-water-cost-current"])
-        total_cost = lighting_cost + heating_cost + hot_water_cost
 
         adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
             epc_energy_cost=heating_cost,
@@ -668,6 +668,13 @@ class Property:
             "appliances": appliances_energy_cost
         }
 
+        self.energy_consumption_estimates = {
+            "heating": heating_prediction,
+            "hot_water": hot_water_prediction,
+            "lighting": lighting_kwh,
+            "appliances": appliances_energy_use
+        }
+
         self.expected_energy_bill = (
             adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + appliances_energy_cost
         )

From c4768c85346f3dfc1f7bfaf16bc0c575596afa49 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 14:22:29 +0100
Subject: [PATCH 06/36] Extracing building level info

---
 backend/Property.py        |  3 +++
 backend/app/plan/router.py | 20 ++++++++++++++++----
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index c54405b2..72d1d169 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -60,6 +60,7 @@ class Property:
     # Surplus information, that can be provided as optional inputs, by a customer
     n_bathrooms = None
     n_bedrooms = None
+    building_id = None  # Used to group properties together into a single building
 
     def __init__(
         self,
@@ -193,12 +194,14 @@ class Property:
         return {
             "n_bathrooms": n_bathrooms,
             "n_bedrooms": n_bedrooms,
+            "building_id": kwargs.get("building_id", None),
         }
 
     def parse_kwargs(self, kwargs):
         # We extract the elements from kwargs that we recognise. Anything additional is ignored
         self.n_bathrooms = kwargs.get("n_bathrooms", None)
         self.n_bedrooms = kwargs.get("n_bedrooms", None)
+        self.building_id = kwargs.get("building_id", None)
 
     def create_base_difference_epc_record(self, cleaned_lookup: dict):
         """
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0cf670c2..2832989e 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -263,7 +263,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
 
-        # TODO: insert building id
         input_properties = []
         for config in tqdm(plan_input):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
@@ -366,9 +365,22 @@ async def trigger_plan(body: PlanTriggerRequest):
         for p in input_properties:
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
             p.get_spatial_data(uprn_filenames)
-            # Call Google Solar API
-            # TODO: Complete me
-            solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
+
+        building_ids = [
+            {
+                "building_id": p.building_id, "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"]
+            } for p in input_properties if p.building_id is not None
+        ]
+        if building_ids:
+            # Model the solar potential at the building level
+            print("complete me")
+        else:
+            # Model the solar potential at the property level
+            for p in input_properties:
+                # TODO: Complete me!
+                solar_performance = solar_api_client.get(
+                    longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]
+                )
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}

From c3fd22825aec0fdb3213d7a51cbd7796965380db Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 17:19:50 +0100
Subject: [PATCH 07/36] implementing solar optimisation

---
 backend/apis/GoogleSolarApi.py             | 166 ++++++++-------------
 backend/app/plan/router.py                 |  45 +++++-
 etl/bill_savings/EnergyConsumptionModel.py |  39 ++++-
 etl/bill_savings/data_collection.py        |   2 +-
 etl/bill_savings/data_combining.py         |  11 ++
 5 files changed, 153 insertions(+), 110 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index d29e3da5..f7b34d19 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -98,13 +98,15 @@ class GoogleSolarApi:
                     raise
 
     @lru_cache(maxsize=128)
-    def get(self, longitude, latitude, required_quality="MEDIUM"):
+    def get(self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False):
         """
         Wrapper function that calls get_building_insights and extracts roof segments, with caching.
 
         :param longitude: The longitude of the location.
         :param latitude: The latitude of the location.
+        :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude.
         :param required_quality: The required quality of the data (default is "MEDIUM").
+        :param is_building: Whether the energy consumption is for a building or a unit.
         :return: The JSON response containing the building insights data.
         """
 
@@ -112,8 +114,8 @@ class GoogleSolarApi:
 
         # Extract key data from the insights response
         self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
-        self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
         self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
+        self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
         self.panel_area = (
             self.insights_data["solarPotential"]["panelHeightMeters"] *
             self.insights_data["solarPotential"]["panelWidthMeters"]
@@ -133,7 +135,7 @@ class GoogleSolarApi:
         self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]
 
         # We now start finding the solar panel configurations
-        self.optimise_solar_configuration()
+        self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building)
 
     @staticmethod
     def lifetime_production_ac_kwh(
@@ -143,7 +145,7 @@ class GoogleSolarApi:
     ):
         """
         Mimics the function described in the Google Solar API documentation, presenting the lifetime production
-        AC KWH as a geometri sum
+        AC KWH as a geometric sum
         """
 
         return (
@@ -153,86 +155,7 @@ class GoogleSolarApi:
                 installation_life_span)) /
             (1 - efficiency_depreciation_factor))
 
-    @staticmethod
-    def annualUtilityBillEstimate(
-        yearlyKWhEnergyConsumption,
-        initialAcKwhPerYear,
-        efficiencyDepreciationFactor,
-        year,
-        costIncreaseFactor,
-        discountRate):
-        """
-        Implements the bill costing model for esimating annual bill
-        :param yearlyKWhEnergyConsumption:
-        :param initialAcKwhPerYear:
-        :param efficiencyDepreciationFactor:
-        :param year:
-        :param costIncreaseFactor:
-        :param discountRate:
-        :return:
-        """
-
-        return (
-            billCostModel(
-                yearlyKWhEnergyConsumption -
-                annualProduction(
-                    initialAcKwhPerYear,
-                    efficiencyDepreciationFactor,
-                    year)) *
-            pow(costIncreaseFactor, year) /
-            pow(discountRate, year))
-
-    def lifetimeUtilityBill(
-        yearlyKWhEnergyConsumption,
-        initialAcKwhPerYear,
-        efficiencyDepreciationFactor,
-        installationLifeSpan,
-        costIncreaseFactor,
-        discountRate):
-        bill = [0] * installationLifeSpan
-        for year in range(installationLifeSpan):
-            bill[year] = annualUtilityBillEstimate(
-                yearlyKWhEnergyConsumption,
-                initialAcKwhPerYear,
-                efficiencyDepreciationFactor,
-                year,
-                costIncreaseFactor,
-                discountRate)
-        return bill
-
-    def estimate_solar_costs(self, panel_performance):
-        """
-        This method implements the recommended costing approach, to estimate the ROI of a solar panel
-        configuration, as described in the Google Solar API documentation
-        :param panel_performance: dataframe containing the solar panel array configuration and energy generation data
-        :return:
-        """
-
-        # we now estiamte the financial benefits of solar panels for the household, using the framework described
-        # by the Google Solar API
-        # 1) Convert Solar Energy AD production from the DC production
-        panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
-
-        # This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a
-        # property could be 100% electric
-        average_electricity_consumption
-
-        # Remove anything where the total ac energy is less than half of the array wattage
-        panel_performance = panel_performance[
-            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
-            ]
-
-        # 2) Calculate the liftime solar energy production
-        panel_performance['lifetime_ac_kwh'] = panel_performance.apply(
-            self.lifetime_production_ac_kwh,
-            axis=1,
-            efficiency_depreciation_factor=self.efficiency_depreciation_factor,
-            installation_life_span=self.installation_life_span
-        )
-
-        # TODO: Complete the rest of the solar model
-
-    def optimise_solar_configuration(self):
+    def optimise_solar_configuration(self, energy_consumption, is_building=False):
         """
         Optimise the solar panel configuration for the building.
         :return:
@@ -287,30 +210,67 @@ class GoogleSolarApi:
         panel_performance = pd.DataFrame(panel_performance)
         # We can have duplicate configurations
         panel_performance = panel_performance.drop_duplicates()
-        # Ensure more than 4 panels
-        panel_performance = panel_performance[panel_performance["n_panels"] >= 4]
+        # If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the
+        # minimum is 4
+        min_panels = 10 if is_building else 4
+        panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]
 
-        self.estimate_solar_costs()
+        panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
 
-        # This first bracket is the value of the energy bill savings
-        panel_performance["bill_savings"] = (
-            self.SOLAR_CONSUMPTION_PROPORTION *
-            panel_performance["total_energy"] *
-            AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        # Remove anything where the total ac energy is less than half of the array wattage
+        panel_performance = panel_performance[
+            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
+            ]
+
+        # 2) Calculate the liftime solar energy production
+        panel_performance['lifetime_ac_kwh'] = panel_performance.apply(
+            self.lifetime_production_ac_kwh,
+            axis=1,
+            efficiency_depreciation_factor=self.efficiency_depreciation_factor,
+            installation_life_span=self.installation_life_span
         )
-        # This is the amount of energy exported
-        panel_performance["export_value"] = (
-            (1 - self.SOLAR_CONSUMPTION_PROPORTION) *
-            panel_performance["total_energy"] *
-            AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
+
+        # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi
+        roi_results = []
+        for _, panel_config in panel_performance.iterrows():
+            lifetime_ac_kwh = panel_config["lifetime_ac_kwh"]
+            lifetime_energy_consumption = energy_consumption * self.installation_life_span
+
+            if lifetime_ac_kwh < lifetime_energy_consumption:
+                # We estimate the amount of electricity generated, based on the price cap
+                generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+                roi = generation_value / panel_config["total_cost"]
+                generation_deficit = lifetime_energy_consumption - lifetime_ac_kwh
+            else:
+                # We now have a surplus of energy, which we can sell back to the grid
+                surplus = lifetime_ac_kwh - lifetime_energy_consumption
+                surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
+                generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+                roi = (generation_value + surplus_value) / panel_config["total_cost"]
+                generation_deficit = surplus_value
+
+            # Generation deficit tells us how much more energy we need to meet the generation demand.
+            roi_results.append(
+                {
+                    "n_panels": panel_config["n_panels"],
+                    "roi": roi,
+                    "generation_value": generation_value,
+                    "generation_deficit": generation_deficit
+                }
+            )
+
+        roi_results = pd.DataFrame(roi_results)
+
+        panel_performance = panel_performance.merge(
+            roi_results, how="left", on="n_panels"
         )
-        panel_performance["energy_value"] = panel_performance["bill_savings"] + panel_performance["export_value"]
-        panel_performance["payback_years"] = panel_performance["total_cost"] / panel_performance["energy_value"]
 
-        panel_performance = panel_performance.sort_values("weighted_ratio", ascending=False)
-        # TODO: Finish this!!
-
-        panel_performance["roof_area_percentage"] = panel_performance["panneled_roof_area"] / self.roof_area
+        # We prioritise maximal roi, then minimal geneartion deficit, then maximal generation value (if there is still
+        # a tie). Ideally, we want the best roi over the lifetime of the solar panels, but we also want to ensure that
+        # we can meet the energy demands of the building.
+        panel_performance = panel_performance.sort_values(
+            ["roi", "generation_deficit", "generation_value"], ascending=[False, True, False]
+        )
 
         self.panel_performance = panel_performance
 
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 2832989e..c3823180 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -351,13 +351,14 @@ async def trigger_plan(body: PlanTriggerRequest):
         photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
         solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
 
-        dataset_version = "2024-07-05"
+        dataset_version = "2024-07-08"
         energy_consumption_client = EnergyConsumptionModel(
             model_paths={
                 "heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl",
                 "hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl"
             },
-            dummy_schema_path=f"model_directory/energy_consumption_model/dummy_schema_{dataset_version}.pkl",
+            dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl",
+            consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet",
             cleaned=cleaned
         )
 
@@ -366,14 +367,48 @@ async def trigger_plan(body: PlanTriggerRequest):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
             p.get_spatial_data(uprn_filenames)
 
+        # TODO: Handle the case of modelling some units as buildings and some as properties individually
+        # TODO: We should adjust the energy consumtpion to account for post-retrofit energy consumption
         building_ids = [
             {
-                "building_id": p.building_id, "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"]
+                "building_id": p.building_id,
+                "longitude": p.spatial["longitude"],
+                "latitude": p.spatial["latitude"],
+                "energy_consumption": energy_consumption_client.estimate_new_consumption(
+                    current_rating=p.data["current-energy-rating"],
+                    target_rating=body.goal_value,
+                    current_consumption=p.current_adjusted_energy
+                ),
             } for p in input_properties if p.building_id is not None
         ]
         if building_ids:
-            # Model the solar potential at the building level
-            print("complete me")
+            # Find the unique longitude and latitude pairs for each building id
+            unique_coordinates = {}
+            for entry in building_ids:
+                building_id = entry['building_id']
+                coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']}
+
+                if building_id not in unique_coordinates:
+                    unique_coordinates[building_id] = []
+
+                if coordinate_pair not in unique_coordinates[building_id]:
+                    unique_coordinates[building_id].append(coordinate_pair)
+
+            for building_id, coordinates in unique_coordinates.items():
+                if len(coordinates) > 1:
+                    raise NotImplementedError("more than one coordinate for a building - handle me")
+
+                coordinates = coordinates[0]
+                energy_consumption = sum(
+                    [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
+                )
+                solar_performance = solar_api_client.get(
+                    longitude=coordinates["longitude"],
+                    latitude=coordinates["latitude"],
+                    energy_consumption=energy_consumption,
+                    is_building=True,
+                )
+
         else:
             # Model the solar potential at the property level
             for p in input_properties:
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 59a68a56..e7658de5 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -46,7 +46,7 @@ class EnergyConsumptionModel:
         "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
     ]
 
-    def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, n_jobs=1):
+    def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1):
         self.cleaned = cleaned
         self.models = {}
         self.model_paths = model_paths or {}
@@ -85,6 +85,13 @@ class EnergyConsumptionModel:
                 s3_file_name=dummy_schema_path
             )
 
+        self.consumption_averages = None
+        if consumption_average_path:
+            self.consumption_averages = read_dataframe_from_s3_parquet(
+                bucket_name="retrofit-data-dev",
+                file_key=consumption_average_path
+            )
+
     def read_dataset(self, file_path):
         """Reads the dataset from the specified file path."""
         logger.info(f"Reading dataset from {file_path}")
@@ -434,3 +441,33 @@ class EnergyConsumptionModel:
         self.data = None
 
         return prediction
+
+    @staticmethod
+    def calculate_percentage_decrease(start_rating, end_rating, consumption_averages):
+
+        start_consumption = consumption_averages.loc[
+            consumption_averages["current-energy-rating"] == start_rating, "total_consumption"
+        ].values[0]
+        end_consumption = consumption_averages.loc[
+            consumption_averages["current-energy-rating"] == end_rating, "total_consumption"
+        ].values[0]
+
+        percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
+        return percentage_decrease
+
+    def estimate_new_consumption(self, current_rating, target_rating, current_consumption):
+        """
+        Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
+        for the energy kwh models, this function will estimate the new consumption based on the current consumption,
+        based on the expected reduction in consumption from the current rating to the target rating.
+        :param current_rating:
+        :param target_rating:
+        :param current_consumption:
+        :param df:
+        :return:
+        """
+        percentage_decrease = self.calculate_percentage_decrease(
+            current_rating, target_rating, self.consumption_averages
+        )
+        new_consumption = current_consumption * (1 - percentage_decrease / 100)
+        return new_consumption
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 4fc03f99..c51bf59e 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,7 +133,7 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 260:
+        if i < 26:
             continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py
index a111ecf2..11366360 100644
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/data_combining.py
@@ -91,3 +91,14 @@ def app():
         file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
         df=df
     )
+
+    # We also estimate the energy consumption reduction from this data, by band
+    df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
+    consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index()
+
+    # Save the consumption averages back to s3
+    save_dataframe_to_s3_parquet(
+        bucket_name="retrofit-data-dev",
+        file_key=f"energy_consumption/{run_date}/consumption_averages.parquet",
+        df=consumption_averages
+    )

From c72bd8a5ed8e375e28dc2f9e16222f722664fa35 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 19:01:55 +0100
Subject: [PATCH 08/36] Updated checking if solar pv valid

---
 backend/Property.py                       |  9 +++++++++
 backend/app/plan/router.py                | 17 ++++++++++++++---
 recommendations/SolarPvRecommendations.py |  5 +++++
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 72d1d169..752a0868 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -62,6 +62,9 @@ class Property:
     n_bedrooms = None
     building_id = None  # Used to group properties together into a single building
 
+    # Contains the solar panel optimisation results from the Google Solar API
+    solar_panel_configuration = None
+
     def __init__(
         self,
         id,
@@ -604,6 +607,12 @@ class Property:
         self.find_energy_sources()
         self.set_current_energy_bill(energy_consumption_client)
 
+    def set_solar_panel_configuration(self, solar_panel_configuration):
+        """
+        This funtion inserts the solar panel configuration into the property object
+        """
+        self.solar_panel_configuration = solar_panel_configuration
+
     def set_current_energy_bill(self, energy_consumption_client):
         """
         Given what we know about the property now, estimates the current energy consumption using the UCL paper
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c3823180..e2adbed1 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -368,12 +368,12 @@ async def trigger_plan(body: PlanTriggerRequest):
             p.get_spatial_data(uprn_filenames)
 
         # TODO: Handle the case of modelling some units as buildings and some as properties individually
-        # TODO: We should adjust the energy consumtpion to account for post-retrofit energy consumption
         building_ids = [
             {
                 "building_id": p.building_id,
                 "longitude": p.spatial["longitude"],
                 "latitude": p.spatial["latitude"],
+                # Energy consumption is adjusted for the property's expected post retrofit state
                 "energy_consumption": energy_consumption_client.estimate_new_consumption(
                     current_rating=p.data["current-energy-rating"],
                     target_rating=body.goal_value,
@@ -394,6 +394,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 if coordinate_pair not in unique_coordinates[building_id]:
                     unique_coordinates[building_id].append(coordinate_pair)
 
+            solar_panel_configuration = {}
             for building_id, coordinates in unique_coordinates.items():
                 if len(coordinates) > 1:
                     raise NotImplementedError("more than one coordinate for a building - handle me")
@@ -402,17 +403,27 @@ async def trigger_plan(body: PlanTriggerRequest):
                 energy_consumption = sum(
                     [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
                 )
-                solar_performance = solar_api_client.get(
+                solar_api_client.get(
                     longitude=coordinates["longitude"],
                     latitude=coordinates["latitude"],
                     energy_consumption=energy_consumption,
                     is_building=True,
                 )
+                solar_panel_configuration[building_id] = {
+                    "insights_data": solar_api_client.insights_data,
+                    "panel_performance": solar_api_client.panel_performance,
+                    "n_units": len([entry for entry in building_ids if entry['building_id'] == building_id])
+                }
+
+                # Insert this into the properties that have this building id
+                for p in input_properties:
+                    if p.building_id == building_id:
+                        p.set_solar_panel_configuration(solar_panel_configuration[building_id])
 
         else:
             # Model the solar potential at the property level
             for p in input_properties:
-                # TODO: Complete me!
+                # TODO: Complete me! - we probably won't do this for individual flats
                 solar_performance = solar_api_client.get(
                     longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]
                 )
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 458eae12..3387fc7a 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -79,6 +79,11 @@ class SolarPvRecommendations:
         ]
 
     def is_solar_pv_valid(self):
+
+        # If the property is a flat but we are looking at building solar potential, we can include this
+        if (self.property.building_id is not None) and (self.property.solar_panel_configuration is not None):
+            return True
+
         is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
         is_valid_roof_type = (
             self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]

From 11a20bd9a7fc5aac7fb4ca758303c0d4782aa871 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 19:19:44 +0100
Subject: [PATCH 09/36] Added new building level solar recommendation function|

---
 recommendations/SolarPvRecommendations.py | 49 +++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 3387fc7a..2de8f873 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -95,6 +95,50 @@ class SolarPvRecommendations:
 
         return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
 
+    def recommend_building_analysis(self, phase):
+        """
+        This recommendation approach handles the case of producing solar PV recommendations at the building level,
+        across multiple flats. For these recommendations, we don't include the battery option since it's impractical
+        from a space perspective.
+        :return:
+        """
+
+        panel_performance = self.property.solar_panel_configuration["panel_performance"]
+        total_roof_area = (
+            self.property.solar_panel_configuration["insights_data"]["solarPotential"]["wholeRoofStats"]["areaMeters2"]
+        )
+        n_units = self.property.solar_panel_configuration["n_units"]
+
+        best_configurations = panel_performance.head(3)
+
+        for _, recommendation_config in best_configurations.iterrows():
+            roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
+            # Spread the cost to the individual units
+            total_cost = recommendation_config["total_cost"] / n_units
+            kw = np.floor(recommendation_config["array_warrage"] / 100) / 10
+
+            description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof "
+                           "of the building")
+
+            self.recommendation.append(
+                {
+                    "phase": phase,
+                    "parts": [],
+                    "type": "solar_pv",
+                    "description": description,
+                    "starting_u_value": None,
+                    "new_u_value": None,
+                    "sap_points": None,
+                    "already_installed": False,
+                    "total_cost": total_cost,
+                    # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
+                    # back up here
+                    "photo_supply": roof_coverage_percent,
+                    "has_battery": False,
+                    "description_simulation": {"photo-supply": roof_coverage_percent}
+                }
+            )
+
     def recommend(self, phase):
         """
         We check if a property is potentially suitable for solar PV based on the following criteria:
@@ -107,6 +151,11 @@ class SolarPvRecommendations:
         if not self.is_solar_pv_valid():
             return
 
+        # If we have a buiilding level analysis, we implement separate logic
+        if self.property.building_id is not None:
+            self.recommend_building_analysis(phase)
+            return
+
         solar_pv_percentage = self.property.solar_pv_percentage
         # We round up to the neaest 10%
         solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10

From 1350d8ec9e7f84823c319b0d05a0f3c6605db195 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 19:20:45 +0100
Subject: [PATCH 10/36] changed cost key

---
 recommendations/SolarPvRecommendations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 2de8f873..42bf03d4 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -130,7 +130,7 @@ class SolarPvRecommendations:
                     "new_u_value": None,
                     "sap_points": None,
                     "already_installed": False,
-                    "total_cost": total_cost,
+                    "total": total_cost,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here
                     "photo_supply": roof_coverage_percent,

From d303db25577c19cd645badf36446934a32a74713 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 20:41:51 +0100
Subject: [PATCH 11/36] Adding the recommendation scoring mechanisms

---
 backend/Property.py                       | 24 +++++++++++++++++++++++
 recommendations/FloorRecommendations.py   |  5 +++++
 recommendations/Recommendations.py        |  5 +++++
 recommendations/SolarPvRecommendations.py |  7 ++++---
 4 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 752a0868..106931d8 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -174,6 +174,7 @@ class Property:
         self.hot_water_energy_source = None
 
         self.recommendations_scoring_data = []
+        self.simulation_epcs = {}
 
         self.parse_kwargs(kwargs)
 
@@ -282,6 +283,7 @@ class Property:
         """
 
         self.recommendations_scoring_data = []
+        self.simulation_epcs = {}
         phases = sorted(
             [
                 r[0]["phase"]
@@ -336,6 +338,28 @@ class Property:
                 )
                 self.recommendations_scoring_data.append(scoring_dict)
 
+            # We also use the representative recommendations to produce transformed EPCs
+            represenative_recs_to_this_phase = [
+                r for r in property_representative_recommendations
+                if r["phase"] <= phase
+            ]
+            epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
+            # It is possible that we could have two simulations applied to the same descriptions
+            # We extract these out
+            phase_epc_transformation = {}
+            for config in epc_transformations:
+                for k, v in config.items():
+                    if k in phase_epc_transformation:
+                        raise NotImplementedError(
+                            "Already have this key in the phase_epc_transformation - implement me")
+                    phase_epc_transformation[k] = v
+
+            simulation_epc = self.epc_record.prepared_epc.copy()
+            # Replace the understores with hyphens
+            simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
+            simulation_epc.update(simulation_epc)
+            self.simulation_epcs = simulation_epc
+
     @staticmethod
     def create_recommendation_scoring_data(
         property_id,
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 3f764d83..9faedb89 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -227,6 +227,11 @@ class FloorRecommendations(Definitions):
                             "new_u_value": new_u_value,
                             "sap_points": None,
                             "already_installed": already_installed,
+                            "description_simulation": {
+                                "floor-description": "Solid, insulated" if
+                                material["type"] == "solid_floor_insulation"
+                                else "Suspended, insulated"
+                            },
                             **cost_result
                         }
                     )
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 827ca928..1541246a 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -221,6 +221,7 @@ class Recommendations:
 
             has_u_value = recommendations_by_type[0].get("new_u_value") is not None
             has_sap_points = recommendations_by_type[0].get("sap_points") is not None
+            has_rank = recommendations_by_type[0].get("rank") is not None
 
             # When check if these recommendations have two different types, such as solid wall insulation
             # If we have multiple types, we group by type and then select the best recommendation for each type
@@ -238,6 +239,10 @@ class Recommendations:
                     # Sort the options by the cost per SAP point improvement - the lower the better
                     for rec in recommendations:
                         rec["efficiency"] = rec["total"] / rec["sap_points"]
+                elif has_rank:
+                    # Sort the options by rank - the lower the better
+                    for rec in recommendations:
+                        rec["efficiency"] = rec["rank"]
                 else:
                     # Sort the options by cost - the lower the better
                     for rec in recommendations:
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 42bf03d4..18bfdced 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -109,9 +109,9 @@ class SolarPvRecommendations:
         )
         n_units = self.property.solar_panel_configuration["n_units"]
 
-        best_configurations = panel_performance.head(3)
+        best_configurations = panel_performance.head(3).reset_index(drop=True)
 
-        for _, recommendation_config in best_configurations.iterrows():
+        for rank, recommendation_config in best_configurations.iterrows():
             roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
             # Spread the cost to the individual units
             total_cost = recommendation_config["total_cost"] / n_units
@@ -135,7 +135,8 @@ class SolarPvRecommendations:
                     # back up here
                     "photo_supply": roof_coverage_percent,
                     "has_battery": False,
-                    "description_simulation": {"photo-supply": roof_coverage_percent}
+                    "description_simulation": {"photo-supply": roof_coverage_percent},
+                    "rank": rank  # Rank is used to get the representative recommendation - rank 0 will be chosen
                 }
             )
 

From 21a334d4f393f90812dafb9e81edbb9e29cdd6c4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Jul 2024 22:20:19 +0100
Subject: [PATCH 12/36] Added in the new cost apis

---
 backend/Property.py                 |  4 ++--
 backend/app/config.py               | 23 ++++++++++++++++++++---
 backend/app/plan/router.py          | 14 +++-----------
 backend/ml_models/api.py            | 21 +++++++++++++++++++--
 etl/bill_savings/data_collection.py |  2 +-
 5 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 106931d8..c1568de0 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -357,8 +357,8 @@ class Property:
             simulation_epc = self.epc_record.prepared_epc.copy()
             # Replace the understores with hyphens
             simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
-            simulation_epc.update(simulation_epc)
-            self.simulation_epcs = simulation_epc
+            simulation_epc.update(phase_epc_transformation)
+            self.simulation_epcs[phase] = simulation_epc
 
     @staticmethod
     def create_recommendation_scoring_data(
diff --git a/backend/app/config.py b/backend/app/config.py
index 6f2e405b..ef48c317 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -8,9 +8,6 @@ class Settings(BaseSettings):
     SECRET_KEY: str
     ENVIRONMENT: str
     DATA_BUCKET: str
-    SAP_PREDICTIONS_BUCKET: str
-    CARBON_PREDICTIONS_BUCKET: str
-    HEAT_PREDICTIONS_BUCKET: str
     PLAN_TRIGGER_BUCKET: str
     EPC_AUTH_TOKEN: str
     ORDNANCE_SURVEY_API_KEY: str
@@ -21,6 +18,14 @@ class Settings(BaseSettings):
     DB_PORT: str
     DB_NAME: str
 
+    # Prediction buckets
+    SAP_PREDICTIONS_BUCKET: str
+    CARBON_PREDICTIONS_BUCKET: str
+    HEAT_PREDICTIONS_BUCKET: str
+    LIGHTING_COST_PREDICTIONS_BUCKET: str
+    HEATING_COST_PREDICTIONS_BUCKET: str
+    HOT_WATER_COST_PREDICTIONS_BUCKET: str
+
     class Config:
         env_file = "backend/.env"
 
@@ -28,3 +33,15 @@ class Settings(BaseSettings):
 @lru_cache()
 def get_settings():
     return Settings()
+
+
+@lru_cache()
+def get_prediction_buckets():
+    return {
+        "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+        "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+        "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
+        "lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET,
+        "heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET,
+        "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET
+    }
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e2adbed1..609d737f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -10,7 +10,7 @@ from sqlalchemy.exc import IntegrityError, OperationalError
 from sqlalchemy.orm import sessionmaker
 from starlette.responses import Response
 
-from backend.app.config import get_settings
+from backend.app.config import get_settings, get_prediction_buckets
 from backend.app.db.connection import db_engine
 from backend.app.db.functions.materials_functions import get_materials
 from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
@@ -462,21 +462,13 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        all_predictions = {
-            "sap_change_predictions": pd.DataFrame(),
-            "heat_demand_predictions": pd.DataFrame(),
-            "carbon_change_predictions": pd.DataFrame()
-        }
+        all_predictions = model_api.predictions_template()
         to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
         for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
             predictions_dict = model_api.predict_all(
                 df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
                 bucket=get_settings().DATA_BUCKET,
-                prediction_buckets={
-                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
-                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
-                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
-                }
+                prediction_buckets=get_prediction_buckets()
             )
 
             # Append the predictions to the predictions dictionary
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index a2024dd7..4844d7fd 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -11,13 +11,19 @@ class ModelApi:
     MODEL_PREFIXES = [
         "sap_change_predictions",
         "heat_demand_predictions",
-        "carbon_change_predictions"
+        "carbon_change_predictions",
+        "lighting_cost_predictions",
+        "heating_cost_predictions",
+        "hot_water_cost_predictions",
     ]
 
     MODEL_URLS = {
         "sap_change_predictions": "sapmodel",
         "heat_demand_predictions": "heatmodel",
-        "carbon_change_predictions": "carbonmodel"
+        "carbon_change_predictions": "carbonmodel",
+        "lighting_cost_predictions": "lightingmodel",
+        "heating_cost_predictions": "heatingmodel",
+        "hot_water_cost_predictions": "hotwatermodel",
     }
 
     def __init__(
@@ -39,6 +45,17 @@ class ModelApi:
         self.portfolio_id = portfolio_id
         self.timestamp = timestamp
 
+    @staticmethod
+    def predictions_template():
+        return {
+            "sap_change_predictions": pd.DataFrame(),
+            "heat_demand_predictions": pd.DataFrame(),
+            "carbon_change_predictions": pd.DataFrame(),
+            "lighting_cost_predictions": pd.DataFrame(),
+            "heating_cost_predictions": pd.DataFrame(),
+            "hot_water_cost_predictions": pd.DataFrame(),
+        }
+
     def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
         """
         The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index c51bf59e..be141c01 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,7 +133,7 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 26:
+        if i < 110:
             continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)

From 68e9b83c9f3cc9bb054b4d9b6e481a58b780a477 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 11:04:18 +0100
Subject: [PATCH 13/36] adjusting costs so they are based on 2024 figures

---
 backend/Property.py | 115 +++++++++++++++++++++++++++++---------------
 1 file changed, 76 insertions(+), 39 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index c1568de0..98df4ceb 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -643,6 +643,15 @@ class Property:
         https://www.sciencedirect.com/science/article/pii/S0378778823002542
         :return:
         """
+        # We should adjust the costs first and then calculate the energy consumption
+        # These are the costs at the time that the EPC was created - we need a cost based on today's prices so
+        # we do the following:
+        # 1) Score the models to get kwh for heating and hot water
+        # 2) Adjust the kwh values with the UCL paper
+        # 3) Convert the kwh values to costs today using current prices
+        #
+        # For the moment, we'll just use the same cost for lighting, since the value is quite low
+
         scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
         # Change columns from underscores to hyphens
         scoring_df.columns = [
@@ -651,30 +660,6 @@ class Property:
         for col in ["heating_kwh", "hot_water_kwh"]:
             scoring_df[col] = None
 
-        # We should adjust the costs first and then calculate the energy consumption
-        lighting_cost = float(self.data["lighting-cost-current"])
-        heating_cost = float(self.data["heating-cost-current"])
-        hot_water_cost = float(self.data["hot-water-cost-current"])
-
-        adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=heating_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=hot_water_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=lighting_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        scoring_df["heating-cost-current"] = [adjusted_heating_cost]
-        scoring_df["hot-water-cost-current"] = [adjusted_hot_water_cost]
-        scoring_df["lighting-cost-current"] = [adjusted_lighting_cost]
-
         energy_consumption_client.data = None
         heating_prediction = energy_consumption_client.score_new_data(
             new_data=scoring_df, target="heating_kwh"
@@ -685,30 +670,82 @@ class Property:
         )[0]
 
         # We convert the lighting cost into kwh, just using the price cap
-        lighting_kwh = float(adjusted_lighting_cost) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
 
-        appliances_energy_use = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
-        appliances_energy_cost = appliances_energy_use * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
 
-        total_energy_consumption = heating_prediction + hot_water_prediction + lighting_kwh + appliances_energy_use
-
-        self.current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy_consumption=total_energy_consumption,
+        adjusted_heating_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=heating_prediction,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
+        adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=hot_water_prediction,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
+
+        adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=lighting_kwh,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
+
+        adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=appliances_kwh,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
+
+        # Convert to cost
+        if self.heating_energy_source == "Electricity":
+            adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        elif self.heating_energy_source == "Natural Gas":
+            adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.GAS_PRICE_CAP
+        else:
+            raise NotImplementedError("Not implemented cost for this fuel type")
+
+        if self.hot_water_energy_source == "Electricity":
+            adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        elif self.hot_water_energy_source == "Natural Gas":
+            adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.GAS_PRICE_CAP
+        else:
+            raise NotImplementedError("Not implemented cost for this fuel type")
+
+        adjusted_lighting_cost = adjusted_lighting_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+
+        adjusted_appliances_cost = adjusted_applicances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+
+        # Sum up the adjusted kwh figures
+        self.current_adjusted_energy = (
+            adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
+        )
+
         self.energy_cost_estimates = {
-            "heating": adjusted_heating_cost,
-            "hot_water": adjusted_hot_water_cost,
-            "lighting": adjusted_lighting_cost,
-            "appliances": appliances_energy_cost
+            "adjusted": {
+                "heating": adjusted_heating_cost,
+                "hot_water": adjusted_hot_water_cost,
+                "lighting": adjusted_lighting_cost,
+                "appliances": adjusted_appliances_cost
+            },
+            "original": {
+                "heating": float(self.data["heating-cost-current"]),
+                "hot_water": float(self.data["hot-water-cost-current"]),
+                "lighting": float(self.data["lighting-cost-current"]),
+                "appliances": appliances_kwh
+            }
         }
 
         self.energy_consumption_estimates = {
-            "heating": heating_prediction,
-            "hot_water": hot_water_prediction,
-            "lighting": lighting_kwh,
-            "appliances": appliances_energy_use
+            "adjusted": {
+                "heating": adjusted_heating_kwh,
+                "hot_water": adjusted_hot_water_kwh,
+                "lighting": adjusted_lighting_kwh,
+                "appliances": adjusted_applicances_kwh
+            },
+            "original": {
+                "heating": heating_prediction,
+                "hot_water": hot_water_prediction,
+                "lighting": lighting_kwh,
+                "appliances": appliances_kwh
+            }
         }
 
         self.expected_energy_bill = (

From 5274b9b7138ba4b3ee503fdda16cc004abfa4507 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 12:33:38 +0100
Subject: [PATCH 14/36] updating bills costing methodology to transform costs
 to today

---
 backend/Property.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 98df4ceb..31612a49 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -697,15 +697,19 @@ class Property:
         # Convert to cost
         if self.heating_energy_source == "Electricity":
             adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+            unadjusted_heating_cost = heating_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
         elif self.heating_energy_source == "Natural Gas":
             adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.GAS_PRICE_CAP
+            unadjusted_heating_cost = heating_prediction * AnnualBillSavings.GAS_PRICE_CAP
         else:
             raise NotImplementedError("Not implemented cost for this fuel type")
 
         if self.hot_water_energy_source == "Electricity":
             adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+            unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
         elif self.hot_water_energy_source == "Natural Gas":
             adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.GAS_PRICE_CAP
+            unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.GAS_PRICE_CAP
         else:
             raise NotImplementedError("Not implemented cost for this fuel type")
 
@@ -725,11 +729,16 @@ class Property:
                 "lighting": adjusted_lighting_cost,
                 "appliances": adjusted_appliances_cost
             },
-            "original": {
+            "unadjusted": {
+                "heating": unadjusted_heating_cost,
+                "hot_water": unadjusted_hot_water_cost,
+                "lighting": float(self.data["lighting-cost-current"]),
+                "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+            },
+            "epc": {
                 "heating": float(self.data["heating-cost-current"]),
                 "hot_water": float(self.data["hot-water-cost-current"]),
                 "lighting": float(self.data["lighting-cost-current"]),
-                "appliances": appliances_kwh
             }
         }
 
@@ -740,7 +749,7 @@ class Property:
                 "lighting": adjusted_lighting_kwh,
                 "appliances": adjusted_applicances_kwh
             },
-            "original": {
+            "unadjusted": {
                 "heating": heating_prediction,
                 "hot_water": hot_water_prediction,
                 "lighting": lighting_kwh,
@@ -749,7 +758,7 @@ class Property:
         }
 
         self.expected_energy_bill = (
-            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + appliances_energy_cost
+            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
         )
 
     def set_spatial(self, spatial: pd.DataFrame):

From 4c79abe51223816157653c0056931d001413cb23 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 14:03:21 +0100
Subject: [PATCH 15/36] Completed set up of bills costs

---
 backend/Property.py                        | 75 +++++++++++---------
 backend/app/plan/router.py                 |  3 +-
 etl/bill_savings/EnergyConsumptionModel.py | 81 +++++++++++++++++++---
 3 files changed, 115 insertions(+), 44 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 31612a49..188564e4 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -643,14 +643,24 @@ class Property:
         https://www.sciencedirect.com/science/article/pii/S0378778823002542
         :return:
         """
-        # We should adjust the costs first and then calculate the energy consumption
-        # These are the costs at the time that the EPC was created - we need a cost based on today's prices so
-        # we do the following:
-        # 1) Score the models to get kwh for heating and hot water
-        # 2) Adjust the kwh values with the UCL paper
-        # 3) Convert the kwh values to costs today using current prices
-        #
-        # For the moment, we'll just use the same cost for lighting, since the value is quite low
+
+        # We get the following things:
+        # 1) Today's cost. This give us a basline figure for what the cost is today
+        # 2) Predicted KwH
+
+        # Today's costs
+        todays_heating_cost = energy_consumption_client.convert_cost_to_today(
+            original_cost=float(self.data["heating-cost-current"]),
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+        )
+        todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
+            original_cost=float(self.data["hot-water-cost-current"]),
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+        )
+        todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
+            original_cost=float(self.data["lighting-cost-current"]),
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+        )
 
         scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
         # Change columns from underscores to hyphens
@@ -694,33 +704,34 @@ class Property:
             current_epc_rating=self.data["current-energy-rating"],
         )
 
-        # Convert to cost
-        if self.heating_energy_source == "Electricity":
-            adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-            unadjusted_heating_cost = heating_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-        elif self.heating_energy_source == "Natural Gas":
-            adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.GAS_PRICE_CAP
-            unadjusted_heating_cost = heating_prediction * AnnualBillSavings.GAS_PRICE_CAP
-        else:
-            raise NotImplementedError("Not implemented cost for this fuel type")
+        # Adjust today's cost figures with the UCL model
+        adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=todays_heating_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
 
-        if self.hot_water_energy_source == "Electricity":
-            adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-            unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-        elif self.hot_water_energy_source == "Natural Gas":
-            adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.GAS_PRICE_CAP
-            unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.GAS_PRICE_CAP
-        else:
-            raise NotImplementedError("Not implemented cost for this fuel type")
+        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=todays_hot_water_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
 
-        adjusted_lighting_cost = adjusted_lighting_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=todays_lighting_cost,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
 
-        adjusted_appliances_cost = adjusted_applicances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        adjusted_appliances_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
+            epc_energy_cost=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
 
         # Sum up the adjusted kwh figures
         self.current_adjusted_energy = (
             adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
         )
+        self.expected_energy_bill = (
+            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
+        )
 
         self.energy_cost_estimates = {
             "adjusted": {
@@ -730,9 +741,9 @@ class Property:
                 "appliances": adjusted_appliances_cost
             },
             "unadjusted": {
-                "heating": unadjusted_heating_cost,
-                "hot_water": unadjusted_hot_water_cost,
-                "lighting": float(self.data["lighting-cost-current"]),
+                "heating": todays_heating_cost,
+                "hot_water": todays_hot_water_cost,
+                "lighting": todays_lighting_cost,
                 "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
             },
             "epc": {
@@ -757,10 +768,6 @@ class Property:
             }
         }
 
-        self.expected_energy_bill = (
-            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
-        )
-
     def set_spatial(self, spatial: pd.DataFrame):
         """
         Sets whether the property is in a conservation area given the output of the ConservationAreaClient
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 609d737f..7a0bba2a 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -359,7 +359,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             },
             dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl",
             consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet",
-            cleaned=cleaned
+            cleaned=cleaned,
+            environment=get_settings().ENVIRONMENT
         )
 
         logger.info("Getting spatial data")
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index e7658de5..9a7d6523 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -1,11 +1,10 @@
 import pandas as pd
 import numpy as np
 from xgboost import XGBRegressor
-from datetime import datetime
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
 from sklearn.feature_selection import RFECV
-from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
+from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3
 from utils.logger import setup_logger
 
 logger = setup_logger()
@@ -46,11 +45,17 @@ class EnergyConsumptionModel:
         "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
     ]
 
-    def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1):
+    retail_price_comparison = None
+
+    def __init__(
+        self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1,
+        environment="dev"
+    ):
         self.cleaned = cleaned
         self.models = {}
         self.model_paths = model_paths or {}
         self.n_jobs = n_jobs
+        self.environment = environment
 
         self.data = None
         self.input_data = None
@@ -76,26 +81,84 @@ class EnergyConsumptionModel:
         if model_paths:
             for target, path in model_paths.items():
                 # Read model
-                self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
+                self.models[target] = read_pickle_from_s3(
+                    bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path
+                )
                 # Read dummy schema
 
         if dummy_schema_path:
             self.dummy_schema = read_pickle_from_s3(
-                bucket_name="retrofit-model-directory-dev",
+                bucket_name=f"retrofit-model-directory-{environment}",
                 s3_file_name=dummy_schema_path
             )
 
         self.consumption_averages = None
         if consumption_average_path:
             self.consumption_averages = read_dataframe_from_s3_parquet(
-                bucket_name="retrofit-data-dev",
+                bucket_name=f"retrofit-data-{environment}",
                 file_key=consumption_average_path
             )
 
+            # We also retrieve the newest retail price comparison data which comes from Ofgem:
+            # https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators
+            # We use the detail price comparison by company and tariff type data
+            self.read_retail_price_comparison()
+
+    def read_retail_price_comparison(self):
+        data = read_csv_from_s3(
+            bucket_name=f"retrofit-data-{self.environment}",
+            filepath="energy_consumption/retail-price-comparison.csv"
+        )
+        header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
+                  'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
+                  'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
+                  'Cheapest tariff (Basket)', 'Default tariff cap level']
+
+        # Extract data rows
+        data_rows = []
+        for row in data[1:]:
+            date = row['\ufeff"']
+            values = row[None]
+            data_rows.append([date] + values)
+
+        self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
+        self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
+
+    def convert_cost_to_today(self, original_cost, lodgement_date):
+        """
+        Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
+        (or as close to today as possible)
+        :param original_cost: The original energy cost
+        :param lodgement_date: The date the EPC was lodged
+        :return:
+        """
+        closest_date = self.retail_price_comparison.iloc[
+            (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
+        ]['Date'].values[0]
+        closest_date = pd.Timestamp(closest_date)
+
+        # Extract the tariff price on the closest date
+        tariff_2024 = self.retail_price_comparison[
+            self.retail_price_comparison['Date'] == closest_date
+            ]['Average standard variable tariff (Large legacy suppliers)'].values[0]
+
+        # Extract the latest available tariff price
+        latest_tariff = self.retail_price_comparison[
+            'Average standard variable tariff (Large legacy suppliers)'
+        ].iloc[-1]
+
+        # Calculate the ratio
+        ratio = float(latest_tariff) / float(tariff_2024)
+
+        # Calculate the updated heating cost
+        updated_cost = original_cost * ratio
+
+        return updated_cost
+
     def read_dataset(self, file_path):
         """Reads the dataset from the specified file path."""
         logger.info(f"Reading dataset from {file_path}")
-        self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
+        self.data = read_dataframe_from_s3_parquet(bucket_name=f"retrofit-data-{self.environment}", file_key=file_path)
         self.input_data = self.data.copy()
 
     def feature_engineering(self, drop_first=False):
@@ -392,7 +455,7 @@ class EnergyConsumptionModel:
         logger.info(f"Saving model for target {target}")
         save_pickle_to_s3(
             self.models[target],
-            bucket_name="retrofit-model-directory-dev",
+            bucket_name=f"retrofit-model-directory-{self.environment}",
             s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl"
         )
 
@@ -400,7 +463,7 @@ class EnergyConsumptionModel:
         logger.info("Saving dummy schema for target {target}")
         save_pickle_to_s3(
             self.dummy_schema,
-            bucket_name="retrofit-model-directory-dev",
+            bucket_name=f"retrofit-model-directory-{self.environment}",
             s3_file_name=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl"
         )
 

From b9f64722cef0c9fe540f20e66eeefe6414e4be89 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 15:52:16 +0100
Subject: [PATCH 16/36] integrating scoring heat and hot water models and
 adjusting days_to for the cost models

---
 backend/Property.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/backend/Property.py b/backend/Property.py
index 188564e4..433bebe7 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -2,8 +2,10 @@ import os
 import ast
 from itertools import groupby
 import pandas as pd
+from datetime import datetime, timedelta
 
 from etl.epc.Dataset import TrainingDataset
+from etl.epc.Record import EPCRecord
 from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
 from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
@@ -291,6 +293,7 @@ class Property:
                 if r[0]["phase"] is not None
             ]
         )
+        simulation_lodgment_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
 
         for phase in phases:
             property_recommendations_by_phase = [
@@ -323,6 +326,10 @@ class Property:
                 0
             ].copy()
 
+            recommendation_record["days_to_ending"] = EPCRecord._calculate_days_to(
+                lodgement_date=simulation_lodgment_date,
+            )
+
             for rec in property_recommendations_by_phase:
                 # We simulate the impact of the recommendation at this current phase, and all of the prior phases
 
@@ -355,6 +362,13 @@ class Property:
                     phase_epc_transformation[k] = v
 
             simulation_epc = self.epc_record.prepared_epc.copy()
+            # Insert static values
+            simulation_epc["lodgement_date"] = simulation_lodgment_date
+            # Insert today's costs, unadjusted (i.e. in line with what we expect the EPC would say today)
+            simulation_epc["heating-cost-current"] = round(self.energy_cost_estimates["unadjusted"]["heating"])
+            simulation_epc["lighting-cost-current"] = round(self.energy_cost_estimates["unadjusted"]["lighting"])
+            simulation_epc["hot-water-cost-current"] = round(self.energy_cost_estimates["unadjusted"]["hot_water"])
+
             # Replace the understores with hyphens
             simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
             simulation_epc.update(phase_epc_transformation)

From 6c07a290e751461bfe59eb7f07fa00e0165b6813 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 16:00:36 +0100
Subject: [PATCH 17/36] Adding back in potential columns

---
 etl/epc/Pipeline.py | 18 ++++++++++-----
 etl/epc/Record.py   | 56 ++++++++++++++++++++++-----------------------
 2 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py
index bc3bfd91..0601d6ec 100644
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@@ -40,7 +40,7 @@ VARIABLE_DATA_FEATURES = (
     COMPONENT_FEATURES
     + ROOM_FEATURES
     + EFFICIENCY_FEATURES
-    # + POTENTIAL_COLUMNS
+    + POTENTIAL_COLUMNS
     + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
 )
 COST_FEATURES = [x.lower() for x in COST_FEATURES]
@@ -288,9 +288,11 @@ class EPCPipeline:
             for x in variable_data.to_dict(orient="records")
         ]
 
-        # TODO: We want to be able to provide value for the u values in the main pipeline so this will need to be part of the EPCRecord
+        # TODO: We want to be able to provide value for the u values in the main pipeline so this will need to be
+        #  part of the EPCRecord
 
-        # We can use multiple types of comparison datasets - i.e. Compare consecutive records, or compare all permutations of records
+        # We can use multiple types of comparison datasets - i.e. Compare consecutive records, or compare all
+        # permutations of records
         property_difference_records = self._generate_property_difference_records(
             epc_records, uprn, directory, fixed_data
         )
@@ -311,7 +313,8 @@ class EPCPipeline:
 
         property_difference_records: list = []
 
-        # property_difference_records = self._compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_difference_records)
+        # property_difference_records = self._compare_consecutive_epcs(epc_records, uprn, directory, fixed_data,
+        # property_difference_records)
 
         property_difference_records = self._compare_all_permutation_epcs(
             epc_records, uprn, directory, fixed_data, property_difference_records
@@ -353,7 +356,9 @@ class EPCPipeline:
                     if not difference_record.ensure_adequate_data():
                         # Rdsap hasn't changed but we have enough data to use this record
                         # i.e. all fields aside from mechnical ventilation are the same]
-                        # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
+                        # self.check_records.append({"uprn": uprn, "directory_name": directory.name,
+                        # "difference_record": difference_record, "earliest_record": earliest_record,
+                        # "latest_record": latest_record})
                         continue
 
                 all_equal = difference_record.compare_fields_in_records(
@@ -402,7 +407,8 @@ class EPCPipeline:
                 if not difference_record.ensure_adequate_data():
                     # Rdsap hasn't changed but we have enough data to use this record
                     # i.e. all fields aside from mechnical ventilation are the same]
-                    # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
+                    # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record":
+                    # difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
                     continue
 
             all_equal = difference_record.compare_fields_in_records(
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index b8471ccf..cc70d42b 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -79,10 +79,10 @@ class EPCRecord:
     lighting_cost_current: float = None
     heating_cost_current: float = None
     hot_water_cost_current: float = None
-    # potential_energy_efficiency: float = None
-    # environment_impact_potential: float = None
-    # energy_consumption_potential: float = None
-    # co2_emissions_potential: float = None
+    potential_energy_efficiency: float = None
+    environment_impact_potential: float = None
+    energy_consumption_potential: float = None
+    co2_emissions_potential: float = None
     lodgement_date: str = None
     current_energy_efficiency: int = None
     energy_consumption_current: int = None
@@ -255,18 +255,18 @@ class EPCRecord:
         self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"]
         self.heating_cost_current: float = self.prepared_epc["heating_cost_current"]
         self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"]
-        # self.potential_energy_efficiency: float = float(
-        #     self.prepared_epc["potential_energy_efficiency"]
-        # )
-        # self.environment_impact_potential: float = float(
-        #     self.prepared_epc["environment_impact_potential"]
-        # )
-        # self.energy_consumption_potential: float = float(
-        #     self.prepared_epc["energy_consumption_potential"]
-        # )
-        # self.co2_emissions_potential: float = float(
-        #     self.prepared_epc["co2_emissions_potential"]
-        # )
+        self.potential_energy_efficiency: float = float(
+            self.prepared_epc["potential_energy_efficiency"]
+        )
+        self.environment_impact_potential: float = float(
+            self.prepared_epc["environment_impact_potential"]
+        )
+        self.energy_consumption_potential: float = float(
+            self.prepared_epc["energy_consumption_potential"]
+        )
+        self.co2_emissions_potential: float = float(
+            self.prepared_epc["co2_emissions_potential"]
+        )
         self.lodgement_date: str = self.prepared_epc["lodgement_date"]
         self.current_energy_efficiency: int = int(
             self.prepared_epc["current_energy_efficiency"]
@@ -1056,18 +1056,18 @@ class EPCDifferenceRecord:
             "heating_cost_ending": self.record2.get("heating_cost_current"),
             "hot_water_cost_starting": self.record1.get("hot_water_cost_current"),
             "hot_water_cost_ending": self.record2.get("hot_water_cost_current"),
-            # "potential_energy_efficiency": self.earliest_record.get(
-            #     "potential_energy_efficiency"
-            # ),
-            # "environment_impact_potential": self.earliest_record.get(
-            #     "environment_impact_potential"
-            # ),
-            # "energy_consumption_potential": self.earliest_record.get(
-            #     "energy_consumption_potential"
-            # ),
-            # "co2_emissions_potential": self.earliest_record.get(
-            #     "co2_emissions_potential"
-            # ),
+            "potential_energy_efficiency": self.earliest_record.get(
+                "potential_energy_efficiency"
+            ),
+            "environment_impact_potential": self.earliest_record.get(
+                "environment_impact_potential"
+            ),
+            "energy_consumption_potential": self.earliest_record.get(
+                "energy_consumption_potential"
+            ),
+            "co2_emissions_potential": self.earliest_record.get(
+                "co2_emissions_potential"
+            ),
             **ending_record,
             **starting_record,
         }

From 4c57b74d9669d1c6ab45294e5f8b94da6aa45297 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 17:07:05 +0100
Subject: [PATCH 18/36] halfway to adding the individual recommendation impact

---
 backend/Property.py                    |  36 ++--
 backend/ml_models/AnnualBillSavings.py |  53 +-----
 recommendations/Recommendations.py     | 226 ++++++++++++++++++++-----
 3 files changed, 204 insertions(+), 111 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 433bebe7..9eb8ef99 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -364,10 +364,6 @@ class Property:
             simulation_epc = self.epc_record.prepared_epc.copy()
             # Insert static values
             simulation_epc["lodgement_date"] = simulation_lodgment_date
-            # Insert today's costs, unadjusted (i.e. in line with what we expect the EPC would say today)
-            simulation_epc["heating-cost-current"] = round(self.energy_cost_estimates["unadjusted"]["heating"])
-            simulation_epc["lighting-cost-current"] = round(self.energy_cost_estimates["unadjusted"]["lighting"])
-            simulation_epc["hot-water-cost-current"] = round(self.energy_cost_estimates["unadjusted"]["hot_water"])
 
             # Replace the understores with hyphens
             simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
@@ -698,44 +694,44 @@ class Property:
 
         appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
 
-        adjusted_heating_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=heating_prediction,
+        adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=heating_prediction,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
-        adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=hot_water_prediction,
+        adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=hot_water_prediction,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
-        adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=lighting_kwh,
+        adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=lighting_kwh,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
-        adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=appliances_kwh,
+        adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=appliances_kwh,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
         # Adjust today's cost figures with the UCL model
-        adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=todays_heating_cost,
+        adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=todays_heating_cost,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
-        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=todays_hot_water_cost,
+        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=todays_hot_water_cost,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
-        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=todays_lighting_cost,
+        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=todays_lighting_cost,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
-        adjusted_appliances_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
-            epc_energy_cost=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
+        adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
             current_epc_rating=self.data["current-energy-rating"],
         )
 
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 4747e587..a0c426bb 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -133,53 +133,7 @@ class AnnualBillSavings:
         return appliances_energy_use
 
     @classmethod
-    def adjust_energy_to_metered(
-        cls, epc_energy_consumption, current_epc_rating
-    ):
-        """
-        The over-prediction of energy use by EPCs in Great Britain: A comparison
-        of EPC-modelled and metered primary energy use intensity
-
-        Which can be found here: https://www.sciencedirect.com/science/article/pii/S0378778823002542
-        We implement the results on page 10
-
-        :return:
-        """
-
-        gradients = {
-            "A": -0.1,
-            "B": -0.1,
-            "C": -0.43,
-            "D": -0.52,
-            "E": -0.7,
-            "F": -0.76,
-            "G": -0.76
-        }
-
-        intercepts = {
-            "A": 28,
-            "B": 28,
-            "C": 97,
-            "D": 119,
-            "E": 160,
-            "F": 157,
-            "G": 157
-        }
-
-        gradient = gradients[current_epc_rating]
-        intercept = intercepts[current_epc_rating]
-
-        # This should be negative
-        consumption_difference = gradient * epc_energy_consumption + intercept
-
-        adjusted_consumption = (epc_energy_consumption + consumption_difference)
-        if adjusted_consumption < 0:
-            raise ValueError("consumption_difference should be negative")
-
-        return adjusted_consumption
-
-    @classmethod
-    def adjust_energy_cost_to_metered(cls, epc_energy_cost, current_epc_rating):
+    def adjust_energy_to_metered(cls, epc_energy, current_epc_rating):
         """
         The over-prediction of energy use by EPCs in Great Britain: A comparison
         of EPC-modelled and metered primary energy use intensity
@@ -188,6 +142,7 @@ class AnnualBillSavings:
         We implement the results on page 10
 
         This is used to just re-map the cost from the EPC to the metered cost
+        epc_energy could be cost or kwh
         :return:
         """
 
@@ -215,10 +170,10 @@ class AnnualBillSavings:
         intercept = intercepts[current_epc_rating]
 
         # This should be negative
-        consumption_difference = gradient * epc_energy_cost + intercept
+        consumption_difference = gradient * epc_energy + intercept
         consumption_difference = 0 if consumption_difference > 0 else consumption_difference
 
-        adjusted_consumption = (epc_energy_cost + consumption_difference)
+        adjusted_consumption = (epc_energy + consumption_difference)
         if adjusted_consumption < 0:
             raise ValueError("consumption_difference should be negative")
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 1541246a..470b0554 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -1,3 +1,4 @@
+import pandas as pd
 from backend.Property import Property
 from typing import List
 from itertools import groupby
@@ -276,7 +277,9 @@ class Recommendations:
         return property_recommendations
 
     @classmethod
-    def calculate_recommendation_impact(cls, property_instance, all_predictions, recommendations):
+    def calculate_recommendation_impact(
+        cls, property_instance, all_predictions, recommendations, energy_consumption_client
+    ):
 
         """
         Given predictions from the model apis, with method will update the recommendations with the predicted
@@ -285,6 +288,7 @@ class Recommendations:
         :param property_instance: Instance of the Property class, for the home associated to property_id
         :param all_predictions: dictionary of predictions from the model apis
         :param recommendations: dictionary of recommendations for the property
+        :param energy_consumption_client: Instance of the EnergyConsumptionClient class
         :return:
         """
 
@@ -297,6 +301,34 @@ class Recommendations:
         property_carbon_predictions = all_predictions["carbon_change_predictions"][
             all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
             ].copy()
+        property_lighting_cost_predictions = all_predictions["lighting_cost_predictions"][
+            all_predictions["lighting_cost_predictions"]["property_id"] == str(property_instance.id)
+            ].copy()
+        property_heating_cost_predictions = all_predictions["heating_cost_predictions"][
+            all_predictions["heating_cost_predictions"]["property_id"] == str(property_instance.id)
+            ].copy()
+        property_hot_water_cost_predictions = all_predictions["hot_water_cost_predictions"][
+            all_predictions["hot_water_cost_predictions"]["property_id"] == str(property_instance.id)
+            ].copy()
+
+        # We apply adjustments to each of the heating costs
+        property_lighting_cost_predictions["adjusted_cost"] = property_lighting_cost_predictions["predictions"].apply(
+            lambda x: AnnualBillSavings.adjust_energy_to_metered(
+                x, current_epc_rating=property_instance.data["current-energy-rating"]
+            )
+        )
+
+        property_heating_cost_predictions["adjusted_cost"] = property_heating_cost_predictions["predictions"].apply(
+            lambda x: AnnualBillSavings.adjust_energy_to_metered(
+                x, current_epc_rating=property_instance.data["current-energy-rating"]
+            )
+        )
+
+        property_hot_water_cost_predictions["adjusted_cost"] = property_hot_water_cost_predictions["predictions"].apply(
+            lambda x: AnnualBillSavings.adjust_energy_to_metered(
+                x, current_epc_rating=property_instance.data["current-energy-rating"]
+            )
+        )
 
         property_recommendations = recommendations[property_instance.id].copy()
 
@@ -304,32 +336,43 @@ class Recommendations:
         sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index()
         heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index()
         carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index()
+        lighting_cost_phase_impact = (
+            property_lighting_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median().reset_index()
+        )
+        heating_cost_phase_impact = (
+            property_heating_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median().reset_index()
+        )
+        hot_water_cost_phase_impact = (
+            property_hot_water_cost_predictions.groupby("phase")[
+                ["adjusted_cost", "predictions"]
+            ].median().reset_index()
+        )
 
         # The heat demand change is the difference between the starting heat demand and the value at the final phase
-        expected_heat_demand = property_instance.floor_area * (
-            heat_phase_impact[heat_phase_impact["phase"] == max(heat_phase_impact["phase"])]["predictions"].values[0]
-        )
-        starting_heat_demand = (
-            float(property_instance.data["energy-consumption-current"]) * property_instance.floor_area
-        )
-
-        # This is the unadjusted resulting heat demand
-        predicted_heat_demand_change = starting_heat_demand - expected_heat_demand
-
-        # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
-        #       actually implemented
-        expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy_consumption=expected_heat_demand,
-            current_epc_rating=property_instance.data["current-energy-rating"],
-            total_floor_area=property_instance.floor_area
-        )
-
-        adjusted_heat_demand_change = (
-            property_instance.current_adjusted_energy - expected_adjusted_energy
-        )
-
-        # TODO: We should determine if the home is gas & electricity or just electricity
-        expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
+        # expected_heat_demand = property_instance.floor_area * (
+        #     heat_phase_impact[heat_phase_impact["phase"] == max(heat_phase_impact["phase"])]["predictions"].values[0]
+        # )
+        # starting_heat_demand = (
+        #     float(property_instance.data["energy-consumption-current"]) * property_instance.floor_area
+        # )
+        #
+        # # This is the unadjusted resulting heat demand
+        # predicted_heat_demand_change = starting_heat_demand - expected_heat_demand
+        #
+        # # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
+        # #       actually implemented
+        # expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+        #     epc_energy_consumption=expected_heat_demand,
+        #     current_epc_rating=property_instance.data["current-energy-rating"],
+        #     total_floor_area=property_instance.floor_area
+        # )
+        #
+        # adjusted_heat_demand_change = (
+        #     property_instance.current_adjusted_energy - expected_adjusted_energy
+        # )
+        #
+        # # TODO: We should determine if the home is gas & electricity or just electricity
+        # expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
 
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
@@ -350,12 +393,126 @@ class Recommendations:
                     rec["recommendation_id"]
                 )]["predictions"].values[0]
 
+                # Lighting costs won't change unless we have a lighting recommendation
+                new_lighting_cost_data = property_lighting_cost_predictions[
+                    property_lighting_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
+                    ]
+
+                new_lighting_cost = new_lighting_cost_data["adjusted_cost"].values[0]
+                new_lighting_cost_unadjusted = new_lighting_cost_data["predictions"].values[0]
+
+                new_heating_cost_data = property_heating_cost_predictions[
+                    property_heating_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
+                    ]
+
+                new_heating_cost = new_heating_cost_data["adjusted_cost"].values[0]
+                new_heating_cost_unadjusted = new_heating_cost_data["predictions"].values[0]
+
+                new_hot_water_cost_data = property_hot_water_cost_predictions[
+                    property_hot_water_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
+                    ]
+
+                new_hot_water_cost = new_hot_water_cost_data["adjusted_cost"].values[0]
+                new_hot_water_cost_unadjusted = new_hot_water_cost_data["predictions"].values[0]
+
                 if rec["phase"] == 0:
                     predicted_sap_points = new_sap - float(property_instance.data["current-energy-efficiency"])
                     predicted_co2_savings = float(property_instance.data["co2-emissions-current"]) - new_carbon
                     predicted_heat_demand = property_instance.floor_area * (
                         float(property_instance.data["energy-consumption-current"]) - new_heat_demand
                     )
+
+                    predicted_heating_cost_reduction = (
+                        float(property_instance.energy_cost_estimates["adjusted"]["heating"]) - new_heating_cost
+                    )
+                    predicted_hot_water_cost_reduction = (
+                        float(property_instance.energy_cost_estimates["adjusted"]["hot_water"]) - new_hot_water_cost
+                    )
+                    predicted_heating_cost_reduction = (
+                        0 if predicted_heating_cost_reduction < 0 else predicted_heating_cost_reduction
+                    )
+                    predicted_hot_water_cost_reduction = (
+                        0 if predicted_hot_water_cost_reduction < 0 else predicted_hot_water_cost_reduction
+                    )
+
+                    # Only lighting recommendations can have an impact here
+                    predicted_lighting_cost_reduction = 0 if rec["type"] != "lighting" else (
+                        float(property_instance.energy_cost_estimates["adjusted"]["lighting"]) - new_lighting_cost
+                    )
+
+                    # This is the total bill savings for the recommendation
+                    if rec["type"] == "solar_pv":
+                        # We need to calculate the predicted bill savings for the solar pv recommendation
+                        # where we will get some savings from the cost of appliances but it depends on the amount
+                        # of energy generated by the solar panels
+                        # We can assume that 50% of the energy generated will be used by the property without
+                        # a battery, to be conservative.
+                        # SIMILARLY: We need to handle kwh savings
+                        raise Exception("Handle me")
+                    else:
+                        predicted_bill_savings = (
+                            predicted_heating_cost_reduction + predicted_hot_water_cost_reduction +
+                            predicted_lighting_cost_reduction
+                        )
+
+                    # We now predict the kwh savings using the xgb model
+                    scoring_heating_cost = min(
+                        property_instance.energy_cost_estimates["unadjusted"]["heating"], new_heating_cost_unadjusted
+                    )
+                    scoring_hot_water_cost = min(
+                        property_instance.energy_cost_estimates["unadjusted"]["hot_water"],
+                        new_hot_water_cost_unadjusted
+                    )
+                    scoring_lighting_cost = min(
+                        property_instance.energy_cost_estimates["unadjusted"]["lighting"], new_lighting_cost_unadjusted
+                    ) if rec["type"] == "lighting" \
+                        else property_instance.energy_cost_estimates["unadjusted"]["lighting"]
+
+                    simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
+                    # The current heating, hot water and energy kwh should be based on the new, unadjusted
+                    # costs for lighting, heating, hot water
+                    simulation_epc["heating-cost-current"] = int(scoring_heating_cost)
+                    simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost)
+                    simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost)
+                    # We predict with the energy consumption model
+                    scoring_df = pd.DataFrame([simulation_epc])
+                    # Change columns from underscores to hyphens
+                    scoring_df.columns = [
+                        x.lower().replace("_", "-") for x in scoring_df.columns
+                    ]
+                    for col in ["heating_kwh", "hot_water_kwh"]:
+                        scoring_df[col] = None
+
+                    energy_consumption_client.data = None
+                    new_heating_kwh = energy_consumption_client.score_new_data(
+                        new_data=scoring_df, target="heating_kwh"
+                    )[0]
+
+                    new_hot_water_kwh = energy_consumption_client.score_new_data(
+                        new_data=scoring_df, target="hot_water_kwh"
+                    )[0]
+
+                    # Adjust these figures
+                    new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
+                        new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
+                    )
+                    new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
+                        new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
+                    )
+
+                    heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else (
+                        property_instance.energy_consumption_estimates["adjusted"]["heating"] - new_heating_kwh_adjusted
+                    )
+
+                    hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else (
+                        property_instance.energy_consumption_estimates["adjusted"]["hot_water"] -
+                        new_hot_water_kwh_adjusted
+                    )
+
+                    lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
+
+                    kwh_reduction = heating_kwh_reduction + hot_water_kwh_reduction + lighting_kwh_reduction
+
                 else:
                     previous_phase = rec["phase"] - 1
                     predicted_sap_points = (
@@ -383,23 +540,8 @@ class Recommendations:
                 # Round to 2 decimal places
                 rec["sap_points"] = round(rec["sap_points"], 2)
 
-                # We now calculate the adjusted heat demand for this recommendation, which is simply the percentage
-                # of the total adjusted heat demand change. The percentage we use is this recommendation's percentage
-                # of the total heat demand per square meter change
-
-                rec["adjusted_heat_demand"] = adjusted_heat_demand_change * (
-                    rec["heat_demand"] / predicted_heat_demand_change
-                )
-                # We make sure this is NOT below 0
-                rec["adjusted_heat_demand"] = max(0, rec["adjusted_heat_demand"])
-
-                # Depending on the property's tarriff, we calculate the amount of energy savings this measure will bring
-                if property_instance.energy_source == "electricity":
-                    rec["energy_cost_savings"] = AnnualBillSavings.estimate_electric(rec["adjusted_heat_demand"])
-                elif property_instance.energy_source == "electricity_and_gas":
-                    rec["energy_cost_savings"] = AnnualBillSavings.estimate(rec["adjusted_heat_demand"])
-                else:
-                    raise ValueError("Invalid value for energy source")
+                rec["kwh_savings"] = kwh_reduction
+                rec["energy_cost_savings"] = predicted_bill_savings
 
                 if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (
                     rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):

From af6674497995529ca82ea398ede6243d250ab7f5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 17:37:37 +0100
Subject: [PATCH 19/36] added adjust cost differences

---
 recommendations/Recommendations.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 470b0554..806e4d23 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -527,6 +527,32 @@ class Recommendations:
                         new_heat_demand
                     )
 
+                    # We now estimate the adjusted cost savings for the recommendation
+                    predicted_heating_cost_reduction = (
+                        heating_cost_phase_impact[heating_cost_phase_impact["phase"] == previous_phase][
+                            "adjusted_cost"
+                        ].values[0] - new_heating_cost
+                    )
+                    predicted_heating_cost_reduction = (
+                        0 if predicted_heating_cost_reduction < 0 else predicted_heating_cost_reduction
+                    )
+
+                    predicted_hot_water_cost_reduction = (
+                        hot_water_cost_phase_impact[hot_water_cost_phase_impact["phase"] == previous_phase][
+                            "adjusted_cost"
+                        ].values[0] - new_hot_water_cost
+                    )
+                    predicted_hot_water_cost_reduction = (
+                        0 if predicted_hot_water_cost_reduction < 0 else predicted_hot_water_cost_reduction
+                    )
+
+                    # Only lighting recommendations can have an impact here
+                    predicted_lighting_cost_reduction = 0 if rec["type"] != "lighting" else (
+                        lighting_cost_phase_impact[lighting_cost_phase_impact["phase"] == previous_phase][
+                            "adjusted_cost"
+                        ].values[0] - new_lighting_cost
+                    )
+
                 if rec["type"] == "low_energy_lighting":
                     # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
                     rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT)

From 3bdc4db92af99ac8ad4164fed4ab290fc8ee11aa Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 19:27:21 +0100
Subject: [PATCH 20/36] Added in second part of recommendation impact
 calculations

---
 backend/app/plan/router.py                |   9 +-
 backend/ml_models/AnnualBillSavings.py    |   6 +-
 recommendations/Recommendations.py        | 241 +++++++++++++++++++---
 recommendations/SolarPvRecommendations.py |   5 +-
 4 files changed, 226 insertions(+), 35 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7a0bba2a..e4fc508c 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -380,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     target_rating=body.goal_value,
                     current_consumption=p.current_adjusted_energy
                 ),
+                "property_id": p.id
             } for p in input_properties if p.building_id is not None
         ]
         if building_ids:
@@ -419,7 +420,13 @@ async def trigger_plan(body: PlanTriggerRequest):
                 # Insert this into the properties that have this building id
                 for p in input_properties:
                     if p.building_id == building_id:
-                        p.set_solar_panel_configuration(solar_panel_configuration[building_id])
+                        unit_solar_panel_configuration = solar_panel_configuration[building_id].copy()
+
+                        unit_solar_panel_configuration["unit_share_of_energy"] = (
+                            [x for x in building_ids if x["property_id"] == p.id][0]["energy_consumption"] /
+                            energy_consumption
+                        )
+                        p.set_solar_panel_configuration(unit_solar_panel_configuration)
 
         else:
             # Model the solar potential at the property level
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index a0c426bb..e4d9d143 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -28,8 +28,10 @@ class AnnualBillSavings:
     # https://www.ofgem.gov.uk/energy-price-cap
     ELECTRICITY_PRICE_CAP = 0.2236
     GAS_PRICE_CAP = 0.0548
-    # This is the most recent export payment figure, at 12p per kwh
-    ELECTRICITY_EXPORT_PAYMENT = 0.12
+    # This is the most recent export payment figure, at 9.28p/kWh
+    # Smart export guarantee rates can be found here:
+    # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates
+    ELECTRICITY_EXPORT_PAYMENT = 0.0928
 
     # This is a weighted mean of the price caps, using the consumption figures above as weights
     PRICE_FACTOR = 0.09549999999999999
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 806e4d23..6e17ef54 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -14,6 +14,7 @@ from recommendations.HeatingRecommender import HeatingRecommender
 from recommendations.HotwaterRecommendations import HotwaterRecommendations
 from recommendations.SecondaryHeating import SecondaryHeating
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+from backend.apis.GoogleSolarApi import GoogleSolarApi
 
 
 class Recommendations:
@@ -374,6 +375,8 @@ class Recommendations:
         # # TODO: We should determine if the home is gas & electricity or just electricity
         # expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
 
+        phase_lighting_costs = {}
+        phase_kwh_figures = {}
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
 
@@ -422,23 +425,52 @@ class Recommendations:
                         float(property_instance.data["energy-consumption-current"]) - new_heat_demand
                     )
 
+                    if rec["type"] == "lighting":
+                        new_heating_cost = property_instance.energy_cost_estimates["adjusted"]["heating"]
+                        new_hot_water_cost = property_instance.energy_cost_estimates["adjusted"]["hot_water"]
+                        new_lighting_cost = min(
+                            new_lighting_cost, property_instance.energy_cost_estimates["adjusted"]["lighting"]
+                        )
+                        scoring_heating_cost = property_instance.energy_cost_estimates["unadjusted"]["heating"]
+                        scoring_hot_water_cost = property_instance.energy_cost_estimates["unadjusted"]["hot_water"]
+                        scoring_lighting_cost = min(
+                            property_instance.energy_cost_estimates["unadjusted"]["lighting"],
+                            new_lighting_cost_unadjusted
+                        )
+                    else:
+                        new_heating_cost = min(
+                            new_heating_cost, property_instance.energy_cost_estimates["adjusted"]["heating"]
+                        )
+                        new_hot_water_cost = min(
+                            new_hot_water_cost, property_instance.energy_cost_estimates["adjusted"]["hot_water"]
+                        )
+                        new_lighting_cost = property_instance.energy_cost_estimates["adjusted"]["lighting"]
+
+                        scoring_heating_cost = min(
+                            property_instance.energy_cost_estimates["unadjusted"]["heating"],
+                            new_heating_cost_unadjusted
+                        )
+                        scoring_hot_water_cost = min(
+                            property_instance.energy_cost_estimates["unadjusted"]["hot_water"],
+                            new_hot_water_cost_unadjusted
+                        )
+                        scoring_lighting_cost = property_instance.energy_cost_estimates["unadjusted"]["lighting"]
+
                     predicted_heating_cost_reduction = (
-                        float(property_instance.energy_cost_estimates["adjusted"]["heating"]) - new_heating_cost
+                        property_instance.energy_cost_estimates["adjusted"]["heating"] - new_heating_cost
                     )
                     predicted_hot_water_cost_reduction = (
-                        float(property_instance.energy_cost_estimates["adjusted"]["hot_water"]) - new_hot_water_cost
-                    )
-                    predicted_heating_cost_reduction = (
-                        0 if predicted_heating_cost_reduction < 0 else predicted_heating_cost_reduction
-                    )
-                    predicted_hot_water_cost_reduction = (
-                        0 if predicted_hot_water_cost_reduction < 0 else predicted_hot_water_cost_reduction
+                        property_instance.energy_cost_estimates["adjusted"]["hot_water"] - new_hot_water_cost
                     )
 
-                    # Only lighting recommendations can have an impact here
                     predicted_lighting_cost_reduction = 0 if rec["type"] != "lighting" else (
-                        float(property_instance.energy_cost_estimates["adjusted"]["lighting"]) - new_lighting_cost
+                        property_instance.energy_cost_estimates["adjusted"]["lighting"] - new_lighting_cost
                     )
+                    # We store this value for later
+                    phase_lighting_costs[rec["phase"]] = {
+                        "adjusted": new_lighting_cost,
+                        "unadjusted": scoring_lighting_cost
+                    }
 
                     # This is the total bill savings for the recommendation
                     if rec["type"] == "solar_pv":
@@ -456,17 +488,6 @@ class Recommendations:
                         )
 
                     # We now predict the kwh savings using the xgb model
-                    scoring_heating_cost = min(
-                        property_instance.energy_cost_estimates["unadjusted"]["heating"], new_heating_cost_unadjusted
-                    )
-                    scoring_hot_water_cost = min(
-                        property_instance.energy_cost_estimates["unadjusted"]["hot_water"],
-                        new_hot_water_cost_unadjusted
-                    )
-                    scoring_lighting_cost = min(
-                        property_instance.energy_cost_estimates["unadjusted"]["lighting"], new_lighting_cost_unadjusted
-                    ) if rec["type"] == "lighting" \
-                        else property_instance.energy_cost_estimates["unadjusted"]["lighting"]
 
                     simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
                     # The current heating, hot water and energy kwh should be based on the new, unadjusted
@@ -513,6 +534,17 @@ class Recommendations:
 
                     kwh_reduction = heating_kwh_reduction + hot_water_kwh_reduction + lighting_kwh_reduction
 
+                    phase_kwh_figures[rec["phase"]] = {
+                        "adjusted": {
+                            "heating": new_heating_kwh_adjusted,
+                            "hot_water": new_hot_water_kwh_adjusted
+                        },
+                        "unadjusted": {
+                            "heating": new_heating_kwh,
+                            "hot_water": new_hot_water_kwh
+                        }
+                    }
+
                 else:
                     previous_phase = rec["phase"] - 1
                     predicted_sap_points = (
@@ -527,30 +559,177 @@ class Recommendations:
                         new_heat_demand
                     )
 
+                    if rec["type"] == "lighting":
+                        # If we have a lighting recommendation, the heating, hot water and lighting costs will
+                        # be from the previous phase - nothing will change
+                        new_heating_cost = heating_cost_phase_impact[
+                            heating_cost_phase_impact["phase"] == previous_phase
+                            ]["adjusted_cost"].values[0]
+                        new_hot_water_cost = hot_water_cost_phase_impact[
+                            hot_water_cost_phase_impact["phase"] == previous_phase
+                            ]["adjusted_cost"].values[0]
+
+                        new_lighting_cost = min(
+                            new_lighting_cost, phase_lighting_costs[previous_phase]["adjusted"]
+                        )
+                        # We also use the unadjusted costs for the scoring from the previous phase
+                        scoring_heating_cost = heating_cost_phase_impact[
+                            heating_cost_phase_impact["phase"] == previous_phase
+                            ]["predictions"].values[0]
+                        scoring_hot_water_cost = hot_water_cost_phase_impact[
+                            hot_water_cost_phase_impact["phase"] == previous_phase
+                            ]["predictions"].values[0]
+                        scoring_lighting_cost = min(
+                            new_lighting_cost_unadjusted,
+                            phase_lighting_costs[previous_phase]["unadjusted"]
+                        )
+                    else:
+                        # Whereas for other recommendations, we use the new costs
+                        new_heating_cost = min(
+                            new_heating_cost,
+                            heating_cost_phase_impact[
+                                heating_cost_phase_impact["phase"] == previous_phase
+                                ]["adjusted_cost"].values[0]
+                        )
+                        new_hot_water_cost = min(
+                            new_hot_water_cost,
+                            hot_water_cost_phase_impact[
+                                hot_water_cost_phase_impact["phase"] == previous_phase
+                                ]["adjusted_cost"].values[0]
+                        )
+                        new_lighting_cost = phase_lighting_costs[previous_phase]["adjusted"]
+
+                        scoring_heating_cost = min(
+                            new_heating_cost_unadjusted,
+                            heating_cost_phase_impact[
+                                heating_cost_phase_impact["phase"] == previous_phase
+                                ]["predictions"].values[0]
+                        )
+                        scoring_hot_water_cost = min(
+                            new_hot_water_cost_unadjusted,
+                            hot_water_cost_phase_impact[
+                                hot_water_cost_phase_impact["phase"] == previous_phase
+                                ]["predictions"].values[0]
+                        )
+                        scoring_lighting_cost = phase_lighting_costs[previous_phase]["unadjusted"]
+
                     # We now estimate the adjusted cost savings for the recommendation
                     predicted_heating_cost_reduction = (
                         heating_cost_phase_impact[heating_cost_phase_impact["phase"] == previous_phase][
                             "adjusted_cost"
                         ].values[0] - new_heating_cost
                     )
-                    predicted_heating_cost_reduction = (
-                        0 if predicted_heating_cost_reduction < 0 else predicted_heating_cost_reduction
-                    )
 
                     predicted_hot_water_cost_reduction = (
                         hot_water_cost_phase_impact[hot_water_cost_phase_impact["phase"] == previous_phase][
                             "adjusted_cost"
                         ].values[0] - new_hot_water_cost
                     )
-                    predicted_hot_water_cost_reduction = (
-                        0 if predicted_hot_water_cost_reduction < 0 else predicted_hot_water_cost_reduction
-                    )
 
                     # Only lighting recommendations can have an impact here
-                    predicted_lighting_cost_reduction = 0 if rec["type"] != "lighting" else (
-                        lighting_cost_phase_impact[lighting_cost_phase_impact["phase"] == previous_phase][
-                            "adjusted_cost"
-                        ].values[0] - new_lighting_cost
+                    predicted_lighting_cost_reduction = (
+                        phase_lighting_costs[previous_phase]["adjusted"] - new_lighting_cost
+                    )
+
+                    # We now predict the kwh savings using the xgb model - this is based on
+                    # the new costs at this phase
+
+                    simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
+                    # The current heating, hot water and energy kwh should be based on the new, unadjusted
+                    # costs for lighting, heating, hot water
+                    simulation_epc["heating-cost-current"] = int(scoring_heating_cost)
+                    simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost)
+                    simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost)
+                    # We predict with the energy consumption model
+                    scoring_df = pd.DataFrame([simulation_epc])
+                    # Change columns from underscores to hyphens
+                    scoring_df.columns = [
+                        x.lower().replace("_", "-") for x in scoring_df.columns
+                    ]
+                    for col in ["heating_kwh", "hot_water_kwh"]:
+                        scoring_df[col] = None
+
+                    energy_consumption_client.data = None
+                    new_heating_kwh = energy_consumption_client.score_new_data(
+                        new_data=scoring_df, target="heating_kwh"
+                    )[0]
+
+                    new_hot_water_kwh = energy_consumption_client.score_new_data(
+                        new_data=scoring_df, target="hot_water_kwh"
+                    )[0]
+
+                    # Adjust these figures
+                    new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
+                        new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
+                    )
+                    new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
+                        new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
+                    )
+
+                    heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else (
+                        phase_kwh_figures[previous_phase]["adjusted"]["heating"] - new_heating_kwh_adjusted
+                    )
+
+                    hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else (
+                        phase_kwh_figures[previous_phase]["adjusted"]["hot_water"] - new_hot_water_kwh_adjusted
+                    )
+
+                    lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
+
+                    # This is the total bill savings for the recommendation
+
+                    predicted_appliances_cost_reduction = 0
+                    predicted_appliances_kwh_reduction = 0
+                    if rec["type"] == "solar_pv":
+                        # Calulate the amount of energy the solar panel array will generate for this unit
+                        unit_energy_consumption = (
+                            rec["initial_ac_kwh_per_year"] *
+                            property_instance.solar_panel_configuration["unit_share_of_energy"]
+                        )
+
+                        unit_energy_utilised = unit_energy_consumption * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION
+                        unit_energy_exported = unit_energy_consumption - unit_energy_utilised
+                        unit_energy_exported_value = unit_energy_exported * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
+
+                        # We assume that 50% of the energy generated will be used by the property without a battery
+                        # to be conservative
+
+                        # of the energy utilised, some of it is used by heating, hot water and lighting so we
+                        # remove that from the total
+                        unit_energy_utilised -= (
+                            heating_kwh_reduction + hot_water_kwh_reduction + lighting_kwh_reduction
+                        )
+                        unit_energy_utilised = 0 if unit_energy_utilised < 0 else unit_energy_utilised
+
+                        # This is how much energy the appliances will use after install
+                        post_install_appliance_kwh = (
+                            property_instance.energy_consumption_estimates["adjusted"]["appliances"] -
+                            unit_energy_utilised
+                        )
+                        post_install_appliance_kwh = (
+                            0 if post_install_appliance_kwh < 0 else post_install_appliance_kwh
+                        )
+
+                        predicted_appliances_kwh_reduction = (
+                            property_instance.energy_consumption_estimates["adjusted"]["appliances"] -
+                            post_install_appliance_kwh
+                        )
+
+                        predicted_appliances_cost_reduction = unit_energy_exported_value + (
+                            predicted_appliances_kwh_reduction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+                        )
+
+                    # We now calculate the predicted_bill_savings
+                    predicted_bill_savings = (
+                        predicted_heating_cost_reduction + predicted_hot_water_cost_reduction +
+                        predicted_lighting_cost_reduction + predicted_appliances_cost_reduction
+                    )
+
+                    kwh_reduction = (
+                        heating_kwh_reduction +
+                        hot_water_kwh_reduction +
+                        lighting_kwh_reduction +
+                        predicted_appliances_kwh_reduction
                     )
 
                 if rec["type"] == "low_energy_lighting":
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 18bfdced..596b9290 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -113,13 +113,15 @@ class SolarPvRecommendations:
 
         for rank, recommendation_config in best_configurations.iterrows():
             roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
-            # Spread the cost to the individual units
+            # Spread the cost to the individual units - adding a 20% contingency
             total_cost = recommendation_config["total_cost"] / n_units
             kw = np.floor(recommendation_config["array_warrage"] / 100) / 10
 
             description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof "
                            "of the building")
 
+            initial_ac_kwh_per_year = recommendation_config["initial_ac_kwh_per_year"]
+
             self.recommendation.append(
                 {
                     "phase": phase,
@@ -135,6 +137,7 @@ class SolarPvRecommendations:
                     # back up here
                     "photo_supply": roof_coverage_percent,
                     "has_battery": False,
+                    "initial_ac_kwh_per_year": initial_ac_kwh_per_year,
                     "description_simulation": {"photo-supply": roof_coverage_percent},
                     "rank": rank  # Rank is used to get the representative recommendation - rank 0 will be chosen
                 }

From a28c94c18b4d570f4b837ff8dd910bdf8fddc85d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 19:39:37 +0100
Subject: [PATCH 21/36] added description_simulation to hot water
 recommendation

---
 recommendations/HotwaterRecommendations.py |  5 ++-
 recommendations/Recommendations.py         | 41 +++++++---------------
 2 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 9c5c7045..95488d3f 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -62,7 +62,10 @@ class HotwaterRecommendations:
                 "sap_points": None,
                 "already_installed": already_installed,
                 **recommendation_cost,
-                "simulation_config": {"hot_water_energy_eff_ending": "Average"}
+                "simulation_config": {"hot_water_energy_eff_ending": "Average"},
+                "description_simulation": {
+                    "hot-water-energy-eff": "Average"
+                }
             }
         )
         return
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 6e17ef54..5cca056f 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -337,9 +337,10 @@ class Recommendations:
         sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index()
         heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index()
         carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index()
-        lighting_cost_phase_impact = (
-            property_lighting_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median().reset_index()
-        )
+        # lighting_cost_phase_impact = (
+        #     property_lighting_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median(
+        #     ).reset_index()
+        # )
         heating_cost_phase_impact = (
             property_heating_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median().reset_index()
         )
@@ -349,32 +350,6 @@ class Recommendations:
             ].median().reset_index()
         )
 
-        # The heat demand change is the difference between the starting heat demand and the value at the final phase
-        # expected_heat_demand = property_instance.floor_area * (
-        #     heat_phase_impact[heat_phase_impact["phase"] == max(heat_phase_impact["phase"])]["predictions"].values[0]
-        # )
-        # starting_heat_demand = (
-        #     float(property_instance.data["energy-consumption-current"]) * property_instance.floor_area
-        # )
-        #
-        # # This is the unadjusted resulting heat demand
-        # predicted_heat_demand_change = starting_heat_demand - expected_heat_demand
-        #
-        # # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
-        # #       actually implemented
-        # expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
-        #     epc_energy_consumption=expected_heat_demand,
-        #     current_epc_rating=property_instance.data["current-energy-rating"],
-        #     total_floor_area=property_instance.floor_area
-        # )
-        #
-        # adjusted_heat_demand_change = (
-        #     property_instance.current_adjusted_energy - expected_adjusted_energy
-        # )
-        #
-        # # TODO: We should determine if the home is gas & electricity or just electricity
-        # expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
-
         phase_lighting_costs = {}
         phase_kwh_figures = {}
         for recommendations_by_type in property_recommendations:
@@ -752,6 +727,14 @@ class Recommendations:
                     rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
                     raise ValueError("sap points, co2 or heat demand is missing")
 
+        # We sum up the total savings for the property and that is our expected energy bill
+        # expected_energy_bill = sum(
+        #     [
+        #         rec["energy_cost_savings"] for rec in property_recommendations
+        #         if rec["type"] != "mechanical_ventilation"
+        #     ]
+        # )
+
         return (
             property_recommendations,
             expected_adjusted_energy,

From 14450f2b795e642e51fbccf6bea1b9208b62345f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 21:54:11 +0100
Subject: [PATCH 22/36] got the recommendation impact working for pfp

---
 backend/Property.py                |   2 +-
 backend/app/plan/router.py         |   4 +-
 recommendations/Recommendations.py | 176 ++++++++++++++++++-----------
 3 files changed, 115 insertions(+), 67 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 9eb8ef99..b1cf2d16 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -739,7 +739,7 @@ class Property:
         self.current_adjusted_energy = (
             adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
         )
-        self.expected_energy_bill = (
+        self.current_energy_bill = (
             adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
         )
 
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e4fc508c..0fe50538 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -501,7 +501,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                 Recommendations.calculate_recommendation_impact(
                     property_instance=property_instance,
                     all_predictions=all_predictions,
-                    recommendations=recommendations
+                    recommendations=recommendations,
+                    representative_recommendations=representative_recommendations,
+                    energy_consumption_client=energy_consumption_client
                 )
             )
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 5cca056f..97e5a3b7 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -277,9 +277,71 @@ class Recommendations:
 
         return property_recommendations
 
+    @staticmethod
+    def _calculate_appliance_solar_savings(
+        rec, property_instance, heating_kwh_reduction, hot_water_kwh_reduction, lighting_kwh_reduction
+    ):
+        """
+        Calculates the impact on kwh and cost of installing solar panels on appliances
+        :param rec: The recommendation
+        :param property_instance: Instance of the Property class
+        :param heating_kwh_reduction: The kwh reduction from heating
+        :param hot_water_kwh_reduction: The kwh reduction from hot water
+        :param lighting_kwh_reduction: The kwh reduction from lighting
+        :return:
+        """
+
+        if rec["type"] != "solar_pv":
+            return 0, 0
+
+        # Calulate the amount of energy the solar panel array will generate for this unit
+        unit_energy_consumption = (
+            rec["initial_ac_kwh_per_year"] *
+            property_instance.solar_panel_configuration["unit_share_of_energy"]
+        )
+
+        unit_energy_utilised = unit_energy_consumption * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION
+        unit_energy_exported = unit_energy_consumption - unit_energy_utilised
+        unit_energy_exported_value = unit_energy_exported * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
+
+        # We assume that 50% of the energy generated will be used by the property without a battery
+        # to be conservative
+
+        # of the energy utilised, some of it is used by heating, hot water and lighting so we
+        # remove that from the total
+        unit_energy_utilised -= (
+            heating_kwh_reduction + hot_water_kwh_reduction + lighting_kwh_reduction
+        )
+        unit_energy_utilised = 0 if unit_energy_utilised < 0 else unit_energy_utilised
+
+        # This is how much energy the appliances will use after install
+        post_install_appliance_kwh = (
+            property_instance.energy_consumption_estimates["adjusted"]["appliances"] -
+            unit_energy_utilised
+        )
+        post_install_appliance_kwh = (
+            0 if post_install_appliance_kwh < 0 else post_install_appliance_kwh
+        )
+
+        predicted_appliances_kwh_reduction = (
+            property_instance.energy_consumption_estimates["adjusted"]["appliances"] -
+            post_install_appliance_kwh
+        )
+
+        predicted_appliances_cost_reduction = unit_energy_exported_value + (
+            predicted_appliances_kwh_reduction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        )
+
+        return predicted_appliances_cost_reduction, predicted_appliances_kwh_reduction
+
     @classmethod
     def calculate_recommendation_impact(
-        cls, property_instance, all_predictions, recommendations, energy_consumption_client
+        cls,
+        property_instance,
+        all_predictions,
+        recommendations,
+        representative_recommendations,
+        energy_consumption_client
     ):
 
         """
@@ -289,6 +351,7 @@ class Recommendations:
         :param property_instance: Instance of the Property class, for the home associated to property_id
         :param all_predictions: dictionary of predictions from the model apis
         :param recommendations: dictionary of recommendations for the property
+        :param representative_recommendations: dictionary of representative recommendations for the property
         :param energy_consumption_client: Instance of the EnergyConsumptionClient class
         :return:
         """
@@ -350,8 +413,14 @@ class Recommendations:
             ].median().reset_index()
         )
 
+        representative_rec_ids = [
+            rec["recommendation_id"] for rec in representative_recommendations[property_instance.id]
+        ]
+
         phase_lighting_costs = {}
         phase_kwh_figures = {}
+        bill_savings_list = []
+        kwh_savings_list = []
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
 
@@ -447,21 +516,6 @@ class Recommendations:
                         "unadjusted": scoring_lighting_cost
                     }
 
-                    # This is the total bill savings for the recommendation
-                    if rec["type"] == "solar_pv":
-                        # We need to calculate the predicted bill savings for the solar pv recommendation
-                        # where we will get some savings from the cost of appliances but it depends on the amount
-                        # of energy generated by the solar panels
-                        # We can assume that 50% of the energy generated will be used by the property without
-                        # a battery, to be conservative.
-                        # SIMILARLY: We need to handle kwh savings
-                        raise Exception("Handle me")
-                    else:
-                        predicted_bill_savings = (
-                            predicted_heating_cost_reduction + predicted_hot_water_cost_reduction +
-                            predicted_lighting_cost_reduction
-                        )
-
                     # We now predict the kwh savings using the xgb model
 
                     simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
@@ -507,7 +561,30 @@ class Recommendations:
 
                     lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
 
-                    kwh_reduction = heating_kwh_reduction + hot_water_kwh_reduction + lighting_kwh_reduction
+                    (
+                        predicted_appliances_cost_reduction,
+                        predicted_appliances_kwh_reduction
+                    ) = cls._calculate_appliance_solar_savings(
+                        rec=rec,
+                        property_instance=property_instance,
+                        heating_kwh_reduction=heating_kwh_reduction,
+                        hot_water_kwh_reduction=hot_water_kwh_reduction,
+                        lighting_kwh_reduction=lighting_kwh_reduction
+                    )
+
+                    kwh_reduction = (
+                        heating_kwh_reduction +
+                        hot_water_kwh_reduction +
+                        lighting_kwh_reduction +
+                        predicted_appliances_kwh_reduction
+                    )
+
+                    predicted_bill_savings = (
+                        predicted_heating_cost_reduction +
+                        predicted_hot_water_cost_reduction +
+                        predicted_lighting_cost_reduction +
+                        predicted_appliances_cost_reduction
+                    )
 
                     phase_kwh_figures[rec["phase"]] = {
                         "adjusted": {
@@ -651,48 +728,16 @@ class Recommendations:
 
                     lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
 
-                    # This is the total bill savings for the recommendation
-
-                    predicted_appliances_cost_reduction = 0
-                    predicted_appliances_kwh_reduction = 0
-                    if rec["type"] == "solar_pv":
-                        # Calulate the amount of energy the solar panel array will generate for this unit
-                        unit_energy_consumption = (
-                            rec["initial_ac_kwh_per_year"] *
-                            property_instance.solar_panel_configuration["unit_share_of_energy"]
-                        )
-
-                        unit_energy_utilised = unit_energy_consumption * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION
-                        unit_energy_exported = unit_energy_consumption - unit_energy_utilised
-                        unit_energy_exported_value = unit_energy_exported * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
-
-                        # We assume that 50% of the energy generated will be used by the property without a battery
-                        # to be conservative
-
-                        # of the energy utilised, some of it is used by heating, hot water and lighting so we
-                        # remove that from the total
-                        unit_energy_utilised -= (
-                            heating_kwh_reduction + hot_water_kwh_reduction + lighting_kwh_reduction
-                        )
-                        unit_energy_utilised = 0 if unit_energy_utilised < 0 else unit_energy_utilised
-
-                        # This is how much energy the appliances will use after install
-                        post_install_appliance_kwh = (
-                            property_instance.energy_consumption_estimates["adjusted"]["appliances"] -
-                            unit_energy_utilised
-                        )
-                        post_install_appliance_kwh = (
-                            0 if post_install_appliance_kwh < 0 else post_install_appliance_kwh
-                        )
-
-                        predicted_appliances_kwh_reduction = (
-                            property_instance.energy_consumption_estimates["adjusted"]["appliances"] -
-                            post_install_appliance_kwh
-                        )
-
-                        predicted_appliances_cost_reduction = unit_energy_exported_value + (
-                            predicted_appliances_kwh_reduction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-                        )
+                    (
+                        predicted_appliances_cost_reduction,
+                        predicted_appliances_kwh_reduction
+                    ) = cls._calculate_appliance_solar_savings(
+                        rec=rec,
+                        property_instance=property_instance,
+                        heating_kwh_reduction=heating_kwh_reduction,
+                        hot_water_kwh_reduction=hot_water_kwh_reduction,
+                        lighting_kwh_reduction=lighting_kwh_reduction
+                    )
 
                     # We now calculate the predicted_bill_savings
                     predicted_bill_savings = (
@@ -723,17 +768,18 @@ class Recommendations:
                 rec["kwh_savings"] = kwh_reduction
                 rec["energy_cost_savings"] = predicted_bill_savings
 
+                if rec["recommendation_id"] in representative_rec_ids:
+                    bill_savings_list.append(predicted_bill_savings)
+                    kwh_savings_list.append(kwh_reduction)
+
                 if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (
                     rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
                     raise ValueError("sap points, co2 or heat demand is missing")
 
         # We sum up the total savings for the property and that is our expected energy bill
-        # expected_energy_bill = sum(
-        #     [
-        #         rec["energy_cost_savings"] for rec in property_recommendations
-        #         if rec["type"] != "mechanical_ventilation"
-        #     ]
-        # )
+
+        expected_energy_bill = property_instance.current_energy_bill - sum(bill_savings_list)
+        expected_adjusted_energy = property_instance.current_adjusted_energy - sum(kwh_savings_list)
 
         return (
             property_recommendations,

From 53bb74ac4f3fe3203022dff3ce34e82b86ea879e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 23:03:49 +0100
Subject: [PATCH 23/36] Changing adjusted heat demand to kwh_savings in
 recommendations model

---
 backend/app/db/functions/recommendations_functions.py | 4 ++--
 backend/app/db/models/recommendations.py              | 2 +-
 backend/ml_models/Valuation.py                        | 7 +++++++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index b22ce92f..365829e4 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -80,8 +80,8 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
             "starting_u_value": rec.get("starting_u_value"),
             "new_u_value": rec.get("new_u_value"),
             "sap_points": rec["sap_points"],
-            "heat_demand": rec["heat_demand"],
-            "adjusted_heat_demand": rec["adjusted_heat_demand"],
+            "energy_savings": rec["heat_demand"],
+            "kwh_savings": rec["kwh_savings"],
             "co2_equivalent_savings": rec["co2_equivalent_savings"],
             "total_work_hours": rec["labour_hours"],
             "energy_cost_savings": rec["energy_cost_savings"],
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index 186f87a8..8ab7908f 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -22,7 +22,7 @@ class Recommendation(Base):
     new_u_value = Column(Float)
     sap_points = Column(Float)
     heat_demand = Column(Float)
-    adjusted_heat_demand = Column(Float)
+    kwh_savings = Column(Float)
     co2_equivalent_savings = Column(Float)
     energy_savings = Column(Float)
     energy_cost_savings = Column(Float)
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 1af38194..b87f156b 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -93,6 +93,13 @@ class PropertyValuation:
         # Northern Group Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
         10070868263: 194_000,  # Based on Zoopla
         10070868244: 195_000,  # Based on Zoopla
+        # Places For People Pilot
+        200140644: 385_000,
+        200140645: 481_000,
+        200140646: 372_000,
+        200140647: 481_000,
+        200140648: 373_000,
+        200140649: 373_000,
     }
 
     # We base our valuation uplifts on a number of sources

From e12ff81aaa350eaaa96f28814203f486f80a75ec Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 23:24:40 +0100
Subject: [PATCH 24/36] added labour hours and days to building level costing
 for solar

---
 recommendations/Costs.py                  | 4 ++--
 recommendations/SolarPvRecommendations.py | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 68870841..9a12af42 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -1037,13 +1037,13 @@ class Costs:
         vat = total_cost - subtotal_before_vat
 
         # Labour hours are based on estimates from online research but an average team seems to consist of 3 people
-        # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
+        # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 48 hours of
         # labour
         return {
             "total": total_cost,
             "subtotal": subtotal_before_vat,
             "vat": vat,
-            "labour_hours": 72,
+            "labour_hours": 48,
             "labour_days": 2,
         }
 
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 596b9290..5219e323 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -116,6 +116,9 @@ class SolarPvRecommendations:
             # Spread the cost to the individual units - adding a 20% contingency
             total_cost = recommendation_config["total_cost"] / n_units
             kw = np.floor(recommendation_config["array_warrage"] / 100) / 10
+            # Default to a weeks work for a team of 3 people doing 8 hour days
+            labour_days = 5
+            labour_hours = 3 * 8 * labour_days
 
             description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof "
                            "of the building")
@@ -133,6 +136,8 @@ class SolarPvRecommendations:
                     "sap_points": None,
                     "already_installed": False,
                     "total": total_cost,
+                    "labour_days": labour_days,
+                    "labour_hours": labour_hours,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here
                     "photo_supply": roof_coverage_percent,

From 36718ab3be906aacc2d0d206f91f5f27da66d8a9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Jul 2024 23:30:09 +0100
Subject: [PATCH 25/36] handling aggregations storage to db

---
 backend/app/db/functions/portfolio_functions.py | 2 +-
 backend/app/plan/router.py                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index 69203368..402675e8 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -11,7 +11,7 @@ def aggregate_portfolio_recommendations(
         session.query(
             func.sum(Recommendation.estimated_cost).label("cost"),
             func.sum(Recommendation.total_work_hours).label("total_work_hours"),
-            func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
+            func.sum(Recommendation.kwh_savings).label("energy_savings"),
             func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
             func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
         )
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0fe50538..7d92f425 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -129,7 +129,7 @@ def extract_portfolio_aggregation_data(
 
         pre_retrofit_energy_consumption = p.current_adjusted_energy
         post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
-            [r["adjusted_heat_demand"] for r in default_recommendations]
+            [r["kwh_savings"] for r in default_recommendations]
         )
 
         # Add up energy savings

From c112d23c6762c2759807e36574271502b7162c3a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 01:10:19 +0100
Subject: [PATCH 26/36] removed electric boiler rec

---
 recommendations/Costs.py                     | 15 ++--
 recommendations/HeatingControlRecommender.py |  4 ++
 recommendations/HeatingRecommender.py        | 76 ++++++++++++++++++++
 3 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 9a12af42..2dbce327 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -92,6 +92,12 @@ CONDENSING_BOILER_COSTS = {
     "40kw": 1625
 }
 
+# Electric boiler prices base on
+# https://www.greenmatch.co.uk/boilers/combi-boilers/electric-combi-boilers
+# https://www.tlc-direct.co.uk/Products/ERMAC15.html
+# The unit is a 15kw boiler, capable of outputting between 3kw and 15kw. Costs seem to be around £1800
+ELECTRIC_BOILER_COSTS = 1800
+
 # Assumes 3 hours to remove each heater (including re-decorating)
 ROOM_HEATER_REMOVAL_COST = 120
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
@@ -1285,7 +1291,7 @@ class Costs:
         estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
         return round(estimated_radiators)
 
-    def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
+    def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms, is_electric=False):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
         First time central heating vosts can als be found here:
@@ -1293,7 +1299,10 @@ class Costs:
         :return:
         """
 
-        unit_cost = CONDENSING_BOILER_COSTS[size]
+        if not is_electric:
+            unit_cost = CONDENSING_BOILER_COSTS[size]
+        else:
+            unit_cost = ELECTRIC_BOILER_COSTS
         # The unit cost is the cost without VAT
         # We now need to estimate the cost of the works
         labour_days = 2
@@ -1307,8 +1316,6 @@ class Costs:
         # Add contingency and preliminaries
         labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
 
-        # labour_days = labour_days + (removal_labour_hours / 8)
-
         vat = labour_cost * self.VAT_RATE
 
         subtotal_before_vat = unit_cost + labour_cost
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 76da6c37..a94c2304 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -35,6 +35,10 @@ class HeatingControlRecommender:
 
             return
 
+        if heating_description in ["Boiler and radiators, electric"]:
+            self.recommend_roomstat_programmer_trvs()
+            return
+
         if heating_description in ["Air source heat pump, radiators, electric"]:
             self.recommend_time_temperature_zone_controls()
 
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index ac8c4973..4ad1d987 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -119,6 +119,69 @@ class HeatingRecommender:
 
         return
 
+    def recommend_electric_boiler_upgrade(self, phase):
+
+        # Small initial scope, just handles the case of properties that have electric boilers where the efficiency
+        # is poor or very poor
+        # We recommend upgrading to a new electric boiler
+
+        recommendation_phase = phase
+
+        if self.property.data["mainheat-energy-eff"] not in ["Poor", "Very Poor"]:
+            return
+
+        hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"]
+        hotwater_from_cylinder = self.property.hotwater["clean_description"] in [
+            "From main system, no cylinder thermostat"
+        ]
+        # if the hotwater is from the mains, we probably have a combi boiler so we recommend a new electric boiler
+
+        if hotwater_from_mains:
+            description = f"Upgrade to a higher efficiency electric boiler"
+
+            simulation_config = {
+                "mainheat_energy_eff_ending": "Average",
+                "hot_water_energy_eff_ending": "Average"
+            }
+
+            boiler_costs = self.costs.boiler(
+                size=None,
+                exising_room_heaters=False,
+                system_change=False,
+                n_heated_rooms=self.property.data["number-heated-rooms"],
+                n_rooms=self.property.number_of_rooms,
+                is_electric=True
+            )
+
+            already_installed = "heating" in self.property.already_installed
+            if already_installed:
+                boiler_costs = override_costs(boiler_costs)
+                description = "Heating system has already been upgraded, no further action needed."
+
+            boiler_recommendation = {
+                "phase": recommendation_phase,
+                "parts": [],
+                "type": "heating",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config,
+                **boiler_costs
+            }
+
+            controls_recommender = HeatingControlRecommender(self.property)
+            controls_recommender.recommend(heating_description="Boiler and radiators, electric")
+
+            self.heating_recommendations.extend([boiler_recommendation] + controls_recommender.recommendation)
+            return
+
+        if hotwater_from_cylinder:
+            # We recommend a change from a system boiler, with a cylinder to a combi boiler
+            description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
+                           "boiler")
+
     def is_ashp_valid(self):
         suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
         has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
@@ -458,6 +521,19 @@ class HeatingRecommender:
 
         return closest_size
 
+    @staticmethod
+    def estimate_electric_boiler_size(num_heated_rooms):
+        """
+        We use the approach similar to as defined in
+        https://www.greenmatch.co.uk/boilers/combi-boilers/electric-combi-boilers
+        Instead of radiators as a proxy, we do the number of heated rooms
+
+        :param num_heated_rooms: The number of heated rooms in the property
+        :return:
+        """
+
+        return max(num_heated_rooms * 1.5, 6)
+
     def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
         """
         This boiler recommendation will only recommend a like-for-like upgrade, since changing the system

From a43c9e82d34e952983ff62f12ed919a53dcc0951 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 01:29:11 +0100
Subject: [PATCH 27/36] reverting electric boiler recs, adding cylinder
 thermostat rec

---
 recommendations/Costs.py                   | 19 +++++++
 recommendations/HotwaterRecommendations.py | 60 ++++++++++++++++++++--
 2 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 2dbce327..2159c0b0 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -1151,6 +1151,25 @@ class Costs:
             "labour_days": 1,
         }
 
+    def cylinder_thermostat(self):
+        """
+        Calculate the cost of installing a cylinder thermostat
+        """
+
+        # The £200 cost is a rough estimate based on internet research
+        total_cost = 200
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # We estimate the labour hours to be 2
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": 2,
+            "labour_days": 1,
+        }
+
     def hot_water_tank_insulation(self):
         """
         Calculate the cost of installing hot water tank insulation
diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 95488d3f..86a031d6 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -1,6 +1,7 @@
 from backend.Property import Property
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import override_costs
+from recommendations.recommendation_utils import override_costs, check_simulation_difference
+from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
 
 
 class HotwaterRecommendations:
@@ -34,10 +35,15 @@ class HotwaterRecommendations:
             self.recommend_tank_insulation(phase=phase)
             return
 
+        if self.property.hotwater["clean_description"] == "From main system, no cylinder thermostat":
+            self.recommend_cylinder_thermostat(phase=phase)
+            return
+
     def recommend_tank_insulation(self, phase):
         """
         If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water
-        tank. This is a very simple and cost effective improvement that can be made to the home.
+        tank. This is a very simple and cost effective improvement that can be made to the home. It will likely
+        take the efficiency from very poor to poor.
         """
 
         recommendation_cost = self.costs.hot_water_tank_insulation()
@@ -62,9 +68,55 @@ class HotwaterRecommendations:
                 "sap_points": None,
                 "already_installed": already_installed,
                 **recommendation_cost,
-                "simulation_config": {"hot_water_energy_eff_ending": "Average"},
+                "simulation_config": {"hot_water_energy_eff_ending": "Poor"},
                 "description_simulation": {
-                    "hot-water-energy-eff": "Average"
+                    "hot-water-energy-eff": "Poor"
+                }
+            }
+        )
+        return
+
+    def recommend_cylinder_thermostat(self, phase):
+        """
+        If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water
+        tank. This is a very simple and cost effective improvement that can be made to the home.
+        """
+
+        recommendation_cost = self.costs.cylinder_thermostat()
+
+        already_installed = "cylinder_thermostat" in self.property.already_installed
+        if already_installed:
+            recommendation_cost = override_costs(recommendation_cost)
+            description = "Cylinder thermostat has already been installed, no further action required"
+        else:
+            description = "Install cylinder thermostat"
+
+        new_epc_description = "From main system"
+        hotwater_ending_config = HotWaterAttributes(new_epc_description).process()
+        hotwater_simulation_config = check_simulation_difference(
+            new_config=hotwater_ending_config, old_config=self.property.hotwater
+        )
+
+        simulation_config = {
+            "hot_water_energy_eff_ending": self.property.data["hot-water-energy-eff"],
+            **hotwater_simulation_config
+        }
+
+        self.recommendations.append(
+            {
+                "phase": phase,
+                "parts": [],
+                "type": "cylinder_thermostat",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                **recommendation_cost,
+                "simulation_config": simulation_config,
+                "description_simulation": {
+                    "hot-water-energy-eff": self.property.data["hot-water-energy-eff"],
+                    "hotwater-description": new_epc_description,
                 }
             }
         )

From be105ac267ad484edf766ec58f265de782ef428c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 01:34:18 +0100
Subject: [PATCH 28/36] Added cylinder thermostat to
 create_recommendation_scoring_data

---
 backend/Property.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index b1cf2d16..eadefc48 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -521,6 +521,7 @@ class Property:
             if recommendation["type"] in [
                 "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
+                "cylinder_thermostat"
             ]:
                 # We update the data, as defined in the recommendaton
                 if output["walls_insulation_thickness_ending"] is None:
@@ -544,7 +545,7 @@ class Property:
                 "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                 "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
                 "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
-                "heating_control", "secondary_heating"
+                "heating_control", "secondary_heating", "cylinder_thermostat"
             ]:
                 raise NotImplementedError(
                     "Implement me, given type %s" % recommendation["type"]

From fdfac81d2315963dff69a093a909bb0ccf770c6b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 01:47:55 +0100
Subject: [PATCH 29/36] updated cylinder thermostat recommendations

---
 recommendations/HotwaterRecommendations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 86a031d6..0d34c894 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -89,7 +89,7 @@ class HotwaterRecommendations:
             recommendation_cost = override_costs(recommendation_cost)
             description = "Cylinder thermostat has already been installed, no further action required"
         else:
-            description = "Install cylinder thermostat"
+            description = "Install a smart cylinder thermostat on the hot water tank"
 
         new_epc_description = "From main system"
         hotwater_ending_config = HotWaterAttributes(new_epc_description).process()

From 389e9f51da2af199ce3a79110ac29e9c39360c8d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 11:15:01 +0100
Subject: [PATCH 30/36] setting up storage to db

---
 backend/apis/GoogleSolarApi.py              | 34 ++++++++++-
 backend/app/db/functions/solar_functions.py | 64 +++++++++++++++++++++
 backend/app/db/models/solar.py              | 21 +++++++
 backend/app/plan/router.py                  | 18 +++++-
 recommendations/SolarPvRecommendations.py   |  4 +-
 5 files changed, 137 insertions(+), 4 deletions(-)
 create mode 100644 backend/app/db/functions/solar_functions.py
 create mode 100644 backend/app/db/models/solar.py

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index f7b34d19..55041b74 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -5,6 +5,10 @@ from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 import requests
 from functools import lru_cache
 import time
+from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
+from utils.logger import setup_logger
+
+logger = setup_logger()
 
 
 class GoogleSolarApi:
@@ -98,7 +102,10 @@ class GoogleSolarApi:
                     raise
 
     @lru_cache(maxsize=128)
-    def get(self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False):
+    def get(
+        self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False, session=None,
+        uprn=None
+    ):
         """
         Wrapper function that calls get_building_insights and extracts roof segments, with caching.
 
@@ -107,10 +114,21 @@ class GoogleSolarApi:
         :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude.
         :param required_quality: The required quality of the data (default is "MEDIUM").
         :param is_building: Whether the energy consumption is for a building or a unit.
+        :param session: The database session to use for the query (default is None).
+        :param uprn: The unique property reference number (default is None).
         :return: The JSON response containing the building insights data.
         """
 
-        self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
+        is_outdated = False
+        if session is not None:
+            # Check if the data is already in the database
+            self.insights_data, _, is_outdated = get_solar_data(
+                session, longitude=longitude, latitude=latitude, uprn=uprn
+            )
+
+        # If we have no data in the db, or updated_at is more than 6 months
+        if self.insights_data is None or is_outdated:
+            self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
 
         # Extract key data from the insights response
         self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
@@ -137,6 +155,18 @@ class GoogleSolarApi:
         # We now start finding the solar panel configurations
         self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building)
 
+    def save_to_db(self, session, uprns_to_location):
+        if self.insights_data is None:
+            raise ValueError("No api data to store")
+
+        logger.info("Storing to database")
+
+        store_batch_data(
+            session=session,
+            api_data=self.insights_data,
+            uprns_to_location=uprns_to_location
+        )
+
     @staticmethod
     def lifetime_production_ac_kwh(
         row,
diff --git a/backend/app/db/functions/solar_functions.py b/backend/app/db/functions/solar_functions.py
new file mode 100644
index 00000000..e8bba137
--- /dev/null
+++ b/backend/app/db/functions/solar_functions.py
@@ -0,0 +1,64 @@
+import datetime
+import pytz
+from sqlalchemy.orm import Session
+from sqlalchemy.orm.exc import NoResultFound
+from backend.app.db.models.solar import Solar
+
+
+def get_solar_data(session: Session, longitude: float = None, latitude: float = None, uprn: str = None):
+    """
+    This function will fetch data from the solar table based on longitude and latitude or UPRN.
+    :param session: The database session
+    :param longitude: The longitude to search for
+    :param latitude: The latitude to search for
+    :param uprn: The UPRN to search for (overrides longitude and latitude if provided)
+    :return: The google_api_response and updated_at fields
+    """
+    try:
+        if uprn:
+            # Search by UPRN
+            solar_data = session.query(Solar.google_api_response, Solar.updated_at).filter_by(uprn=uprn).one()
+        else:
+            # Search by longitude and latitude
+            solar_data = session.query(Solar.google_api_response, Solar.updated_at).filter(
+                Solar.longitude == longitude,
+                Solar.latitude == latitude
+            ).one()
+
+        # Check if updated_at is more than 6 months old
+        six_months_ago = datetime.datetime.now(pytz.utc) - datetime.timedelta(days=6 * 30)  # Approximate 6 months
+        is_outdated = solar_data.updated_at < six_months_ago
+
+        return solar_data.google_api_response, solar_data.updated_at, is_outdated
+
+    except NoResultFound:
+        return None, None, False
+
+
+def store_batch_data(session: Session, api_data: dict, uprns_to_location: list):
+    """
+    This function will store the API data to the solar table against all of the UPRNs with longitude and latitude.
+    :param session: The database session
+    :param api_data: The API data to store
+    :param data_list: A list of dictionaries containing uprn, longitude, and latitude
+    """
+    try:
+        # Convert the data_list to a list of dicts for bulk insert
+        records_to_update = []
+        for data in uprns_to_location:
+            record = {
+                'uprn': data['uprn'],
+                'longitude': data['longitude'],
+                'latitude': data['latitude'],
+                'google_api_response': api_data,
+                'updated_at': datetime.datetime.now(pytz.utc)
+            }
+            records_to_update.append(record)
+
+        # Perform bulk insert or update
+        session.bulk_insert_mappings(Solar, records_to_update)
+        session.commit()
+
+    except Exception as e:
+        session.rollback()
+        raise e
diff --git a/backend/app/db/models/solar.py b/backend/app/db/models/solar.py
new file mode 100644
index 00000000..9cc51e51
--- /dev/null
+++ b/backend/app/db/models/solar.py
@@ -0,0 +1,21 @@
+import datetime
+import pytz
+from sqlalchemy import Column, Integer, Float, DateTime, JSON
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
+
+
+class Solar(Base):
+    __tablename__ = 'solar'
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    longitude = Column(Float, nullable=False)
+    latitude = Column(Float, nullable=False)
+    uprn = Column(Integer, nullable=False)
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    updated_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc), onupdate=datetime.datetime.now(pytz.utc)
+    )
+    google_api_response = Column(JSON, nullable=False)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7d92f425..c85382e7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -380,12 +380,14 @@ async def trigger_plan(body: PlanTriggerRequest):
                     target_rating=body.goal_value,
                     current_consumption=p.current_adjusted_energy
                 ),
-                "property_id": p.id
+                "property_id": p.id,
+                "uprn": p.uprn
             } for p in input_properties if p.building_id is not None
         ]
         if building_ids:
             # Find the unique longitude and latitude pairs for each building id
             unique_coordinates = {}
+            building_uprns = {}
             for entry in building_ids:
                 building_id = entry['building_id']
                 coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']}
@@ -396,6 +398,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 if coordinate_pair not in unique_coordinates[building_id]:
                     unique_coordinates[building_id].append(coordinate_pair)
 
+                if building_id not in building_uprns:
+                    building_uprns[building_id] = []
+
+                if entry['uprn'] not in building_uprns[building_id]:
+                    building_uprns[building_id].append(
+                        {
+                            "uprn": entry['uprn'], "longitude": entry['longitude'], "latitude": entry['latitude']
+                        }
+                    )
+
             solar_panel_configuration = {}
             for building_id, coordinates in unique_coordinates.items():
                 if len(coordinates) > 1:
@@ -410,6 +422,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     latitude=coordinates["latitude"],
                     energy_consumption=energy_consumption,
                     is_building=True,
+                    session=session
                 )
                 solar_panel_configuration[building_id] = {
                     "insights_data": solar_api_client.insights_data,
@@ -417,6 +430,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                     "n_units": len([entry for entry in building_ids if entry['building_id'] == building_id])
                 }
 
+                # Store the data in the database
+                solar_api_client.save_to_db(session=session, uprns_to_location=building_uprns[building_id])
+
                 # Insert this into the properties that have this building id
                 for p in input_properties:
                     if p.building_id == building_id:
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 5219e323..af1e7f27 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -109,7 +109,9 @@ class SolarPvRecommendations:
         )
         n_units = self.property.solar_panel_configuration["n_units"]
 
-        best_configurations = panel_performance.head(3).reset_index(drop=True)
+        # At a building level, we take a single configuration so that all properties a guaranteed to use
+        # the same configuration
+        best_configurations = panel_performance.head(1).reset_index(drop=True)
 
         for rank, recommendation_config in best_configurations.iterrows():
             roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)

From 693698d5de8eeef263954f1eca7d4aa1d26eb3f0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 11:17:55 +0100
Subject: [PATCH 31/36] implemented storage of solar data to backend

---
 backend/apis/GoogleSolarApi.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 55041b74..0faf8e72 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -65,6 +65,9 @@ class GoogleSolarApi:
         self.panel_wattage = None
         self.panel_performance = None
 
+        # Indicates if we need to store the data to the db
+        self.need_to_store = False
+
     def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
         """
         Make an API request to retrieve building insights based on the given longitude and latitude, with retry
@@ -129,6 +132,7 @@ class GoogleSolarApi:
         # If we have no data in the db, or updated_at is more than 6 months
         if self.insights_data is None or is_outdated:
             self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
+            self.need_to_store = True
 
         # Extract key data from the insights response
         self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
@@ -159,6 +163,9 @@ class GoogleSolarApi:
         if self.insights_data is None:
             raise ValueError("No api data to store")
 
+        if not self.need_to_store:
+            return
+
         logger.info("Storing to database")
 
         store_batch_data(

From 28c70d0afbfda1236859a87aa2f9f2f6ac2b32fc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 17:52:22 +0100
Subject: [PATCH 32/36] Added expected payback years

---
 backend/apis/GoogleSolarApi.py              | 39 ++++++++++++--
 backend/app/db/functions/solar_functions.py | 58 +++++++++++++++------
 backend/app/db/models/solar.py              | 26 ++++++++-
 backend/app/plan/router.py                  |  4 +-
 4 files changed, 107 insertions(+), 20 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 0faf8e72..d6bc58df 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -159,19 +159,42 @@ class GoogleSolarApi:
         # We now start finding the solar panel configurations
         self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building)
 
-    def save_to_db(self, session, uprns_to_location):
+    def save_to_db(self, session, uprns_to_location, scenario_type):
         if self.insights_data is None:
             raise ValueError("No api data to store")
 
+        if scenario_type not in ["unit", "building"]:
+            raise Exception("Invalid scenario type. Must be either 'unit' or 'building'")
+
         if not self.need_to_store:
             return
 
         logger.info("Storing to database")
 
+        scenarios_data = self.panel_performance.head(1)[
+            ["n_panels", "yearly_dc_energy", "total_cost", "panneled_roof_area", "array_warrage",
+             "initial_ac_kwh_per_year", "lifetime_ac_kwh", "roi"]
+        ].rename(
+            columns={
+                "n_panels": "number_panels",
+                "yearly_dc_energy": "yearly_dc_kwh",
+                "total_cost": "cost",
+                "panneled_roof_area": "panelled_roof_area",
+                "array_warrage": "array_kwhp",
+                "initial_ac_kwh_per_year": "yearly_ac_kwh",
+                "lifetime_ac_kwh": "lifetime_ac_kwh",
+            }
+        )
+
+        # Adding missing fields with default values
+        scenarios_data["is_default"] = True
+        scenarios_data["scenario_type"] = scenario_type
+
         store_batch_data(
             session=session,
             api_data=self.insights_data,
-            uprns_to_location=uprns_to_location
+            uprns_to_location=uprns_to_location,
+            scenarios_data=scenarios_data
         )
 
     @staticmethod
@@ -286,13 +309,21 @@ class GoogleSolarApi:
                 roi = (generation_value + surplus_value) / panel_config["total_cost"]
                 generation_deficit = surplus_value
 
+            # Calculate expected payback years
+            if generation_value > 0:
+                expected_payback_years = panel_config["total_cost"] / (
+                    generation_value / self.installation_life_span)
+            else:
+                expected_payback_years = None  # or some high value indicating no payback
+
             # Generation deficit tells us how much more energy we need to meet the generation demand.
             roi_results.append(
                 {
                     "n_panels": panel_config["n_panels"],
                     "roi": roi,
                     "generation_value": generation_value,
-                    "generation_deficit": generation_deficit
+                    "generation_deficit": generation_deficit,
+                    "expected_payback_years": expected_payback_years
                 }
             )
 
@@ -309,6 +340,8 @@ class GoogleSolarApi:
             ["roi", "generation_deficit", "generation_value"], ascending=[False, True, False]
         )
 
+        panel_performance["expected_payback_years"] = np.ceil(panel_performance["expected_payback_years"]).astype(int)
+
         self.panel_performance = panel_performance
 
     def exclude_north_facing_segments(self):
diff --git a/backend/app/db/functions/solar_functions.py b/backend/app/db/functions/solar_functions.py
index e8bba137..bf541bb4 100644
--- a/backend/app/db/functions/solar_functions.py
+++ b/backend/app/db/functions/solar_functions.py
@@ -2,7 +2,7 @@ import datetime
 import pytz
 from sqlalchemy.orm import Session
 from sqlalchemy.orm.exc import NoResultFound
-from backend.app.db.models.solar import Solar
+from backend.app.db.models.solar import Solar, SolarScenario
 
 
 def get_solar_data(session: Session, longitude: float = None, latitude: float = None, uprn: str = None):
@@ -35,28 +35,56 @@ def get_solar_data(session: Session, longitude: float = None, latitude: float =
         return None, None, False
 
 
-def store_batch_data(session: Session, api_data: dict, uprns_to_location: list):
+def store_batch_data(session: Session, api_data: dict, uprns_to_location: list, scenarios_data: list):
     """
     This function will store the API data to the solar table against all of the UPRNs with longitude and latitude.
     :param session: The database session
     :param api_data: The API data to store
-    :param data_list: A list of dictionaries containing uprn, longitude, and latitude
+    :param uprns_to_location: A list of dictionaries containing uprn, longitude, and latitude
+    :param scenarios_data: A list of dictionaries containing scenario data for each UPRN
     """
     try:
-        # Convert the data_list to a list of dicts for bulk insert
-        records_to_update = []
+        # Insert data into the Solar table and get the IDs
+        solar_records = []
         for data in uprns_to_location:
-            record = {
-                'uprn': data['uprn'],
-                'longitude': data['longitude'],
-                'latitude': data['latitude'],
-                'google_api_response': api_data,
-                'updated_at': datetime.datetime.now(pytz.utc)
-            }
-            records_to_update.append(record)
+            solar_record = Solar(
+                uprn=data['uprn'],
+                longitude=data['longitude'],
+                latitude=data['latitude'],
+                google_api_response=api_data,
+                updated_at=datetime.datetime.now(pytz.utc)
+            )
+            solar_records.append(solar_record)
 
-        # Perform bulk insert or update
-        session.bulk_insert_mappings(Solar, records_to_update)
+        session.bulk_save_objects(solar_records)
+        session.commit()
+
+        # Retrieve the IDs of the inserted records
+        inserted_ids = [record.id for record in solar_records]
+
+        # Prepare the data for SolarScenario
+        scenario_records = []
+        for index, solar_id in enumerate(inserted_ids):
+            scenarios = scenarios_data[index]  # Assuming scenarios_data has the same order as uprns_to_location
+            for scenario in scenarios:
+                scenario_record = SolarScenario(
+                    solar_id=solar_id,
+                    scenario_type=scenario['scenario_type'],
+                    number_panels=scenario['number_panels'],
+                    array_kwhp=scenario['array_kwhp'],
+                    lifetime_dc_kwh=scenario['lifetime_dc_kwh'],
+                    yearly_dc_kwh=scenario['yearly_dc_kwh'],
+                    lifetime_ac_kwh=scenario.get('lifetime_ac_kwh'),  # Optional field
+                    yearly_ac_kwh=scenario.get('yearly_ac_kwh'),  # Optional field
+                    cost=scenario['cost'],
+                    expected_payback_years=scenario.get('expected_payback_years'),  # Optional field
+                    panelled_roof_area=scenario['panelled_roof_area'],
+                    is_default=scenario['is_default']
+                )
+                scenario_records.append(scenario_record)
+
+        # Insert data into the SolarScenario table
+        session.bulk_save_objects(scenario_records)
         session.commit()
 
     except Exception as e:
diff --git a/backend/app/db/models/solar.py b/backend/app/db/models/solar.py
index 9cc51e51..88372bd3 100644
--- a/backend/app/db/models/solar.py
+++ b/backend/app/db/models/solar.py
@@ -1,6 +1,7 @@
 import datetime
 import pytz
-from sqlalchemy import Column, Integer, Float, DateTime, JSON
+from enum import Enum as PyEnum
+from sqlalchemy import Column, Integer, Float, DateTime, JSON, BigInteger, ForeignKey, Enum, Boolean
 from sqlalchemy.ext.declarative import declarative_base
 
 Base = declarative_base()
@@ -19,3 +20,26 @@ class Solar(Base):
         DateTime, nullable=False, default=datetime.datetime.now(pytz.utc), onupdate=datetime.datetime.now(pytz.utc)
     )
     google_api_response = Column(JSON, nullable=False)
+
+
+class ScenarioType(PyEnum):
+    unit = "unit"
+    building = "building"
+
+
+class SolarScenario(Base):
+    __tablename__ = 'solar_scenario'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    solar_id = Column(BigInteger, ForeignKey('solar.id'), nullable=False)
+    scenario_type = Column(Enum(ScenarioType), nullable=False)
+    number_panels = Column(Integer, nullable=False)
+    array_kwhp = Column(Integer, nullable=False)
+    lifetime_dc_kwh = Column(Float, nullable=False)
+    yearly_dc_kwh = Column(Float, nullable=False)
+    lifetime_ac_kwh = Column(Float)
+    yearly_ac_kwh = Column(Float)
+    cost = Column(Float, nullable=False)
+    expected_payback_years = Column(Float)
+    panelled_roof_area = Column(Float, nullable=False)
+    is_default = Column(Boolean, nullable=False)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c85382e7..0564fbba 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -431,7 +431,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                 }
 
                 # Store the data in the database
-                solar_api_client.save_to_db(session=session, uprns_to_location=building_uprns[building_id])
+                solar_api_client.save_to_db(
+                    session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
+                )
 
                 # Insert this into the properties that have this building id
                 for p in input_properties:

From 09e5b01c7eae7ff7e4dd535dc110e90cd7106156 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 17:56:40 +0100
Subject: [PATCH 33/36] added the lifetime kwh figures

---
 backend/apis/GoogleSolarApi.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index d6bc58df..74de9c20 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -198,10 +198,11 @@ class GoogleSolarApi:
         )
 
     @staticmethod
-    def lifetime_production_ac_kwh(
+    def lifetime_production_kwh(
         row,
         efficiency_depreciation_factor,
-        installation_life_span
+        installation_life_span,
+        column_name="initial_ac_kwh_per_year"
     ):
         """
         Mimics the function described in the Google Solar API documentation, presenting the lifetime production
@@ -209,7 +210,7 @@ class GoogleSolarApi:
         """
 
         return (
-            row["initial_ac_kwh_per_year"] *
+            row[column_name] *
             (1 - pow(
                 efficiency_depreciation_factor,
                 installation_life_span)) /
@@ -284,10 +285,19 @@ class GoogleSolarApi:
 
         # 2) Calculate the liftime solar energy production
         panel_performance['lifetime_ac_kwh'] = panel_performance.apply(
-            self.lifetime_production_ac_kwh,
+            self.lifetime_production_kwh,
             axis=1,
             efficiency_depreciation_factor=self.efficiency_depreciation_factor,
-            installation_life_span=self.installation_life_span
+            installation_life_span=self.installation_life_span,
+            column_name="initial_ac_kwh_per_year"
+        )
+
+        panel_performance['lifetime_dc_kwh'] = panel_performance.apply(
+            self.lifetime_production_kwh,
+            axis=1,
+            efficiency_depreciation_factor=self.efficiency_depreciation_factor,
+            installation_life_span=self.installation_life_span,
+            column_name="yearly_dc_energy",
         )
 
         # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi

From 050981ae8256158144b4dac5f27114fbc726c46b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Jul 2024 18:16:47 +0100
Subject: [PATCH 34/36] Scoring scenario data against solar data

---
 backend/apis/GoogleSolarApi.py              | 18 +++++++++++++-----
 backend/app/db/functions/solar_functions.py | 16 ++++++++++------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 74de9c20..6eb58a23 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -172,8 +172,18 @@ class GoogleSolarApi:
         logger.info("Storing to database")
 
         scenarios_data = self.panel_performance.head(1)[
-            ["n_panels", "yearly_dc_energy", "total_cost", "panneled_roof_area", "array_warrage",
-             "initial_ac_kwh_per_year", "lifetime_ac_kwh", "roi"]
+            [
+                "n_panels",
+                "yearly_dc_energy",
+                "total_cost",
+                "panneled_roof_area",
+                "array_warrage",
+                "initial_ac_kwh_per_year",
+                "lifetime_ac_kwh",
+                "roi",
+                "expected_payback_years",
+                "lifetime_dc_kwh"
+            ]
         ].rename(
             columns={
                 "n_panels": "number_panels",
@@ -182,13 +192,11 @@ class GoogleSolarApi:
                 "panneled_roof_area": "panelled_roof_area",
                 "array_warrage": "array_kwhp",
                 "initial_ac_kwh_per_year": "yearly_ac_kwh",
-                "lifetime_ac_kwh": "lifetime_ac_kwh",
             }
         )
-
-        # Adding missing fields with default values
         scenarios_data["is_default"] = True
         scenarios_data["scenario_type"] = scenario_type
+        scenarios_data = scenarios_data.to_dict(orient="records")
 
         store_batch_data(
             session=session,
diff --git a/backend/app/db/functions/solar_functions.py b/backend/app/db/functions/solar_functions.py
index bf541bb4..59243f01 100644
--- a/backend/app/db/functions/solar_functions.py
+++ b/backend/app/db/functions/solar_functions.py
@@ -44,6 +44,7 @@ def store_batch_data(session: Session, api_data: dict, uprns_to_location: list,
     :param scenarios_data: A list of dictionaries containing scenario data for each UPRN
     """
     try:
+
         # Insert data into the Solar table and get the IDs
         solar_records = []
         for data in uprns_to_location:
@@ -55,18 +56,21 @@ def store_batch_data(session: Session, api_data: dict, uprns_to_location: list,
                 updated_at=datetime.datetime.now(pytz.utc)
             )
             solar_records.append(solar_record)
+            session.add(solar_record)
 
-        session.bulk_save_objects(solar_records)
-        session.commit()
+        session.flush()  # Flush to get the IDs generated
+
+        for record in solar_records:
+            session.refresh(record)  # Refresh to populate the ID fields
 
         # Retrieve the IDs of the inserted records
-        inserted_ids = [record.id for record in solar_records]
+        inserted_ids = {record.uprn: record.id for record in solar_records}
 
         # Prepare the data for SolarScenario
         scenario_records = []
-        for index, solar_id in enumerate(inserted_ids):
-            scenarios = scenarios_data[index]  # Assuming scenarios_data has the same order as uprns_to_location
-            for scenario in scenarios:
+        for data in uprns_to_location:
+            solar_id = inserted_ids.get(data['uprn'])
+            for scenario in scenarios_data:
                 scenario_record = SolarScenario(
                     solar_id=solar_id,
                     scenario_type=scenario['scenario_type'],

From b20e00a736b293ccbdd5b5c020fe349f365e6c65 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Jul 2024 08:35:34 +0100
Subject: [PATCH 35/36] wip integrating additional recs with new code

---
 backend/Property.py                           |  21 +++
 backend/apis/GoogleSolarApi.py                |  37 +++-
 backend/app/plan/router.py                    |   1 +
 .../places_for_people/demo_portfolio.py       | 176 ++++++++++++++++++
 recommendations/Costs.py                      |  30 +--
 recommendations/HeatingRecommender.py         |  21 +++
 recommendations/Recommendations.py            |  17 ++
 recommendations/RoofRecommendations.py        |  52 +++++-
 recommendations/VentilationRecommendations.py |   2 +-
 recommendations/WallRecommendations.py        |   8 +-
 10 files changed, 336 insertions(+), 29 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index eadefc48..b8eb9936 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -357,6 +357,27 @@ class Property:
             for config in epc_transformations:
                 for k, v in config.items():
                     if k in phase_epc_transformation:
+                        if "-energy-eff" in k:
+                            # We take the highest value
+                            if phase_epc_transformation[k] == "Very Good":
+                                continue
+                            elif phase_epc_transformation[k] == "Good":
+                                if v == "Very Good":
+                                    phase_epc_transformation[k] = v
+                            elif phase_epc_transformation[k] == "Average":
+                                if v in ["Good", "Very Good"]:
+                                    phase_epc_transformation[k] = v
+                            elif phase_epc_transformation[k] == "Poor":
+                                if v in ["Average", "Good", "Very Good"]:
+                                    phase_epc_transformation[k] = v
+                            else:
+                                phase_epc_transformation[k] = v
+
+                            continue
+
+                        if phase_epc_transformation[k] == v:
+                            continue
+
                         raise NotImplementedError(
                             "Already have this key in the phase_epc_transformation - implement me")
                     phase_epc_transformation[k] = v
diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 6eb58a23..8d08b083 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -7,6 +7,7 @@ from functools import lru_cache
 import time
 from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
 from utils.logger import setup_logger
+from sklearn.preprocessing import MinMaxScaler
 
 logger = setup_logger()
 
@@ -198,6 +199,7 @@ class GoogleSolarApi:
         scenarios_data["scenario_type"] = scenario_type
         scenarios_data = scenarios_data.to_dict(orient="records")
 
+        # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
         store_batch_data(
             session=session,
             api_data=self.insights_data,
@@ -244,7 +246,7 @@ class GoogleSolarApi:
                 wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
                 generated_dc_energy = segment["yearlyEnergyDcKwh"]
                 ratio = generated_dc_energy / wattage
-                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (generated_dc_energy / 1000)
+                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
                 roi_summary.append(
                     {
                         "segmentIndex": segment["segmentIndex"],
@@ -309,17 +311,19 @@ class GoogleSolarApi:
         )
 
         # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi
+        lifetime_energy_consumption = energy_consumption * self.installation_life_span
         roi_results = []
         for _, panel_config in panel_performance.iterrows():
             lifetime_ac_kwh = panel_config["lifetime_ac_kwh"]
-            lifetime_energy_consumption = energy_consumption * self.installation_life_span
 
+            surplus = 0
             if lifetime_ac_kwh < lifetime_energy_consumption:
                 # We estimate the amount of electricity generated, based on the price cap
                 generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
                 roi = generation_value / panel_config["total_cost"]
                 generation_deficit = lifetime_energy_consumption - lifetime_ac_kwh
             else:
+
                 # We now have a surplus of energy, which we can sell back to the grid
                 surplus = lifetime_ac_kwh - lifetime_energy_consumption
                 surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
@@ -341,7 +345,8 @@ class GoogleSolarApi:
                     "roi": roi,
                     "generation_value": generation_value,
                     "generation_deficit": generation_deficit,
-                    "expected_payback_years": expected_payback_years
+                    "expected_payback_years": expected_payback_years,
+                    "surplus": surplus
                 }
             )
 
@@ -351,12 +356,28 @@ class GoogleSolarApi:
             roi_results, how="left", on="n_panels"
         )
 
-        # We prioritise maximal roi, then minimal geneartion deficit, then maximal generation value (if there is still
-        # a tie). Ideally, we want the best roi over the lifetime of the solar panels, but we also want to ensure that
-        # we can meet the energy demands of the building.
-        panel_performance = panel_performance.sort_values(
-            ["roi", "generation_deficit", "generation_value"], ascending=[False, True, False]
+        # We want max roi, minimal generation deficit, and max generation value - we create a ranking score
+        # Assign equal weights to each metric
+        weights = {'roi': 0.6, 'generation_value': 0.2, 'generation_deficit': 0.2}
+        metrics = panel_performance[['roi', 'generation_value', 'generation_deficit']]
+
+        # Normalize the columns (0 to 1 scale)
+        scaler = MinMaxScaler()
+        normalized_metrics = scaler.fit_transform(metrics)
+
+        # Convert normalized metrics back to a dataframe
+        normalized_metrics_df = pd.DataFrame(
+            normalized_metrics, columns=['roi', 'generation_value', 'generation_deficit']
         )
+        normalized_metrics_df['combined_score'] = (
+            normalized_metrics_df['roi'] * weights['roi'] +
+            normalized_metrics_df['generation_value'] * weights['generation_value'] +
+            (1 - normalized_metrics_df['generation_deficit']) * weights['generation_deficit']
+        )
+
+        panel_performance['combined_score'] = normalized_metrics_df['combined_score'].values
+        panel_performance['rank'] = panel_performance['combined_score'].rank(ascending=False)
+        panel_performance = panel_performance.sort_values(by='rank')
 
         panel_performance["expected_payback_years"] = np.ceil(panel_performance["expected_payback_years"]).astype(int)
 
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0564fbba..5d75bada 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -431,6 +431,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 }
 
                 # Store the data in the database
+                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
                 solar_api_client.save_to_db(
                     session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
                 )
diff --git a/etl/customers/places_for_people/demo_portfolio.py b/etl/customers/places_for_people/demo_portfolio.py
index 5c290ad7..2d48eff3 100644
--- a/etl/customers/places_for_people/demo_portfolio.py
+++ b/etl/customers/places_for_people/demo_portfolio.py
@@ -3,6 +3,7 @@ import pandas as pd
 from utils.s3 import save_csv_to_s3
 
 PORTFOLIO_ID = 83
+SECOND_PORTFOLIO_ID = 84
 USER_ID = 8
 
 
@@ -67,6 +68,181 @@ def app():
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": "",
         "budget": None,
+        "exclusions": ["floor_insulation"]
+    }
+    print(body)
+
+    # Get an example of flats with solar panels from epc data
+
+    # import inspect
+    # import pandas as pd
+    # from tqdm import tqdm
+    # from pathlib import Path
+    #
+    # src_file_path = inspect.getfile(lambda: None)
+    #
+    # EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+    #
+    # epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+    #
+    # directory = epc_directories[1]
+    # data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+    # # Get flats
+    # data = data[data["PROPERTY_TYPE"].str.lower().str.contains("flat")]
+    # data = data[~pd.isnull(data["UPRN"])]
+    # data["UPRN"] = data["UPRN"].astype(int).astype(str)
+    # data = data[pd.to_datetime(data["LODGEMENT_DATE"]) > "2020-01-01"]
+    # flats_with_solar = data[data['PHOTO_SUPPLY'] > 0]
+    #
+    # print(flats_with_solar["UPRN"])
+    #
+    # flats_with_solar[["ADDRESS", "UPRN"]]
+    #
+    # # Good example:
+    # # UPRN: 10013160824, Flat 39, The Meadow, 30 Busk Meadow S5 7JH (care home with 39 flats, have solar panels)
+    # #
+    # # Mostly, For a mid-floor flat, the property doesn't show as having solar panels through the photo_supply variable
+    # # But actually for UPRN: 10013245713, Apartment 4, Orchard House, Gill Lane PR4 5QN, this has a dwelling above
+    # # but the photo_supply variable is 20
+    #
+    # # Small flat consisting of 2 units
+    # # UPRN: 42172953, FLAT 2, 276 CLAUGHTON ROAD, BIRKENHEAD CH41 4DX
+    #
+    # # Flat containing 5 units
+    # # UPRN: 10013247127 Flat 1, Old Church House PR4 5GE
+    # # UPRN: 10013247130 Flat 4, Old Church House PR4 5GE
+    #
+    # # Flat containing multiple units:
+    # # UPRNS: 10013245710, 10013245716, 10013245711, 10013245717, 10013245714, 10013245715, 10013245712, 10013245713
+    #
+    # # Look for flats with air source heat pumps!
+    # flats_with_asps = data[data["MAINHEAT_DESCRIPTION"].str.lower().str.contains("air source heat pump")]
+    # print(flats_with_asps[["UPRN", "ADDRESS"]])
+
+
+def app_epc_b():
+    # TODO: We can insert a variable, indicating the they own all of the units in the building
+    asset_list = [
+        {
+            "address": "Flat 1, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140644,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 2, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140645,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 3, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140646,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 4, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140647,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 5, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140648,
+            "building_id": 1,
+        },
+        {
+            "address": "Flat 6, Fenton Court",
+            "postcode": "N2 8DS",
+            "uprn": 200140649,
+            "building_id": 1,
+        }
+    ]
+
+    non_invasive_recommendations = [
+        {
+            "address": "Flat 1, Fenton Court",
+            "postcode": "N2 8DS",
+            'recommendations': [
+                'cavity_extract_and_refill',
+                # 'air_source_heat_pump'
+            ]
+        },
+        {
+            "address": "Flat 2, Fenton Court",
+            "postcode": "N2 8DS",
+            'recommendations': [
+                'cavity_extract_and_refill',
+                # 'air_source_heat_pump'
+            ]
+        },
+        {
+            "address": "Flat 3, Fenton Court",
+            "postcode": "N2 8DS",
+            'recommendations': [
+                'cavity_extract_and_refill',
+                # 'air_source_heat_pump'
+            ]
+        },
+        {
+            "address": "Flat 4, Fenton Court",
+            "postcode": "N2 8DS",
+            'recommendations': [
+                'cavity_extract_and_refill',
+                # 'air_source_heat_pump'
+            ]
+        },
+        {
+            "address": "Flat 5, Fenton Court",
+            "postcode": "N2 8DS",
+            'recommendations': [
+                'cavity_extract_and_refill',
+                'loft_insulation',
+                # 'air_source_heat_pump'
+            ]
+        },
+        {
+            "address": "Flat 6, Fenton Court",
+            "postcode": "N2 8DS",
+            'recommendations': [
+                'cavity_extract_and_refill',
+                'loft_insulation',
+                # 'air_source_heat_pump'
+            ]
+        },
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{SECOND_PORTFOLIO_ID}/non_intrusives.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{SECOND_PORTFOLIO_ID}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    body = {
+        "portfolio_id": str(SECOND_PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "budget": None,
+        "exclusions": ["floor_insulation"]
     }
     print(body)
 
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 2159c0b0..ce459528 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -18,23 +18,23 @@ regional_labour_variations = [
     {"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
 ]
 
-# This data is based on the MCS database
+# This data is based on the MCS database - taken the figures for June 2024
 MCS_SOLAR_PV_COST_DATA = {
-    "last_updated": "2024-06-10",
-    "average_cost_per_kwh": 1750,
-    "average_cost_per_kwh-Outer London": 1776,
-    "average_cost_per_kwh-Inner London": 1776,
-    "average_cost_per_kwh-South East England": 1672,
-    "average_cost_per_kwh-South West England": 1732,
-    "average_cost_per_kwh-East of England": 1721,
+    "last_updated": "2024-07-10",
+    "average_cost_per_kwh": 1825,
+    "average_cost_per_kwh-Outer London": 1950,
+    "average_cost_per_kwh-Inner London": 1950,
+    "average_cost_per_kwh-South East England": 1966,
+    "average_cost_per_kwh-South West England": 1864,
+    "average_cost_per_kwh-East of England": 1719,
     "average_cost_per_kwh-East Midlands": 1730,
-    "average_cost_per_kwh-West Midlands": 1761,
-    "average_cost_per_kwh-North East England": 1669,
-    "average_cost_per_kwh-North West England": 1764,
-    "average_cost_per_kwh-Yorkshire and the Humber": 1705,
-    "average_cost_per_kwh-Wales": 1896,
-    "average_cost_per_kwh-Scotland": 1767,
-    "average_cost_per_kwh-Northern Ireland": 1767,
+    "average_cost_per_kwh-West Midlands": 1789,
+    "average_cost_per_kwh-North East England": 1872,
+    "average_cost_per_kwh-North West England": 1860,
+    "average_cost_per_kwh-Yorkshire and the Humber": 1789,
+    "average_cost_per_kwh-Wales": 1676,
+    "average_cost_per_kwh-Scotland": 1781,
+    "average_cost_per_kwh-Northern Ireland": 1347,
 }
 
 # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 4ad1d987..d908f4b9 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -183,6 +183,10 @@ class HeatingRecommender:
                            "boiler")
 
     def is_ashp_valid(self):
+
+        if "air_source_heat_pump" in self.property.non_invasive_recommendations:
+            return True
+
         suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
         has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
 
@@ -232,6 +236,12 @@ class HeatingRecommender:
             "mainheat_energy_eff_ending": "Good",
             "hot_water_energy_eff_ending": "Good"
         }
+        description_simulation = {
+            "mainheat-description": "Air source heat pump, radiators, electric",
+            "mainheat-energy-eff": "Good",
+            "hot-water-energy-eff": "Good",
+            "hotwater-description": "From main system",
+        }
         # Installation of a boiler improves the hot water system so we need to reflect this in
         # the outcome of the recommendation
         heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
@@ -241,6 +251,10 @@ class HeatingRecommender:
         fuel_ending_config = {}
         if self.property.main_fuel["fuel_type"] != "electricity":
             fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
+            description_simulation = {
+                **description_simulation,
+                "main-fuel": "electricity (not community)"
+            }
 
         # Check the simulation differences
         heating_simulation_config = check_simulation_difference(
@@ -270,6 +284,12 @@ class HeatingRecommender:
                 **controls_recommender.recommendation[0]["simulation_config"]
             }
 
+            description_simulation = {
+                **description_simulation,
+                "mainheatcont-description": "time and temperature zone control",
+                "mainheatc-energy-eff": "Very Good"
+            }
+
         ashp_recommendation = {
             "phase": phase,
             "parts": [
@@ -282,6 +302,7 @@ class HeatingRecommender:
             "sap_points": None,
             "already_installed": already_installed,
             "simulation_config": simulation_config,
+            "description_simulation": description_simulation,
             **ashp_costs
         }
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 97e5a3b7..03e6f284 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -752,6 +752,23 @@ class Recommendations:
                         predicted_appliances_kwh_reduction
                     )
 
+                    # We store this value for later
+                    phase_lighting_costs[rec["phase"]] = {
+                        "adjusted": new_lighting_cost,
+                        "unadjusted": scoring_lighting_cost
+                    }
+
+                    phase_kwh_figures[rec["phase"]] = {
+                        "adjusted": {
+                            "heating": new_heating_kwh_adjusted,
+                            "hot_water": new_hot_water_kwh_adjusted
+                        },
+                        "unadjusted": {
+                            "heating": new_heating_kwh,
+                            "hot_water": new_hot_water_kwh
+                        }
+                    }
+
                 if rec["type"] == "low_energy_lighting":
                     # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
                     rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT)
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 81f514b1..a1f8c67c 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -23,6 +23,7 @@ class RoofRecommendations:
     # It is recommended that lofts should have at least 270mm of insulation. If the property has more than 200mm of
     # loft insulation in place already, we do not recommend anything for the moment
     MINIMUM_LOFT_ISULATION_MM = 200
+    MINIMUM_RECOMMENDED_LOFT_INSULATION = 280
     # Flat roof should have at least 100mm of insulation
     MINIMUM_FLAT_ROOF_ISULATION_MM = 100
 
@@ -79,6 +80,11 @@ class RoofRecommendations:
         """
         Check if the loft is already insulated
         """
+
+        # If we have a non-invasive recommendation for the loft insulation, we can assume that the loft is not insulated
+        if "loft_insulation" in self.property.non_invasive_recommendations:
+            return False
+
         return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]
 
     def recommend(self, phase):
@@ -115,12 +121,17 @@ class RoofRecommendations:
         u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})
 
         self.estimated_u_value = u_value
-        if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
+        if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and (
+            "loft_insulation" not in self.property.non_invasive_recommendations
+        ):
             # The Roof is already compliant
             return
 
         if self.property.roof["is_pitched"] or self.property.roof["is_flat"]:
-            self.recommend_roof_insulation(u_value, self.insulation_thickness, self.property.roof, phase)
+            insulation_thickness = (
+                0 if "loft_insulation" not in self.property.non_invasive_recommendations else self.insulation_thickness
+            )
+            self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
             return
 
         if self.property.roof["is_roof_room"]:
@@ -200,7 +211,9 @@ class RoofRecommendations:
                 # We make sure we hit a depth of 270mm. We should factor in any existing insulation if the
                 # loft is already partially insulated.
                 # Note: This requirement is only for loft insulation
-                if ((material["depth"] + insulation_thickness) < self.MINIMUM_LOFT_ISULATION_MM) and roof["is_pitched"]:
+                if (
+                    (material["depth"] + insulation_thickness) < self.MINIMUM_RECOMMENDED_LOFT_INSULATION
+                ) and roof["is_pitched"]:
                     continue
 
                 part_u_value = r_value_per_mm_to_u_value(material["depth"], material["r_value_per_mm"])
@@ -245,6 +258,35 @@ class RoofRecommendations:
                     else:
                         raise ValueError("Invalid material type")
 
+                    # This is based on the values we have in the training data
+                    valid_numeric_values = [
+                        12,
+                        25,
+                        50,
+                        75,
+                        100,
+                        150,
+                        200,
+                        250,
+                        270,
+                        300,
+                        350,
+                        400,
+                    ]
+
+                    proposed_depth = new_thickness
+                    if new_thickness not in valid_numeric_values:
+                        # Take the nearest value for scoring
+                        proposed_depth = min(
+                            valid_numeric_values, key=lambda x: abs(x - proposed_depth)
+                        )
+
+                    if proposed_depth >= 270:
+                        new_efficiency = "Very Good"
+                    else:
+                        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+                            new_efficiency = "Good"
+
                     recommendations.append(
                         {
                             "phase": phase,
@@ -263,6 +305,10 @@ class RoofRecommendations:
                             "sap_points": None,
                             "already_installed": already_installed,
                             "new_thickness": new_thickness,
+                            "description_simulation": {
+                                "roof-description": f"Pitched, {int(proposed_depth)}mm loft insulation",
+                                "roof-energy-eff": new_efficiency
+                            },
                             **cost_result
                         }
                     )
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 5b36bd9c..1120654a 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -72,7 +72,7 @@ class VentilationRecommendations(Definitions):
                 "already_installed": already_installed,
                 "sap_points": 0,
                 "heat_demand": 0,
-                "adjusted_heat_demand": 0,
+                "kwh_savings": 0,
                 "co2_equivalent_savings": 0,
                 "energy_cost_savings": 0,
                 "total": estimated_cost,
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index fb228b49..a1a1491b 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -252,7 +252,7 @@ class WallRecommendations(Definitions):
 
         self.estimated_u_value = u_value
 
-        if is_cavity_wall:
+        if is_cavity_wall or "cavity_extract_and_refill" in self.property.non_invasive_recommendations:
             if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                 # Test filling cavity
                 self.find_cavity_insulation(u_value, insulation_thickness, phase)
@@ -357,7 +357,7 @@ class WallRecommendations(Definitions):
                 simulation_config = {
                     **simulation_config,
                     **walls_simulation_config,
-                    "walls_thermal_transmittance_ending": new_u_value
+                    "walls_thermal_transmittance_ending": new_u_value,
                 }
 
                 recommendations.append(
@@ -378,6 +378,10 @@ class WallRecommendations(Definitions):
                         "sap_points": None,
                         "already_installed": already_installed,
                         "simulation_config": simulation_config,
+                        "description_simulation": {
+                            "walls-description": "Cavity wall, filled cavity",
+                            "walls-energy-eff": "Good"
+                        },
                         **cost_result
                     }
                 )

From 1d28a7e335c9d6771dce2e1c57cdafa7d69469eb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Jul 2024 19:22:10 +0100
Subject: [PATCH 36/36] Implemeted solar api wip

---
 backend/Property.py                           |  1 +
 backend/app/plan/router.py                    | 13 +++++++------
 backend/app/plan/schemas.py                   |  5 ++++-
 etl/customers/vander_elliot/non_intrusives.py |  3 ++-
 recommendations/HeatingControlRecommender.py  | 12 ++++++++++--
 recommendations/HeatingRecommender.py         | 12 +++++++++---
 recommendations/LightingRecommendations.py    |  6 +++++-
 recommendations/Recommendations.py            | 17 +++++++++++++++--
 recommendations/SolarPvRecommendations.py     |  3 ++-
 recommendations/WindowsRecommendations.py     |  7 +++++++
 10 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index b8eb9936..4ae65d7d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -351,6 +351,7 @@ class Property:
                 if r["phase"] <= phase
             ]
             epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
+            
             # It is possible that we could have two simulations applied to the same descriptions
             # We extract these out
             phase_epc_transformation = {}
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 5d75bada..00e73b56 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -448,12 +448,13 @@ async def trigger_plan(body: PlanTriggerRequest):
                         p.set_solar_panel_configuration(unit_solar_panel_configuration)
 
         else:
-            # Model the solar potential at the property level
-            for p in input_properties:
-                # TODO: Complete me! - we probably won't do this for individual flats
-                solar_performance = solar_api_client.get(
-                    longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]
-                )
+            # # Model the solar potential at the property level
+            # for p in input_properties:
+            #     # TODO: Complete me! - we probably won't do this for individual flats
+            #     solar_performance = solar_api_client.get(
+            #         longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]
+            #     )
+            print("Implement me")
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index fbc4d4f2..77ac4217 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -16,6 +16,7 @@ class PlanTriggerRequest(BaseModel):
 
     # Pre-defined list of possibilities for exclusions
     _allowed_exclusions = {
+        # Measure classes
         "wall_insulation",
         "ventilation",
         "roof_insulation",
@@ -25,7 +26,9 @@ class PlanTriggerRequest(BaseModel):
         "heating",
         "hot_water",
         "lighting",
-        "solar_pv"
+        "solar_pv",
+        # Specific measures
+        "air_source_heat_pump",
     }
 
     _allowed_goals = {"Increase EPC"}
diff --git a/etl/customers/vander_elliot/non_intrusives.py b/etl/customers/vander_elliot/non_intrusives.py
index bbc46754..280ba968 100644
--- a/etl/customers/vander_elliot/non_intrusives.py
+++ b/etl/customers/vander_elliot/non_intrusives.py
@@ -119,11 +119,12 @@ def app():
         "portfolio_id": str(PORTFOLIO_ID),
         "housing_type": "Private",
         "goal": "Increase EPC",
-        "goal_value": "A",
+        "goal_value": "C",
         "trigger_file_path": filename,
         "already_installed_file_path": already_installed_filename,
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": "",
+        "exclusions": ["wall_insulation", "air_source_heat_pump"],
         "budget": None,
     }
     print(body)
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index a94c2304..1aae3973 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -190,7 +190,11 @@ class HeatingControlRecommender:
                 "new_u_value": None,
                 "sap_points": None,
                 "already_installed": already_installed,
-                "simulation_config": simulation_config
+                "simulation_config": simulation_config,
+                "description_simulation": {
+                    "mainheatcont-description": "Programmer, room thermostat and TRVS",
+                    "mainheatc-energy-eff": "Good"
+                }
             }
         )
 
@@ -250,6 +254,10 @@ class HeatingControlRecommender:
                 "new_u_value": None,
                 "sap_points": None,
                 "already_installed": already_installed,
-                "simulation_config": simulation_config
+                "simulation_config": simulation_config,
+                "description_simulation": {
+                    "mainheatcont-description": "Time and temperature zone control",
+                    "mainheatc-energy-eff": "Very Good"
+                }
             }
         )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index d908f4b9..0afdc18f 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -42,18 +42,21 @@ class HeatingRecommender:
 
         return self.has_electric_heating_description or electric_heating_assumed
 
-    def recommend(self, has_cavity_or_loft_recommendations, phase=0):
+    def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None):
         """
         Produces heating recommendations
         :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
         recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
         before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
         :param phase: indicates the phase of the retrofit programme
+        :param exclusions: A list of exclusions for the recommendations
         """
 
         # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
         #       in the Costs class, stored as SYSTEM_FLUSH_COST
+        
+        exclusions = [] if exclusions is None else exclusions
 
         self.heating_recommendations = []
         self.heating_control_recommendations = []
@@ -112,7 +115,7 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        if self.is_ashp_valid():
+        if self.is_ashp_valid(exclusions=exclusions):
             self.recommend_air_source_heat_pump(
                 phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
             )
@@ -182,11 +185,14 @@ class HeatingRecommender:
             description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
                            "boiler")
 
-    def is_ashp_valid(self):
+    def is_ashp_valid(self, exclusions):
 
         if "air_source_heat_pump" in self.property.non_invasive_recommendations:
             return True
 
+        if "air_source_heat_pump" in exclusions:
+            return False
+
         suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
         has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
 
diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py
index 31720579..1186b0a9 100644
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@@ -109,8 +109,12 @@ class LightingRecommendations:
                 # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
                 # the proportion of lights that will be set to low energy
                 "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
-                "heat_demand": heat_demand_change,
+                "kwh_savings": heat_demand_change,
                 "co2_equivalent_savings": carbon_change,
+                "description_simulation": {
+                    "lighting-energy-eff": "Very Good",
+                    "lighting-description": "Low energy lighting in all fixed outlets",
+                },
                 **cost_result
             }
         ]
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 03e6f284..fcdd513f 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -119,7 +119,9 @@ class Recommendations:
             has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
 
             self.heating_recommender.recommend(
-                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+                phase=phase,
+                has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
+                exclusions=self.exclusions
             )
             if (
                 self.heating_recommender.heating_recommendations or
@@ -294,6 +296,13 @@ class Recommendations:
         if rec["type"] != "solar_pv":
             return 0, 0
 
+        if property_instance.solar_panel_configuration is None:
+            print("PLACEHOLDER ESTIMATES")
+            # 50% reduction average
+            kwh_reduction = property_instance.energy_consumption_estimates["adjusted"]["appliances"] * 0.5
+            predicted_appliances_cost_reduction = kwh_reduction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+            return predicted_appliances_cost_reduction, kwh_reduction
+
         # Calulate the amount of energy the solar panel array will generate for this unit
         unit_energy_consumption = (
             rec["initial_ac_kwh_per_year"] *
@@ -721,10 +730,14 @@ class Recommendations:
                     heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else (
                         phase_kwh_figures[previous_phase]["adjusted"]["heating"] - new_heating_kwh_adjusted
                     )
+                    if heating_kwh_reduction < 0:
+                        heating_kwh_reduction = 0
 
                     hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else (
                         phase_kwh_figures[previous_phase]["adjusted"]["hot_water"] - new_hot_water_kwh_adjusted
                     )
+                    if hot_water_kwh_reduction < 0:
+                        hot_water_kwh_reduction = 0
 
                     lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
 
@@ -773,7 +786,7 @@ class Recommendations:
                     # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
                     rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT)
                     rec["co2_equivalent_savings"] = min(predicted_co2_savings, rec["co2_equivalent_savings"])
-                    rec["heat_demand"] = min(predicted_heat_demand, rec["heat_demand"])
+                    rec["heat_demand"] = predicted_heat_demand
                 else:
                     rec["sap_points"] = predicted_sap_points
                     rec["co2_equivalent_savings"] = predicted_co2_savings
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index af1e7f27..276573ec 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -244,6 +244,7 @@ class SolarPvRecommendations:
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here
                     "photo_supply": 100 * roof_coverage,
-                    "has_battery": has_battery
+                    "has_battery": has_battery,
+                    "description_simulation": {"photo-supply": 100 * roof_coverage},
                 }
             )
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index 8c0cc493..29c75989 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -115,5 +115,12 @@ class WindowsRecommendations:
                 "already_installed": already_installed,
                 **cost_result,
                 "is_secondary_glazing": is_secondary_glazing,
+                # TODO: Make this condition on is_secondary_glazing
+                "description_simulation": {
+                    "multi-glaze-proportion": 100,
+                    "windows-energy-eff": "Average",
+                    "windows-description": "Fully double glazed",
+                    "glazed-type": "double glazing installed during or after 2002",
+                }
             }
         ]