From a3609ee055509341be47dbf09e2938e20c7c66e3 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 28 May 2024 17:39:07 +0100
Subject: [PATCH 01/80] add new builds

---
 etl/epc/DataProcessor.py | 236 ++++++++++++++++++++++++++-------------
 1 file changed, 159 insertions(+), 77 deletions(-)

diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py
index a77bcaa3..4ad854c1 100644
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@@ -5,7 +5,7 @@ from BaseUtility import Definitions
 from etl.epc.settings import (
     DATA_PROCESSOR_SETTINGS,
     EARLIEST_EPC_DATE,
-    IGNORED_TRANSACTION_TYPES,
+    # IGNORED_TRANSACTION_TYPES,
     IGNORED_FLOOR_LEVELS,
     IGNORED_PROPERTY_TYPES,
     IGNORED_TENURES,
@@ -56,8 +56,11 @@ construction_age_remap = {
 
 expanded_map = {
     i: [
-        label for label, bounds in construction_age_bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
-    ][0] for i in range(0, 3001)
+        label
+        for label, bounds in construction_age_bounds_map.items()
+        if (i <= bounds["u"]) and (i >= bounds["l"])
+    ][0]
+    for i in range(0, 3001)
 }
 
 
@@ -74,8 +77,13 @@ class EPCDataProcessor:
     Handle data loading and data preprocessing
     """
 
-    def __init__(self, data: pd.DataFrame | None = None, cleaning_averages: pd.DataFrame | None = None,
-                 run_mode: str = "training", violation_mode: bool = False) -> None:
+    def __init__(
+        self,
+        data: pd.DataFrame | None = None,
+        cleaning_averages: pd.DataFrame | None = None,
+        run_mode: str = "training",
+        violation_mode: bool = False,
+    ) -> None:
         """
         :param filepath: If specified, is the physical location of the data
         :param is_newdata: Indicates if we are processing new, testing data.
@@ -86,7 +94,9 @@ class EPCDataProcessor:
         self.data: pd.DataFrame = data if is_data_a_dataframe else pd.DataFrame()
 
         is_cleaning_averages_a_dataframe = isinstance(cleaning_averages, pd.DataFrame)
-        self.cleaning_averages: pd.DataFrame = cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
+        self.cleaning_averages: pd.DataFrame = (
+            cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
+        )
 
         # FOR NOW IF VIOLATION MODE IS ON, WE USE RUN MODE AS NEWDATA
         self.violation_mode = violation_mode
@@ -103,7 +113,9 @@ class EPCDataProcessor:
         ignore_step = True if self.run_mode == "newdata" else False
 
         if filepath is not None:
-            self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
+            self.load_data(
+                filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"]
+            )
 
         if len(self.data) == 0:
             raise Exception("No data to process - check filepath/ data being passed in")
@@ -121,7 +133,8 @@ class EPCDataProcessor:
         self.clean_multi_glaze_proportion(ignore_step=ignore_step)
         self.clean_photo_supply()
         self.retain_multiple_epc_properties(
-            epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"], ignore_step=ignore_step
+            epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"],
+            ignore_step=ignore_step,
         )
 
         self.fill_na_fields()
@@ -188,7 +201,9 @@ class EPCDataProcessor:
         if ignore_step:
             return
 
-        self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
+        self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[
+            0
+        ]
 
     def fill_invalid_constituency_fields(self, ignore_step: bool = False):
         """
@@ -201,7 +216,9 @@ class EPCDataProcessor:
         if ignore_step:
             return
 
-        self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
+        self.data = self.data.fillna(
+            {"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]}
+        )
 
     def sort_data_by_uprn_lodgement_date(self, ignore_step: bool = False):
         """
@@ -301,7 +318,7 @@ class EPCDataProcessor:
         """
 
         if self.violation_mode:
-            # TODO: to fill in 
+            # TODO: to fill in
             return
 
         if ignore_step:
@@ -311,9 +328,7 @@ class EPCDataProcessor:
             lambda x: self.clean_construction_age_band(x)
         )
 
-        self.data = self.data[
-            ~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"])
-        ]
+        self.data = self.data[~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"])]
 
     def clean_missing_rooms(self, ignore_step: bool = False):
         """
@@ -331,31 +346,45 @@ class EPCDataProcessor:
             return
 
         # TODO: DO we want to move this out of this function? (i.e. alter the data before we do any cleaning)
-        self.data["POSTAL_AREA"] = self.data["POSTCODE"].apply(lambda x: x.split(" ")[0])
+        self.data["POSTAL_AREA"] = self.data["POSTCODE"].apply(
+            lambda x: x.split(" ")[0]
+        )
 
         def apply_clean(data, matching_columns):
 
-            cleaning_data = data[~pd.isnull(data[col])].groupby(
-                matching_columns
-            )[col].median().reset_index()
-
-            data = data.merge(
-                cleaning_data, how="left", on=matching_columns, suffixes=("", "_CLEANING")
+            cleaning_data = (
+                data[~pd.isnull(data[col])]
+                .groupby(matching_columns)[col]
+                .median()
+                .reset_index()
             )
 
-            data[col] = np.where(pd.isnull(data[col]), data[f"{col}_CLEANING"], data[col])
+            data = data.merge(
+                cleaning_data,
+                how="left",
+                on=matching_columns,
+                suffixes=("", "_CLEANING"),
+            )
+
+            data[col] = np.where(
+                pd.isnull(data[col]), data[f"{col}_CLEANING"], data[col]
+            )
             data = data.drop(columns=f"{col}_CLEANING")
             return data
 
         for col in ["NUMBER_HEATED_ROOMS", "NUMBER_HABITABLE_ROOMS"]:
 
             to_index = 3
-            matching_columns = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "POSTAL_AREA"]
+            matching_columns = [
+                "PROPERTY_TYPE",
+                "BUILT_FORM",
+                "CONSTRUCTION_AGE_BAND",
+                "POSTAL_AREA",
+            ]
             has_missings = pd.isnull(self.data[col]).sum()
             while has_missings:
                 self.data = apply_clean(
-                    data=self.data,
-                    matching_columns=matching_columns[0:to_index + 1]
+                    data=self.data, matching_columns=matching_columns[0 : to_index + 1]
                 )
                 has_missings = pd.isnull(self.data[col]).sum()
 
@@ -363,7 +392,10 @@ class EPCDataProcessor:
                     # Check if we've gotten to index 0 and still have missings - something has gone wrong or
                     # we have a very unique property type
                     if has_missings:
-                        raise NotImplementedError("Handle this edge case, we still have missings for column %s" % col)
+                        raise NotImplementedError(
+                            "Handle this edge case, we still have missings for column %s"
+                            % col
+                        )
 
                     break
                 to_index -= 1
@@ -410,7 +442,7 @@ class EPCDataProcessor:
     #     coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.is_newdata else
     #     COLUMNTYPES
     #     for k, v in coltypes.items():
-    #         self.data[k] = self.data[k].astype(v) 
+    #         self.data[k] = self.data[k].astype(v)
     #     self.data = self.data.astype(coltypes)
 
     #     self.na_remapping()
@@ -437,9 +469,11 @@ class EPCDataProcessor:
 
     def na_remapping(self, auto_subset_columns: bool = False):
 
-        fill_na_map_apply = {
-            k: v for k, v in fill_na_map.items() if k in self.data.columns
-        } if auto_subset_columns else fill_na_map
+        fill_na_map_apply = (
+            {k: v for k, v in fill_na_map.items() if k in self.data.columns}
+            if auto_subset_columns
+            else fill_na_map
+        )
 
         for column, fill_value in fill_na_map_apply.items():
             self.data[column] = self.data[column].fillna(fill_value)
@@ -535,28 +569,34 @@ class EPCDataProcessor:
 
         for variable in AVERAGE_FIXED_FEATURES:
             # Replace any missing NAN values with averages for the same Property type and built form
-            cleaning_averages_filled[variable] = cleaning_averages_filled[variable].fillna(
-                cleaning_averages_filled[f"{variable}_AVERAGE"]
-            )
+            cleaning_averages_filled[variable] = cleaning_averages_filled[
+                variable
+            ].fillna(cleaning_averages_filled[f"{variable}_AVERAGE"])
 
-            cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_AVERAGE")
+            cleaning_averages_filled = cleaning_averages_filled.drop(
+                columns=f"{variable}_AVERAGE"
+            )
 
             #  If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope
             #  and built form
             #  We can use just the property type average and replace
 
-            cleaning_averages_filled[variable] = cleaning_averages_filled[variable].fillna(
-                cleaning_averages_filled[f"{variable}_PROPERTY_AVERAGE"]
-            )
+            cleaning_averages_filled[variable] = cleaning_averages_filled[
+                variable
+            ].fillna(cleaning_averages_filled[f"{variable}_PROPERTY_AVERAGE"])
 
-            cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_PROPERTY_AVERAGE")
+            cleaning_averages_filled = cleaning_averages_filled.drop(
+                columns=f"{variable}_PROPERTY_AVERAGE"
+            )
 
             # If there are still NA values, use BUILT FORM averages
-            cleaning_averages_filled["variable"] = cleaning_averages_filled[variable].fillna(
-                cleaning_averages_filled[f"{variable}_BUILT_FORM_AVERAGE"]
-            )
+            cleaning_averages_filled["variable"] = cleaning_averages_filled[
+                variable
+            ].fillna(cleaning_averages_filled[f"{variable}_BUILT_FORM_AVERAGE"])
 
-            cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_BUILT_FORM_AVERAGE")
+            cleaning_averages_filled = cleaning_averages_filled.drop(
+                columns=f"{variable}_BUILT_FORM_AVERAGE"
+            )
 
             # If there still is na values, use average across all epc in consituecy
             cleaning_averages_filled[variable] = cleaning_averages_filled[
@@ -573,7 +613,9 @@ class EPCDataProcessor:
 
         self.cleaning_averages = cleaning_averages_filled
 
-    def retain_multiple_epc_properties(self, epc_minimum_count: int = 1, ignore_step: bool = False) -> None:
+    def retain_multiple_epc_properties(
+        self, epc_minimum_count: int = 1, ignore_step: bool = False
+    ) -> None:
         """
         Reduce the data futher by keeping only datasets with multiple epcs
         """
@@ -592,12 +634,16 @@ class EPCDataProcessor:
         counts = counts[counts["count"] > epc_minimum_count]
         self.data = pd.merge(self.data, counts, on="UPRN")
 
-    def recast_df_columns(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
+    def recast_df_columns(
+        self, column_mappings: dict, auto_subset_columns: bool = False
+    ) -> None:
         """
         Recast columns from the dataframe to ensure the behaviour we want
         """
         if auto_subset_columns:
-            column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
+            column_mappings = {
+                k: v for k, v in column_mappings.items() if k in self.data.columns
+            }
 
         for key, values in column_mappings.items():
             if key not in self.data.columns:
@@ -608,13 +654,17 @@ class EPCDataProcessor:
             else:
                 self.data[key] = self.data[key].astype(values)
 
-    def recast_all_data(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
+    def recast_all_data(
+        self, column_mappings: dict, auto_subset_columns: bool = False
+    ) -> None:
         """
         Using a dictionary to recast all columns at once
         """
 
         if auto_subset_columns:
-            column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
+            column_mappings = {
+                k: v for k, v in column_mappings.items() if k in self.data.columns
+            }
 
         self.data = self.data.astype(column_mappings)
 
@@ -625,14 +675,26 @@ class EPCDataProcessor:
 
         if self.violation_mode:
             violation_uprn_missing = pd.isnull(self.data["UPRN"])
-            violation_old_lodgment_date = self.data["LODGEMENT_DATE"] < EARLIEST_EPC_DATE
-            violation_invalid_transaction_type = self.data["TRANSACTION_TYPE"] == IGNORED_TRANSACTION_TYPES
-            violation_ignored_floor_level = self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
+            violation_old_lodgment_date = (
+                self.data["LODGEMENT_DATE"] < EARLIEST_EPC_DATE
+            )
+            # violation_invalid_transaction_type = self.data["TRANSACTION_TYPE"] == IGNORED_TRANSACTION_TYPES
+            violation_ignored_floor_level = self.data["FLOOR_LEVEL"].isin(
+                IGNORED_FLOOR_LEVELS
+            )
             violation_rdsap_score_above_max = self.data[RDSAP_RESPONSE] > MAX_SAP_SCORE
-            violation_missing_windows_description = pd.isnull(self.data["WINDOWS_DESCRIPTION"])
-            violation_missing_hotwater_description = pd.isnull(self.data["HOTWATER_DESCRIPTION"])
-            violation_missing_roof_description = pd.isnull(self.data["ROOF_DESCRIPTION"])
-            violation_invalid_property_type = self.data["PROPERTY_TYPE"] == IGNORED_PROPERTY_TYPES
+            violation_missing_windows_description = pd.isnull(
+                self.data["WINDOWS_DESCRIPTION"]
+            )
+            violation_missing_hotwater_description = pd.isnull(
+                self.data["HOTWATER_DESCRIPTION"]
+            )
+            violation_missing_roof_description = pd.isnull(
+                self.data["ROOF_DESCRIPTION"]
+            )
+            violation_invalid_property_type = (
+                self.data["PROPERTY_TYPE"] == IGNORED_PROPERTY_TYPES
+            )
             violation_invalid_tenure = self.data["TENURE"].isin(IGNORED_TENURES)
 
             violation_df = pd.concat(
@@ -647,7 +709,8 @@ class EPCDataProcessor:
                     violation_missing_roof_description,
                     violation_invalid_property_type,
                     violation_invalid_tenure,
-                ], axis=1,
+                ],
+                axis=1,
                 keys=[
                     "violation_uprn_missing",
                     "violation_old_lodgment_date",
@@ -658,8 +721,8 @@ class EPCDataProcessor:
                     "violation_missing_hotwater_description",
                     "violation_missing_roof_description",
                     "violation_invalid_property_type",
-                    "violation_invalid_tenure"
-                ]
+                    "violation_invalid_tenure",
+                ],
             )
 
             self.data = pd.concat([self.data, violation_df], axis=1)
@@ -685,10 +748,8 @@ class EPCDataProcessor:
 
         self.data = self.data[~pd.isnull(self.data["UPRN"])]
         self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
-        self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
-        self.data = self.data[
-            ~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
-        ]
+        # self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
+        self.data = self.data[~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)]
         self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE]
 
         # We observed 7 final records with missing windows and 2 records with missing hot water so we shall remove them
@@ -705,7 +766,7 @@ class EPCDataProcessor:
         self.data = self.data[~self.data["TENURE"].isin(IGNORED_TENURES)]
 
         # We remap zero values to None
-        self.data.loc[self.data['FLOOR_HEIGHT'] == 0, 'FLOOR_HEIGHT'] = None
+        self.data.loc[self.data["FLOOR_HEIGHT"] == 0, "FLOOR_HEIGHT"] = None
 
     def clean_multi_glaze_proportion(self, ignore_step: bool = False) -> None:
         """
@@ -734,7 +795,11 @@ class EPCDataProcessor:
 
     @staticmethod
     def apply_averages_cleaning(
-        data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
+        data_to_clean,
+        cleaning_data,
+        cols_to_merge_on,
+        colnames=None,
+        ignore_step: bool = False,
     ):
         """
         Clean the input DataFrame using averages from a cleaning DataFrame.
@@ -752,12 +817,13 @@ class EPCDataProcessor:
 
         # The desired colnames to clean - which may not be present
         if colnames is None:
-            colnames = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"]
+            colnames = [
+                "TOTAL_FLOOR_AREA",
+                "FLOOR_HEIGHT",
+                "FIXED_LIGHTING_OUTLETS_COUNT",
+            ]
 
-        cols_to_clean = [
-            c for c in colnames if
-            c in data_to_clean.columns
-        ]
+        cols_to_clean = [c for c in colnames if c in data_to_clean.columns]
 
         # Enforce data types
         for col in ["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"]:
@@ -768,7 +834,15 @@ class EPCDataProcessor:
 
         # Calculate averages
         cleaning_averages_to_merge = cleaning_data.groupby(columns_to_merge_on).agg(
-            dict(zip(cols_to_clean, ["mean", ] * len(cols_to_clean)))
+            dict(
+                zip(
+                    cols_to_clean,
+                    [
+                        "mean",
+                    ]
+                    * len(cols_to_clean),
+                )
+            )
         )
 
         # Merge with the original data
@@ -777,7 +851,7 @@ class EPCDataProcessor:
             cleaning_averages_to_merge,
             on=columns_to_merge_on,
             suffixes=("", "_AVERAGE"),
-            how='left'
+            how="left",
         )
 
         global_averages = cleaning_data[cols_to_clean].mean()
@@ -806,14 +880,20 @@ class EPCDataProcessor:
             raise Exception("Suffix should be one of _starting or _ending")
 
         if suffix == "_STARTING":
-            starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES].copy().add_suffix(suffix)
+            starting_cols = (
+                self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES]
+                .copy()
+                .add_suffix(suffix)
+            )
             fixed_cols = self.data[NO_SUFFIX_COMPONENT_COLS + POTENTIAL_COLUMNS].copy()
 
             return pd.concat([starting_cols, fixed_cols], axis=1)
 
-        return self.data[
-            ENDING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES
-            ].copy().add_suffix(suffix)
+        return (
+            self.data[ENDING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES]
+            .copy()
+            .add_suffix(suffix)
+        )
 
     def get_fixed_features(self) -> pd.DataFrame:
         """
@@ -831,14 +911,17 @@ class EPCDataProcessor:
         :param cols_to_ignore: If specified, is a list of columns to ignore, e.g. uuids
         :return: DataFrame with coerced columns.
         """
-        object_columns = df.select_dtypes(include=['object']).columns
+        object_columns = df.select_dtypes(include=["object"]).columns
         if cols_to_ignore:
             object_columns = [c for c in object_columns if c not in cols_to_ignore]
 
         for column in object_columns:
             unique_values = df[column].dropna().unique()
             # If the unique values in the column are 'True' and 'False', convert the column to boolean
-            if set(unique_values) == {'True', 'False'} or set(unique_values) == {True, False}:
+            if set(unique_values) == {"True", "False"} or set(unique_values) == {
+                True,
+                False,
+            }:
                 df[column] = df[column].astype(bool)
 
         return df
@@ -877,7 +960,6 @@ class EPCDataProcessor:
 
     @staticmethod
     def clean_efficiency_variables(df):
-
         """
         These is scope to clean this by the model per corresponding description.
         E.g. for WALLS_ENG_EFF we could look at the mode efficiency rating by description and

From d51eeec58d77dff99c0c03033be1d0da534fd9e2 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 28 May 2024 17:48:45 +0100
Subject: [PATCH 02/80] add thermal transmittance unit as boolean flag to
 signify walls from new builds - assuming only new builds have this
 description

---
 etl/epc/Dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 7d5c3ef8..36abd4ef 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -559,9 +559,9 @@ class TrainingDataset(BaseDataset):
             "walls": [
                 # We need to cleaned descriptions for pulling out u-values
                 "original_description",
-                "thermal_transmittance_unit",
+                # "thermal_transmittance_unit",
                 "original_description_ending",
-                "thermal_transmittance_unit_ending",
+                # "thermal_transmittance_unit_ending",
                 "is_cavity_wall_ending",
                 "is_solid_brick_ending",
                 "is_system_built_ending",

From c3e04d2d007f191f5b2e6c3b0fa7d1737e6749fd Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 28 May 2024 18:14:47 +0100
Subject: [PATCH 03/80] add temp fix for cleaned to allow for new builds to
 flag thermal unit

---
 etl/epc/Pipeline.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py
index 6abf05bd..3a078703 100644
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@@ -64,6 +64,21 @@ def get_cleaned_description_mapping():
 
 clean_lookup = get_cleaned_description_mapping()
 
+# TODO: THIS IS A TEMPORARY FIX
+new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"])
+
+import numpy as np
+
+new_walls_description_mapping["thermal_transmittance_unit"] = np.where(
+    ~pd.isnull(new_walls_description_mapping["thermal_transmittance_unit"]),
+    "w/m-¦k",
+    new_walls_description_mapping["thermal_transmittance_unit"],
+)
+
+clean_lookup["walls-description"] = new_walls_description_mapping.to_dict(
+    orient="records"
+)
+
 
 class EPCPipeline:
     """

From 57477907cbed9086f275f5761e1a240cbcabc726 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 28 May 2024 19:02:19 +0100
Subject: [PATCH 04/80] add check for float nan in recommendations

---
 etl/epc/Dataset.py                        |   6 +-
 recommendations/WallRecommendations.py    | 148 +++++++++++++++-------
 recommendations/WindowsRecommendations.py |  36 ++++--
 3 files changed, 130 insertions(+), 60 deletions(-)

diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 36abd4ef..ee3e357c 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -229,7 +229,9 @@ class TrainingDataset(BaseDataset):
         """
         # TODO: move into EPCRecord record
         uvalue_columns = [
-            col for col in self.df.columns if "thermal_transmittance" in col
+            col
+            for col in self.df.columns
+            if "thermal_transmittance" in col and "_unit" not in col
         ]
         for uvalue_col in uvalue_columns:
             self.df[uvalue_col] = pd.to_numeric(self.df[uvalue_col])
@@ -703,6 +705,8 @@ class TrainingDataset(BaseDataset):
                     "insulation_thickness_ending": f"{component}_insulation_thickness_ending",
                     "thermal_transmittance": f"{component}_thermal_transmittance",
                     "thermal_transmittance_ending": f"{component}_thermal_transmittance_ending",
+                    "thermal_transmittance_unit": f"{component}_thermal_transmittance_unit",
+                    "thermal_transmittance_unit_ending": f"{component}_thermal_transmittance_unit_ending",
                     "tariff_type": f"{component}_tariff_type",
                     "tariff_type_ending": f"{component}_tariff_type_ending",
                     "clean_description": f"{component}_clean_description",
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 20fc453c..8ca34bc8 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -7,8 +7,13 @@ from datatypes.enums import QuantityUnits
 from backend.Property import Property
 from BaseUtility import Definitions
 from recommendations.recommendation_utils import (
-    r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_wall_u_value, override_costs
+    r_value_per_mm_to_u_value,
+    calculate_u_value_uplift,
+    is_diminishing_returns,
+    update_lowest_selected_u_value,
+    get_recommended_part,
+    get_wall_u_value,
+    override_costs,
 )
 from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
 from recommendations.Costs import Costs
@@ -22,7 +27,7 @@ class WallRecommendations(Definitions):
     # After 1930, Solid brick walls became less populate and instead, cavity walls became a
     # more popular choice
     YEARS_CAVITY_WALLS_BEGAN = 1930
-    U_VALUE_UNIT = 'w/m-¦k'
+    U_VALUE_UNIT = "w/m-¦k"
 
     # part L building regulations indicate that any rennovations on an existing property's walls should
     # achieve a U-value of no higher than 0.3
@@ -53,11 +58,7 @@ class WallRecommendations(Definitions):
     # threshold
     NEW_BUILD_INSULATED = 0.75
 
-    def __init__(
-        self,
-        property_instance: Property,
-        materials: List
-    ):
+    def __init__(self, property_instance: Property, materials: List):
         self.property = property_instance
         self.costs = Costs(self.property)
         # For audit purposes, when estimating u values we'll store it
@@ -75,9 +76,10 @@ class WallRecommendations(Definitions):
         ]
 
         self.internal_wall_non_insulation_materials = [
-            part for part in materials if part["type"] in [
-                "iwi_wall_demolition", "iwi_vapour_barrier", "iwi_redecoration"
-            ]
+            part
+            for part in materials
+            if part["type"]
+            in ["iwi_wall_demolition", "iwi_vapour_barrier", "iwi_redecoration"]
         ]
 
         self.external_wall_insulation_materials = [
@@ -85,9 +87,10 @@ class WallRecommendations(Definitions):
         ]
 
         self.external_wall_non_insulation_materials = [
-            part for part in materials if part["type"] in [
-                "ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"
-            ]
+            part
+            for part in materials
+            if part["type"]
+            in ["ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"]
         ]
 
     @property
@@ -98,7 +101,9 @@ class WallRecommendations(Definitions):
 
         # Current logic: If the property is in a conservation area/heritage building/listed building or a flat,
         # it is not suitable for EWI
-        if self.property.restricted_measures or (self.property.data["property-type"].lower() == "flat"):
+        if self.property.restricted_measures or (
+            self.property.data["property-type"].lower() == "flat"
+        ):
             return False
 
         return True
@@ -109,31 +114,43 @@ class WallRecommendations(Definitions):
         # recommend internal wall insulation as a possible measure
 
         u_value = self.property.walls["thermal_transmittance"]
+        u_value = None if math.isnan(u_value) else u_value
+
         is_cavity_wall = self.property.walls["is_cavity_wall"]
         insulation_thickness = self.property.walls["insulation_thickness"]
 
         # We check if the wall is already insulated and if so, we exit
-        if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and (
-            "cavity_extract_and_refill" not in self.property.non_invasive_recommendations
+        if (
+            (insulation_thickness in ["average", "above average"])
+            or self.property.walls["is_filled_cavity"]
+        ) and (
+            "cavity_extract_and_refill"
+            not in self.property.non_invasive_recommendations
         ):
             return
 
         if u_value:
 
             if self.property.walls["thermal_transmittance_unit"] != self.U_VALUE_UNIT:
-                raise NotImplementedError("Haven't handled the case of other u value units yet")
+                raise NotImplementedError(
+                    "Haven't handled the case of other u value units yet"
+                )
 
             # If the property is a new build and the U-value is below 0.75, we don't recommend insulation because it's
             # not practical
-            if (self.property.data["transaction-type"] == "new dwelling") and (u_value <= self.NEW_BUILD_INSULATED):
+            if (self.property.data["transaction-type"] == "new dwelling") and (
+                u_value <= self.NEW_BUILD_INSULATED
+            ):
                 # Recommend nothing
                 return
 
             # We can't detect it's a cavity wall, but it was built after 1990 so likely built with insulation already
             # + it already has a U-value WORSE than the building regulations, so we recommend either internal or
             # external wall insulation
-            if (not is_cavity_wall) and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION) and (
-                u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE
+            if (
+                (not is_cavity_wall)
+                and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
+                and (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
             ):
                 # Recommend insulation
                 self.find_insulation(u_value, phase)
@@ -141,8 +158,10 @@ class WallRecommendations(Definitions):
 
             # We can't detect it's a cavity wall, but it was built after 1990 so likely built with insulation already
             # + it already has a U-value better than the building regulations, so we don't need to recommend anything
-            if (not is_cavity_wall) and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION) and (
-                u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE
+            if (
+                (not is_cavity_wall)
+                and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
+                and (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
             ):
                 # Recommend nothing
                 return
@@ -205,28 +224,40 @@ class WallRecommendations(Definitions):
         recommendations = []
         for _, material in insulation_materials.iterrows():
 
-            part_u_value = r_value_per_mm_to_u_value(cavity_width, material["r_value_per_mm"])
+            part_u_value = r_value_per_mm_to_u_value(
+                cavity_width, material["r_value_per_mm"]
+            )
 
             _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
             new_u_value = math.ceil(new_u_value * 100.0) / 100.0
 
             if is_diminishing_returns(
-                recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
+                recommendations,
+                new_u_value,
+                lowest_selected_u_value,
+                self.DIMINISHING_RETURNS_U_VALUE,
             ):
                 continue
 
             if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE:
-                lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
+                lowest_selected_u_value = update_lowest_selected_u_value(
+                    lowest_selected_u_value, new_u_value
+                )
 
-                is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations
+                is_extraction_and_refill = (
+                    "cavity_extract_and_refill"
+                    in self.property.non_invasive_recommendations
+                )
 
                 cost_result = self.costs.cavity_wall_insulation(
                     wall_area=self.property.insulation_wall_area,
                     material=material.to_dict(),
-                    is_extraction_and_refill=is_extraction_and_refill
+                    is_extraction_and_refill=is_extraction_and_refill,
                 )
 
-                already_installed = "cavity_wall_insulation" in self.property.already_installed
+                already_installed = (
+                    "cavity_wall_insulation" in self.property.already_installed
+                )
                 if already_installed:
                     cost_result = override_costs(cost_result)
 
@@ -246,7 +277,7 @@ class WallRecommendations(Definitions):
                                 part=material.to_dict(),
                                 quantity=self.property.insulation_wall_area,
                                 quantity_unit=QuantityUnits.m2.value,
-                                cost_result=cost_result
+                                cost_result=cost_result,
                             )
                         ],
                         "type": "cavity_wall_insulation",
@@ -255,13 +286,15 @@ class WallRecommendations(Definitions):
                         "new_u_value": new_u_value,
                         "sap_points": None,
                         "already_installed": already_installed,
-                        **cost_result
+                        **cost_result,
                     }
                 )
 
         self.recommendations = recommendations
 
-    def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
+    def _find_insulation(
+        self, u_value, insulation_materials, non_insulation_materials, phase
+    ):
 
         lowest_selected_u_value = None
         recommendations = []
@@ -269,7 +302,9 @@ class WallRecommendations(Definitions):
 
             for _, material in insulation_material_group.iterrows():
 
-                part_u_value = r_value_per_mm_to_u_value(material["depth"], material["r_value_per_mm"])
+                part_u_value = r_value_per_mm_to_u_value(
+                    material["depth"], material["r_value_per_mm"]
+                )
                 _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
                 new_u_value = math.ceil(new_u_value * 100.0) / 100.0
 
@@ -280,22 +315,30 @@ class WallRecommendations(Definitions):
                 # further into the diminishing returns threshold and can shouldn't be
 
                 if is_diminishing_returns(
-                    recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
+                    recommendations,
+                    new_u_value,
+                    lowest_selected_u_value,
+                    self.DIMINISHING_RETURNS_U_VALUE,
                 ):
                     continue
 
                 # We allow a small tolerance for error so we don't discount the recommendation entirely
                 if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
 
-                    lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
+                    lowest_selected_u_value = update_lowest_selected_u_value(
+                        lowest_selected_u_value, new_u_value
+                    )
 
                     if material["type"] == "internal_wall_insulation":
                         cost_result = self.costs.internal_wall_insulation(
                             wall_area=self.property.insulation_wall_area,
                             material=material.to_dict(),
-                            non_insulation_materials=non_insulation_materials
+                            non_insulation_materials=non_insulation_materials,
+                        )
+                        already_installed = (
+                            "internal_wall_insulation"
+                            in self.property.already_installed
                         )
-                        already_installed = "internal_wall_insulation" in self.property.already_installed
                         if already_installed:
                             cost_result = override_costs(cost_result)
 
@@ -303,9 +346,12 @@ class WallRecommendations(Definitions):
                         cost_result = self.costs.external_wall_insulation(
                             wall_area=self.property.insulation_wall_area,
                             material=material.to_dict(),
-                            non_insulation_materials=non_insulation_materials
+                            non_insulation_materials=non_insulation_materials,
+                        )
+                        already_installed = (
+                            "external_wall_insulation"
+                            in self.property.already_installed
                         )
-                        already_installed = "external_wall_insulation" in self.property.already_installed
                         if already_installed:
                             cost_result = override_costs(cost_result)
                     else:
@@ -319,7 +365,7 @@ class WallRecommendations(Definitions):
                                     part=material.to_dict(),
                                     quantity=self.property.insulation_wall_area,
                                     quantity_unit=QuantityUnits.m2.value,
-                                    cost_result=cost_result
+                                    cost_result=cost_result,
                                 )
                             ],
                             "type": material["type"],
@@ -328,7 +374,7 @@ class WallRecommendations(Definitions):
                             "new_u_value": new_u_value,
                             "already_installed": already_installed,
                             "sap_points": None,
-                            **cost_result
+                            **cost_result,
                         }
                     )
 
@@ -350,16 +396,18 @@ class WallRecommendations(Definitions):
         if self.ewi_valid:
             ewi_recommendations = self._find_insulation(
                 u_value=u_value,
-                insulation_materials=pd.DataFrame(self.external_wall_insulation_materials),
+                insulation_materials=pd.DataFrame(
+                    self.external_wall_insulation_materials
+                ),
                 non_insulation_materials=self.external_wall_non_insulation_materials,
-                phase=phase
+                phase=phase,
             )
 
         iwi_recommendations = self._find_insulation(
             u_value=u_value,
             insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
             non_insulation_materials=self.internal_wall_non_insulation_materials,
-            phase=phase
+            phase=phase,
         )
 
         self.recommendations += ewi_recommendations + iwi_recommendations
@@ -367,12 +415,16 @@ class WallRecommendations(Definitions):
     @staticmethod
     def _make_description(material):
         if material["type"] == "internal_wall_insulation":
-            return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on internal "
-                    f"walls")
+            return (
+                f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on internal "
+                f"walls"
+            )
 
         if material["type"] == "external_wall_insulation":
-            return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on external "
-                    f"walls")
+            return (
+                f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on external "
+                f"walls"
+            )
 
         if material["type"] == "cavity_wall_insulation":
             return f"Fill cavity with {material['description']}"
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index b7c2823a..8c0cc493 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -4,7 +4,7 @@ import numpy as np
 
 from backend.Property import Property
 from recommendations.Costs import Costs
-from recommendation_utils import override_costs
+from recommendations.recommendation_utils import override_costs
 
 
 class WindowsRecommendations:
@@ -14,7 +14,7 @@ class WindowsRecommendations:
         # glazed
         "most": 0.33,
         # If glazing is partial, we assume 50/50 split between glazed and unglazed
-        "partial": 0.5
+        "partial": 0.5,
     }
 
     def __init__(self, property_instance: Property, materials: List):
@@ -52,14 +52,20 @@ class WindowsRecommendations:
         if not number_of_windows:
             raise ValueError("Number of windows not specified")
 
-        if self.property.windows["has_glazing"] & (self.property.windows["glazing_coverage"] == "full"):
+        if self.property.windows["has_glazing"] & (
+            self.property.windows["glazing_coverage"] == "full"
+        ):
             return
 
         # We scale the number of windows based on the proportion of existing glazing
         if self.property.data["multi-glaze-proportion"] != "":
-            n_windows_scalar = 1 - (int(self.property.data["multi-glaze-proportion"]) / 100)
+            n_windows_scalar = 1 - (
+                int(self.property.data["multi-glaze-proportion"]) / 100
+            )
         else:
-            n_windows_scalar = self.COVERAGE_MAP.get(self.property.windows["glazing_coverage"], 1)
+            n_windows_scalar = self.COVERAGE_MAP.get(
+                self.property.windows["glazing_coverage"], 1
+            )
 
         number_of_windows *= n_windows_scalar
         number_of_windows = np.ceil(number_of_windows)
@@ -68,7 +74,7 @@ class WindowsRecommendations:
         cost_result = self.costs.window_glazing(
             number_of_windows=number_of_windows,
             material=self.glazing_material,
-            is_secondary_glazing=is_secondary_glazing
+            is_secondary_glazing=is_secondary_glazing,
         )
 
         already_installed = "windows_glazing" in self.property.already_installed
@@ -76,18 +82,26 @@ class WindowsRecommendations:
             cost_result = override_costs(cost_result)
             description = "The property already has double glazing installed. No further action is required."
         else:
-            glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
+            glazing_type = (
+                "secondary glazing" if is_secondary_glazing else "double glazing"
+            )
             if self.property.windows["glazing_coverage"] in ["partial", "most"]:
                 description = f"Install {glazing_type} to the remaining windows"
             else:
                 description = f"Install {glazing_type} to all windows"
 
             if self.property.is_listed:
-                description += ". Secondary glazing recommended due to listed building status"
+                description += (
+                    ". Secondary glazing recommended due to listed building status"
+                )
             elif self.property.is_heritage:
-                description += ". Secondary glazing recommended due to herigate building status"
+                description += (
+                    ". Secondary glazing recommended due to herigate building status"
+                )
             elif self.property.in_conservation_area:
-                description += ". Secondary glazing recommended due to conservation area status"
+                description += (
+                    ". Secondary glazing recommended due to conservation area status"
+                )
 
         self.recommendation = [
             {
@@ -100,6 +114,6 @@ class WindowsRecommendations:
                 "sap_points": None,
                 "already_installed": already_installed,
                 **cost_result,
-                "is_secondary_glazing": is_secondary_glazing
+                "is_secondary_glazing": is_secondary_glazing,
             }
         ]

From 14452dde9937d242c30ff490b8c5039a80ea6fcc Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 28 May 2024 19:07:58 +0100
Subject: [PATCH 05/80] use pandas

---
 etl/epc/generate_scenarios_data.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py
index f9f66034..df1f9452 100644
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@@ -41,6 +41,15 @@ cleaning_data = read_dataframe_from_s3_parquet(
 materials = get_materials(session)
 cleaned = get_cleaned()
 
+# TODO: THIS IS A TEMPORARY FIX
+new_walls_description_mapping = pd.DataFrame(cleaned["walls-description"])
+new_walls_description_mapping.loc[
+    ~new_walls_description_mapping["thermal_transmittance_unit"].isnull(),
+    "thermal_transmittance_unit",
+] = "w/m-¦k"
+
+cleaned["walls-description"] = new_walls_description_mapping.to_dict(orient="records")
+
 uprn_filenames = read_dataframe_from_s3_parquet(
     bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
 )
@@ -167,7 +176,7 @@ for scenario_property in scenario_properties:
     p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
     recommender = Recommendations(property_instance=p, materials=materials)
-    property_recommendations = recommender.recommend("0")
+    property_recommendations = recommender.recommend()
 
     wall_recommendations = recommender.wall_recomender.recommendations
     loft_recommendations = recommender.roof_recommender.recommendations

From 54b6761803c9e83348f5d3d7644d21ed65070e3c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 30 May 2024 11:55:52 +0100
Subject: [PATCH 06/80] updating how we set the simulation config for walls'

---
 .idea/Model.iml                             |  2 +-
 .idea/misc.xml                              |  2 +-
 etl/customers/goldman/property_ownership.py | 37 +++++++++
 etl/property_valuation/requirements.txt     |  7 ++
 etl/property_valuation/scrape_valuations.py | 83 ++++++++++++++++++++
 recommendations/WallRecommendations.py      | 87 +++++++++++----------
 6 files changed, 173 insertions(+), 45 deletions(-)
 create mode 100644 etl/property_valuation/requirements.txt
 create mode 100644 etl/property_valuation/scrape_valuations.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index d30205ae..44fa7142 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -3,6 +3,7 @@ import pandas as pd
 from tqdm import tqdm
 import Levenshtein
 from backend.SearchEpc import SearchEpc
+from utils.s3 import read_dataframe_from_s3_parquet
 
 # Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
 # £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
@@ -248,6 +249,13 @@ def app():
     """
     This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
     """
+
+    # TODO: This property:
+    #       https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
+    #       =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
+    #       is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
+    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
+    #       and performing a singular filter for most recent EPC by UPRN
     # paths = [
     #     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
     #     "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@@ -477,6 +485,35 @@ def app():
     portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
     portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
 
+    # We check if any of these properties are in a conservation area
+    valuations = pd.read_excel("property value.xlsx")
+
+    uprn_filenames = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
+    )
+
+    geospatial_data = []
+    for _, row in tqdm(valuations.iterrows(), total=len(valuations)):
+        filtered_df = uprn_filenames[
+            (uprn_filenames["lower"] <= row["UPRN"])
+            & (uprn_filenames["upper"] >= row["UPRN"])
+            ]
+        if filtered_df.empty:
+            raise Exception("No match found")
+
+        filename = filtered_df.iloc[0]["filenames"]
+
+        spatial_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+        )
+        spatial = spatial_data[
+            spatial_data["UPRN"] == row["UPRN"]
+            ][["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]]
+        geospatial_data.append(spatial.to_dict("records")[0])
+
+    geospatial_data = pd.DataFrame(geospatial_data)
+    geospatial_data.to_excel("geospatial_data.xlsx", index=False)
+
 
 def company_aggregation():
     company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
diff --git a/etl/property_valuation/requirements.txt b/etl/property_valuation/requirements.txt
new file mode 100644
index 00000000..8a4a1924
--- /dev/null
+++ b/etl/property_valuation/requirements.txt
@@ -0,0 +1,7 @@
+seleniumbase
+beautifulsoup4
+requests
+pandas
+tqdm
+openpyxl
+undetected_chromedriver
\ No newline at end of file
diff --git a/etl/property_valuation/scrape_valuations.py b/etl/property_valuation/scrape_valuations.py
new file mode 100644
index 00000000..67713a4e
--- /dev/null
+++ b/etl/property_valuation/scrape_valuations.py
@@ -0,0 +1,83 @@
+import requests
+import random
+import time
+import pandas as pd
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+from seleniumbase import Driver
+from seleniumbase import page_actions
+
+import undetected_chromedriver as webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+import time
+import pandas as pd
+
+BASE_URL = "https://www.zoopla.co.uk/property/uprn/{uprn}/"
+
+
+def initialize_driver():
+    driver = Driver(headless=True, uc=True)  # Set headless to True if you want headless mode
+    return driver
+
+
+def app():
+    # Read in the starting asset list
+    asset_list = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx")
+    asset_list = asset_list[["UPRN", "ADDRESS", "POSTCODE"]]
+
+    # asset_list.to_excel("property value.xlsx", index=False)
+
+    # Generate the list of urls
+    urls = [BASE_URL.format(uprn=uprn) for uprn in asset_list["UPRN"]]
+
+    driver = webdriver.Chrome()
+
+    driver = initialize_driver()
+    driver.set_page_load_timeout(30)  # Increase page load timeout
+
+    result = []
+    for i, (url, uprn) in tqdm(enumerate(zip(urls, asset_list["UPRN"].tolist())), total=len(urls)):
+
+        # Every 10 requests sleep for an extra 7 seconds
+        if len(result) % 10 == 0 and len(result) != 0:
+            time.sleep(7)
+
+        try:
+
+            driver.get(url)
+            page_actions.wait_for_element_visible(driver, "p[data-testid='estimate-blurred']", timeout=30)
+
+            price_element = driver.find_element("css selector", "p[data-testid='estimate-blurred']")
+            price = price_element.get_text(strip=True)
+
+            low_price_element = driver.find_element("css selector", "span[data-testid='low-estimate-blurred']")
+            low_price = low_price_element.get_text(strip=True)
+
+            high_price_element = driver.find_element("css selector", "span[data-testid='high-estimate-blurred']")
+            high_price = high_price_element.get_text(strip=True)
+
+            result.append(
+                {
+                    "UPRN": uprn,
+                    "price": price,
+                    "lower_estimate": low_price,
+                    "upper_estimate": high_price
+                }
+            )
+
+            # Sleep a random amount of time between 5 and 20 seconds
+            sleep_time = 5 + (15 * random.random())
+            time.sleep(sleep_time)
+
+        except Exception as e:
+            print(f"Failed to retrieve data for UPRN {uprn} at iteration {i}: {e}")
+
+    # Store the result depending on where we are
+    savepoint = pd.DataFrame(result)
+    savepoint.to_csv(f"savepoint_index_{i}.csv", index=False)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 243a5edb..939bef80 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -55,23 +55,24 @@ class WallRecommendations(Definitions):
     NEW_BUILD_INSULATED = 0.75
 
     # These are the ending descriptions we consider for walls with external insulation
+    # This maps the clean descriptions to the ending descriptions
     EXTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
-        "solid_brick": "Solid brick, with external insulation",
-        "cob": "Cob, with external insulation",
-        "system_built": "System built, with external insulation",
-        "granite_or_whinstone": 'Granite or whinstone, with external insulation',
-        "sandstone_or_limestone": 'Sandstone or limestone, with external insulation',
-        "timber_frame": "Timber frame, with external insulation"
+        "Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
+        "Solid brick, as built, no insulation": "Solid brick, with external insulation",
+        "Cob, as built": "Cob, with external insulation",
+        "System built, as built, no insulation": "System built, with external insulation",
+        "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
+        "Timber frame, as built, no insulation": "Timber frame, with external insulation",
     }
 
     # These are the ending descriptions we consider for walls with internal insulation
     INTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
-        "solid_brick": "Solid brick, with internal insulation",
-        "cob": "Cob, with internal insulation",
-        "system_built": "System built, with internal insulation",
-        "granite_or_whinstone": 'Granite or whinstone, with internal insulation',
-        "sandstone_or_limestone": 'Sandstone or limestone, with internal insulation',
-        "timber_frame": "Timber frame, with internal insulation"
+        "Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
+        "Solid brick, as built, no insulation": "Solid brick, with internal insulation",
+        "Cob, as built": "Cob, with internal insulation",
+        "System built, as built, no insulation": "System built, with internal insulation",
+        "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
+        "Timber frame, as built, no insulation": "Timber frame, with internal insulation",
     }
 
     def __init__(
@@ -302,17 +303,14 @@ class WallRecommendations(Definitions):
 
                 wall_ending_config = WallAttributes("Cavity wall, filled cavity").process()
 
-                simulation_config = {}
-                if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-                    simulation_config = {
-                        "walls_energy_eff_ending": "Good",
-                        "walls_thermal_transmittance_ending": new_u_value
-                    }
-
                 walls_simulation_config = check_simulation_difference(
                     new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
                 )
 
+                simulation_config = self.set_starting_simulation_config(
+                    wall_ending_config=wall_ending_config
+                )
+
                 simulation_config = {**simulation_config, **walls_simulation_config}
 
                 recommendations.append(
@@ -340,30 +338,35 @@ class WallRecommendations(Definitions):
         self.recommendations = recommendations
 
     def get_internal_external_wall_description(self, description_map, new_u_value):
-        if self.property.walls["is_solid_brick"]:
-            return description_map["solid_brick"]
-
-        if self.property.walls["is_cob"]:
-            return description_map["cob"]
-
-        if self.property.walls["is_system_built"]:
-            return description_map["system_built"]
-
-        if self.property.walls["is_granite_or_whinstone"]:
-            return description_map["granite_or_whinstone"]
-
-        if self.property.walls["is_sandstone_or_limestone"]:
-            return description_map["sandstone_or_limestone"]
-
-        if self.property.walls["is_timber_frame"]:
-            return description_map["timber_frame"]
 
         if "Average thermal transmittance" in self.property.walls["clean_description"]:
             if new_u_value is None:
                 raise ValueError("New u value is None")
             return f'Average thermal transmittance {new_u_value} W/m-¦K'
 
-        raise NotImplementedError("Not implemented yet")
+        return description_map[self.property.walls["clean_description"]]
+
+    def set_starting_simulation_config(self, wall_ending_config):
+        """
+        Helper function to set the starting simulation config
+        """
+
+        simulation_config = {}
+        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+            simulation_config = {
+                "walls_energy_eff_ending": "Good"
+            }
+
+        # We check if we have double insulation in any instances
+        double_insulation = (
+            (wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
+            (wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
+            (wall_ending_config["external_insulation"] and wall_ending_config["internal_insulation"])
+        )
+        if double_insulation:
+            simulation_config["walls_energy_eff_ending"] = "Very Good"
+
+        return simulation_config
 
     def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
 
@@ -425,16 +428,14 @@ class WallRecommendations(Definitions):
 
                     wall_ending_config = WallAttributes(new_description).process()
 
-                    simulation_config = {}
-                    if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-                        simulation_config = {
-                            "walls_energy_eff_ending": "Good"
-                        }
-
                     walls_simulation_config = check_simulation_difference(
                         new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
                     )
 
+                    simulation_config = self.set_starting_simulation_config(
+                        wall_ending_config=wall_ending_config
+                    )
+
                     simulation_config = {
                         **walls_simulation_config,
                         **simulation_config,

From 1c77c0801dc9a091db9d70c9cf3794591db389ca Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 30 May 2024 12:06:12 +0100
Subject: [PATCH 07/80] Adding additional description to wall maps

---
 recommendations/WallRecommendations.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 939bef80..682215ec 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -59,6 +59,7 @@ class WallRecommendations(Definitions):
     EXTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
         "Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
         "Solid brick, as built, no insulation": "Solid brick, with external insulation",
+        "Solid brick, as built, insulated": "Solid brick, with external insulation",
         "Cob, as built": "Cob, with external insulation",
         "System built, as built, no insulation": "System built, with external insulation",
         "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
@@ -69,6 +70,7 @@ class WallRecommendations(Definitions):
     INTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
         "Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
         "Solid brick, as built, no insulation": "Solid brick, with internal insulation",
+        "Solid brick, as built, insulated": "Solid brick, with internal insulation",
         "Cob, as built": "Cob, with internal insulation",
         "System built, as built, no insulation": "System built, with internal insulation",
         "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',

From 47058984d4db121a0aea38839fdb5e9d97b2e76a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 30 May 2024 13:38:26 +0100
Subject: [PATCH 08/80] fixed setting of walls insulation thickness

---
 backend/app/plan/router.py                  | 12 ++++++++++++
 etl/property_valuation/scrape_valuations.py |  5 +++++
 recommendations/WallRecommendations.py      |  4 ++--
 recommendations/recommendation_utils.py     | 10 ++++++++--
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 1e2c1e6f..1b0dd267 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -741,6 +741,9 @@ async def build_mds(body: PlanTriggerRequest):
         for p in tqdm(input_properties):
 
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
+            # [{'external_wall_insulation': 'EWI (Trad Const)'}, {'loft_insulation': 'LI'}, {'air_source_heat_pump':
+            # 'ASHP Htg'}, {'solar_pv': 'Solar PV'}]
+            # p.measures = [{'external_wall_insulation': 'EWI (Trad Const)'}]
 
             mds = Mds(property_instance=p, materials=materials)
             property_representative_recommendations, errors = mds.build()
@@ -788,6 +791,15 @@ async def build_mds(body: PlanTriggerRequest):
                 all_predictions[key] = pd.concat([all_predictions[key], scored])
 
         # We now produce a table of results for the mds report
+        from utils.s3 import read_dataframe_from_s3_parquet
+        z = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev",
+            file_key="sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet"
+        )
+
+        # TODO: 1) walls_insulation_thickness_ending is not being set in the recommendations_scoring_data,
+        #           insulation_thickness_ending is being set instead
+        #       2)
 
         # TODO: TEMP
         for p in plan_input:
diff --git a/etl/property_valuation/scrape_valuations.py b/etl/property_valuation/scrape_valuations.py
index 67713a4e..434168ca 100644
--- a/etl/property_valuation/scrape_valuations.py
+++ b/etl/property_valuation/scrape_valuations.py
@@ -78,6 +78,11 @@ def app():
     savepoint = pd.DataFrame(result)
     savepoint.to_csv(f"savepoint_index_{i}.csv", index=False)
 
+    # TODO: Testing Jina AI - didn't work but maybe one of the alternatives might work:
+    #       https://www.youtube.com/watch?v=QxHE4af5BQE
+    response = requests.get("https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/41222761/")
+    response.text
+
 
 if __name__ == "__main__":
     app()
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 5c890823..b2ad4e5d 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -102,7 +102,7 @@ class WallRecommendations(Definitions):
             part
             for part in materials
             if part["type"]
-            in ["iwi_wall_demolition", "iwi_vapour_barrier", "iwi_redecoration"]
+               in ["iwi_wall_demolition", "iwi_vapour_barrier", "iwi_redecoration"]
         ]
 
         self.external_wall_insulation_materials = [
@@ -113,7 +113,7 @@ class WallRecommendations(Definitions):
             part
             for part in materials
             if part["type"]
-            in ["ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"]
+               in ["ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"]
         ]
 
     @property
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 996f5c9c..07a861dc 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -756,17 +756,23 @@ def calculate_cavity_age(newest_epc, older_epcs, cleaned):
     return cavity_age
 
 
-def check_simulation_difference(old_config, new_config, prefix=""):
+def check_simulation_difference(old_config, new_config, prefix="", keys_with_prefix=None):
     """
     Given two dictionaries, that describe the heating control configurations, this method will compare the two
     and pick out the differences. These differences will be things that have been added and things that have been
     removed. This will be used to determine how we should be updating the configuration in the simulation
     :return:
     """
+
+    keys_with_prefix = (
+        ["is_assumed", "thermal_transmittance", "insulation_thickness"] if keys_with_prefix is None
+        else keys_with_prefix
+    )
+
     differences = {}
     for key in new_config:
         if old_config[key] != new_config[key]:
-            new_key = prefix + key + "_ending" if key in ["is_assumed", "thermal_transmittance"] else key + "_ending"
+            new_key = prefix + key + "_ending" if key in keys_with_prefix else key + "_ending"
             differences[new_key] = new_config[key]
 
     return differences

From dc39d6690bb85d85deb2e315755f2644eb5b1848 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 30 May 2024 14:23:17 +0100
Subject: [PATCH 09/80] reviewing predictions

---
 backend/app/plan/router.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 1b0dd267..88e882ed 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -743,7 +743,6 @@ async def build_mds(body: PlanTriggerRequest):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
             # [{'external_wall_insulation': 'EWI (Trad Const)'}, {'loft_insulation': 'LI'}, {'air_source_heat_pump':
             # 'ASHP Htg'}, {'solar_pv': 'Solar PV'}]
-            # p.measures = [{'external_wall_insulation': 'EWI (Trad Const)'}]
 
             mds = Mds(property_instance=p, materials=materials)
             property_representative_recommendations, errors = mds.build()
@@ -790,13 +789,6 @@ async def build_mds(body: PlanTriggerRequest):
             for key, scored in predictions_dict.items():
                 all_predictions[key] = pd.concat([all_predictions[key], scored])
 
-        # We now produce a table of results for the mds report
-        from utils.s3 import read_dataframe_from_s3_parquet
-        z = read_dataframe_from_s3_parquet(
-            bucket_name="retrofit-data-dev",
-            file_key="sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet"
-        )
-
         # TODO: 1) walls_insulation_thickness_ending is not being set in the recommendations_scoring_data,
         #           insulation_thickness_ending is being set instead
         #       2)

From 5a9bc153067138e2a48ca5fa031a28725c2cc622 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 1 Jun 2024 14:32:20 +0100
Subject: [PATCH 10/80] completing combination of available measures

---
 backend/Property.py                           |   5 +-
 backend/app/plan/router.py                    |  18 ++-
 backend/app/plan/schemas.py                   |   6 +
 etl/customers/eon/pilot_asset_list.py         |  25 ++-
 etl/customers/lhp/30_may_2024_data_pull.py    | 148 ++++++++++++++++++
 etl/customers/unitas/20_may_2024_data_pull.py | 148 ++++++++++++++++++
 recommendations/Mds.py                        | 126 ++++++++++++++-
 7 files changed, 462 insertions(+), 14 deletions(-)
 create mode 100644 etl/customers/lhp/30_may_2024_data_pull.py
 create mode 100644 etl/customers/unitas/20_may_2024_data_pull.py

diff --git a/backend/Property.py b/backend/Property.py
index b7753413..212c20d6 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -93,7 +93,10 @@ class Property:
             non_invasive_recommendations else []
         )
         # This is a list of measures that have been recommended for the property
-        self.measures = ast.literal_eval(measures) if measures else None
+        if isinstance(measures, list):
+            self.measures = measures
+        else:
+            self.measures = ast.literal_eval(measures) if measures else None
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 88e882ed..91fc6963 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -23,7 +23,7 @@ from backend.app.db.functions.recommendations_functions import (
 )
 from backend.app.db.models.portfolio import rating_lookup
 from backend.app.dependencies import validate_token
-from backend.app.plan.schemas import PlanTriggerRequest
+from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest
 from backend.app.plan.utils import get_cleaned
 from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
 
@@ -622,7 +622,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
 
 @router.post("/mds")
-async def build_mds(body: PlanTriggerRequest):
+async def build_mds(body: MdsRequest):
     # TODO: This is a placeholder location for the MDS endpoint, which this is being assembled
 
     logger.info("Connecting to db")
@@ -633,6 +633,8 @@ async def build_mds(body: PlanTriggerRequest):
         session.begin()
         logger.info("Getting the inputs")
         plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+        measure_set = body.measures
+        optimise_measures = measure_set is not None
 
         cleaning_data = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
@@ -706,7 +708,10 @@ async def build_mds(body: PlanTriggerRequest):
             #     (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
             # ), {})
 
-            measures = config["measures"] if "measures" in config else None
+            if measure_set is None:
+                measures = config["measures"] if "measures" in config else None
+            else:
+                measures = measure_set
 
             input_properties.append(
                 Property(
@@ -738,13 +743,11 @@ async def build_mds(body: PlanTriggerRequest):
         recommendations_scoring_data = []
         representative_recommendations = {}
 
+        # TODO: Action the optimise_measures flat
         for p in tqdm(input_properties):
-
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
-            # [{'external_wall_insulation': 'EWI (Trad Const)'}, {'loft_insulation': 'LI'}, {'air_source_heat_pump':
-            # 'ASHP Htg'}, {'solar_pv': 'Solar PV'}]
 
-            mds = Mds(property_instance=p, materials=materials)
+            mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
             property_representative_recommendations, errors = mds.build()
 
             if errors:
@@ -886,6 +889,7 @@ async def build_mds(body: PlanTriggerRequest):
         results = pd.DataFrame(results)
         results["sap_uplift"] = results["sap_after"] - results["sap_before"]
 
+        # results.to_excel("mds_results 30th May.xlsx")
 
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 59c0ebef..fbc4d4f2 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -52,3 +52,9 @@ class PlanTriggerRequest(BaseModel):
         if v not in cls._allowed_housing_types:
             raise ValueError(f"{v} is not a valid housing type")
         return v
+
+
+class MdsRequest(PlanTriggerRequest):
+    # When creating the mds report, we allow an optional list of measures to select from. If this is passed, it will
+    # cause the service to select the optimal package from the list of measures
+    measures: Optional[conlist(str, min_items=1)] = None
diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py
index 4f79e05e..b7c529e3 100644
--- a/etl/customers/eon/pilot_asset_list.py
+++ b/etl/customers/eon/pilot_asset_list.py
@@ -64,7 +64,7 @@ def extract_mds_measures(config):
         measures.append({"district_heating_networks": "District heating networks"})
 
     if not pd.isnull(config["Elec Storage Htrs (Out of scope -Prov sum only)"]):
-        measures.append({"electric_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)"})
+        measures.append({"high_heat_retention_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)"})
 
     if not pd.isnull(config["Low Energy Bulbs"]):
         measures.append({"low_energy_lighting": "Low Energy Bulbs"})
@@ -269,3 +269,26 @@ def app():
         "budget": None,
     }
     print(body)
+
+    # Optimised version where we specify the measures
+    measures = [
+        "external_wall_insulation",
+        "cavity_wall_insulation",
+        "loft_insulation",
+        "air_source_heat_pump",
+        "high_heat_retention_storage_heaters",
+        "solar_pv"
+    ]
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "measures": measures,
+        "budget": None,
+    }
diff --git a/etl/customers/lhp/30_may_2024_data_pull.py b/etl/customers/lhp/30_may_2024_data_pull.py
new file mode 100644
index 00000000..4bf15caa
--- /dev/null
+++ b/etl/customers/lhp/30_may_2024_data_pull.py
@@ -0,0 +1,148 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by LHP
+    :return:
+    """
+    # asset_list = read_excel_from_s3(
+    #     bucket_name="retrofit-datalake-dev",
+    #     file_key="customers/guiness/TGP CW Properties PV.xlsx",
+    #     header_row=0
+    # )
+    asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Echo4 3.4.24.xlsx", header=0)
+
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        full_address = home["ADDRESS"]
+        address_split = full_address.split(",")
+        address1 = address_split[0].strip()
+        postcode = address_split[-1].strip()
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=postcode,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": full_address,
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description"
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["ADDRESS"],
+        right_on=["asset_list_address"]
+    )
+
+    asset_list = asset_list.drop(columns=["asset_list_address"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["Property Type"]), axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"],
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "LHP EPC Data pull.xlsx"
+    asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/unitas/20_may_2024_data_pull.py b/etl/customers/unitas/20_may_2024_data_pull.py
new file mode 100644
index 00000000..21686ef4
--- /dev/null
+++ b/etl/customers/unitas/20_may_2024_data_pull.py
@@ -0,0 +1,148 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Unitas
+    :return:
+    """
+    # asset_list = read_excel_from_s3(
+    #     bucket_name="retrofit-datalake-dev",
+    #     file_key="customers/guiness/TGP CW Properties PV.xlsx",
+    #     header_row=0
+    # )
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/UNITAS BUNGALOWS - EPC DATA PULL.xlsx", header=0
+    )
+
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        searcher = SearchEpc(
+            address1=str(home["Address Line 1"]),
+            postcode=home["Post Code"],
+            uprn=home["Property Reference"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": home["Address Line 1"],
+            "asset_list_postcode": home["Post Code"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description"
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["Address Line 1"],
+        right_on=["asset_list_address"]
+    )
+
+    asset_list = asset_list.drop(columns=["asset_list_address"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "EPC Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["EPC Property Type"]) if not pd.isnull(
+            x["EPC Property Type"]) else None,
+        axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ) if not pd.isnull(x["uprn"]) else None, axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ) if not pd.isnull(x["uprn"]) else None,
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+            x["uprn"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "UNITAS BUNGALOWS - EPC DATA PULL - May 30tg 2024.xlsx"
+    asset_list.to_excel(filename, index=False)
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index 7453e5e9..b8455146 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -1,3 +1,4 @@
+import itertools
 from backend.Property import Property
 from recommendations.FloorRecommendations import FloorRecommendations
 from recommendations.WallRecommendations import WallRecommendations
@@ -18,7 +19,7 @@ class Mds:
     Handles the contruction of the MDS report
     """
 
-    def __init__(self, property_instance: Property, materials):
+    def __init__(self, property_instance: Property, materials, optimise_measures: bool = False):
         self.property_instance = property_instance
 
         self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
@@ -35,13 +36,128 @@ class Mds:
         self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
         self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)
 
+        # This flag indicates that we wish to optimise the measures, to the property, depending on the set of measures
+        # we have been provided
+        self.optimise_measures = optimise_measures
+
+    def select_optimal_measure_set(self, measures):
+
+        # This is the set
+        all_considered_measures = [
+            'external_wall_insulation',
+            'cavity_wall_insulation',
+            'loft_insulation',
+            'air_source_heat_pump',
+            'high_heat_retention_storage_heaters',
+            'solar_pv'
+        ]
+
+        # Check if our measures are within the ones we've handled
+        new = [m for m in measures if m not in all_considered_measures]
+        if new:
+            raise NotImplementedError("New measures - handle me")
+
+        def prune_options(options, measures):
+            options_pruned = []
+            for _group in options:
+                group_pruned = [m for m in _group if m in measures]
+                if not group_pruned:
+                    continue
+                options_pruned.append(group_pruned)
+
+            return options_pruned
+
+        # For options in here, a property could only possibly have one of these
+        one_choice_options = [
+            ["external_wall_insulation", "cavity_wall_insulation", "internal_wall_insulation"],
+            ["loft_insulation", "flat_roof_insulation", "room_in_roof_insulation"],
+            ["solid_floor_insulation", "suspended_floor_insulation"],
+        ]
+        # prune one_choice_options based on the measure set considered for this property
+        one_choice_options_pruned = prune_options(one_choice_options, measures)
+
+        # For options in here, a property could have one or the other so all should be considered
+        multi_path_options = [
+            ["air_source_heat_pump", "high_heat_retention_storage_heaters", "gas_boiler"]
+        ]
+
+        multi_path_options_pruned = prune_options(multi_path_options, measures)
+
+        one_choice_combinations = [list(itertools.product(*one_choice_options_pruned))]
+        one_choice_combinations = [list(x) for sublist in one_choice_combinations for x in sublist]
+        multi_path_combinations = [list(itertools.product(*multi_path_options_pruned))]
+        multi_path_combinations = [list(x) for sublist in multi_path_combinations for x in sublist]
+
+        one_choice_flat = [item for sublist in one_choice_options_pruned for item in sublist]
+        multi_path_flat = [item for sublist in multi_path_options_pruned for item in sublist]
+
+        remaining_measures = [
+            measure for measure in measures
+            if measure not in one_choice_flat and measure not in multi_path_flat
+        ]
+
+        # Combine one_choice and multi_path combinations with remaining measures
+        final_combinations = []
+        for one_choice in one_choice_combinations:
+            for multi_path in multi_path_combinations:
+                final_combinations.append([m for m in one_choice + multi_path + remaining_measures])
+
+        pruned_combinations = []
+        for combination in final_combinations:
+            pruned_measures = []
+            for measure in combination:
+                if measure not in measures:
+                    continue
+                # There are certain measures where we need to
+                if measure == "external_wall_insulation":
+                    # Check if the wall is solid
+                    if self.property_instance.walls['is_solid_brick']:
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "cavity_wall_insulation":
+                    # Check if the wall is cavity
+                    if self.property_instance.walls['is_cavity_wall']:
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "loft_insulation":
+                    # Check if the roof is suitable for loft insulation
+                    if self.property_instance.roof["is_pitched"]:
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "solid_floor_insulation":
+                    # Check if the floor is solid
+                    if self.property_instance.floor["is_solid"]:
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "suspended_floor_insulation":
+                    # Check if the floor is suspended
+                    if self.property_instance.floor["is_suspended"]:
+                        pruned_measures.append(measure)
+                    continue
+
+                pruned_measures.append(measure)
+
+            if len(combination) != len(pruned_measures):
+                continue
+
+            pruned_combinations.append(pruned_measures)
+
+        # We're left with the subset of measures that are possible for this property
+        # These are the possible groups of measures that could be applied to this home
+        return pruned_combinations
+
     def build(self):
         if self.property_instance.measures is None:
             raise NotImplementedError("No measures in the property - implement me")
 
-        measures = self.property_instance.measures
-
-        measure_config_list = [list(m.keys())[0] for m in measures]
+        if self.optimise_measures:
+            measure_config_list = self.select_optimal_measure_set(self.property_instance.measures)
+        else:
+            measure_config_list = [list(m.keys())[0] for m in self.property_instance.measures]
 
         not_implemented_measures = [
             "party_wall_insulation",
@@ -105,7 +221,7 @@ class Mds:
             recs = self.insert_recommendation_id(recs, measures, "air_source_heat_pump")
             mds_recommendations.append(recs)
 
-        if "electric_storage_heaters" in measure_config_list:
+        if "high_heat_retention_storage_heaters" in measure_config_list:
             recs = self.heating_recommender.recommend_hhr_storage_heaters(
                 phase=0, system_change=True, heating_controls_only=False, _return=True
             )

From 1eca5af64c6f0f8f15ee10f7e3c56e4694153088 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 3 Jun 2024 18:17:31 +0100
Subject: [PATCH 11/80] adding the optimisation approach for mds

---
 backend/app/plan/router.py                    |  26 ++-
 .../places_for_people/route_march.py          |  43 +++++
 etl/customers/unitas/Audit_check.py           | 182 ++++++++++++++++++
 recommendations/Mds.py                        | 119 +++++++++---
 recommendations/Recommendations.py            |   2 +-
 5 files changed, 335 insertions(+), 37 deletions(-)
 create mode 100644 etl/customers/unitas/Audit_check.py

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 91fc6963..fc754f07 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -742,24 +742,42 @@ async def build_mds(body: MdsRequest):
         logger.info("Getting components and epc recommendations")
         recommendations_scoring_data = []
         representative_recommendations = {}
+        recommendations = {}
 
         # TODO: Action the optimise_measures flat
         for p in tqdm(input_properties):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
             mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
-            property_representative_recommendations, errors = mds.build()
+            mds_recommendations, property_representative_recommendations, errors = mds.build()
 
             if errors:
                 logger.info("Errors occurred during MDS build")
 
+            recommendations[p.id] = mds_recommendations
             representative_recommendations[p.id] = property_representative_recommendations
 
             # Build the scoring data
             p.create_base_difference_epc_record(cleaned_lookup=cleaned)
-            recommendations_scoring_data.append(
-                p.simulate_all_representative_recommendations(property_representative_recommendations)
-            )
+            if optimise_measures:
+                for _id, mds_recs in mds_recommendations.items():
+                    representative_ids = [r["recommendation_id"] for r in property_representative_recommendations[_id]]
+                    simulation_mds_recs = []
+                    for recs in mds_recs:
+                        simulation_mds_recs.append(
+                            [r for r in recs if r["recommendation_id"] in representative_ids]
+                        )
+
+                    p.adjust_difference_record_with_recommendations(
+                        simulation_mds_recs, property_representative_recommendations[_id]
+                    )
+                    recommendations_scoring_data.extend(p.recommendations_scoring_data)
+
+            else:
+
+                recommendations_scoring_data.append(
+                    p.simulate_all_representative_recommendations(property_representative_recommendations)
+                )
 
         logger.info("Preparing data for scoring in sap change api")
         recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py
index 5da1c2f7..7b55702c 100644
--- a/etl/customers/places_for_people/route_march.py
+++ b/etl/customers/places_for_people/route_march.py
@@ -295,6 +295,49 @@ def main():
 
     addresses_df2.to_excel("Places For People EPC data with surveyor.xlsx", index=False)
 
+    # Read in
+    df = pd.read_excel("Places For People EPC data with surveyor.xlsx")
+    df = df[
+        df["assessor_name"].isin(
+            [
+                "Arsalan Khalid", "Kieran Bradnock", "Wayne Davies", "Lindsay Sands", "Bruce Nethercot",
+                "Christopher Hearn", "Robert Sigerson", "Daniel Riddle", "Leroy Sands",
+            ]
+        )
+    ]
+
+    # Get the EPC
+    heights = []
+    for _, row in tqdm(df.iterrows(), total=len(df)):
+        searcher = SearchEpc(
+            address1=str(row["Matched EPC Address"]),
+            postcode=str(row["POSTCODE"]),
+            uprn=str(int(row["uprn"])),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+
+        height = {
+            "uprn": row["uprn"],
+            "floor_height": searcher.newest_epc["floor-height"]
+        }
+        heights.append(height)
+
+    df = df.merge(
+        pd.DataFrame(heights),
+        how="left",
+        on="uprn"
+    )
+
+    df.to_excel("WF surveyors with floor heights.xlsx", index=False)
+
 
 if __name__ == "__main__":
     main()
diff --git a/etl/customers/unitas/Audit_check.py b/etl/customers/unitas/Audit_check.py
new file mode 100644
index 00000000..ad5361d4
--- /dev/null
+++ b/etl/customers/unitas/Audit_check.py
@@ -0,0 +1,182 @@
+import pandas as pd
+import os
+
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    # Read in rolling master
+    master = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Downloads/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 16.5.24 K - PASSWORD "
+        "PROTECTED/ECO 4 - PHASE 1-Table 1.csv"
+    )
+
+    master = master[master["INSTALLER"] == "SCIS"]
+
+    master = master[
+        [
+            'UPRN', 'NO.', 'Street / Block Name', 'Town/Area', 'Post Code', 'Surveyor', "SUBMISSION DATE"
+        ]
+    ]
+
+    master = master[~pd.isnull(master["UPRN"])]
+    master = master[master["UPRN"] != "NOT ON ASSET LIST"]
+
+    heights = []
+    eco_assessment_epcs = []
+    for _, row in tqdm(master.iterrows(), total=len(master)):
+        searcher = SearchEpc(
+            address1="",
+            postcode="",
+            uprn=str(int(row["UPRN"])),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=False,
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        # Look for eco assessment epcs
+        eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment']
+        # Take the newest
+        eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True)
+        if eco_epc:
+            eco_assessment_epcs.append(eco_epc[0])
+
+        height = {
+            "uprn": row["UPRN"],
+            "floor_height": searcher.newest_epc["floor-height"]
+        }
+        heights.append(height)
+
+    heights_df = pd.DataFrame(heights)
+
+    eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs)
+
+    merged_heights_df = master.merge(heights_df, left_on="UPRN", right_on="uprn", how="inner")
+    merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""]
+    merged_eco_assessment_epcs_df = master.merge(eco_assessment_epcs_df[["uprn", "floor-height"]], left_on="UPRN",
+                                                 right_on="uprn", how="inner")
+    merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float)
+
+    merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean()
+
+    # Store
+    merged_heights_df.to_csv("Unitas 2022 heights - based on newest EPC.csv", index=False)
+    merged_eco_assessment_epcs_df.to_csv("Unitas 2022 heights - based on ECO assessment EPC.csv", index=False)
+
+    # Read in a diferent sheet
+    master = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/COMMUNITY HOUSING SURVEYS WITH A POST EPC.xlsx"
+    )
+
+    master["row_number"] = master.index
+
+    heights = []
+    eco_assessment_epcs = []
+    expected_pre = []
+    expected_post = []
+    biggest_floor_height = []
+    for _, row in tqdm(master.iterrows(), total=len(master)):
+
+        full_address = ", ".join([
+            str(row["NO."]), row["Street / Block Name"], row["Town/Area"], row["Post Code"]
+        ])
+        searcher = SearchEpc(
+            address1=str(row["NO."]),
+            postcode=str(row["Post Code"]),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=False,
+            full_address=full_address
+        )
+
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            continue
+
+        all_epcs = [searcher.newest_epc] + searcher.older_epcs
+        # Search for SAP 54s
+        sap_54s = [x for x in all_epcs if x["current-energy-efficiency"] == "54"]
+        sap_69s = [x for x in all_epcs if x["current-energy-efficiency"] == "69"]
+        heights = [float(x["floor-height"]) for x in all_epcs if x["floor-height"] != ""]
+
+        # Look for eco assessment epcs
+        eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment']
+        # Take the newest
+        eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True)
+        if eco_epc:
+            eco_assessment_epcs.append(
+                {
+                    "row_number": row["row_number"],
+                    **eco_epc[0]
+                }
+            )
+
+        if heights:
+            floor_height_max = max(heights)
+            biggest_floor_height.append(
+                {
+                    "row_number": row["row_number"],
+                    "floor_height": floor_height_max
+                }
+            )
+
+        if sap_54s:
+            expected_pre.append(
+                {
+                    "row_number": row["row_number"],
+                    **sap_54s[0]
+                }
+            )
+
+        if sap_69s:
+            expected_post.append(
+                {
+                    "row_number": row["row_number"],
+                    **sap_69s[0]
+                }
+            )
+
+    expected_pre_df = pd.DataFrame(expected_pre)
+    expected_post_df = pd.DataFrame(expected_post)
+
+    heights_df = pd.DataFrame(biggest_floor_height)
+    eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs)
+
+    merged_heights_df = master.merge(heights_df, on="row_number", how="inner")
+    merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""]
+
+    merged_eco_assessment_epcs_df = master.merge(
+        eco_assessment_epcs_df[["row_number", "floor-height"]], on="row_number", how="inner"
+    )
+    merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float)
+
+    merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean()
+
+    # Check average floor height for social housing properties with ECO assessment EPCs in Birmingham
+    sample = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
+    sample = sample[sample["TRANSACTION_TYPE"] == "ECO assessment"]
+    sample = sample[sample["TENURE"].isin(["rental (social)", "Rented (social)"])]
+    sample["FLOOR_HEIGHT"] = sample["FLOOR_HEIGHT"].astype(float)
+    sample["FLOOR_HEIGHT"].mean()
+
+    sample[pd.to_datetime(sample["LODGEMENT_DATE"]) >= "2022-01-01"]["FLOOR_HEIGHT"].mean()
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index b8455146..ac80af1a 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -1,4 +1,5 @@
 import itertools
+from utils.logger import setup_logger
 from backend.Property import Property
 from recommendations.FloorRecommendations import FloorRecommendations
 from recommendations.WallRecommendations import WallRecommendations
@@ -13,6 +14,8 @@ from recommendations.HotwaterRecommendations import HotwaterRecommendations
 from recommendations.SecondaryHeating import SecondaryHeating
 from recommendations.Recommendations import Recommendations
 
+logger = setup_logger()
+
 
 class Mds:
     """
@@ -52,6 +55,16 @@ class Mds:
             'solar_pv'
         ]
 
+        format_map = {
+            "external_wall_insulation": "EWI (Trad Const)",
+            "internal_wall_insualtion": "IWI",
+            "cavity_wall_insulation": "CWI",
+            "loft_insulation": "LI",
+            "air_source_heat_pump": "ASHP Htg",
+            "high_heat_retention_storage_heaters": "High Heat Retention Storage Heaters",
+            "solar_pv": "Solar PV",
+        }
+
         # Check if our measures are within the ones we've handled
         new = [m for m in measures if m not in all_considered_measures]
         if new:
@@ -144,21 +157,18 @@ class Mds:
             if len(combination) != len(pruned_measures):
                 continue
 
-            pruned_combinations.append(pruned_measures)
+            pruned_measures_formatted = []
+            for pm in pruned_measures:
+                pruned_measures_formatted.append({pm: format_map[pm]})
+
+            pruned_combinations.append(pruned_measures_formatted)
 
         # We're left with the subset of measures that are possible for this property
         # These are the possible groups of measures that could be applied to this home
+
         return pruned_combinations
 
-    def build(self):
-        if self.property_instance.measures is None:
-            raise NotImplementedError("No measures in the property - implement me")
-
-        if self.optimise_measures:
-            measure_config_list = self.select_optimal_measure_set(self.property_instance.measures)
-        else:
-            measure_config_list = [list(m.keys())[0] for m in self.property_instance.measures]
-
+    def _build(self, measure_config_list, measures):
         not_implemented_measures = [
             "party_wall_insulation",
             "ground_source_heat_pump",
@@ -176,114 +186,159 @@ class Mds:
 
         mds_recommendations = []
         errors = []
+        phase = 0
 
         # TODO: Could use a decarator to reduce the boilerplate code - insert_recommendation_id and then the append
 
         if "external_wall_insulation" in measure_config_list:
-            recs = self.wall_recommender.mds_recommend_ewi(phase=0)
+            recs = self.wall_recommender.mds_recommend_ewi(phase=phase)
             if not recs:
                 raise Exception("No recommendations for external wall insulation")
             recs = self.insert_recommendation_id(recs, measures, "external_wall_insulation")
             mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1
 
         if "cavity_wall_insulation" in measure_config_list:
-            recs = self.wall_recommender.mds_recommend_cavity_wall_insulation(phase=0)
+            recs = self.wall_recommender.mds_recommend_cavity_wall_insulation(phase=phase)
             recs = self.insert_recommendation_id(recs, measures, "cavity_wall_insulation")
             mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1
 
         if "loft_insulation" in measure_config_list:
             # Check if the roof is suitable for loft insulation
             if self.property_instance.roof['is_roof_room']:
                 errors.append("Roof is a room")
             else:
-                recs = self.roof_recommender.mds_loft_insulation(phase=0)
+                recs = self.roof_recommender.mds_loft_insulation(phase=phase)
                 if not recs:
                     raise Exception("No recommendations for loft insulation")
                 recs = self.insert_recommendation_id(recs, measures, "loft_insulation")
                 mds_recommendations.append(recs)
+                if self.optimise_measures and len(recs):
+                    phase += 1
 
         if "internal_wall_insulation" in measure_config_list:
             raise Exception("check me out 4")
-            self.wall_recommender.recommend(phase=0)
+            self.wall_recommender.recommend(phase=phase)
 
         if "suspended_floor_insulation" in measure_config_list:
             raise Exception("check me out 5")
-            self.floor_recommender.recommend(phase=0)
+            self.floor_recommender.recommend(phase=phase)
 
         if "solid_floor_insulation" in measure_config_list:
             raise Exception("check me out 6")
-            self.floor_recommender.recommend(phase=0)
+            self.floor_recommender.recommend(phase=phase)
 
         if "air_source_heat_pump" in measure_config_list:
             recs = self.heating_recommender.recommend_air_source_heat_pump(
-                phase=0, has_cavity_or_loft_recommendations=False, _return=True
+                phase=phase, has_cavity_or_loft_recommendations=False, _return=True
             )
             recs = self.insert_recommendation_id(recs, measures, "air_source_heat_pump")
             mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1
 
         if "high_heat_retention_storage_heaters" in measure_config_list:
             recs = self.heating_recommender.recommend_hhr_storage_heaters(
-                phase=0, system_change=True, heating_controls_only=False, _return=True
+                phase=phase, system_change=True, heating_controls_only=False, _return=True
             )
-            recs = self.insert_recommendation_id(recs, measures, "electric_storage_heaters")
+            recs = self.insert_recommendation_id(recs, measures, "high_heat_retention_storage_heaters")
             mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1
 
         if "low_energy_lighting" in measure_config_list:
             raise Exception("check me out 9")
-            self.lighting_recommender.recommend(phase=0)
+            self.lighting_recommender.recommend(phase=phase)
 
         if "cylinder_insulation" in measure_config_list:
             raise Exception("check me out 10")
-            self.hotwater_recommender.recommend(phase=0)
+            self.hotwater_recommender.recommend(phase=phase)
 
         if "smart_controls" in measure_config_list:
             raise Exception("check me out 11")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)
 
         if "zone_controls" in measure_config_list:
             raise Exception("check me out 12")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)
 
         if "trvs" in measure_config_list:
             raise Exception("check me out 13")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)
 
         if "solar_pv" in measure_config_list:
-            recs = self.solar_recommender.mds_recommend(phase=0, solar_pv_percentage=0.5)
+            recs = self.solar_recommender.mds_recommend(phase=phase, solar_pv_percentage=0.5)
             recs = self.insert_recommendation_id(recs, measures, "solar_pv")
             mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1
 
         if "double_glazing" in measure_config_list:
             raise Exception("check me out 15")
-            self.windows_recommender.recommend(phase=0)
+            self.windows_recommender.recommend(phase=phase)
 
         if "mechanical_ventilation" in measure_config_list:
             raise Exception("check me out 16")
-            self.ventilation_recomender.recommend(phase=0)
+            self.ventilation_recomender.recommend(phase=phase)
 
         if "gas_boiler" in measure_config_list:
             raise Exception("check me out 17")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)
 
         if "flat_roof_insulation" in measure_config_list:
             raise Exception("check me out 18")
-            self.roof_recommender.recommend(phase=0)
+            self.roof_recommender.recommend(phase=phase)
 
         if "room_in_roof_insulation" in measure_config_list:
             raise Exception("check me out 19")
-            self.roof_recommender.recommend(phase=0)
+            self.roof_recommender.recommend(phase=phase)
 
         property_representative_recommendations = Recommendations.create_representative_recommendations(
             mds_recommendations, non_invasive_recommendations=[]
         )
 
-        return property_representative_recommendations, errors
+        return mds_recommendations, property_representative_recommendations, errors
+
+    def build(self):
+        if self.property_instance.measures is None:
+            raise NotImplementedError("No measures in the property - implement me")
+
+        if self.optimise_measures:
+            measures_set = self.select_optimal_measure_set(self.property_instance.measures)
+            logger.info(f"Building recommendations for {len(measures_set)} combinations of measures")
+            mds_recommendations_map = {}
+            representative_recommendations_map = {}
+            errors_map = {}
+            for measures in measures_set:
+                measure_config_list = [list(x.keys())[0] for x in measures]
+                mds_recommendations, rep_recommendations, errors = self._build(
+                    measure_config_list=measure_config_list,
+                    measures=measures
+                )
+                if errors:
+                    logger.info(f"Errors: {errors}")
+
+                mds_recommendations_map[str(measure_config_list)] = mds_recommendations
+                representative_recommendations_map[str(measure_config_list)] = rep_recommendations
+                errors_map[str(measure_config_list)] = errors
+
+            return mds_recommendations_map, representative_recommendations_map, errors_map
+
+        else:
+            measure_config_list = [list(m.keys())[0] for m in self.property_instance.measures]
+            return self._build(measure_config_list=measure_config_list, measures=self.property_instance.measures)
 
     @staticmethod
     def insert_recommendation_id(recommendations, measures, measure_name):
         # Insert the recommendation identifier into this recommendation
         measure_config = [m for m in measures if measure_name in m][0]
+
+        idx = 0
         for r in recommendations:
-            r["recommendation_id"] = list(measure_config.values())[0]
+            r["recommendation_id"] = list(measure_config.values())[0] + "-" + str(idx)
+            idx += 1
 
         return recommendations
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index c8113cdc..19fba581 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -227,7 +227,7 @@ class Recommendations:
 
             recommendations_by_type = sorted(recommendations_by_type, key=lambda x: x["type"])
             representative_recommendations = []
-            for type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):
+            for _type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):
                 recommendations = list(recommendations)
                 # We also create an efficiency key, which is used to sort the recommendations
                 if has_u_value:

From 5e8930f265698f43281a963403f5b597a14769bd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 3 Jun 2024 18:30:10 +0100
Subject: [PATCH 12/80] pass u-value through to simulation for cwi

---
 backend/app/plan/router.py             | 8 ++++++--
 recommendations/WallRecommendations.py | 6 +++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index fc754f07..b8437fa2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -744,7 +744,6 @@ async def build_mds(body: MdsRequest):
         representative_recommendations = {}
         recommendations = {}
 
-        # TODO: Action the optimise_measures flat
         for p in tqdm(input_properties):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
@@ -771,7 +770,12 @@ async def build_mds(body: MdsRequest):
                     p.adjust_difference_record_with_recommendations(
                         simulation_mds_recs, property_representative_recommendations[_id]
                     )
-                    recommendations_scoring_data.extend(p.recommendations_scoring_data)
+
+                    data = p.recommendations_scoring_data.copy()
+                    for d in data:
+                        d["id"] = d["id"] + _id
+
+                    recommendations_scoring_data.extend(data)
 
             else:
 
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index b2ad4e5d..fcd8e2bd 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -343,7 +343,11 @@ class WallRecommendations(Definitions):
                     wall_ending_config=wall_ending_config
                 )
 
-                simulation_config = {**simulation_config, **walls_simulation_config}
+                simulation_config = {
+                    **simulation_config,
+                    **walls_simulation_config,
+                    "walls_thermal_transmittance_ending": new_u_value
+                }
 
                 recommendations.append(
                     {

From 9284d6cf764d0d6a4e9b123474cada3f197b829b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 3 Jun 2024 22:43:51 +0100
Subject: [PATCH 13/80] prepared the optimised output"

---
 backend/app/plan/router.py | 138 ++++++++++++++++++++++++++++++++++++-
 recommendations/Mds.py     |  22 +++---
 2 files changed, 148 insertions(+), 12 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b8437fa2..8d7309a6 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -773,7 +773,7 @@ async def build_mds(body: MdsRequest):
 
                     data = p.recommendations_scoring_data.copy()
                     for d in data:
-                        d["id"] = d["id"] + _id
+                        d["id"] = d["id"] + "*" + _id
 
                     recommendations_scoring_data.extend(data)
 
@@ -823,6 +823,142 @@ async def build_mds(body: MdsRequest):
             if p["uprn"]:
                 p["uprn"] = str(int(float(p["uprn"])))
 
+        import re
+        from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+
+        if optimise_measures:
+            results = []
+            for p in input_properties:
+
+                package_comparison = []
+                for _id in recommendations[p.id].keys():
+                    sap_prediction = all_predictions["sap_change_predictions"][
+                        (all_predictions["sap_change_predictions"]["property_id"] == str(p.id)) &
+                        (all_predictions["sap_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
+                        ].copy().reset_index(drop=True)
+                    sap_prediction["row_id"] = sap_prediction.index
+
+                    heat_demand_prediction = all_predictions["heat_demand_predictions"][
+                        (all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)) &
+                        (all_predictions["heat_demand_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
+                        ].copy().reset_index(drop=True)
+                    heat_demand_prediction["row_id"] = heat_demand_prediction.index
+
+                    carbon_prediction = all_predictions["carbon_change_predictions"][
+                        (all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)) &
+                        (all_predictions["carbon_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
+                        ].copy().reset_index(drop=True)
+                    carbon_prediction["row_id"] = carbon_prediction.index
+
+                    epc_target = body.goal_value
+                    sap_target = epc_to_sap_lower_bound(epc_target)
+                    # Define the measures
+                    sap_threshold_barrier = sap_prediction[sap_prediction["predictions"] >= sap_target]
+                    if sap_threshold_barrier.empty:
+                        raise NotImplementedError("FIX ME")
+                    sap_threshold_barrier = sap_threshold_barrier.head(1)
+
+                    sap_prediction = sap_prediction[
+                        sap_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
+                        ]
+                    heat_demand_prediction = heat_demand_prediction[
+                        heat_demand_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
+                        ]
+                    carbon_prediction = carbon_prediction[
+                        carbon_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
+                        ]
+
+                    reverse_map = {v: k for k, v in Mds.format_map.items()}
+
+                    selected_measures = [
+                        reverse_map[x.split("-")[0]] for x in sap_prediction["recommendation_id"].values
+                    ]
+                    selected_measure_ids = [x.split("*")[0] for x in sap_prediction["recommendation_id"].values]
+
+                    costs = [
+                        r["total"] for r in representative_recommendations[p.id][_id] if
+                        r["recommendation_id"] in selected_measure_ids
+                    ]
+                    costs = sum(costs)
+
+                    sap_before = int(p.data["current-energy-efficiency"])
+                    sap_after = sap_prediction["predictions"].values[-1]
+
+                    epc_before = p.data["current-energy-rating"]
+                    epc_after = sap_to_epc(sap_after)
+
+                    heat_demand_before = p.data["energy-consumption-current"]
+                    heat_demand_after = heat_demand_prediction["predictions"].values[-1]
+
+                    carbon_before = p.data["co2-emissions-current"]
+                    carbon_after = carbon_prediction["predictions"].values[-1]
+
+                    current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+                        epc_energy_consumption=heat_demand_before * p.floor_area,
+                        current_epc_rating=epc_before,
+                    )
+
+                    expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+                        epc_energy_consumption=heat_demand_after * p.floor_area,
+                        current_epc_rating=epc_before,
+                    )
+
+                    current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
+                    expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
+
+                    bill_savings = current_energy_bill - expected_energy_bill
+                    energy_savings = current_adjusted_energy - expected_adjusted_energy
+
+                    package_comparison.append(
+                        {
+                            "id": _id,
+                            "cost": costs,
+                            "measures": selected_measures,
+                            "sap_before": sap_before,
+                            "sap_after": sap_after,
+                            "epc_before": epc_before,
+                            "epc_after": epc_after,
+                            "heat_demand_before": heat_demand_before,
+                            "heat_demand_after": heat_demand_after,
+                            "carbon_before": carbon_before,
+                            "carbon_after": carbon_after,
+                            "bill_savings": bill_savings,
+                            "energy_savings": energy_savings,
+                        }
+                    )
+
+                package_comparison = pd.DataFrame(package_comparison)
+                # Find the smallest cost package
+                package_comparison = package_comparison.sort_values("cost")
+                package_comparison = package_comparison.head(1).to_dict("records")[0]
+
+                config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
+                if not config:
+                    config = {"address": None, "postcode": None}
+                else:
+                    config = config[0]
+
+                results.append({
+                    "config_address": config["address"],
+                    "config_postcode": config["postcode"],
+                    "address": p.address,
+                    "postcode": p.postcode,
+                    "measures": package_comparison["measures"],
+                    "year_of_epc": p.data['lodgement-date'],
+                    "sap_before": package_comparison["sap_before"],
+                    "sap_after": package_comparison["sap_after"],
+                    "epc_before": package_comparison["epc_before"],
+                    "epc_after": package_comparison["epc_after"],
+                    "heat_demand_before": package_comparison["heat_demand_before"],
+                    "heat_demand_after": package_comparison["heat_demand_after"],
+                    "carbon_before": package_comparison["carbon_before"],
+                    "carbon_after": package_comparison["carbon_after"],
+                    "bill_savings": package_comparison["bill_savings"],
+                    "energy_savings": package_comparison["energy_savings"],
+                })
+
+            results = pd.DataFrame(results)
+
         results = []
         for p in input_properties:
             measures = p.measures
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index ac80af1a..9fe3ff09 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -22,6 +22,16 @@ class Mds:
     Handles the contruction of the MDS report
     """
 
+    format_map = {
+        "external_wall_insulation": "EWI (Trad Const)",
+        "internal_wall_insualtion": "IWI",
+        "cavity_wall_insulation": "CWI",
+        "loft_insulation": "LI",
+        "air_source_heat_pump": "ASHP Htg",
+        "high_heat_retention_storage_heaters": "High Heat Retention Storage Heaters",
+        "solar_pv": "Solar PV",
+    }
+
     def __init__(self, property_instance: Property, materials, optimise_measures: bool = False):
         self.property_instance = property_instance
 
@@ -55,16 +65,6 @@ class Mds:
             'solar_pv'
         ]
 
-        format_map = {
-            "external_wall_insulation": "EWI (Trad Const)",
-            "internal_wall_insualtion": "IWI",
-            "cavity_wall_insulation": "CWI",
-            "loft_insulation": "LI",
-            "air_source_heat_pump": "ASHP Htg",
-            "high_heat_retention_storage_heaters": "High Heat Retention Storage Heaters",
-            "solar_pv": "Solar PV",
-        }
-
         # Check if our measures are within the ones we've handled
         new = [m for m in measures if m not in all_considered_measures]
         if new:
@@ -159,7 +159,7 @@ class Mds:
 
             pruned_measures_formatted = []
             for pm in pruned_measures:
-                pruned_measures_formatted.append({pm: format_map[pm]})
+                pruned_measures_formatted.append({pm: self.format_map[pm]})
 
             pruned_combinations.append(pruned_measures_formatted)
 

From 813d51e9f311ad7902466d46f4b5f98cf5d1cbae Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 10:05:05 +0100
Subject: [PATCH 14/80] handle the case of no hhr heater recommendation

---
 recommendations/Mds.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index 9fe3ff09..b03a52d3 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -244,10 +244,16 @@ class Mds:
             recs = self.heating_recommender.recommend_hhr_storage_heaters(
                 phase=phase, system_change=True, heating_controls_only=False, _return=True
             )
-            recs = self.insert_recommendation_id(recs, measures, "high_heat_retention_storage_heaters")
-            mds_recommendations.append(recs)
-            if self.optimise_measures and len(recs):
-                phase += 1
+            if recs is None:
+                logger.info(
+                    f"No recommendations for high heat retention storage heaters, current heating "
+                    f"{self.property_instance.main_heating['clean_description']}"
+                )
+            else:
+                recs = self.insert_recommendation_id(recs, measures, "high_heat_retention_storage_heaters")
+                mds_recommendations.append(recs)
+                if self.optimise_measures and len(recs):
+                    phase += 1
 
         if "low_energy_lighting" in measure_config_list:
             raise Exception("check me out 9")

From bc5008bdd56b94fe6fd33113d0b5a570ef24e7f8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 10:08:07 +0100
Subject: [PATCH 15/80] handling error reporting

---
 backend/Property.py        | 8 ++++----
 backend/app/plan/router.py | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 212c20d6..cd2028cb 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -203,11 +203,11 @@ class Property:
         # difference_record = self.epc_record - self.epc_record
 
         # TODO: change these lower and replace in the settings file
-        print(
-            "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
-        )
+        # print(
+        #     "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
+        # )
         fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD
-        print("NEED TO CHANGE THE DASH TO LOWER CASE")
+        # print("NEED TO CHANGE THE DASH TO LOWER CASE")
         fixed_data_col_names = [
             x.lower().replace("_", "-") for x in fixed_data_col_names
         ]
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 8d7309a6..6ee9dfe2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -750,7 +750,8 @@ async def build_mds(body: MdsRequest):
             mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
             mds_recommendations, property_representative_recommendations, errors = mds.build()
 
-            if errors:
+            if any([len(x) for x in errors.values()]):
+                blah
                 logger.info("Errors occurred during MDS build")
 
             recommendations[p.id] = mds_recommendations

From 1c6ffd6c0572ce3d9e0d30652b493b48b9b55820 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 10:10:55 +0100
Subject: [PATCH 16/80] allow sub-combinations for combinations of measures

---
 recommendations/Mds.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index b03a52d3..af0a0be8 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -154,9 +154,6 @@ class Mds:
 
                 pruned_measures.append(measure)
 
-            if len(combination) != len(pruned_measures):
-                continue
-
             pruned_measures_formatted = []
             for pm in pruned_measures:
                 pruned_measures_formatted.append({pm: self.format_map[pm]})

From a5876d40db06fb13e1413df40246ee29ba71ffae Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 11:14:43 +0100
Subject: [PATCH 17/80] Adding in condition checks for various measures

---
 recommendations/HeatingRecommender.py  | 46 +++++++++++++++++---------
 recommendations/Mds.py                 | 39 +++++++++++++++++-----
 recommendations/RoofRecommendations.py | 33 +++++++++---------
 recommendations/WallRecommendations.py | 21 +++++++++---
 4 files changed, 94 insertions(+), 45 deletions(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 2041f783..1b8c5035 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -8,6 +8,11 @@ from recommendations.HeatingControlRecommender import HeatingControlRecommender
 
 
 class HeatingRecommender:
+    ELECTRIC_HEATING_DESCRIPTIONS = [
+        "Room heaters, electric",
+        "Electric storage heaters",
+        "Electric storage heaters, radiators"
+    ]
 
     def __init__(self, property_instance: Property):
         self.property = property_instance
@@ -16,6 +21,23 @@ class HeatingRecommender:
         self.heating_recommendations = []
         self.heating_control_recommendations = []
 
+        self.has_electric_heating_description = (
+            self.property.main_heating["clean_description"] in self.ELECTRIC_HEATING_DESCRIPTIONS
+        )
+
+    def is_high_heat_retention_valid(self):
+        """
+        Check conditions if high heat retention storage is valid
+        :return:
+        """
+
+        no_heating_no_mains = (
+            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
+            not self.property.data["mains-gas-flag"]
+        )
+
+        return self.has_electric_heating_description or no_heating_no_mains
+
     def recommend(self, has_cavity_or_loft_recommendations, phase=0):
         """
         Produces heating recommendations
@@ -34,16 +56,7 @@ class HeatingRecommender:
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
-        has_electric_heating_description = self.property.main_heating["clean_description"] in [
-            "Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
-        ]
-
-        no_heating_no_mains = (
-            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
-            not self.property.data["mains-gas-flag"]
-        )
-
-        if has_electric_heating_description or no_heating_no_mains:
+        if self.is_high_heat_retention_valid():
             # Recommend high heat retention storage heaters
             self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
 
@@ -61,7 +74,7 @@ class HeatingRecommender:
         )
 
         # We also check if the property has electric heating, but it has access to the mains gas
-        electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
+        electic_heating_has_mains = self.has_electric_heating_description and self.property.data["mains-gas-flag"]
 
         portable_heaters_has_mains = (
             self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
@@ -93,16 +106,19 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
-        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-
-        if suitable_property_type and not has_air_source_heat_pump:
+        if self.is_ashp_valid():
             self.recommend_air_source_heat_pump(
                 phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
             )
 
         return
 
+    def is_ashp_valid(self):
+        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+        return suitable_property_type and not has_air_source_heat_pump
+
     def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
         """
         This method will implement the recommendation for an air source heat pump
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index af0a0be8..ad3c4d2e 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -123,36 +123,58 @@ class Mds:
                     continue
                 # There are certain measures where we need to
                 if measure == "external_wall_insulation":
-                    # Check if the wall is solid
-                    if self.property_instance.walls['is_solid_brick']:
+                    # Check if the wall is not cavity since the other wall types can take external wall insulation
+                    if self.wall_recommender.ewi_valid():
                         pruned_measures.append(measure)
                     continue
 
                 if measure == "cavity_wall_insulation":
                     # Check if the wall is cavity
-                    if self.property_instance.walls['is_cavity_wall']:
+                    if (
+                        self.property_instance.walls['is_cavity_wall'] and
+                        not self.property_instance.walls['is_filled_cavity']
+                    ):
                         pruned_measures.append(measure)
                     continue
 
                 if measure == "loft_insulation":
-                    # Check if the roof is suitable for loft insulation
-                    if self.property_instance.roof["is_pitched"]:
+                    # Check if the roof is suitable for loft insulation and the loft isn't already done
+                    if (
+                        self.property_instance.roof["is_pitched"] and
+                        not self.roof_recommender.is_loft_already_insulated()
+                    ):
                         pruned_measures.append(measure)
                     continue
 
                 if measure == "solid_floor_insulation":
                     # Check if the floor is solid
-                    if self.property_instance.floor["is_solid"]:
+                    if (
+                        self.property_instance.floor["is_solid"] and
+                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"]
+                    ):
                         pruned_measures.append(measure)
                     continue
 
                 if measure == "suspended_floor_insulation":
                     # Check if the floor is suspended
-                    if self.property_instance.floor["is_suspended"]:
+                    if (
+                        self.property_instance.floor["is_suspended"] and
+                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"]
+                    ):
                         pruned_measures.append(measure)
                     continue
 
-                pruned_measures.append(measure)
+                if measure == "high_heat_retention_storage_heaters":
+                    if self.heating_recommender.is_high_heat_retention_valid():
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "air_source_heat_pump":
+                    if self.heating_recommender.is_ashp_valid():
+                        pruned_measures.append(measure)
+                    continue
+
+                raise NotImplementedError("Implement me")
 
             pruned_measures_formatted = []
             for pm in pruned_measures:
@@ -311,7 +333,6 @@ class Mds:
 
         if self.optimise_measures:
             measures_set = self.select_optimal_measure_set(self.property_instance.measures)
-            logger.info(f"Building recommendations for {len(measures_set)} combinations of measures")
             mds_recommendations_map = {}
             representative_recommendations_map = {}
             errors_map = {}
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 538d90e4..81f514b1 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -54,6 +54,13 @@ class RoofRecommendations:
             ]
         ]
 
+        # Extract the insulation thickness from the roof, which is used throughout this method
+        self.insulation_thickness = convert_thickness_to_numeric(
+            self.property.roof["insulation_thickness"],
+            self.property.roof["is_pitched"],
+            self.property.roof["is_flat"]
+        )
+
     def mds_loft_insulation(self, phase):
         """
         For usages within the mds report
@@ -62,18 +69,18 @@ class RoofRecommendations:
         """
         self.recommendations = []
 
-        insulation_thickness = convert_thickness_to_numeric(
-            self.property.roof["insulation_thickness"],
-            self.property.roof["is_pitched"],
-            self.property.roof["is_flat"]
-        )
-
         u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})
 
-        self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
+        self.recommend_roof_insulation(u_value, self.insulation_thickness, self.property.roof, phase)
 
         return self.recommendations
 
+    def is_loft_already_insulated(self):
+        """
+        Check if the loft is already insulated
+        """
+        return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]
+
     def recommend(self, phase):
 
         if self.property.roof["has_dwelling_above"]:
@@ -81,21 +88,15 @@ class RoofRecommendations:
 
         u_value = self.property.roof["thermal_transmittance"]
 
-        insulation_thickness = convert_thickness_to_numeric(
-            self.property.roof["insulation_thickness"],
-            self.property.roof["is_pitched"],
-            self.property.roof["is_flat"]
-        )
-
         # We check if the roof is already insulated and if so, we exit
 
         # Building regulations part L recommend installing at least 270mm of insulation, however generally we
         # experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
         # This only holds true for pitched roofs.
-        if (insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
+        if self.is_loft_already_insulated():
             return
 
-        if (insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
+        if (self.insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
             return
 
         if self.property.roof["is_roof_room"]:
@@ -119,7 +120,7 @@ class RoofRecommendations:
             return
 
         if self.property.roof["is_pitched"] or self.property.roof["is_flat"]:
-            self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
+            self.recommend_roof_insulation(u_value, self.insulation_thickness, self.property.roof, phase)
             return
 
         if self.property.roof["is_roof_room"]:
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index fcd8e2bd..868c08c0 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -112,11 +112,9 @@ class WallRecommendations(Definitions):
         self.external_wall_non_insulation_materials = [
             part
             for part in materials
-            if part["type"]
-               in ["ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"]
+            if part["type"] in ["ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"]
         ]
 
-    @property
     def ewi_valid(self):
         """
         This method check available data, to determine if a property is suitable for external wall insulation
@@ -126,11 +124,24 @@ class WallRecommendations(Definitions):
         # it is not suitable for EWI
         if self.property.restricted_measures or (
             self.property.data["property-type"].lower() == "flat"
+        ) or (
+            self.property.walls['is_cob'] or
+            self.property.walls['is_sandstone_or_limestone'] or
+            self.property.walls["is_cavity_wall"]
         ):
             return False
 
         return True
 
+    def is_suitable_for_solid_insulation(self):
+        """
+        Checks if the wall is of a suitable type for internal/external wall insulation
+        """
+        if self.property.walls["is_cavity_wall"] or self.property.walls["is_cob"]:
+            return False
+
+        return True
+
     def mds_recommend_cavity_wall_insulation(self, phase=None):
         # Function specifically for cavity wall insulation, for usage in the mds report
         self.recommendations = []
@@ -249,7 +260,7 @@ class WallRecommendations(Definitions):
             return
 
         # Remaining wall types are treated with IWI or EWI
-        if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
+        if (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and self.is_suitable_for_solid_insulation():
             self.find_insulation(u_value, phase)
             return
 
@@ -528,7 +539,7 @@ class WallRecommendations(Definitions):
         # consider diminishing returns between the two as they are considered to be separate measures
 
         ewi_recommendations = []
-        if self.ewi_valid:
+        if self.ewi_valid():
             ewi_recommendations = self._find_insulation(
                 u_value=u_value,
                 insulation_materials=pd.DataFrame(

From bbe35400bdea1a751944b1cb842da21be1529a99 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 11:23:54 +0100
Subject: [PATCH 18/80] added condition check for solar

---
 backend/app/plan/router.py                |  3 +--
 recommendations/Mds.py                    |  4 ++++
 recommendations/SolarPvRecommendations.py | 23 +++++++++++++----------
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 6ee9dfe2..fdbee9b7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -751,8 +751,7 @@ async def build_mds(body: MdsRequest):
             mds_recommendations, property_representative_recommendations, errors = mds.build()
 
             if any([len(x) for x in errors.values()]):
-                blah
-                logger.info("Errors occurred during MDS build")
+                raise Exception("Errors occurred during MDS build")
 
             recommendations[p.id] = mds_recommendations
             representative_recommendations[p.id] = property_representative_recommendations
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index ad3c4d2e..02ed3d06 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -174,6 +174,10 @@ class Mds:
                         pruned_measures.append(measure)
                     continue
 
+                if measure == "solar_pv":
+                    if self.solar_recommender.is_solar_pv_valid():
+                        pruned_measures.append(measure)
+
                 raise NotImplementedError("Implement me")
 
             pruned_measures_formatted = []
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 14161da3..a9255370 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -75,15 +75,7 @@ class SolarPvRecommendations:
             }
         ]
 
-    def recommend(self, phase):
-        """
-        We check if a property is potentially suitable for solar PV based on the following criteria:
-        - The property is a house or bungalow
-        - The property has a flat or pitched roof
-        - The property does not have existing solar pv
-        :return:
-        """
-
+    def is_solar_pv_valid(self):
         is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
         is_valid_roof_type = (
             self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
@@ -93,7 +85,18 @@ class SolarPvRecommendations:
             None, 0, self.property.DATA_ANOMALY_MATCHES
         ]
 
-        if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
+        return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
+
+    def recommend(self, phase):
+        """
+        We check if a property is potentially suitable for solar PV based on the following criteria:
+        - The property is a house or bungalow
+        - The property has a flat or pitched roof
+        - The property does not have existing solar pv
+        :return:
+        """
+
+        if not self.is_solar_pv_valid():
             return
 
         solar_pv_percentage = self.property.solar_pv_percentage

From 548672cc1039efede9d53886ce616a747dd25cff Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 11:36:28 +0100
Subject: [PATCH 19/80] refine conditions for recommending ewi in mds

---
 etl/epc/Dataset.py     | 18 +++++++++---------
 recommendations/Mds.py |  9 ++++++++-
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index ee3e357c..83a85b78 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
         common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
 
         self.df = self.df.loc[
-            :,
-            no_suffix_cols
-            + only_ending_cols
-            + [col for cols in common_cols for col in cols],
-        ]
+                  :,
+                  no_suffix_cols
+                  + only_ending_cols
+                  + [col for cols in common_cols for col in cols],
+                  ]
 
     def _remove_abnormal_change_in_floor_area(self):
         """
@@ -511,7 +511,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_sandstone_or_limestone"]
                     == expanded_df["is_sandstone_or_limestone_ending"]
                 )
-            ]
+                ]
         elif component == "floor":
             expanded_df = expanded_df[
                 (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@@ -528,7 +528,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_to_external_air"]
                     == expanded_df["is_to_external_air_ending"]
                 )
-            ]
+                ]
         elif component == "roof":
             expanded_df = expanded_df[
                 (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@@ -541,7 +541,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["has_dwelling_above"]
                     == expanded_df["has_dwelling_above_ending"]
                 )
-            ]
+                ]
 
         return expanded_df
 
@@ -742,7 +742,7 @@ class TrainingDataset(BaseDataset):
                 self.df[col] = self.df[col].fillna("Unknown")
 
     def _null_validation(self, information: str):
-        print(f"Null validation after {information}")
+        # print(f"Null validation after {information}")
         if pd.isnull(self.df).sum().sum():
             raise ValueError(f"Null values found in dataset, after step {information}")
 
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index 02ed3d06..e9e5cad8 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -124,7 +124,10 @@ class Mds:
                 # There are certain measures where we need to
                 if measure == "external_wall_insulation":
                     # Check if the wall is not cavity since the other wall types can take external wall insulation
-                    if self.wall_recommender.ewi_valid():
+                    if (
+                        self.wall_recommender.ewi_valid() and
+                        not self.property_instance.walls["insulation_thickness"] in ["average", "above average"]
+                    ):
                         pruned_measures.append(measure)
                     continue
 
@@ -177,9 +180,13 @@ class Mds:
                 if measure == "solar_pv":
                     if self.solar_recommender.is_solar_pv_valid():
                         pruned_measures.append(measure)
+                    continue
 
                 raise NotImplementedError("Implement me")
 
+            if not pruned_measures:
+                continue
+
             pruned_measures_formatted = []
             for pm in pruned_measures:
                 pruned_measures_formatted.append({pm: self.format_map[pm]})

From b3f8a9dccc7d2d2b42432fdb319cb0a8883ec275 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 13:41:59 +0100
Subject: [PATCH 20/80] fixed automated assignment

---
 backend/app/plan/router.py | 43 +++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index fdbee9b7..0e7753e2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -830,8 +830,14 @@ async def build_mds(body: MdsRequest):
             results = []
             for p in input_properties:
 
+                sap_before = int(p.data["current-energy-efficiency"])
+                epc_before = p.data["current-energy-rating"]
+                heat_demand_before = p.data["energy-consumption-current"]
+                carbon_before = p.data["co2-emissions-current"]
+
                 package_comparison = []
                 for _id in recommendations[p.id].keys():
+
                     sap_prediction = all_predictions["sap_change_predictions"][
                         (all_predictions["sap_change_predictions"]["property_id"] == str(p.id)) &
                         (all_predictions["sap_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
@@ -854,8 +860,10 @@ async def build_mds(body: MdsRequest):
                     sap_target = epc_to_sap_lower_bound(epc_target)
                     # Define the measures
                     sap_threshold_barrier = sap_prediction[sap_prediction["predictions"] >= sap_target]
+                    meets_threshold = True
                     if sap_threshold_barrier.empty:
-                        raise NotImplementedError("FIX ME")
+                        sap_threshold_barrier = sap_prediction.tail(1)
+                        meets_threshold = False
                     sap_threshold_barrier = sap_threshold_barrier.head(1)
 
                     sap_prediction = sap_prediction[
@@ -881,16 +889,9 @@ async def build_mds(body: MdsRequest):
                     ]
                     costs = sum(costs)
 
-                    sap_before = int(p.data["current-energy-efficiency"])
                     sap_after = sap_prediction["predictions"].values[-1]
-
-                    epc_before = p.data["current-energy-rating"]
                     epc_after = sap_to_epc(sap_after)
-
-                    heat_demand_before = p.data["energy-consumption-current"]
                     heat_demand_after = heat_demand_prediction["predictions"].values[-1]
-
-                    carbon_before = p.data["co2-emissions-current"]
                     carbon_after = carbon_prediction["predictions"].values[-1]
 
                     current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
@@ -924,13 +925,35 @@ async def build_mds(body: MdsRequest):
                             "carbon_after": carbon_after,
                             "bill_savings": bill_savings,
                             "energy_savings": energy_savings,
+                            "meets_threshold": meets_threshold
                         }
                     )
 
                 package_comparison = pd.DataFrame(package_comparison)
                 # Find the smallest cost package
-                package_comparison = package_comparison.sort_values("cost")
-                package_comparison = package_comparison.head(1).to_dict("records")[0]
+                if not package_comparison.empty:
+
+                    # We check if any of the packages meet the threshold
+                    if package_comparison["meets_threshold"].any():
+                        package_comparison = package_comparison[package_comparison["meets_threshold"]]
+
+                    package_comparison = package_comparison.sort_values("cost")
+                    package_comparison = package_comparison.head(1).to_dict("records")[0]
+                else:
+                    package_comparison = {
+                        "measures": [],
+                        "sap_before": sap_before,
+                        "sap_after": sap_before,
+                        "epc_before": epc_before,
+                        "epc_after": epc_before,
+                        "heat_demand_before": heat_demand_before,
+                        "heat_demand_after": heat_demand_before,
+                        "carbon_before": carbon_before,
+                        "carbon_after": carbon_before,
+                        "bill_savings": 0,
+                        "energy_savings": 0,
+                        "meets_threshold": False
+                    }
 
                 config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
                 if not config:

From 23b62b0c54c51f375cc842f30f96acfcf3f021f8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 13:54:15 +0100
Subject: [PATCH 21/80] handle the case of not requiring any measures

---
 backend/app/plan/router.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0e7753e2..f86b1759 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -857,6 +857,9 @@ async def build_mds(body: MdsRequest):
                     carbon_prediction["row_id"] = carbon_prediction.index
 
                     epc_target = body.goal_value
+                    if epc_before == epc_target:
+                        continue
+
                     sap_target = epc_to_sap_lower_bound(epc_target)
                     # Define the measures
                     sap_threshold_barrier = sap_prediction[sap_prediction["predictions"] >= sap_target]
@@ -934,10 +937,13 @@ async def build_mds(body: MdsRequest):
                 if not package_comparison.empty:
 
                     # We check if any of the packages meet the threshold
+                    # If none of them do, take the one that gets closest to the target
                     if package_comparison["meets_threshold"].any():
                         package_comparison = package_comparison[package_comparison["meets_threshold"]]
+                        package_comparison = package_comparison.sort_values("cost")
+                    else:
+                        package_comparison = package_comparison.sort_values("sap_after", ascending=False)
 
-                    package_comparison = package_comparison.sort_values("cost")
                     package_comparison = package_comparison.head(1).to_dict("records")[0]
                 else:
                     package_comparison = {

From e139031e6b550886c826f8d42d6fdf78e22fc164 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 14:29:24 +0100
Subject: [PATCH 22/80] change the floor level logic

---
 backend/Property.py    | 4 ++--
 recommendations/Mds.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index cd2028cb..6336e42d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -847,8 +847,8 @@ class Property:
         # where a property is marked as being on the first floor
         if self.floor_level > 0:
 
-            # We check if there is another property below
-            if not self.floor["another_property_below"]:
+            # We check if there is another property below (for a non-sap assessment)
+            if not self.floor["another_property_below"] and self.floor["thermal_transmittance_unit"] is None:
                 self.floor_level = 0
             return
 
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index e9e5cad8..c30cb231 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -146,6 +146,8 @@ class Mds:
                         self.property_instance.roof["is_pitched"] and
                         not self.roof_recommender.is_loft_already_insulated()
                     ):
+                        self.property_instance.data
+
                         pruned_measures.append(measure)
                     continue
 

From a1175c04a85372427836087ddfdc654e49a0907c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 14:36:45 +0100
Subject: [PATCH 23/80] added a simple check to see if the property is a mid
 floor flat

---
 backend/Property.py    | 8 ++++++++
 recommendations/Mds.py | 8 +++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 6336e42d..ce21bd52 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -858,6 +858,14 @@ class Property:
                 self.floor_level = 1
             return
 
+    def is_mid_floor_flat(self):
+        """
+        Simple utility function to check if the property is a mid-floor flat
+        :return:
+        """
+
+        return self.data["property-type"] == "Flat" and self.epc_record.original_epc["floor-level"] == "mid floor"
+
     def set_wall_type(self):
         """
         This method sets the wall type of the property, using a simple approach based on the wall description
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index c30cb231..638b6ca8 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -145,9 +145,7 @@ class Mds:
                     if (
                         self.property_instance.roof["is_pitched"] and
                         not self.roof_recommender.is_loft_already_insulated()
-                    ):
-                        self.property_instance.data
-
+                    ) or self.property_instance.is_mid_floor_flat():
                         pruned_measures.append(measure)
                     continue
 
@@ -156,7 +154,7 @@ class Mds:
                     if (
                         self.property_instance.floor["is_solid"] and
                         self.property_instance.floor["insulation_thickness"] not in ["average", "above average"]
-                    ):
+                    ) or self.property_instance.is_mid_floor_flat():
                         pruned_measures.append(measure)
                     continue
 
@@ -165,7 +163,7 @@ class Mds:
                     if (
                         self.property_instance.floor["is_suspended"] and
                         self.property_instance.floor["insulation_thickness"] not in ["average", "above average"]
-                    ):
+                    ) or self.property_instance.is_mid_floor_flat():
                         pruned_measures.append(measure)
                     continue
 

From 1393a99b8bcba535bea7d3021ef3e5f7de47dbbf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 16:21:45 +0100
Subject: [PATCH 24/80] checking hhr recommendations|

---
 backend/Property.py                   |  8 ----
 backend/app/plan/router.py            | 69 +++++++++++++++++++++++++++
 recommendations/HeatingRecommender.py | 45 +++++++++++------
 recommendations/Mds.py                | 22 ++++++---
 4 files changed, 115 insertions(+), 29 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index ce21bd52..6336e42d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -858,14 +858,6 @@ class Property:
                 self.floor_level = 1
             return
 
-    def is_mid_floor_flat(self):
-        """
-        Simple utility function to check if the property is a mid-floor flat
-        :return:
-        """
-
-        return self.data["property-type"] == "Flat" and self.epc_record.original_epc["floor-level"] == "mid floor"
-
     def set_wall_type(self):
         """
         This method sets the wall type of the property, using a simple approach based on the wall description
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f86b1759..ee36ea80 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -970,6 +970,7 @@ async def build_mds(body: MdsRequest):
                 results.append({
                     "config_address": config["address"],
                     "config_postcode": config["postcode"],
+                    "uprn": p.uprn,
                     "address": p.address,
                     "postcode": p.postcode,
                     "measures": package_comparison["measures"],
@@ -988,6 +989,74 @@ async def build_mds(body: MdsRequest):
 
             results = pd.DataFrame(results)
 
+            # For the different measures, we check the impact with a few debugging functions
+
+            def check_mds(results, input_properties, recommendations):
+                import ast
+                walls_check = []
+                hhr_check = []
+                for p in input_properties:
+                    res = results[results["uprn"] == p.uprn]
+                    wall = p.walls
+                    heating = p.main_heating
+                    wall_recommendation = [
+                        x for x in res["measures"].values[0] if
+                        x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"]
+                    ]
+
+                    hhr_recommendation = [
+                        x for x in res["measures"].values[0] if
+                        x in ["high_heat_retention_storage_heaters"]
+                    ]
+
+                    possible_measures = [ast.literal_eval(x) for x in list(recommendations[p.id].keys())]
+                    # Unlist them
+                    possible_measures = [x for sublist in possible_measures for x in sublist]
+                    possible_measures = list(set(possible_measures))
+
+                    if wall_recommendation:
+                        if len(wall_recommendation) > 1:
+                            raise Exception("something went wrong")
+                        wall_recommendation = wall_recommendation[0]
+                    else:
+                        wall_recommendation = None
+
+                    hhr_recommendation = hhr_recommendation[0] if hhr_recommendation else None
+
+                    walls_check.append(
+                        {
+                            "uprn": p.uprn,
+                            "address": p.address,
+                            "postcode": p.postcode,
+                            "conservation_status": p.spatial["conservation_status"],
+                            "is_listed_building": p.spatial["is_listed_building"],
+                            "is_heritage_building": p.spatial["is_heritage_building"],
+                            "wall": wall["clean_description"],
+                            "recommendation": wall_recommendation,
+                            "possible_measures": possible_measures,
+                            "selected_measures": res["measures"].values[0],
+                        }
+                    )
+
+                    hhr_check.append(
+                        {
+                            "uprn": p.uprn,
+                            "address": p.address,
+                            "postcode": p.postcode,
+                            "heating": heating["clean_description"],
+                            "recommendation": hhr_recommendation,
+                            "possible_measures": possible_measures,
+                            "selected_measures": res["measures"].values[0],
+                        }
+                    )
+
+                walls_check = pd.DataFrame(walls_check)
+                hhr_check = pd.DataFrame(hhr_check)
+
+                return walls_check, hhr_check
+
+            walls_check, hhr_check = check_mds(results, input_properties, recommendations)
+
         results = []
         for p in input_properties:
             measures = p.measures
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 1b8c5035..11a7b663 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -11,9 +11,12 @@ class HeatingRecommender:
     ELECTRIC_HEATING_DESCRIPTIONS = [
         "Room heaters, electric",
         "Electric storage heaters",
-        "Electric storage heaters, radiators"
+        "Electric storage heaters, radiators",
+        "Portable electric heaters assumed for most rooms",
     ]
 
+    high_heat_retention_contols_desc = "Controls for high heat retention storage heaters"
+
     def __init__(self, property_instance: Property):
         self.property = property_instance
         self.costs = Costs(self.property)
@@ -31,12 +34,13 @@ class HeatingRecommender:
         :return:
         """
 
-        no_heating_no_mains = (
-            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
-            not self.property.data["mains-gas-flag"]
+        # If the property has assumed electric heating, regardless of whether or not it has a mains connection, we
+        # can consider hhr storage heaters
+        electric_heating_assumed = (
+            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"]
         )
 
-        return self.has_electric_heating_description or no_heating_no_mains
+        return self.has_electric_heating_description or electric_heating_assumed
 
     def recommend(self, has_cavity_or_loft_recommendations, phase=0):
         """
@@ -330,6 +334,25 @@ class HeatingRecommender:
 
         return output
 
+    def is_hhr_already_installed(self):
+        """
+        Check if the property already has high heat retention storage heaters
+        :return:
+        """
+
+        already_has_hhr = "Electric storage heaters" in self.property.main_heating["clean_description"]
+        already_has_hhr_contols = (
+            self.property.main_heating_controls[
+                "clean_description"
+            ].lower() == self.high_heat_retention_contols_desc.lower()
+        )
+
+        # Conditions for not needing this recommendation
+        # Modern hhr storage heaters will have the specific controls so we can check for this
+        already_installed_hh_retention = already_has_hhr and already_has_hhr_contols
+
+        return already_installed_hh_retention
+
     def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only, _return=False):
         """
         We will recommend upgrading to a high heat retention storage system, if the current system is not already
@@ -346,19 +369,13 @@ class HeatingRecommender:
 
         controls_recommender = HeatingControlRecommender(self.property)
         # The heating controls we're recommending for are based on the recommended heating system
-        high_heat_retention_contols_desc = "Controls for high heat retention storage heaters"
+
         # We only recommend Celect-type controls if the current heating system is not Celect-type controls
-        if self.property.main_heating_controls["clean_description"] != high_heat_retention_contols_desc:
+        if self.property.main_heating_controls["clean_description"] != self.high_heat_retention_contols_desc:
             controls_recommender.recommend(heating_description="Electric storage heaters, radiators")
 
-        # Conditions for not needing this recommendation
-        already_installed_hh_retention = (
-            "Electric storage heaters" in self.property.main_heating["clean_description"] and
-            self.property.main_heating_controls["clean_description"].lower() == high_heat_retention_contols_desc.lower()
-        )
-
         # Conditions for not recommending electric storage heaters
-        if already_installed_hh_retention:
+        if self.is_hhr_already_installed():
             # No recommendation needed
             return
 
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index 638b6ca8..27f6f871 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -116,6 +116,7 @@ class Mds:
                 final_combinations.append([m for m in one_choice + multi_path + remaining_measures])
 
         pruned_combinations = []
+        # TODO: We can do these checks once, outside of the loop and prune the combinations
         for combination in final_combinations:
             pruned_measures = []
             for measure in combination:
@@ -142,10 +143,12 @@ class Mds:
 
                 if measure == "loft_insulation":
                     # Check if the roof is suitable for loft insulation and the loft isn't already done
+                    # Or, if the home had a u-value for the roof, we don't recommend loft insulation
                     if (
                         self.property_instance.roof["is_pitched"] and
-                        not self.roof_recommender.is_loft_already_insulated()
-                    ) or self.property_instance.is_mid_floor_flat():
+                        not self.roof_recommender.is_loft_already_insulated() and
+                        self.property_instance.roof["thermal_transmittance_unit"] is None
+                    ):
                         pruned_measures.append(measure)
                     continue
 
@@ -153,8 +156,9 @@ class Mds:
                     # Check if the floor is solid
                     if (
                         self.property_instance.floor["is_solid"] and
-                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"]
-                    ) or self.property_instance.is_mid_floor_flat():
+                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"] and
+                        self.property_instance.floor["thermal_transmittance_unit"] is not None
+                    ):
                         pruned_measures.append(measure)
                     continue
 
@@ -162,13 +166,17 @@ class Mds:
                     # Check if the floor is suspended
                     if (
                         self.property_instance.floor["is_suspended"] and
-                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"]
-                    ) or self.property_instance.is_mid_floor_flat():
+                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"] and
+                        self.property_instance.floor["thermal_transmittance_unit"] is not None
+                    ):
                         pruned_measures.append(measure)
                     continue
 
                 if measure == "high_heat_retention_storage_heaters":
-                    if self.heating_recommender.is_high_heat_retention_valid():
+                    if (
+                        self.heating_recommender.is_high_heat_retention_valid() and
+                        not self.heating_recommender.is_hhr_already_installed()
+                    ):
                         pruned_measures.append(measure)
                     continue
 

From 7ed4002d0438f374cf2141364007a718065cf0e8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Jun 2024 18:11:36 +0100
Subject: [PATCH 25/80] finished optimisation and output

---
 backend/app/plan/router.py            |  2 ++
 recommendations/HeatingRecommender.py | 17 +++++++++++------
 recommendations/Mds.py                |  7 ++++++-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index ee36ea80..f6c01715 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -999,6 +999,7 @@ async def build_mds(body: MdsRequest):
                     res = results[results["uprn"] == p.uprn]
                     wall = p.walls
                     heating = p.main_heating
+                    heating_controls = p.main_heating_controls
                     wall_recommendation = [
                         x for x in res["measures"].values[0] if
                         x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"]
@@ -1044,6 +1045,7 @@ async def build_mds(body: MdsRequest):
                             "address": p.address,
                             "postcode": p.postcode,
                             "heating": heating["clean_description"],
+                            "heating_controls": heating_controls["clean_description"],
                             "recommendation": hhr_recommendation,
                             "possible_measures": possible_measures,
                             "selected_measures": res["measures"].values[0],
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 11a7b663..ac8c4973 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -62,6 +62,8 @@ class HeatingRecommender:
 
         if self.is_high_heat_retention_valid():
             # Recommend high heat retention storage heaters
+            # TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
+            #       needs the controls
             self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
@@ -341,17 +343,19 @@ class HeatingRecommender:
         """
 
         already_has_hhr = "Electric storage heaters" in self.property.main_heating["clean_description"]
+
+        # Some electric storage heaters will show that the controls are "Manual charge controls" which are indicative
+        # of the old model of electric storage heaters, originating from 1961.
+        # Newer HHR storage heaters will charge up over night but will retain the heat durin the day for when warmth
+        # is actually needed, unlike traditional storage heaters that charge up at night and release heat during the day
+        # which isn't always ideal for the occupants.
         already_has_hhr_contols = (
             self.property.main_heating_controls[
                 "clean_description"
             ].lower() == self.high_heat_retention_contols_desc.lower()
         )
 
-        # Conditions for not needing this recommendation
-        # Modern hhr storage heaters will have the specific controls so we can check for this
-        already_installed_hh_retention = already_has_hhr and already_has_hhr_contols
-
-        return already_installed_hh_retention
+        return already_has_hhr and already_has_hhr_contols
 
     def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only, _return=False):
         """
@@ -374,8 +378,9 @@ class HeatingRecommender:
         if self.property.main_heating_controls["clean_description"] != self.high_heat_retention_contols_desc:
             controls_recommender.recommend(heating_description="Electric storage heaters, radiators")
 
+        has_hhr = self.is_hhr_already_installed()
         # Conditions for not recommending electric storage heaters
-        if self.is_hhr_already_installed():
+        if has_hhr:
             # No recommendation needed
             return
 
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index 27f6f871..4c417447 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -173,9 +173,14 @@ class Mds:
                     continue
 
                 if measure == "high_heat_retention_storage_heaters":
+
+                    # For the moment, we recommend storage heaters if the property doesn't already
+                    # and don't make it contngent on controls
+                    already_has_hhr = self.heating_recommender.is_hhr_already_installed()
+
                     if (
                         self.heating_recommender.is_high_heat_retention_valid() and
-                        not self.heating_recommender.is_hhr_already_installed()
+                        not already_has_hhr
                     ):
                         pruned_measures.append(measure)
                     continue

From 4cc534c333e3555d11270387a27b1b1dbfe54592 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 5 Jun 2024 11:06:15 +0100
Subject: [PATCH 26/80] added exclude old option to epc searcher

---
 backend/SearchEpc.py               |  9 ++++++++-
 etl/customers/eon/deck_examples.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 etl/customers/eon/deck_examples.py

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index db9ec4ff..4c329448 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -11,6 +11,7 @@ from BaseUtility import Definitions
 from utils.logger import setup_logger
 from typing import List
 from fuzzywuzzy import process
+from backend.app.utils import sap_to_epc
 
 logger = setup_logger()
 
@@ -554,7 +555,7 @@ class SearchEpc:
         # If loop finishes without a valid response, raise an exception
         raise Exception("Unable to find postcode data after trimming - investigate me")
 
-    def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
+    def estimate_epc(self, property_type, built_form, lmks_to_drop=None, exclude_old=False):
         """
         For a property that does not have an EPC, we retrieve the EPC data for the closest properties
         and estimate the EPC for the property in question.
@@ -567,6 +568,7 @@ class SearchEpc:
                                 the ordnance survey api
         :param lmks_to_drop:    This is a list of LMK keys that should be dropped from the estimation process. This
                                 is used as an override for testing, to drop EPCs for the property we are testing
+        :param exclude_old:     Used to drop any expired EPCs (more than 10 years old)
         :return:
         """
 
@@ -584,6 +586,9 @@ class SearchEpc:
         # If we still have missing dates, we set it to the mean of the non NA dates
         epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
 
+        if exclude_old:
+            epc_data = epc_data[epc_data["lodgement-datetime"] > pd.Timestamp.now() - pd.DateOffset(years=10)]
+
         # For each attribute, we need to determine the datatype and use an appropriate method
         # to estimate.
         estimated_epc = {}
@@ -624,6 +629,8 @@ class SearchEpc:
         else:
             estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
 
+        estimated_epc["current-energy-rating"] = sap_to_epc(estimated_epc["current-energy-efficiency"])
+
         estimated_epc["postcode"] = self.postcode
         estimated_epc["uprn"] = self.uprn
         estimated_epc["address"] = self.full_address
diff --git a/etl/customers/eon/deck_examples.py b/etl/customers/eon/deck_examples.py
new file mode 100644
index 00000000..8773ce09
--- /dev/null
+++ b/etl/customers/eon/deck_examples.py
@@ -0,0 +1,30 @@
+"""
+This script contains bits of codes for examples to be included in the Deck
+"""
+
+from backend.SearchEpc import SearchEpc
+from dotenv import load_dotenv
+import os
+
+load_dotenv(dotenv_path="backend/.env")
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+searcher = SearchEpc(
+    address1="Flat above 7 Malling Road",
+    postcode="ME6 5AA",
+    auth_token=EPC_AUTH_TOKEN,
+    os_api_key="",
+    property_type=None,
+    fast=False,
+)
+
+res = searcher.estimate_epc(
+    property_type="Flat",
+    built_form="Mid-Terrace",
+    lmks_to_drop=[
+        "4c3714a59744ab2c6e60441f0fa0eb903f283c6c62d0691e108cadbc7b5a8caa",
+        "363197839762013013017062127708717",
+        "363197811132009091518041845968302"
+    ]
+)

From acc45eae646745e8190a3a6f051061418dcfb944 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 5 Jun 2024 12:11:59 +0100
Subject: [PATCH 27/80] debugging mds without optimisation

---
 backend/app/plan/router.py            | 15 +++++++++++----
 etl/customers/eon/pilot_asset_list.py | 10 +++++++++-
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f6c01715..e0add281 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -661,10 +661,14 @@ async def build_mds(body: MdsRequest):
             epc_searcher.find_property(skip_os=True)
 
             if config["address"] == "35b High Street":
-                print("Performing temporary patch")
+                print("Performing temporary patch on 35b High Street")
                 epc_searcher.newest_epc["uprn"] = 10002911892
                 epc_searcher.full_sap_epc["uprn"] = 10002911892
 
+            if config["address"] == "Cobnut Barn":
+                print("Performing temporary patch on Cobnut Barn")
+                epc_searcher.newest_epc["uprn"] = 10013924689
+
             # Create a record in db
             # TODO: If we productionise the creation of this mds report, we will need to store this in the db
             # property_id, is_new = create_property(
@@ -750,8 +754,12 @@ async def build_mds(body: MdsRequest):
             mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
             mds_recommendations, property_representative_recommendations, errors = mds.build()
 
-            if any([len(x) for x in errors.values()]):
-                raise Exception("Errors occurred during MDS build")
+            if isinstance(errors, list):
+                if errors:
+                    raise Exception("Errors occurred during MDS build")
+            else:
+                if any([len(x) for x in errors.values()]):
+                    raise Exception("Errors occurred during MDS build")
 
             recommendations[p.id] = mds_recommendations
             representative_recommendations[p.id] = property_representative_recommendations
@@ -778,7 +786,6 @@ async def build_mds(body: MdsRequest):
                     recommendations_scoring_data.extend(data)
 
             else:
-
                 recommendations_scoring_data.append(
                     p.simulate_all_representative_recommendations(property_representative_recommendations)
                 )
diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py
index b7c529e3..05e459cb 100644
--- a/etl/customers/eon/pilot_asset_list.py
+++ b/etl/customers/eon/pilot_asset_list.py
@@ -229,7 +229,8 @@ def app():
             "35a High Street",
             "35b High Street",
             "Flat Over 20 Holborough Road",
-            "Flat above 7 Malling Road"
+            "Flat above 7 Malling Road",
+            "Cobnut Barn",
         ]:
             print(config["Address"])
             uprn = None
@@ -292,3 +293,10 @@ def app():
         "measures": measures,
         "budget": None,
     }
+
+
+output = []
+for r in self.results:
+    output.append(r["DPA"])
+
+output = pd.DataFrame(output)

From 9217ef67f4ca47d273d80efe8a565efc9d346ec0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 6 Jun 2024 10:34:36 +0100
Subject: [PATCH 28/80] minor change to mds api

---
 backend/app/plan/router.py         | 190 +++++++++++++++++------------
 etl/customers/eon/deck_examples.py |  15 +--
 2 files changed, 117 insertions(+), 88 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e0add281..9caab324 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -841,6 +841,11 @@ async def build_mds(body: MdsRequest):
                 epc_before = p.data["current-energy-rating"]
                 heat_demand_before = p.data["energy-consumption-current"]
                 carbon_before = p.data["co2-emissions-current"]
+                current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+                    epc_energy_consumption=heat_demand_before * p.floor_area,
+                    current_epc_rating=epc_before,
+                )
+                current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
 
                 package_comparison = []
                 for _id in recommendations[p.id].keys():
@@ -904,17 +909,11 @@ async def build_mds(body: MdsRequest):
                     heat_demand_after = heat_demand_prediction["predictions"].values[-1]
                     carbon_after = carbon_prediction["predictions"].values[-1]
 
-                    current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
-                        epc_energy_consumption=heat_demand_before * p.floor_area,
-                        current_epc_rating=epc_before,
-                    )
-
                     expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
                         epc_energy_consumption=heat_demand_after * p.floor_area,
                         current_epc_rating=epc_before,
                     )
 
-                    current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
                     expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
 
                     bill_savings = current_energy_bill - expected_energy_bill
@@ -935,6 +934,7 @@ async def build_mds(body: MdsRequest):
                             "carbon_after": carbon_after,
                             "bill_savings": bill_savings,
                             "energy_savings": energy_savings,
+                            "current_energy_bill": current_energy_bill,
                             "meets_threshold": meets_threshold
                         }
                     )
@@ -965,6 +965,7 @@ async def build_mds(body: MdsRequest):
                         "carbon_after": carbon_before,
                         "bill_savings": 0,
                         "energy_savings": 0,
+                        "current_energy_bill": current_energy_bill,
                         "meets_threshold": False
                     }
 
@@ -990,81 +991,27 @@ async def build_mds(body: MdsRequest):
                     "heat_demand_after": package_comparison["heat_demand_after"],
                     "carbon_before": package_comparison["carbon_before"],
                     "carbon_after": package_comparison["carbon_after"],
-                    "bill_savings": package_comparison["bill_savings"],
-                    "energy_savings": package_comparison["energy_savings"],
+                    "bill_savings": round(package_comparison["bill_savings"], 2),
+                    "energy_savings": round(package_comparison["energy_savings"], 2),
+                    "current_energy_bill": round(package_comparison["current_energy_bill"], 2),
+                    "EWI": "EWI" if "external_wall_insulation" in package_comparison["measures"] else None,
+                    "CWI": "CWI" if "cavity_wall_insulation" in package_comparison["measures"] else None,
+                    "LI": "LI" if "loft_insulation" in package_comparison["measures"] else None,
+                    "ASHP Htg": "ASHP Htg" if "air_source_heat_pump" in package_comparison["measures"] else None,
+                    "Elec Storage": (
+                        "Elec Storage Htrs (Out of scope -Prov sum only)" if "high_heat_retention_storage_heaters" in
+                                                                             package_comparison["measures"] else None
+                    ),
+                    "Solar PV": "Solar PV" if "solar_pv" in package_comparison["measures"] else None,
                 })
 
             results = pd.DataFrame(results)
 
             # For the different measures, we check the impact with a few debugging functions
 
-            def check_mds(results, input_properties, recommendations):
-                import ast
-                walls_check = []
-                hhr_check = []
-                for p in input_properties:
-                    res = results[results["uprn"] == p.uprn]
-                    wall = p.walls
-                    heating = p.main_heating
-                    heating_controls = p.main_heating_controls
-                    wall_recommendation = [
-                        x for x in res["measures"].values[0] if
-                        x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"]
-                    ]
+            walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)
 
-                    hhr_recommendation = [
-                        x for x in res["measures"].values[0] if
-                        x in ["high_heat_retention_storage_heaters"]
-                    ]
-
-                    possible_measures = [ast.literal_eval(x) for x in list(recommendations[p.id].keys())]
-                    # Unlist them
-                    possible_measures = [x for sublist in possible_measures for x in sublist]
-                    possible_measures = list(set(possible_measures))
-
-                    if wall_recommendation:
-                        if len(wall_recommendation) > 1:
-                            raise Exception("something went wrong")
-                        wall_recommendation = wall_recommendation[0]
-                    else:
-                        wall_recommendation = None
-
-                    hhr_recommendation = hhr_recommendation[0] if hhr_recommendation else None
-
-                    walls_check.append(
-                        {
-                            "uprn": p.uprn,
-                            "address": p.address,
-                            "postcode": p.postcode,
-                            "conservation_status": p.spatial["conservation_status"],
-                            "is_listed_building": p.spatial["is_listed_building"],
-                            "is_heritage_building": p.spatial["is_heritage_building"],
-                            "wall": wall["clean_description"],
-                            "recommendation": wall_recommendation,
-                            "possible_measures": possible_measures,
-                            "selected_measures": res["measures"].values[0],
-                        }
-                    )
-
-                    hhr_check.append(
-                        {
-                            "uprn": p.uprn,
-                            "address": p.address,
-                            "postcode": p.postcode,
-                            "heating": heating["clean_description"],
-                            "heating_controls": heating_controls["clean_description"],
-                            "recommendation": hhr_recommendation,
-                            "possible_measures": possible_measures,
-                            "selected_measures": res["measures"].values[0],
-                        }
-                    )
-
-                walls_check = pd.DataFrame(walls_check)
-                hhr_check = pd.DataFrame(hhr_check)
-
-                return walls_check, hhr_check
-
-            walls_check, hhr_check = check_mds(results, input_properties, recommendations)
+            results.to_excel("optimised mds_results 5th June.xlsx")
 
         results = []
         for p in input_properties:
@@ -1114,11 +1061,14 @@ async def build_mds(body: MdsRequest):
             )
 
             # TODO: We should determine if the home is gas & electricity or just electricity
+
+            # Determine if the heating and hotwater was previously electric only or both
+
             current_energy_bill = AnnualBillSavings.calculate_annual_bill(
-                current_adjusted_energy,
+                kwh=current_adjusted_energy,
             )
             expected_energy_bill = AnnualBillSavings.calculate_annual_bill(
-                expected_adjusted_energy,
+                kwh=expected_adjusted_energy,
             )
 
             bill_savings = current_energy_bill - expected_energy_bill
@@ -1133,6 +1083,7 @@ async def build_mds(body: MdsRequest):
             to_append = {
                 "config_address": config["address"],
                 "config_postcode": config["postcode"],
+                "uprn": p.uprn,
                 "address": p.address,
                 "postcode": p.postcode,
                 "measures": measures,
@@ -1146,15 +1097,19 @@ async def build_mds(body: MdsRequest):
                 "heat_demand_after": heat_demand_after,
                 "carbon_before": carbon_before,
                 "carbon_after": carbon_after,
-                "bill_savings": bill_savings,
-                "energy_savings": energy_savings,
+                "bill_savings": round(bill_savings, 2),
+                "energy_savings": round(energy_savings, 2),
+                "current_energy_bill": round(current_energy_bill, 2),
+                "fuel_type": p.main_fuel["fuel_type"],
             }
             results.append(to_append)
 
         results = pd.DataFrame(results)
         results["sap_uplift"] = results["sap_after"] - results["sap_before"]
 
-        # results.to_excel("mds_results 30th May.xlsx")
+        # results.to_excel("mds_results 5th June.xlsx")
+
+        walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)
 
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
@@ -1174,3 +1129,80 @@ async def build_mds(body: MdsRequest):
         return Response(status_code=500, content="An unexpected error occurred.")
     finally:
         session.close()
+
+
+def check_mds(results, input_properties, recommendations, optimise_measures):
+    import ast
+    walls_check = []
+    hhr_check = []
+    for p in input_properties:
+        res = results[results["uprn"] == p.uprn]
+        wall = p.walls
+        heating = p.main_heating
+        heating_controls = p.main_heating_controls
+
+        if optimise_measures:
+            measures = res["measures"].values[0]
+        else:
+            measures = [list(z.keys())[0] for z in res["measures"].values[0]]
+
+        wall_recommendation = [
+            x for x in measures if
+            x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"]
+        ]
+
+        hhr_recommendation = [
+            x for x in measures if
+            x in ["high_heat_retention_storage_heaters"]
+        ]
+
+        if optimise_measures:
+            possible_measures = [ast.literal_eval(x) for x in list(recommendations[p.id].keys())]
+            # Unlist them
+            possible_measures = [x for sublist in possible_measures for x in sublist]
+            possible_measures = list(set(possible_measures))
+        else:
+            possible_measures = p.measures
+
+        if wall_recommendation:
+            if len(wall_recommendation) > 1:
+                raise Exception("something went wrong")
+            wall_recommendation = wall_recommendation[0]
+        else:
+            wall_recommendation = None
+
+        hhr_recommendation = hhr_recommendation[0] if hhr_recommendation else None
+
+        walls_check.append(
+            {
+                "uprn": p.uprn,
+                "address": p.address,
+                "postcode": p.postcode,
+                "property_type": p.data['property-type'],
+                "conservation_status": p.spatial["conservation_status"],
+                "is_listed_building": p.spatial["is_listed_building"],
+                "is_heritage_building": p.spatial["is_heritage_building"],
+                "wall": wall["clean_description"],
+                "recommendation": wall_recommendation,
+                "possible_measures": possible_measures,
+                "selected_measures": res["measures"].values[0],
+            }
+        )
+
+        hhr_check.append(
+            {
+                "uprn": p.uprn,
+                "address": p.address,
+                "postcode": p.postcode,
+                "heating": heating["clean_description"],
+                "heating_controls": heating_controls["clean_description"],
+                "recommendation": hhr_recommendation,
+                "possible_measures": possible_measures,
+                "selected_measures": res["measures"].values[0],
+            }
+        )
+
+    walls_check = pd.DataFrame(walls_check)
+    hhr_check = pd.DataFrame(hhr_check)
+
+    return walls_check, hhr_check
diff --git a/etl/customers/eon/deck_examples.py b/etl/customers/eon/deck_examples.py
index 8773ce09..e0e3abe9 100644
--- a/etl/customers/eon/deck_examples.py
+++ b/etl/customers/eon/deck_examples.py
@@ -11,8 +11,8 @@ load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 
 searcher = SearchEpc(
-    address1="Flat above 7 Malling Road",
-    postcode="ME6 5AA",
+    address1="108 Blacklands",
+    postcode="ME19 6DP",
     auth_token=EPC_AUTH_TOKEN,
     os_api_key="",
     property_type=None,
@@ -20,11 +20,8 @@ searcher = SearchEpc(
 )
 
 res = searcher.estimate_epc(
-    property_type="Flat",
-    built_form="Mid-Terrace",
-    lmks_to_drop=[
-        "4c3714a59744ab2c6e60441f0fa0eb903f283c6c62d0691e108cadbc7b5a8caa",
-        "363197839762013013017062127708717",
-        "363197811132009091518041845968302"
-    ]
+    property_type="Bungalow",
+    built_form="Detached",
+    lmks_to_drop=["849273656952012102323315196229804"],
+    exclude_old=True
 )

From 461cdd23674eb556c2e072ba55030068f8dbaacb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 6 Jun 2024 11:18:35 +0100
Subject: [PATCH 29/80] set up basic data preparation process

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 etl/customers/stonewater/shdf_3_clustering.py | 75 +++++++++++++++++++
 3 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/stonewater/shdf_3_clustering.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
new file mode 100644
index 00000000..1a84f1d4
--- /dev/null
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pandas as pd
+
+
+def app():
+    """
+    This script handles the preparation of the data from Stonewater, to archetype a collection
+    of 5.3k properties and reduce that down to a representative set of 450 properties.
+
+    Here, we prepare the input data for clustering
+    :return:
+    """
+
+    # TODO: Temp read from local machine - move to s3
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
+    )
+
+    # Drop the bottom 4 rows, which are completely missing
+    asset_list = asset_list.head(-4)
+
+    # Keep just the columns we're interested in
+    asset_list = asset_list[
+        [
+            "Osm. ID",
+            "Org. ref.",
+            "Postcode",
+            "House no",
+            "Name",
+            "Address line 2",
+            "City/Town",
+            "County",
+            "Address ID",  # This is not uprn
+        ]
+    ].rename(
+        columns={
+            "Osm. ID": "internal_id",
+            "Org. ref.": "customer_asset_id",
+            "Postcode": "postcode",
+            "House no": "house_number",
+            "Name": "address1",
+            "Address line 2": "address2",
+            "City/Town": "city_town",
+            "County": "county",
+            "Address ID": "external_address_id",
+        }
+    )
+
+    # Create full address
+    # TODO: handle cases where one of these is null
+    asset_list["full_address"] = (
+        asset_list["address1"] + ", " +
+        asset_list["address2"] + ", " +
+        asset_list["city_town"] + ", " +
+        asset_list["county"] + ", " +
+        asset_list["postcode"]
+    )
+
+    asset_list["full_address"] = np.where(
+        ~pd.isnull(asset_list["address2"]),
+        (
+            asset_list["address1"] + ", " +
+            asset_list["address2"] + ", " +
+            asset_list["city_town"].str.title() + ", " +
+            asset_list["county"] + ", " +
+            asset_list["postcode"]
+        ),
+        asset_list["address1"] + ", " +
+        asset_list["city_town"].str.title() + ", " +
+        asset_list["county"] + ", " +
+        asset_list["postcode"]
+    )
+
+    if pd.isnull(asset_list["full_address"]).sum():
+        raise ValueError("Missing full addresses")

From 236aaa1f1c1cb402b7c07da8c19024de345a158b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 6 Jun 2024 11:29:46 +0100
Subject: [PATCH 30/80] setting up data pull code

---
 etl/customers/stonewater/shdf_3_clustering.py | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 1a84f1d4..ad5d89dc 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -73,3 +73,42 @@ def app():
 
     if pd.isnull(asset_list["full_address"]).sum():
         raise ValueError("Missing full addresses")
+
+    # TODO: Store in S3
+
+    # TODO: Move ths
+    # Pull in the data
+
+    import os
+    from dotenv import load_dotenv
+    from backend.SearchEpc import SearchEpc
+
+    load_dotenv(dotenv_path="backend/.env")
+    EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+    # Perform an initial pull without ordnance survey data
+    epc_data = []
+    older_epc_data = {}
+    for row_number, asset in asset_list.iterrows():
+        searcher = SearchEpc(
+            address1=asset["address1"],
+            postcode=asset["postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            full_address=asset["full_address"],
+            uprn=asset.get("uprn", None),
+        )
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            continue
+
+        epc_data.append(
+            {
+                "internal_id": asset["internal_id"],
+                **searcher.newest_epc
+            }
+        )
+
+        if searcher.older_epcs is not None:
+            older_epc_data[asset["internal_id"]] = searcher.older_epcs

From 8e33e8bce42a6dc6c24056f6ec18935fe84d9d26 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 7 Jun 2024 11:46:08 +0100
Subject: [PATCH 31/80] set up ordnance survey pipeline for stonewater

---
 backend/SearchEpc.py                          |   9 +-
 backend/app/plan/router.py                    |  38 ++++
 etl/customers/stonewater/shdf_3_clustering.py | 202 +++++++++++++++++-
 3 files changed, 238 insertions(+), 11 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 4c329448..62ae307f 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -191,15 +191,14 @@ class SearchEpc:
         self.property_type = property_type
         self.fast = fast
 
-    @classmethod
-    def get_house_number(cls, address: str) -> str | None:
+    @staticmethod
+    def get_house_number(address: str) -> str | None:
         """
         This method uses the usaddress library to parse an address and extract the primary house or flat number.
         """
         try:
-
-            # Custom regex to catch a broad range of cases
-            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+            # Updated regex to catch house numbers including alphanumeric ones
+            pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)'
             match = re.search(pattern, address)
             if match:
                 return next(g for g in match.groups() if g is not None)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 9caab324..91a5ce0d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -1206,3 +1206,41 @@ def check_mds(results, input_properties, recommendations, optimise_measures):
     hhr_check = pd.DataFrame(hhr_check)
 
     return walls_check, hhr_check
+
+
+from utils.s3 import read_dataframe_from_s3_parquet
+
+z = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev",
+    file_key="sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet"
+)
+
+k = z[z["heat_demand_ending"] != z["heat_demand_starting"]]
+k = k[k["walls_thermal_transmittance"] == k["walls_thermal_transmittance_ending"]]
+k = k[k["roof_thermal_transmittance"] == k["roof_thermal_transmittance_ending"]]
+k = k[k["floor_thermal_transmittance"] == k["floor_thermal_transmittance_ending"]]
+ending_cols = [c for c in k.columns if "_ending" in c]
+eg = k.head(2).tail(1).squeeze()
+
+diff = []
+for c in ending_cols:
+    split = c.split("_ending")[0]
+    if split + "_starting" in k.columns:
+        starting_col = split + "_starting"
+    else:
+        starting_col = split
+
+    b4 = eg[starting_col]
+    after = eg[c]
+    if b4 != after:
+        diff.append(
+            {
+                "measure": split,
+                "starting": b4,
+                "ending": after
+            }
+        )
+diff = pd.DataFrame(diff)
+eg["heat_demand_starting"]
+eg["heat_demand_ending"]
+eg["uprn"]
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index ad5d89dc..e72c5000 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1,5 +1,50 @@
+import json
+from tqdm import tqdm
+
+from fuzzywuzzy import fuzz
 import numpy as np
 import pandas as pd
+import time
+from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3
+
+
+def remove_commas_and_full_stops(input_string: str) -> str:
+    """
+    Removes commas and full stops from the input string.
+
+    Args:
+    input_string (str): The string from which to remove commas and full stops.
+
+    Returns:
+    str: The string with commas and full stops removed.
+    """
+    return input_string.replace(',', '').replace('.', '')
+
+
+def get_places_with_retry(searcher, max_retries=5, wait_time=2):
+    """
+    Tries to call the get_places_api method up to max_retries times,
+    with a wait_time interval between attempts in case of failure.
+
+    Args:
+    searcher (object): The searcher object with the ordnance_survey_client.
+    max_retries (int): Maximum number of retry attempts.
+    wait_time (int): Wait time in seconds between retries.
+
+    Returns:
+    result: The result from the get_places_api method or None if all attempts fail.
+    """
+    for attempt in range(max_retries):
+        try:
+            result = searcher.ordnance_survey_client.get_places_api()
+            return result  # Return the result if successful
+        except Exception as e:
+            print(f"Attempt {attempt + 1} failed with error: {e}")
+            if attempt < max_retries - 1:
+                print(f"Retrying in {wait_time} seconds...")
+                time.sleep(wait_time)
+    print(f"All {max_retries} attempts failed.")
+    return None
 
 
 def app():
@@ -16,6 +61,12 @@ def app():
         "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
     )
 
+    # asset_list = read_excel_from_s3(
+    #     file_key="customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
+    #     bucket_name="retrofit-data-dev",
+    #     header_row=4
+    # )
+
     # Drop the bottom 4 rows, which are completely missing
     asset_list = asset_list.head(-4)
 
@@ -62,12 +113,12 @@ def app():
             asset_list["address1"] + ", " +
             asset_list["address2"] + ", " +
             asset_list["city_town"].str.title() + ", " +
-            asset_list["county"] + ", " +
+            # asset_list["county"] + ", " +
             asset_list["postcode"]
         ),
         asset_list["address1"] + ", " +
         asset_list["city_town"].str.title() + ", " +
-        asset_list["county"] + ", " +
+        # asset_list["county"] + ", " +
         asset_list["postcode"]
     )
 
@@ -89,13 +140,14 @@ def app():
     # Perform an initial pull without ordnance survey data
     epc_data = []
     older_epc_data = {}
-    for row_number, asset in asset_list.iterrows():
+
+    for row_number, asset in tqdm(asset_list.iterrows(), total=len(asset_list)):
         searcher = SearchEpc(
-            address1=asset["address1"],
-            postcode=asset["postcode"],
+            address1=str(asset["address1"]),
+            postcode=str(asset["postcode"]),
             auth_token=EPC_AUTH_TOKEN,
             os_api_key="",
-            full_address=asset["full_address"],
+            full_address=str(asset["full_address"]),
             uprn=asset.get("uprn", None),
         )
         searcher.find_property(skip_os=True)
@@ -112,3 +164,141 @@ def app():
 
         if searcher.older_epcs is not None:
             older_epc_data[asset["internal_id"]] = searcher.older_epcs
+
+    # # Store to S3
+    # save_data_to_s3(
+    #     data=json.dumps(epc_data),
+    #     s3_file_name="customers/Stonewater/clustering/epc_data.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    #
+    # save_data_to_s3(
+    #     data=json.dumps(older_epc_data),
+    #     s3_file_name="customers/Stonewater/clustering/old_epc_data.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    # We read this directly from s3
+    epc_data = json.loads(
+        read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name="customers/Stonewater/clustering/epc_data.json"
+        )
+    )
+
+    older_epc_data = json.loads(
+        read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name="customers/Stonewater/clustering/old_epc_data.json"
+        )
+    )
+
+    # TODO: Perform a comparison between the EPC address and the asset list address, just to double check
+
+    epc_data_df = pd.DataFrame(epc_data)
+    address_comparison = (
+        asset_list[["internal_id", "full_address", "postcode", "house_number", "address1"]].merge(
+            epc_data_df[["internal_id", "address", "postcode", "address1"]].rename(
+                columns={
+                    "address": "epc_address",
+                    "postcode": "epc_postcode",
+                    "address1": "epc_address1"
+                }
+            ),
+            how="inner",
+            on="internal_id"
+        )
+    )
+
+    # Produce a metric, showing the matching confidence between the two
+    address_comparison["epc_extracted_house_number"] = address_comparison["epc_address1"].apply(
+        lambda x: SearchEpc.get_house_number(x)
+    )
+
+    address_comparison["house_numbers_match"] = (
+        address_comparison["house_number"].str.lower() == address_comparison["epc_extracted_house_number"].str.lower()
+    )
+
+    # We also produce a address similarity metric
+    # We convert the strings to lower and remove common punctuation
+
+    address_comparison["address_similarity_score"] = address_comparison.apply(
+        lambda x: fuzz.ratio(
+            remove_commas_and_full_stops(x["address1"].lower()),
+            remove_commas_and_full_stops(x["epc_address1"].lower())
+        ),
+        axis=1
+    )
+
+    address_comparison = address_comparison.sort_values("address_similarity_score", ascending=True)
+    address_comparison = address_comparison[
+        ["internal_id", "full_address", "epc_address", "address_similarity_score", "house_numbers_match"]
+    ]
+
+    # Anything with less than a 90 similarity score, let's do again
+    needs_ordnance_survey = address_comparison[
+        (address_comparison["address_similarity_score"] <= 90) |
+        (~address_comparison["house_numbers_match"])
+        ].copy()
+
+    is_ok = address_comparison[~address_comparison["internal_id"].isin(needs_ordnance_survey["internal_id"])]
+    is_ok = is_ok.sort_values("address_similarity_score", ascending=True)
+
+    os_data_pull_asset_list = asset_list[
+        ~asset_list["internal_id"].isin(is_ok["internal_id"].values)
+    ].copy()
+    os_data_pull_asset_list = os_data_pull_asset_list.reset_index(drop=True)
+
+    # For each of these records, we pull the OS data
+    ORDNANCE_SURVEY_API_KEY = ""  # This API key is a temp key which
+    os_most_relevant = []
+    os_all = {}
+    errors = []
+    for _, asset in tqdm(os_data_pull_asset_list.iterrows(), total=len(os_data_pull_asset_list)):
+        # Calls are throttled to 50 per minute in development mode, so lets just slow this down
+        time.sleep(1.3)
+
+        searcher = SearchEpc(
+            address1=str(asset["address1"]),
+            postcode=str(asset["postcode"]),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=ORDNANCE_SURVEY_API_KEY,
+            full_address=str(asset["full_address"]),
+            uprn=asset.get("uprn", None),
+        )
+        searcher.ordnance_survey_client.full_address = asset["full_address"]
+        # Attempt to get places data with retry logic
+        result = get_places_with_retry(searcher)
+
+        if result:
+            # Get the most relevant response
+            os_most_relevant.append(
+                {
+                    "internal_id": asset["internal_id"],
+                    **searcher.ordnance_survey_client.most_relevant_result
+                }
+            )
+
+            # Also keep the best 100 results
+            os_all[asset["internal_id"]] = searcher.ordnance_survey_client.results
+        else:
+            # Record the internal_id of the asset that failed
+            errors.append(asset["internal_id"])
+
+    # Store to S3
+    save_data_to_s3(
+        data=json.dumps(os_most_relevant),
+        s3_file_name="customers/Stonewater/clustering/os_most_relevant.json",
+        bucket_name="retrofit-data-dev"
+    )
+
+    save_data_to_s3(
+        data=json.dumps(os_all),
+        s3_file_name="customers/Stonewater/clustering/os_all.json",
+        bucket_name="retrofit-data-dev"
+    )
+
+    save_data_to_s3(
+        data=json.dumps(errors),
+        s3_file_name="customers/Stonewater/clustering/errors.json",
+        bucket_name="retrofit-data-dev"
+    )

From 9e32b8bf740f96b26f140218c136a08fa98c35df Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 7 Jun 2024 14:31:22 +0100
Subject: [PATCH 32/80] working on collating the data from OS for Stonewater

---
 etl/customers/stonewater/shdf_3_clustering.py | 251 ++++++++++--------
 1 file changed, 142 insertions(+), 109 deletions(-)

diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index e72c5000..45b435ed 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1,5 +1,8 @@
 import json
 from tqdm import tqdm
+import os
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
 
 from fuzzywuzzy import fuzz
 import numpy as np
@@ -7,6 +10,9 @@ import pandas as pd
 import time
 from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3
 
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
 
 def remove_commas_and_full_stops(input_string: str) -> str:
     """
@@ -36,13 +42,19 @@ def get_places_with_retry(searcher, max_retries=5, wait_time=2):
     """
     for attempt in range(max_retries):
         try:
-            result = searcher.ordnance_survey_client.get_places_api()
-            return result  # Return the result if successful
+            response = searcher.ordnance_survey_client.get_places_api()
+            status = response.get("status")
+            if status == 200:
+                return response  # Return the result if successful
+            else:
+                print(f"Attempt {attempt + 1} failed with status code: {status}")
         except Exception as e:
             print(f"Attempt {attempt + 1} failed with error: {e}")
-            if attempt < max_retries - 1:
-                print(f"Retrying in {wait_time} seconds...")
-                time.sleep(wait_time)
+
+        if attempt < max_retries - 1:
+            print(f"Retrying in {wait_time} seconds...")
+            time.sleep(wait_time)
+
     print(f"All {max_retries} attempts failed.")
     return None
 
@@ -57,16 +69,16 @@ def app():
     """
 
     # TODO: Temp read from local machine - move to s3
-    asset_list = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
-    )
-
-    # asset_list = read_excel_from_s3(
-    #     file_key="customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
-    #     bucket_name="retrofit-data-dev",
-    #     header_row=4
+    # asset_list = pd.read_excel(
+    #     "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
     # )
 
+    asset_list = read_excel_from_s3(
+        file_key="customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
+        bucket_name="retrofit-data-dev",
+        header_row=4
+    )
+
     # Drop the bottom 4 rows, which are completely missing
     asset_list = asset_list.head(-4)
 
@@ -98,15 +110,6 @@ def app():
     )
 
     # Create full address
-    # TODO: handle cases where one of these is null
-    asset_list["full_address"] = (
-        asset_list["address1"] + ", " +
-        asset_list["address2"] + ", " +
-        asset_list["city_town"] + ", " +
-        asset_list["county"] + ", " +
-        asset_list["postcode"]
-    )
-
     asset_list["full_address"] = np.where(
         ~pd.isnull(asset_list["address2"]),
         (
@@ -125,46 +128,37 @@ def app():
     if pd.isnull(asset_list["full_address"]).sum():
         raise ValueError("Missing full addresses")
 
-    # TODO: Store in S3
-
-    # TODO: Move ths
     # Pull in the data
-
-    import os
-    from dotenv import load_dotenv
-    from backend.SearchEpc import SearchEpc
-
-    load_dotenv(dotenv_path="backend/.env")
-    EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+    # This data has already been pulled as much as it can be, so we retrieve the existing extraction from S3
 
     # Perform an initial pull without ordnance survey data
-    epc_data = []
-    older_epc_data = {}
-
-    for row_number, asset in tqdm(asset_list.iterrows(), total=len(asset_list)):
-        searcher = SearchEpc(
-            address1=str(asset["address1"]),
-            postcode=str(asset["postcode"]),
-            auth_token=EPC_AUTH_TOKEN,
-            os_api_key="",
-            full_address=str(asset["full_address"]),
-            uprn=asset.get("uprn", None),
-        )
-        searcher.find_property(skip_os=True)
-
-        if searcher.newest_epc is None:
-            continue
-
-        epc_data.append(
-            {
-                "internal_id": asset["internal_id"],
-                **searcher.newest_epc
-            }
-        )
-
-        if searcher.older_epcs is not None:
-            older_epc_data[asset["internal_id"]] = searcher.older_epcs
-
+    # epc_data = []
+    # older_epc_data = {}
+    #
+    # for row_number, asset in tqdm(asset_list.iterrows(), total=len(asset_list)):
+    #     searcher = SearchEpc(
+    #         address1=str(asset["address1"]),
+    #         postcode=str(asset["postcode"]),
+    #         auth_token=EPC_AUTH_TOKEN,
+    #         os_api_key="",
+    #         full_address=str(asset["full_address"]),
+    #         uprn=asset.get("uprn", None),
+    #     )
+    #     searcher.find_property(skip_os=True)
+    #
+    #     if searcher.newest_epc is None:
+    #         continue
+    #
+    #     epc_data.append(
+    #         {
+    #             "internal_id": asset["internal_id"],
+    #             **searcher.newest_epc
+    #         }
+    #     )
+    #
+    #     if searcher.older_epcs is not None:
+    #         older_epc_data[asset["internal_id"]] = searcher.older_epcs
+    #
     # # Store to S3
     # save_data_to_s3(
     #     data=json.dumps(epc_data),
@@ -192,7 +186,7 @@ def app():
         )
     )
 
-    # TODO: Perform a comparison between the EPC address and the asset list address, just to double check
+    # Perform a comparison between the EPC address and the asset list address, just to double check
 
     epc_data_df = pd.DataFrame(epc_data)
     address_comparison = (
@@ -246,59 +240,98 @@ def app():
     os_data_pull_asset_list = asset_list[
         ~asset_list["internal_id"].isin(is_ok["internal_id"].values)
     ].copy()
+
+    # We have already done a partial pull of the Ordnance survey data so we can skip some of the records
+    # os_most_relevant_1 = json.loads(
+    #     read_from_s3(
+    #         bucket_name="retrofit-data-dev",
+    #         s3_file_name="customers/Stonewater/clustering/os_most_relevant_1.json"
+    #     )
+    # )
+    #
+    # os_most_relevant_2 = json.loads(
+    #     read_from_s3(
+    #         bucket_name="retrofit-data-dev",
+    #         s3_file_name="customers/Stonewater/clustering/os_most_relevant_2.json"
+    #     )
+    # )
+    #
+    # fetched_internal_ids = (
+    #     [x["internal_id"] for x in os_most_relevant_1] + [x["internal_id"] for x in os_most_relevant_2]
+    # )
+    #
+    # # We remove any ids we've already fetched
+    # os_data_pull_asset_list = os_data_pull_asset_list[
+    #     ~os_data_pull_asset_list["internal_id"].isin(fetched_internal_ids)
+    # ]
+    #
+    # # Our OK EPC data (is_ok) + ordnance survey fetched data + the data we need to fetch should equal the total
+    # # number of assets
+    # assert len(is_ok) + len(fetched_internal_ids) + len(os_data_pull_asset_list) == len(asset_list)
+
     os_data_pull_asset_list = os_data_pull_asset_list.reset_index(drop=True)
 
     # For each of these records, we pull the OS data
-    ORDNANCE_SURVEY_API_KEY = ""  # This API key is a temp key which
-    os_most_relevant = []
-    os_all = {}
-    errors = []
-    for _, asset in tqdm(os_data_pull_asset_list.iterrows(), total=len(os_data_pull_asset_list)):
-        # Calls are throttled to 50 per minute in development mode, so lets just slow this down
-        time.sleep(1.3)
-
-        searcher = SearchEpc(
-            address1=str(asset["address1"]),
-            postcode=str(asset["postcode"]),
-            auth_token=EPC_AUTH_TOKEN,
-            os_api_key=ORDNANCE_SURVEY_API_KEY,
-            full_address=str(asset["full_address"]),
-            uprn=asset.get("uprn", None),
-        )
-        searcher.ordnance_survey_client.full_address = asset["full_address"]
-        # Attempt to get places data with retry logic
-        result = get_places_with_retry(searcher)
-
-        if result:
-            # Get the most relevant response
-            os_most_relevant.append(
-                {
-                    "internal_id": asset["internal_id"],
-                    **searcher.ordnance_survey_client.most_relevant_result
-                }
-            )
-
-            # Also keep the best 100 results
-            os_all[asset["internal_id"]] = searcher.ordnance_survey_client.results
-        else:
-            # Record the internal_id of the asset that failed
-            errors.append(asset["internal_id"])
+    # ORDNANCE_SURVEY_API_KEY = ""  # This API key is a temp key which I have copied locally
+    # os_most_relevant = []
+    # os_all = {}
+    # errors = []
+    # for _, asset in tqdm(os_data_pull_asset_list.iterrows(), total=len(os_data_pull_asset_list)):
+    #     # Calls are throttled to 50 per minute in development mode, so lets just slow this down
+    #     time.sleep(2)
+    #
+    #     searcher = SearchEpc(
+    #         address1=str(asset["address1"]),
+    #         postcode=str(asset["postcode"]),
+    #         auth_token=EPC_AUTH_TOKEN,
+    #         os_api_key=ORDNANCE_SURVEY_API_KEY,
+    #         full_address=str(asset["full_address"]),
+    #         uprn=asset.get("uprn", None),
+    #     )
+    #     searcher.ordnance_survey_client.full_address = asset["full_address"]
+    #     # Attempt to get places data with retry logic
+    #     result = get_places_with_retry(searcher)
+    #
+    #     if result:
+    #         # Get the most relevant response
+    #         os_most_relevant.append(
+    #             {
+    #                 "internal_id": asset["internal_id"],
+    #                 **searcher.ordnance_survey_client.most_relevant_result
+    #             }
+    #         )
+    #
+    #         # Also keep the best 100 results
+    #         os_all[asset["internal_id"]] = searcher.ordnance_survey_client.results
+    #     else:
+    #         # Record the internal_id of the asset that failed
+    #         print("Error for address: " + asset["full_address"])
+    #         errors.append(asset["internal_id"])
 
     # Store to S3
-    save_data_to_s3(
-        data=json.dumps(os_most_relevant),
-        s3_file_name="customers/Stonewater/clustering/os_most_relevant.json",
-        bucket_name="retrofit-data-dev"
-    )
+    # save_data_to_s3(
+    #     data=json.dumps(os_most_relevant),
+    #     s3_file_name="customers/Stonewater/clustering/os_most_relevant_3.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    #
+    # save_data_to_s3(
+    #     data=json.dumps(os_all),
+    #     s3_file_name="customers/Stonewater/clustering/os_all_3.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    #
+    # save_data_to_s3(
+    #     data=json.dumps(errors),
+    #     s3_file_name="customers/Stonewater/clustering/errors_3.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
 
-    save_data_to_s3(
-        data=json.dumps(os_all),
-        s3_file_name="customers/Stonewater/clustering/os_all.json",
-        bucket_name="retrofit-data-dev"
-    )
+    # We now collate all of the data for the following steps:
+    # 1) Checking the retrieve ordnance survey data against ordnance survey data
+    # 2) A second round of querying the EPC api to find the EPC data, in case we retrieve something using uprn
+    # 3) Predicting the EPC data for the properties we have no data for
+    # 4) Retrieveing additional data against the internal_id
+    # 5) Creation of final dataset for clustering
 
-    save_data_to_s3(
-        data=json.dumps(errors),
-        s3_file_name="customers/Stonewater/clustering/errors.json",
-        bucket_name="retrofit-data-dev"
-    )
+    for i in ["1", "2", "3"]:

From 0c1ef69fba8a099386835960dbe3ab53351ef331 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 8 Jun 2024 07:51:17 +0100
Subject: [PATCH 33/80] Added postcode filter back to os api

---
 backend/OrdnanceSurvey.py                     |  38 +++-
 etl/customers/stonewater/shdf_3_clustering.py | 168 ++++++++++++++++++
 2 files changed, 200 insertions(+), 6 deletions(-)

diff --git a/backend/OrdnanceSurvey.py b/backend/OrdnanceSurvey.py
index 837e76bd..856dda7a 100644
--- a/backend/OrdnanceSurvey.py
+++ b/backend/OrdnanceSurvey.py
@@ -38,7 +38,11 @@ class OrdnanceSuveyClient:
             raise ValueError("No results found - run get_places_api first")
 
         self.address_os = self.most_relevant_result["ADDRESS"]
-        self.postcode_os = self.most_relevant_result["POSTCODE"]
+
+        if "POSTCODE" in self.most_relevant_result:
+            self.postcode_os = self.most_relevant_result["POSTCODE"]
+        else:
+            self.postcode_os = self.most_relevant_result["POSTCODE_LOCATOR"]
         # We strip out the postcode from the address as this is already stored separately
         self.address_os = self.address_os.replace(self.postcode_os, "").strip()
         # Remove trailing comma
@@ -49,7 +53,7 @@ class OrdnanceSuveyClient:
         self.postcode_os = self.postcode_os.upper()
 
     @lru_cache(maxsize=128)
-    def get_places_api(self):
+    def get_places_api(self, filter_by_postcode=False):
         """
         This method is tasked with getting the places api from the Ordnance Survey.
         """
@@ -58,16 +62,35 @@ class OrdnanceSuveyClient:
             raise ValueError("Ordnance Survey API key not specified")
 
         encoded_address_query = urllib.parse.quote(self.full_address)
-        url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
-               f"{self.api_key}")
+
+        url = (
+            f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&dataset=DPA,LPI&matchprecision=10"
+            f"&key={self.api_key}"
+        )
+
         response = requests.get(url)
         if response.status_code == 200:
             data = response.json()
-            results = data['results']
+            res = data["results"]
+
+            if filter_by_postcode:
+                results = []
+                for r in res:
+                    if "DPA" in r:
+                        if r["DPA"]["POSTCODE"] == self.postcode:
+                            results.append(r)
+                    elif "LPI" in r:
+                        if r["LPI"]["POSTCODE_LOCATOR"] == self.postcode:
+                            results.append(r)
+                    else:
+                        raise ValueError("Could not find postcode in either DPA or LPI")
+            else:
+                results = res
+
             self.results = results
 
             # Extract some details about the best match
-            self.most_relevant_result = self.results[0]["DPA"]
+            self.most_relevant_result = self.results[0]["DPA"] if "DPA" in self.results[0] else self.results[0]["LPI"]
 
             self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
             self.set_places_address()
@@ -99,6 +122,9 @@ class OrdnanceSuveyClient:
             'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
             'RD06': {'property_type': 'Flat'},
         }
+        # Other classifications can be found in here:
+        # https://osdatahub.os.uk/docs/places/technicalSpecification in the CLASSIFICATION_CODE description.
+        # A lookup table csv can be downloaded which contains all of the codes
 
         mapped = value_map.get(classification_code, {})
         self.property_type = mapped.get("property_type", "")
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 45b435ed..8a3725b9 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -3,6 +3,8 @@ from tqdm import tqdm
 import os
 from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc
+import urllib.parse
+import requests
 
 from fuzzywuzzy import fuzz
 import numpy as np
@@ -334,4 +336,170 @@ def app():
     # 4) Retrieveing additional data against the internal_id
     # 5) Creation of final dataset for clustering
 
+    os_most_relevant = []
+    os_all = {}
     for i in ["1", "2", "3"]:
+        most_relevant_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_most_relevant_{i}.json"
+        )
+        os_most_relevant.extend(json.loads(most_relevant_segment))
+        os_all_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_all_{i}.json"
+        )
+        os_all = {**os_all, **json.loads(os_all_segment)}
+
+    os_most_relevant = pd.DataFrame(os_most_relevant)
+
+    os_address_comparison = os_data_pull_asset_list[
+        ["internal_id", "full_address", "postcode", "house_number", "address1"]
+    ].merge(
+        os_most_relevant[["internal_id", "ADDRESS", "POSTCODE", "UPRN"]],
+        how="inner",
+        on="internal_id"
+    )
+
+    # Compare house number
+    # Check for records where the postcode doesn't match
+    os_address_comparison["postcodes_match"] = (
+        os_address_comparison["postcode"].str.lower() == os_address_comparison["POSTCODE"].str.lower()
+    )
+
+    # extract it from ADDRESS
+    os_address_comparison["extracted_house_number"] = os_address_comparison["ADDRESS"].apply(
+        lambda x: SearchEpc.get_house_number(x)
+    )
+
+    # Compare house number
+    os_address_comparison["house_numbers_match"] = (
+        os_address_comparison["house_number"].str.lower() == os_address_comparison["extracted_house_number"].str.lower()
+    )
+
+    # String similarity
+    os_address_comparison["address_similarity_score"] = os_address_comparison.apply(
+        lambda x: fuzz.ratio(
+            remove_commas_and_full_stops(x["full_address"].lower()),
+            remove_commas_and_full_stops(x["ADDRESS"].lower())
+        ),
+        axis=1
+    )
+
+    os_address_comparison = os_address_comparison.sort_values("address_similarity_score", ascending=True)
+
+    problematic = os_address_comparison.copy()
+
+    problematic = problematic[
+        (problematic["address_similarity_score"] <= 80) |
+        (~problematic["house_numbers_match"]) |
+        (~problematic["postcodes_match"])
+        ]
+
+    # TODO: We'll label these problematic records as problematic, in the final output
+
+    # different_postcodes = problematic[~problematic["postcodes_match"]].copy().reset_index(drop=True)
+
+    ORDNANCE_SURVEY_API_KEY = ""  # This API key is a temp key which I have copied locally
+    problematic_os = []
+    problematic_os_all = {}
+    problematic_errors = []
+    for _, row in tqdm(problematic.iterrows(), total=len(problematic)):
+        # Let's just do a backup pull - we're now using LPI too
+        time.sleep(2)
+        backup_searher = SearchEpc(
+            address1=row["address1"],
+            postcode=row["postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=ORDNANCE_SURVEY_API_KEY,
+            uprn=None,
+        )
+        # Attempt to get places data with retry logic
+        result = get_places_with_retry(backup_searher)
+
+        if result:
+            # Get the most relevant response
+            problematic_os.append(
+                {
+                    "internal_id": row["internal_id"],
+                    **backup_searher.ordnance_survey_client.most_relevant_result
+                }
+            )
+
+            # Also keep the best 100 results
+            problematic_os_all[row["internal_id"]] = backup_searher.ordnance_survey_client.results
+        else:
+            # Record the internal_id of the asset that failed
+            print("Error for address: " + row["full_address"])
+            problematic_errors.append(row["internal_id"])
+
+    # Store to S3
+    save_data_to_s3(
+        data=json.dumps(problematic_os),
+        s3_file_name="customers/Stonewater/clustering/problematic_os.json",
+        bucket_name="retrofit-data-dev"
+    )
+
+    save_data_to_s3(
+        data=json.dumps(problematic_os_all),
+        s3_file_name="customers/Stonewater/clustering/problematic_os_all.json",
+        bucket_name="retrofit-data-dev"
+    )
+
+    save_data_to_s3(
+        data=json.dumps(problematic_errors),
+        s3_file_name="customers/Stonewater/clustering/problematic_errors.json",
+        bucket_name="retrofit-data-dev"
+    )
+
+    # Next steps: We should collate all of the data and produce 1 big dataset
+
+    problematic_os_df = pd.DataFrame(problematic_os)
+    problematic_address_comparison = problematic[["internal_id", "full_address", "postcode"]].merge(
+        problematic_os_df[["internal_id", "ADDRESS", "POSTCODE", "UPRN"]],
+        how="inner",
+        on="internal_id"
+    )
+
+    problematic_address_comparison["OS_POSTCODE"] = problematic_address_comparison["ADDRESS"].str.split(", ").str[-1]
+    problematic_address_comparison["postcodes_match"] = (
+        problematic_address_comparison["postcode"].str.lower() == problematic_address_comparison[
+        "OS_POSTCODE"].str.lower()
+    )
+
+    problematic_address_comparison["match_similarity_score"] = problematic_address_comparison.apply(
+        lambda x: fuzz.ratio(
+            remove_commas_and_full_stops(x["full_address"].lower()),
+            remove_commas_and_full_stops(x["ADDRESS"].lower())
+        ),
+        axis=1
+    )
+    problematic_address_comparison = problematic_address_comparison.sort_values("match_similarity_score",
+                                                                                ascending=True)
+
+    # We perform a final check
+    final_check = problematic_address_comparison[
+        (problematic_address_comparison["match_similarity_score"] <= 90) |
+        (~problematic_address_comparison["postcodes_match"])
+        ]
+
+    final_best_matches = []
+    for _, row in final_check.iterrows():
+        os_data = problematic_os_all[row["internal_id"]]
+        os_data = pd.DataFrame(
+            [x["DPA"] if "DPA" in x else x["LPI"] for x in os_data]
+        )
+        os_data["postcode"] = np.where(
+            ~pd.isnull(os_data["POSTCODE"]),
+            os_data["POSTCODE"],
+            os_data["POSTCODE_LOCATOR"]
+        )
+        os_data = os_data[os_data["postcode"].str.lower() == row["postcode"].lower()]
+        if os_data.shape[0] == 1:
+            final_best_matches.append(
+                {
+                    "internal_id": row["internal_id"],
+                    **os_data.iloc[0].to_dict()
+                }
+            )
+        else:
+            blah

From 09a3d01e9037ca59741ab5b6b2364810aa70aa38 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 8 Jun 2024 12:39:49 +0100
Subject: [PATCH 34/80] stonewater checking data

---
 backend/SearchEpc.py                          |  10 +-
 etl/customers/stonewater/no_matches.py        | 165 ++++++++++++++++++
 etl/customers/stonewater/shdf_3_clustering.py | 148 +++++++++++++---
 3 files changed, 292 insertions(+), 31 deletions(-)
 create mode 100644 etl/customers/stonewater/no_matches.py

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 62ae307f..9724ffd1 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -192,10 +192,11 @@ class SearchEpc:
         self.fast = fast
 
     @staticmethod
-    def get_house_number(address: str) -> str | None:
+    def get_house_number(address: str, postcode=None) -> str | None:
         """
         This method uses the usaddress library to parse an address and extract the primary house or flat number.
         """
+
         try:
             # Updated regex to catch house numbers including alphanumeric ones
             pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)'
@@ -207,6 +208,11 @@ class SearchEpc:
             # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
             for part, type_ in parsed:
                 if type_ == 'OccupancyIdentifier':
+                    if postcode is not None:
+                        if part == postcode.split(" ")[0]:
+                            continue
+                        if part == postcode.split(" ")[1]:
+                            continue
                     return part  # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
                     # number
 
@@ -216,7 +222,7 @@ class SearchEpc:
                 return address_number.replace(",", "")  # Remove any trailing commas
 
         except Exception as e:
-            print(f"Error parsing address: {e}")
+            raise Exception(f"Error parsing address: {e}")
 
         return None
 
diff --git a/etl/customers/stonewater/no_matches.py b/etl/customers/stonewater/no_matches.py
new file mode 100644
index 00000000..e7c122b1
--- /dev/null
+++ b/etl/customers/stonewater/no_matches.py
@@ -0,0 +1,165 @@
+no_matches = [
+    {
+        'internal_id': 4626, 'full_address': '1 Dean Lane, Sixpenny Handley, Salisbury, SP5 5AS', 'postcode': 'SP5 5AS',
+        'Note': 'No match found - all addresses in this postcode are for Mulberry Court, Sixpenny Handley, Salisbury, '
+                'SP5 5AS, addresses not recognised by Zoopla - possibly the postcode is incorrect and this could be'
+                'Handley Enterprises Ltd, Unit 1 Dean Lane, Sixpenny Handley, Salisbury, SP5 5PA.'
+                'Or this could be 1 Mulberry Court Sixpenny Handley, Salisbury SP5 5AS'
+    },
+    {
+        'internal_id': 4627, 'full_address': '3 Dean Lane, Sixpenny Handley, Salisbury, SP5 5AS', 'postcode': 'SP5 5AS',
+        'Note': 'No match found - all addresses in this postcode are for Mulberry Court, Sixpenny Handley, Salisbury, '
+                'SP5 5AS, addresses not recognised by Zoopla - possibly the postcode is incorrect and this could be'
+                '2 Town Farm House, Dean Lane, Sixpenny Handley, Salisbury, SP5 5PA'
+                'Or this could be 3 Mulberry Court Sixpenny Handley, Salisbury SP5 5AS'
+    },
+    {
+        'internal_id': 4628, 'full_address': '5 Dean Lane, Sixpenny Handley, Salisbury, SP5 5AS', 'postcode': 'SP5 5AS',
+        'Note': 'No match found - all addresses in this postcode are for Mulberry Court, Sixpenny Handley, Salisbury, '
+                'SP5 5AS, addresses not recognised by Zoopla - possibly the postcode is incorrect and this could be'
+                '4 Town Farm House, Dean Lane, Sixpenny Handley, Salisbury, SP5 5PA'
+                'Or this could be 5 Mulberry Court Sixpenny Handley, Salisbury SP5 5AS'
+    },
+    {
+        'internal_id': 544, 'full_address': 'Room 1, Sawr, PO Box 1354, Bedford, MK41 5AB', 'postcode': 'MK41 5AB',
+        "Note": "Postcode deleted in April 2024: https://checkmypostcode.uk/mk415ab"
+    },
+    {
+        'internal_id': 5116, 'full_address': '3 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 3 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988896'
+    },
+    {
+        'internal_id': 5114, 'full_address': '4 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 4 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988897'
+    },
+    {
+        'internal_id': 5115, 'full_address': '2 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 2 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988895'
+    },
+    {
+        'internal_id': 5113, 'full_address': '6 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 6 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988899'
+    },
+    {
+        'internal_id': 5112, 'full_address': '1 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 1 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988894'
+    },
+    {
+        'internal_id': 3846, 'full_address': '2 Beaufort Road, Southbourne, Bournemouth, BH6 5BD',
+        'postcode': 'BH6 5BD',
+        'Note': "2 Beaufort Road, Southbourne, Bournemouth is listed under the postcode BH6 5AL - is there a typo in "
+                "the postcode?"
+    },
+    {
+        'internal_id': 4497, 'full_address': '11 Brokenford Lane, Totton, Southampton, SO40 9LZ',
+        'postcode': 'SO40 9LZ',
+        'Note': "This postcode doesn't appear to exist, closest is 10 brokenford lane, Totton, Southampton, SO40 9DW."
+                "What should this be?"
+    },
+    {
+        'internal_id': 4181, 'full_address': '25a Eastcott Road, Old Town, Swindon, SN1 3PA', 'postcode': 'SN1 3PA',
+        'Note': 'All addresses at this postcode are for Bow Court. '
+                'Closest match is 25 Eastcott Road, Swindon, SN1 3LT, but there is no 25A'
+    },
+    {
+        'internal_id': 5447, 'full_address': '3 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "These is no 'Send Road' at this postcode. There are a few possible matches, e.g. Flat 3, "
+                "1 Send Road, RG4 8EH"
+    },
+    {
+        'internal_id': 5449, 'full_address': '5 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5450, 'full_address': '6 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5446, 'full_address': '1 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5448, 'full_address': '4 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5451, 'full_address': '7 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 4547, 'full_address': '2 Cecil Terrace, Bemerton, Salisbury, SP2 9NE', 'postcode': 'SP2 9NE',
+        "Note": "Addresses for this postcode are for The Croft, SP2 9NE. Should this be 2 Cecil Terrace SP2 9ND, with"
+                "uprn: 100121039798 ?"
+    },
+    {
+        'internal_id': 4549, 'full_address': '4 Cecil Terrace, Bemerton, Salisbury, SP2 9NE', 'postcode': 'SP2 9NE',
+        "Note": "Addresses for this postcode are for The Croft, SP2 9NE. Should this be 4 Cecil Terrace SP2 9ND?"
+    },
+    {
+        'internal_id': 3601, 'full_address': '20 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should this be 20 Constitution Hill Gardens, Poole, BH14 0PY? (i.e. postcode is wrong) "
+                "uprn: 10001086693"
+    },
+    {
+        'internal_id': 3592, 'full_address': '7 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"
+    },
+    {
+        'internal_id': 3594, 'full_address': '9 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"
+    },
+    {
+        'internal_id': 3591, 'full_address': '6 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"
+    },
+    {
+        'internal_id': 3593, 'full_address': '8 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3590, 'full_address': '5 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3589, 'full_address': '3 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3600, 'full_address': '18 Constitution Hill, Parkstone, Poole, BH14 0PX',
+        'postcode': 'BH14 0PX', "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3599, 'full_address': '17 Constitution Hill, Parkstone, Poole, BH14 0PX',
+        'postcode': 'BH14 0PX', "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3598, 'full_address': '15 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3608, 'full_address': '26 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3610, 'full_address': '30 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3603, 'full_address': '22 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3612, 'full_address': '32 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3595, 'full_address': '10 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3613, 'full_address': '34 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+
+    {'internal_id': 3597, 'full_address': '12 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3602, 'full_address': '21 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3606, 'full_address': '19 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3604, 'full_address': '23 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3605, 'full_address': '25 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3609, 'full_address': '29 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3596, 'full_address': '11 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3607, 'full_address': '27 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3611, 'full_address': '31 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 5622, 'full_address': '26 Roman Way, Andover, SP10 5HZ', 'postcode': 'SP10 5HZ',
+     'Note': 'Shoul this postcode be SP10 5JU ?'}
+]
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 8a3725b9..f2ef9a8b 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -433,28 +433,28 @@ def app():
             problematic_errors.append(row["internal_id"])
 
     # Store to S3
-    save_data_to_s3(
-        data=json.dumps(problematic_os),
-        s3_file_name="customers/Stonewater/clustering/problematic_os.json",
-        bucket_name="retrofit-data-dev"
-    )
-
-    save_data_to_s3(
-        data=json.dumps(problematic_os_all),
-        s3_file_name="customers/Stonewater/clustering/problematic_os_all.json",
-        bucket_name="retrofit-data-dev"
-    )
-
-    save_data_to_s3(
-        data=json.dumps(problematic_errors),
-        s3_file_name="customers/Stonewater/clustering/problematic_errors.json",
-        bucket_name="retrofit-data-dev"
-    )
+    # save_data_to_s3(
+    #     data=json.dumps(problematic_os),
+    #     s3_file_name="customers/Stonewater/clustering/problematic_os.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    #
+    # save_data_to_s3(
+    #     data=json.dumps(problematic_os_all),
+    #     s3_file_name="customers/Stonewater/clustering/problematic_os_all.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    #
+    # save_data_to_s3(
+    #     data=json.dumps(problematic_errors),
+    #     s3_file_name="customers/Stonewater/clustering/problematic_errors.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
 
     # Next steps: We should collate all of the data and produce 1 big dataset
 
     problematic_os_df = pd.DataFrame(problematic_os)
-    problematic_address_comparison = problematic[["internal_id", "full_address", "postcode"]].merge(
+    problematic_address_comparison = problematic[["internal_id", "full_address", "postcode", "house_number"]].merge(
         problematic_os_df[["internal_id", "ADDRESS", "POSTCODE", "UPRN"]],
         how="inner",
         on="internal_id"
@@ -473,28 +473,50 @@ def app():
         ),
         axis=1
     )
-    problematic_address_comparison = problematic_address_comparison.sort_values("match_similarity_score",
-                                                                                ascending=True)
+    problematic_address_comparison = problematic_address_comparison.sort_values(
+        "match_similarity_score", ascending=True
+    )
+
+    # let's do a house number extraction
+    problematic_address_comparison["extracted_house_number"] = problematic_address_comparison.apply(
+        lambda x: SearchEpc.get_house_number(x["ADDRESS"], x["OS_POSTCODE"]), axis=1
+    )
+
+    problematic_address_comparison["house_numbers_different"] = (
+        problematic_address_comparison["house_number"].str.lower().str.split(",").str[0].str.split(" ").str[0] !=
+        problematic_address_comparison[
+            "extracted_house_number"].str.lower()
+    )
 
     # We perform a final check
+    # Take anything where the postcodes don't match, where the house numbers are different and the match similarity
+    # is less than 90, or the match similarity is less than 80
     final_check = problematic_address_comparison[
-        (problematic_address_comparison["match_similarity_score"] <= 90) |
         (~problematic_address_comparison["postcodes_match"])
-        ]
+    ]
+    final_check = final_check.sort_values("match_similarity_score", ascending=False)
+    final_check = final_check.reset_index(drop=True)
 
     final_best_matches = []
+    no_matches = []
     for _, row in final_check.iterrows():
         os_data = problematic_os_all[row["internal_id"]]
         os_data = pd.DataFrame(
             [x["DPA"] if "DPA" in x else x["LPI"] for x in os_data]
         )
-        os_data["postcode"] = np.where(
-            ~pd.isnull(os_data["POSTCODE"]),
-            os_data["POSTCODE"],
-            os_data["POSTCODE_LOCATOR"]
-        )
+
+        if ("POSTCODE_LOCATOR" in os_data.columns) and ("POSTCODE" in os_data.columns):
+            os_data["postcode"] = np.where(
+                ~pd.isnull(os_data["POSTCODE"]),
+                os_data["POSTCODE"],
+                os_data["POSTCODE_LOCATOR"]
+            )
+        elif "POSTCODE" in os_data.columns:
+            os_data["postcode"] = os_data["POSTCODE"]
+        else:
+            os_data["postcode"] = os_data["POSTCODE_LOCATOR"]
         os_data = os_data[os_data["postcode"].str.lower() == row["postcode"].lower()]
-        if os_data.shape[0] == 1:
+        if os_data.shape[0] >= 1:
             final_best_matches.append(
                 {
                     "internal_id": row["internal_id"],
@@ -502,4 +524,72 @@ def app():
                 }
             )
         else:
-            blah
+            no_matches.append(
+                {
+                    "internal_id": row["internal_id"],
+                    "full_address": row["full_address"],
+                    "postcode": row["postcode"]
+                }
+            )
+
+    no_matches = pd.DataFrame(no_matches)
+
+    # Data to be confirmed
+    from etl.customers.stonewater.no_matches import no_matches
+    no_matches_to_export = pd.DataFrame(no_matches)
+    no_matches_to_export = asset_list.merge(
+        no_matches_to_export[["internal_id", "Note"]],
+        how="inner",
+        on="internal_id"
+    ).rename(
+        columns={
+            "internal_id": "Osm. ID",
+            "customer_asset_id": "Org. ref.",
+            "external_address_id": "Address ID",
+        }
+    )
+    no_matches_to_export.to_excel("Stonewater - addresses with no matches.xlsx", index=False)
+
+    # We also confirm final_best_matches
+    final_best_matches_df = pd.DataFrame(final_best_matches)[
+        ["internal_id", "ADDRESS", "UPRN"]
+    ].rename(
+        columns={
+            "ADDRESS": "Ordnance Survey Address - same postcode (best match)",
+            "UPRN": "UPRN - same postcode (best match)"
+        }
+    )
+    # We also get their original match
+    final_best_matches_df = final_best_matches_df.merge(
+        problematic[["internal_id", "ADDRESS", "UPRN"]].rename(
+            columns={
+                "ADDRESS": "Ordnance Survey Address - best possible match",
+                "UPRN": "UPRN - best possible match"
+            }
+        ),
+        how="inner",
+        on="internal_id"
+    )
+
+    # merge on the original data
+    final_best_matches_df = asset_list.merge(
+        final_best_matches_df,
+        how="inner",
+        on="internal_id"
+    ).rename(
+        columns={
+            "internal_id": "Osm. ID",
+            "customer_asset_id": "Org. ref.",
+            "external_address_id": "Address ID",
+        }
+    )
+
+    # "Osm. ID": "internal_id",
+    # "Org. ref.": "customer_asset_id",
+    # "Postcode": "postcode",
+    # "House no": "house_number",
+    # "Name": "address1",
+    # "Address line 2": "address2",
+    # "City/Town": "city_town",
+    # "County": "county",
+    # "Address ID": "external_address_id",

From 743422e8fec13381c552f177a1caad15cedd7471 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 11 Jun 2024 18:23:19 +0100
Subject: [PATCH 35/80] Parity comparison investiagtion, stonewater wip

---
 backend/Property.py                           |  67 ++++++
 backend/SearchEpc.py                          |  17 +-
 backend/apis/GoogleSolarApi.py                | 211 +++++++++++++++++-
 backend/app/plan/router.py                    |  38 ----
 backend/ml_models/Valuation.py                |   3 +
 etl/customers/goldman/property_ownership.py   |  76 +++++++
 .../northern_gorup/test_asset_list.py         |  43 ++++
 .../places_for_people/parity_comparison.py    | 164 ++++++++++++++
 etl/customers/stonewater/shdf_3_clustering.py |  71 ++++++
 recommendations/Costs.py                      |  30 +--
 recommendations/SolarPvRecommendations.py     |   7 +-
 recommendations/WallRecommendations.py        |   2 +-
 12 files changed, 666 insertions(+), 63 deletions(-)
 create mode 100644 etl/customers/northern_gorup/test_asset_list.py
 create mode 100644 etl/customers/places_for_people/parity_comparison.py

diff --git a/backend/Property.py b/backend/Property.py
index 6336e42d..3599f21b 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -162,6 +162,9 @@ class Property:
         self.current_energy_bill = None
         self.expected_energy_bill = None
 
+        self.heating_energy_source = None
+        self.hot_water_energy_source = None
+
         self.recommendations_scoring_data = []
 
         self.parse_kwargs(kwargs)
@@ -585,6 +588,7 @@ class Property:
             floor_area_decile_thresholds=floor_area_decile_thresholds,
         )
         self.set_energy_source()
+        self.find_energy_sources()
 
     def set_spatial(self, spatial: pd.DataFrame):
         """
@@ -993,3 +997,66 @@ class Property:
 
         # Set the energy source based on the conditions above
         self.energy_source = energy_source
+
+    def find_energy_sources(self):
+        # Based on the heating and the hot water
+        heating_fuel_mapping = {
+            'has_mains_gas': 'Natural Gas',
+            'has_electric': 'Electricity',
+            'has_oil': 'Oil',
+            'has_wood_logs': 'Wood Logs',
+            'has_coal': 'Coal',
+            'has_anthracite': 'Anthracite',
+            'has_smokeless_fuel': 'Smokeless Fuel',
+            'has_lpg': 'LPG',
+            'has_b30k': 'B30K Biofuel',
+            'has_air_source_heat_pump': 'Electricity',
+            'has_ground_source_heat_pump': 'Electricity',
+            'has_water_source_heat_pump': 'Electricity',
+            'has_electric_heat_pump': 'Electricity',
+            'has_solar_assisted_heat_pump': 'Electricity',
+            'has_exhaust_source_heat_pump': 'Electricity',
+            'has_community_heat_pump': 'Electricity',
+            'has_wood_pellets': 'Wood Pellets',
+            'has_community_scheme': 'Varied (Community Scheme)'
+        }
+
+        # Hot water
+        heater_type_to_fuel = {
+            'gas instantaneous': 'Natural Gas',
+            'electric heat pump': 'Electricity',
+            'electric immersion': 'Electricity',
+            'gas boiler': 'Natural Gas',
+            'oil boiler': 'Oil',
+            'electric instantaneous': 'Electricity',
+            'gas multipoint': 'Natural Gas',
+            'heat pump': 'Electricity',
+            'solid fuel boiler': 'Solid Fuel',
+            'solid fuel range cooker': 'Solid Fuel',
+            'room heaters': 'Varied'  # Could be any fuel, further specifics needed based on context
+        }
+
+        # Define a mapping from system types to general categories or modifications of fuel types
+        system_type_modification = {
+            'from main system': 'Main System',
+            'from secondary system': 'Secondary System',
+            'from second main heating system': 'Secondary System',
+            'community scheme': 'Community Scheme'
+        }
+
+        self.heating_energy_source = [
+            fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
+        ]
+        if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
+            raise Exception("Investigate em")
+
+        self.heating_energy_source = self.heating_energy_source[0]
+
+        if self.hotwater["heater_type"] is not None:
+            self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
+        else:
+            fuel = system_type_modification[self.hotwater["system_type"]]
+            if fuel == 'Main System':
+                self.hot_water_energy_source = self.heating_energy_source
+            else:
+                raise Exception("Investiage me")
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 9724ffd1..275669cc 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -434,7 +434,8 @@ class SearchEpc:
         self, initial_postcode: str,
         lmks_to_drop: list[str] | None = None,
         built_form: str = "",
-        property_type: str = ""
+        property_type: str = "",
+        exclude_old: bool = False
     ):
         """
         Fetches and processes EPC data for a given initial postcode, applying successive trimming
@@ -453,6 +454,7 @@ class SearchEpc:
         :param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
         :param built_form: The 'built-form' value to be used for filtering the EPC data.
         :param property_type: The 'property-type' value to be used for filtering the EPC data.
+        :param exclude_old: Flag to exclude EPC data older than 10 years.
         :return:
         """
 
@@ -483,6 +485,13 @@ class SearchEpc:
                 if not epc_data.empty:
                     # Further processing of the EPC data
                     epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
+
+                    if exclude_old:
+                        # Exclude EPC data older than 10 years
+                        epc_data = epc_data[
+                            epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10))
+                            ]
+
                     epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
                     epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
                     epc_data["numeric_house_number"] = epc_data["house_number"].apply(
@@ -583,7 +592,8 @@ class SearchEpc:
             initial_postcode=self.postcode,
             lmks_to_drop=lmks_to_drop,
             built_form=built_form,
-            property_type=property_type
+            property_type=property_type,
+            exclude_old=exclude_old
         )
 
         # If we have missing lodgment date, we fill it with inspection-date
@@ -591,9 +601,6 @@ class SearchEpc:
         # If we still have missing dates, we set it to the mean of the non NA dates
         epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
 
-        if exclude_old:
-            epc_data = epc_data[epc_data["lodgement-datetime"] > pd.Timestamp.now() - pd.DateOffset(years=10)]
-
         # For each attribute, we need to determine the datatype and use an appropriate method
         # to estimate.
         estimated_epc = {}
diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 205a3560..8ee7017e 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -1,10 +1,15 @@
+import pandas as pd
+
 from backend.Property import Property
 from backend.SearchEpc import SearchEpc
 from etl.epc.Record import EPCRecord
 from dotenv import load_dotenv
-from utils.s3 import read_dataframe_from_s3_parquet
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
 import os
 import requests
+import msgpack
+from functools import lru_cache
+import time
 
 load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@@ -13,6 +18,8 @@ EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 uprn = 100040099104
 # This is for 353A, Hermitage Lane, ME16 9NT (one of the e.on properties)
 uprn = 200000964454
+# This is for 14 Victoria Road, Cross Hills, KEIGHLEY, North Yorkshire, ENGLAND, BD20 8SY
+uprn = 100050346517
 
 cleaning_data = read_dataframe_from_s3_parquet(
     bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
@@ -49,6 +56,25 @@ p = Property(
 
 p.get_spatial_data(uprn_filenames)
 
+cleaned = read_from_s3(
+    s3_file_name="cleaned_epc_data/cleaned.bson",
+    bucket_name="retrofit-data-dev"
+)
+
+cleaned = msgpack.unpackb(cleaned, raw=False)
+
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+p.get_components(
+    cleaned=cleaned,
+    photo_supply_lookup=photo_supply_lookup,
+    floor_area_decile_thresholds=floor_area_decile_thresholds
+)
+p.hot_water_energy_source
+p.heating_energy_source
+
 longitude = p.spatial["longitude"]
 latitude = p.spatial["latitude"]
 
@@ -73,14 +99,29 @@ from pprint import pprint
 
 pprint(solar_potential)
 
+# This is the maximum number of panels that can be installed
+solar_potential["maxArrayPanelsCount"]
+
 # This is the size of the panels used in the calculation - 400 watt
 solar_potential["panelCapacityWatts"]
+
 # Height of the panels used
 solar_potential["panelHeightMeters"]
+
 # Width of the panels used
 solar_potential["panelWidthMeters"]
 
-solar_potential["wholeRoofStats"]
+# This is the maximum area that can be covered by the panels
+solar_potential["maxArrayAreaMeters2"]
+
+# This is the area of the roof
+solar_potential["wholeRoofStats"]["areaMeters2"]
+
+# This is the area of the floor
+solar_potential["wholeRoofStats"]["groundAreaMeters2"]
+
+solar_potential["solarPanelConfigs"][0]
+solar_potential["solarPanelConfigs"][1]
 
 # Copy of response for testing - 6 Laura Close, Tintagel, PL34 0EB
 # {'name': 'buildings/ChIJ2yC6t4KEa0gRh2TIssogI7k', 'center': {'latitude': 50.667375, 'longitude': -4.7416833},
@@ -334,3 +375,169 @@ solar_potential["wholeRoofStats"]
 # 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 278.3281, 'segmentIndex': 1}]}, 'boundingBox': {'sw': {'latitude':
 # 50.6672904, 'longitude': -4.741778}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415536}},
 # 'imageryQuality': 'MEDIUM', 'imageryProcessedDate': {'year': 2024, 'month': 4, 'day': 18}}
+
+
+self = GoogleSolarApi(api_key=api_key)
+import numpy as np
+from recommendations.Costs import MCS_SOLAR_PV_COST_DATA
+
+
+class GoogleSolarApi:
+    NORTH_FACING_AZIMUTH_RANGE = (-30, 30)
+
+    def __init__(self, api_key, max_retries=5):
+        """
+        Initialize the GoogleSolarApi class with the provided API key and maximum retries.
+
+        :param api_key: The API key to authenticate requests to the Google Solar API.
+        :param max_retries: The maximum number of retries for the API request (default is 5).
+        """
+        self.api_key = api_key
+        self.max_retries = max_retries
+        self.base_url = "https://solar.googleapis.com/v1"
+
+        self.insights_data = None
+        self.roof_segments = []
+
+        # property attributes:
+        self.floor_area = None
+        self.roof_area = None
+        self.roof_segment_indexes = None
+        self.panel_area = None
+
+    def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
+        """
+        Make an API request to retrieve building insights based on the given longitude and latitude, with retry
+        mechanism.
+
+        :param longitude: The longitude of the location.
+        :param latitude: The latitude of the location.
+        :param required_quality: The required quality of the data (default is "MEDIUM").
+        :param max_retries: The maximum number of retries for the API request (default is None, which uses the
+        instance's max_retries).
+        :return: The JSON response containing the building insights data.
+        """
+        if max_retries is None:
+            max_retries = self.max_retries
+
+        insights_url = f"{self.base_url}/buildingInsights:findClosest"
+        params = {
+            'location.latitude': f'{latitude:.5f}',
+            'location.longitude': f'{longitude:.5f}',
+            'requiredQuality': required_quality,
+            'key': self.api_key
+        }
+
+        attempt = 0
+        while attempt < max_retries:
+            try:
+                response = requests.get(insights_url, params=params)
+                response.raise_for_status()  # Raise an error for bad status codes
+                return response.json()
+            except requests.exceptions.RequestException as e:
+                attempt += 1
+                print(f"Attempt {attempt} failed: {e}")
+                time.sleep(2 ** attempt)  # Exponential backoff
+                if attempt >= max_retries:
+                    raise
+
+    @lru_cache(maxsize=128)
+    def get(self, longitude, latitude, required_quality="MEDIUM"):
+        """
+        Wrapper function that calls get_building_insights and extracts roof segments, with caching.
+
+        :param longitude: The longitude of the location.
+        :param latitude: The latitude of the location.
+        :param required_quality: The required quality of the data (default is "MEDIUM").
+        :return: The JSON response containing the building insights data.
+        """
+
+        # TODO - can we make a request which includes the 30cm buffer from the edge of the roof?
+        self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
+
+        # Extract key data from the insights response
+        self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
+        self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
+        self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
+        self.panel_area = (
+            self.insights_data["solarPotential"]["panelHeightMeters"] *
+            self.insights_data["solarPotential"]["panelWidthMeters"]
+        )
+
+        # Automatically exclude north-facing segments
+        self.exclude_north_facing_segments()
+
+        self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]
+
+        # We now start finding the solar panel configurations
+        self.optimise_solar_configuration()
+
+    def optimise_solar_configuration(self):
+        """
+        Optimise the solar panel configuration for the building.
+        :return:
+        """
+
+        # Remove any north facing roof segments
+        panel_performance = []
+        for config in self.insights_data["solarPotential"]["solarPanelConfigs"]:
+            roof_segment_summaries = config["roofSegmentSummaries"]
+            # Filter on just the segments in self.roof_segment_indexes
+            roof_segment_summaries = [
+                segment for segment in roof_segment_summaries if segment["segmentIndex"] in self.roof_segment_indexes
+            ]
+
+            roi_summary = []
+            for segment in roof_segment_summaries:
+                wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
+                generated_energy = segment["yearlyEnergyDcKwh"]
+                ratio = generated_energy / wattage
+                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (generated_energy / 1000)
+                roi_summary.append(
+                    {
+                        "segmentIndex": segment["segmentIndex"],
+                        "wattage": wattage,
+                        "generatedEnergy": generated_energy,
+                        "ratio": ratio,
+                        "n_panels": segment["panelsCount"],
+                        "cost": cost
+                    }
+                )
+
+            roi_summary = pd.DataFrame(roi_summary)
+
+            weighted_ratio = np.average(
+                roi_summary["ratio"].values, weights=roi_summary["generatedEnergy"].values
+            )
+            total_cost = roi_summary["cost"].sum()
+            total_energy = roi_summary["generatedEnergy"].sum()
+
+            panel_performance.append(
+                {
+                    "n_panels": roi_summary["n_panels"].sum(),
+                    "total_energy": total_energy,
+                    "total_cost": total_cost,
+                    "weighted_ratio": weighted_ratio
+                }
+            )
+
+        panel_performance = pd.DataFrame(panel_performance)
+        panel_performance = panel_performance.sort_values("weighted_ratio", ascending=False)
+
+    def exclude_north_facing_segments(self):
+        """
+        Filter out any north-facing roof segments from the roof_segments attribute.
+
+        North-facing segments are defined as those with an azimuth between -30 and 30 degrees.
+        """
+
+        filtered_segments = []
+        for segment_index, segment in enumerate(self.roof_segments):
+            segment["segmentIndex"] = segment_index
+            # Check if the segment is north-facing
+            if self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]:
+                continue
+
+            filtered_segments.append(segment)
+
+        self.roof_segments = filtered_segments
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 91a5ce0d..9caab324 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -1206,41 +1206,3 @@ def check_mds(results, input_properties, recommendations, optimise_measures):
     hhr_check = pd.DataFrame(hhr_check)
 
     return walls_check, hhr_check
-
-
-from utils.s3 import read_dataframe_from_s3_parquet
-
-z = read_dataframe_from_s3_parquet(
-    bucket_name="retrofit-data-dev",
-    file_key="sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet"
-)
-
-k = z[z["heat_demand_ending"] != z["heat_demand_starting"]]
-k = k[k["walls_thermal_transmittance"] == k["walls_thermal_transmittance_ending"]]
-k = k[k["roof_thermal_transmittance"] == k["roof_thermal_transmittance_ending"]]
-k = k[k["floor_thermal_transmittance"] == k["floor_thermal_transmittance_ending"]]
-ending_cols = [c for c in k.columns if "_ending" in c]
-eg = k.head(2).tail(1).squeeze()
-
-diff = []
-for c in ending_cols:
-    split = c.split("_ending")[0]
-    if split + "_starting" in k.columns:
-        starting_col = split + "_starting"
-    else:
-        starting_col = split
-
-    b4 = eg[starting_col]
-    after = eg[c]
-    if b4 != after:
-        diff.append(
-            {
-                "measure": split,
-                "starting": b4,
-                "ending": after
-            }
-        )
-diff = pd.DataFrame(diff)
-eg["heat_demand_starting"]
-eg["heat_demand_ending"]
-eg["uprn"]
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index dd77fb4b..1af38194 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -90,6 +90,9 @@ class PropertyValuation:
         41222760: 46_000,  # Based on Zoopla
         41222761: 270_000,  # Based on Zoopla
         41212534: 38_000,  # Based on Zoopla
+        # Northern Group Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        10070868263: 194_000,  # Based on Zoopla
+        10070868244: 195_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 44fa7142..500963a1 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -527,3 +527,79 @@ def company_aggregation():
     aggregation = aggregation.sort_values("Number of Properties", ascending=False)
 
     aggregation.to_excel("Company ownership aggregation.xlsx")
+
+
+def prepare_anonymised_data():
+    investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
+    investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)
+    valuations = pd.read_excel("property value.xlsx", header=0)
+
+    # Merge these datasets
+    df = investment_50m_properties.merge(
+        investment_epc_data[
+            ["UPRN", "PROPERTY_TYPE", "BUILT_FORM", "TOTAL_FLOOR_AREA", "LODGEMENT_DATE", "POSTCODE"]
+        ].rename(
+            columns={
+                "PROPERTY_TYPE": "Property Type",
+                "BUILT_FORM": "Property Archetype",
+                "TOTAL_FLOOR_AREA": "Total Floor Area",
+                "LODGEMENT_DATE": "Date EPC Lodged",
+                "POSTCODE": "Postcode on EPC"
+            }
+        ),
+        how="inner",
+        on="UPRN"
+    ).merge(
+        valuations.drop(columns=["ADDRESS", "POSTCODE"]).rename(
+            columns={
+                "Zoopla Valuation": "Expected Valuation",
+                "Zoopla Lower Bound": "Valuation - Lower Bound",
+                "Zoopla Upper Bound": "Valuation - Upper Bound",
+            }
+        ),
+        how="inner",
+        on="UPRN"
+    ).rename(
+        columns={
+            "CURRENT_ENERGY_RATING": "Current EPC",
+            "CURRENT_ENERGY_EFFICIENCY": "Current SAP Score",
+            "epc_address": "Address on EPC"
+        }
+    ).drop(
+        columns=["Title Number", "match_type", "UPRN"]
+    )
+
+    redacted_owner_names = df[["Company Registration No. (1)"]].drop_duplicates()
+    redacted_owner_names["Owner"] = ["Owner" + str(i) for i in range(1, len(redacted_owner_names) + 1)]
+
+    df = df.merge(
+        redacted_owner_names, how="left", on="Company Registration No. (1)"
+    )
+
+    df = df.drop(columns=["Company Registration No. (1)", "Proprietor Name (1)", "Property Address"])
+    df = df.sort_values(["Owner", "Date EPC Lodged"], ascending=False)
+
+    redacted_index = []
+    for _, owner_properties in df.groupby("Owner"):
+        top_50_percent = round(owner_properties.shape[0] / 2 + 0.00001)
+        indexes = owner_properties.tail(
+            owner_properties.shape[0] - top_50_percent
+        ).index
+
+        redacted_index.extend(indexes.tolist())
+
+    import numpy as np
+    # Redact addresses and postcodes
+    df["Address on EPC"] = np.where(
+        df.index.isin(redacted_index),
+        "Redacted",
+        df["Address on EPC"]
+    )
+
+    df["Postcode on EPC"] = np.where(
+        df.index.isin(redacted_index),
+        "Redacted",
+        df["Postcode on EPC"]
+    )
+
+    df.to_excel("Property List - 50% redacted.xlsx", index=False)
diff --git a/etl/customers/northern_gorup/test_asset_list.py b/etl/customers/northern_gorup/test_asset_list.py
new file mode 100644
index 00000000..46a4bb75
--- /dev/null
+++ b/etl/customers/northern_gorup/test_asset_list.py
@@ -0,0 +1,43 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 81
+
+
+def app():
+    asset_list = [
+        {
+            'uprn': 10070868263,
+            "address": "Apartment 307, Flint Glass Wharf",
+            "postcode": "M4 6AD",
+        },
+        {
+            'uprn': 10070868244,
+            "address": "Apartment 106, Flint Glass Wharf",
+            "postcode": "M4 6AD",
+        }
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/etl/customers/places_for_people/parity_comparison.py b/etl/customers/places_for_people/parity_comparison.py
new file mode 100644
index 00000000..64ab8591
--- /dev/null
+++ b/etl/customers/places_for_people/parity_comparison.py
@@ -0,0 +1,164 @@
+"""
+This script is used to pull together some case studies for the Parity Projects comparison
+"""
+
+import pandas as pd
+from backend.SearchEpc import SearchEpc
+from dotenv import load_dotenv
+import os
+
+load_dotenv("backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+parity_measures = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Places For People/Parity Sample All Addresses and Measures.xlsx",
+    sheet_name="Total Measures"
+)
+
+solar_measures = parity_measures[parity_measures["Category"] == "SolarPV"]
+
+example_1 = parity_measures[
+    parity_measures["Address Id (used by website)"] == 6125299
+    ].copy()
+
+config = {
+    "address": "14 Victoria Road",
+    "postcode": "BD20 8SY",
+    "uprn": 100050346517
+}
+
+# Point 1:
+# Parity tends to re-score the EPCs, even if they're extrememly recent.
+# For example for '14, Victoria Road, Cross Hills, KEIGHLEY, North Yorkshire, ENGLAND, BD20 8SY'
+# The most recent EPC was done 15 May 2023, and landed at a 66D, however for some reason, parity re-score this
+# home to be a 63.91. It's unclear why this is done
+
+example_1_measures = example_1[["MeasureGroupName", "Individual SAP increase"]].copy()
+# - LEDS: 0.25 SAP points
+# - 300mm of loft insulation from 200mm: 0.43 SAP points - where is this deduced from? Since the latest survey
+# indicates 250mm insulation in place
+# - Check construction of unknown party wall and fill cavity if appropriate: 0.12 SAP points (highly speculative,
+# not based on any data)
+# - Block open chimneys: 1.61 SAP points - latest survey showed 0 open fireplaces
+# - ASHP (45 degree emitters) with enhanced existing radiator central heating and hot water, from E rated gas boiler
+# 6.38 SAP points
+# - 4kWp PV array south and 30 degree pitch with no shading: 30.24 SAP points
+
+# Notes on solar - 30.34 seems like a lot
+# 400 watt is the solar panel output
+# Let's do a test for this property
+# This would be 10 solar panels
+# Using typical solar panel dimensions, this would be 19.63555m2 of roof space
+# The area of the roof is between 60 - 64.5 m2 (we use a API to get the roof data), implying only
+# around 30% of the roof is covered by solar panels
+# Using our machine learning model to simulate the impact of this on SAP, this would more likely result in
+# a
+
+from utils.s3 import read_dataframe_from_s3_parquet
+
+training_data = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev",
+    file_key="sap_change_model/2024-06-09-10-36-53/dataset_rooms.parquet"
+)
+# Look for properties where the only difference is solar
+ending_cols = [
+    c for c in training_data.columns if "_ending" in c and "photo_supply" not in c
+]
+ending_cols = [
+    c for c in ending_cols if
+    c not in ["sap_ending", "heat_demand_ending", "carbon_ending", "transaction_type_ending", "days_to_ending"]
+]
+
+column_pairs = {}
+for col in ending_cols:
+    starting = col.split("_ending")[0]
+    if starting + "_starting" in training_data.columns:
+        starting_col = starting + "_starting"
+    else:
+        starting_col = starting
+
+    column_pairs[col] = starting_col
+
+filtered = training_data.copy()
+# Take rows that had solar installs
+filtered = filtered[filtered["photo_supply_ending"] != filtered["photo_supply_starting"]]
+for ending_col, starting_col in column_pairs.items():
+    filtered = filtered[filtered[ending_col] == filtered[starting_col]]
+    print(f"ending_col: {ending_col}, filtered shape: {filtered.shape}")
+
+avg_change = filtered.groupby("photo_supply_ending")["rdsap_change"].mean().reset_index()
+
+# I've take every single case of there being two EPCs for a property, where the only difference between the first
+# and second is the solar installation. This is 2692 properties, across the UK. In only 4 instances has this resulted in
+# 30 or more SAP points
+
+
+# Some functions based on the SAP methodology:
+import numpy as np
+
+total_floor_area = 50
+occupants = calculate_occupants(total_floor_area)
+appliances_energy_use = estimate_electrical_appliances(occupants, total_floor_area)
+cooking_energy_use = estimate_cooking(occupants)
+
+
+def calculate_occupants(total_floor_area):
+    """
+    From Table 1b
+    :param total_floor_area:
+    :return:
+    """
+    return 1 + (1.76 * (1 - np.exp(-0.000349 * (total_floor_area - 13.9) * (total_floor_area - 13.9))) + 0.0013 * (
+        total_floor_area - 13.9))
+
+
+def estimate_electrical_appliances(occupants, total_floor_area):
+    """
+    From seciont L2 Electrical appliances
+    :param occupants:
+    :param total_floor_area:
+    :return:
+    """
+    e_a = 207.8 * np.power(total_floor_area * occupants, 0.4717)
+
+    days_in_month = {
+        1: 31,
+        2: 28,
+        3: 31,
+        4: 30,
+        5: 31,
+        6: 30,
+        7: 31,
+        8: 31,
+        9: 30,
+        10: 31,
+        11: 30,
+        12: 31
+    }
+
+    eam = 0
+    for m in range(1, 13):
+        nm = days_in_month[m]
+        eam += e_a * (1 + 0.157 * np.cos(2 * np.pi * (m - 1.78) / 12)) * nm / 365
+
+    return eam
+
+
+def estimate_cooking(occupants):
+    """
+    From section L3 Cooking
+    :param occupants:
+    :return:
+    """
+
+    return 35 + 7 * occupants
+
+
+primary_energy_per_m2 = 288  # kWh/m2 per year
+primary_energy_regulated = primary_energy_per_m2 * total_floor_area
+
+primary_energy_factor_electricity = 1.1  # Example factor
+primary_energy_appliances = appliances_energy_use * primary_energy_factor_electricity
+primary_energy_cooking = cooking_energy_use * primary_energy_factor_electricity * 365  # Annualize cooking energy
+
+total_primary_energy_use = primary_energy_regulated + primary_energy_appliances
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index f2ef9a8b..75917a55 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -593,3 +593,74 @@ def app():
     # "City/Town": "city_town",
     # "County": "county",
     # "Address ID": "external_address_id",
+
+
+def compile_data():
+    """
+    Various data sources have been produced to create the final data source for Stonewater.
+    This function combines them
+    :return:
+    """
+    ########################################################################
+    # Read in data
+    ########################################################################
+    asset_list = read_excel_from_s3(
+        file_key="customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
+        bucket_name="retrofit-data-dev",
+        header_row=4
+    )
+
+    # TODO: Read in UPRNs
+
+    ########################################################################
+    # Prepare asset list
+    ########################################################################
+    # TODO: Merge on UPRNs
+    # Drop the bottom 4 rows, which are completely missing
+    asset_list = asset_list.head(-4)
+
+    # Keep just the columns we're interested in
+    asset_list = asset_list[
+        [
+            "Osm. ID",
+            "Org. ref.",
+            "Postcode",
+            "House no",
+            "Name",
+            "Address line 2",
+            "City/Town",
+            "County",
+            "Address ID",  # This is not uprn
+        ]
+    ].rename(
+        columns={
+            "Osm. ID": "internal_id",
+            "Org. ref.": "customer_asset_id",
+            "Postcode": "postcode",
+            "House no": "house_number",
+            "Name": "address1",
+            "Address line 2": "address2",
+            "City/Town": "city_town",
+            "County": "county",
+            "Address ID": "external_address_id",
+        }
+    )
+
+    # Create full address
+    asset_list["full_address"] = np.where(
+        ~pd.isnull(asset_list["address2"]),
+        (
+            asset_list["address1"] + ", " +
+            asset_list["address2"] + ", " +
+            asset_list["city_town"].str.title() + ", " +
+            # asset_list["county"] + ", " +
+            asset_list["postcode"]
+        ),
+        asset_list["address1"] + ", " +
+        asset_list["city_town"].str.title() + ", " +
+        # asset_list["county"] + ", " +
+        asset_list["postcode"]
+    )
+
+    if pd.isnull(asset_list["full_address"]).sum():
+        raise ValueError("Missing full addresses")
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 03190727..5f752730 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -20,21 +20,21 @@ regional_labour_variations = [
 
 # This data is based on the MCS database
 MCS_SOLAR_PV_COST_DATA = {
-    "last_updated": "2024-01-04",
-    "average_cost_per_kwh": 2013.94,
-    "average_cost_per_kwh-Outer London": 2618.75,
-    "average_cost_per_kwh-Inner London": 2618.75,
-    "average_cost_per_kwh-South East England": 2083.33,
-    "average_cost_per_kwh-South West England": 2113,
-    "average_cost_per_kwh-East of England": 1973.86,
-    "average_cost_per_kwh-East Midlands": 1981.86,
-    "average_cost_per_kwh-West Midlands": 1926.55,
-    "average_cost_per_kwh-North East England": 2028.49,
-    "average_cost_per_kwh-North West England": 1620.42,
-    "average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
-    "average_cost_per_kwh-Wales": 1898.83,
-    "average_cost_per_kwh-Scotland": 1967.97,
-    "average_cost_per_kwh-Northern Ireland": 2126.09,
+    "last_updated": "2024-06-10",
+    "average_cost_per_kwh": 1750,
+    "average_cost_per_kwh-Outer London": 1776,
+    "average_cost_per_kwh-Inner London": 1776,
+    "average_cost_per_kwh-South East England": 1672,
+    "average_cost_per_kwh-South West England": 1732,
+    "average_cost_per_kwh-East of England": 1721,
+    "average_cost_per_kwh-East Midlands": 1730,
+    "average_cost_per_kwh-West Midlands": 1761,
+    "average_cost_per_kwh-North East England": 1669,
+    "average_cost_per_kwh-North West England": 1764,
+    "average_cost_per_kwh-Yorkshire and the Humber": 1705,
+    "average_cost_per_kwh-Wales": 1896,
+    "average_cost_per_kwh-Scotland": 1767,
+    "average_cost_per_kwh-Northern Ireland": 1767,
 }
 
 # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index a9255370..458eae12 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -4,10 +4,13 @@ from recommendations.recommendation_utils import override_costs
 
 
 class SolarPvRecommendations:
+    # Solar panel specs based on Eurener 400s solar panels
+    # https://midsummerwholesale.co.uk/buy/eurener/eurener-400w-mepv-zebra-ab-half-cut-mono
     # Approximate area of the solar panels
-    SOLAR_PANEL_AREA = 1.6
+    SOLAR_PANEL_AREA = 1.79
     # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
-    SOLAR_PANEL_WATTAGE = 250
+    # This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group
+    SOLAR_PANEL_WATTAGE = 400
 
     MAX_SYSTEM_WATTAGE = 6000
     MIN_SYSTEM_WATTAGE = 1000
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 868c08c0..fb228b49 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -189,7 +189,7 @@ class WallRecommendations(Definitions):
         # recommend internal wall insulation as a possible measure
 
         u_value = self.property.walls["thermal_transmittance"]
-        u_value = None if math.isnan(u_value) else u_value
+        u_value = None if pd.isnull(u_value) else u_value
 
         is_cavity_wall = self.property.walls["is_cavity_wall"]
         insulation_thickness = self.property.walls["insulation_thickness"]

From ff954eeeda8f121cc5d3af711c9b71147097a11f Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 11 Jun 2024 21:31:42 +0100
Subject: [PATCH 36/80] remove potential columns

---
 etl/epc/Pipeline.py | 14 +++-----
 etl/epc/Record.py   | 87 +++++++++++++++++++++++++--------------------
 2 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py
index 3a078703..47cddeb0 100644
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@@ -39,7 +39,7 @@ VARIABLE_DATA_FEATURES = (
     COMPONENT_FEATURES
     + ROOM_FEATURES
     + EFFICIENCY_FEATURES
-    + POTENTIAL_COLUMNS
+    # + POTENTIAL_COLUMNS
     + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
 )
 
@@ -66,14 +66,10 @@ clean_lookup = get_cleaned_description_mapping()
 
 # TODO: THIS IS A TEMPORARY FIX
 new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"])
-
-import numpy as np
-
-new_walls_description_mapping["thermal_transmittance_unit"] = np.where(
-    ~pd.isnull(new_walls_description_mapping["thermal_transmittance_unit"]),
-    "w/m-¦k",
-    new_walls_description_mapping["thermal_transmittance_unit"],
-)
+new_walls_description_mapping.loc[
+    ~new_walls_description_mapping["thermal_transmittance_unit"].isnull(),
+    "thermal_transmittance_unit",
+] = "w/m-¦k"
 
 clean_lookup["walls-description"] = new_walls_description_mapping.to_dict(
     orient="records"
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index 9a965c6a..9b69c33a 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -76,10 +76,10 @@ class EPCRecord:
     mainheat_energy_eff: str = None
     mainheatc_energy_eff: str = None
     lighting_energy_eff: str = None
-    potential_energy_efficiency: float = None
-    environment_impact_potential: float = None
-    energy_consumption_potential: float = None
-    co2_emissions_potential: float = None
+    # potential_energy_efficiency: float = None
+    # environment_impact_potential: float = None
+    # energy_consumption_potential: float = None
+    # co2_emissions_potential: float = None
     lodgement_date: str = None
     current_energy_efficiency: int = None
     energy_consumption_current: int = None
@@ -249,18 +249,18 @@ class EPCRecord:
         self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"]
         self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"]
         self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"]
-        self.potential_energy_efficiency: float = float(
-            self.prepared_epc["potential_energy_efficiency"]
-        )
-        self.environment_impact_potential: float = float(
-            self.prepared_epc["environment_impact_potential"]
-        )
-        self.energy_consumption_potential: float = float(
-            self.prepared_epc["energy_consumption_potential"]
-        )
-        self.co2_emissions_potential: float = float(
-            self.prepared_epc["co2_emissions_potential"]
-        )
+        # self.potential_energy_efficiency: float = float(
+        #     self.prepared_epc["potential_energy_efficiency"]
+        # )
+        # self.environment_impact_potential: float = float(
+        #     self.prepared_epc["environment_impact_potential"]
+        # )
+        # self.energy_consumption_potential: float = float(
+        #     self.prepared_epc["energy_consumption_potential"]
+        # )
+        # self.co2_emissions_potential: float = float(
+        #     self.prepared_epc["co2_emissions_potential"]
+        # )
         self.lodgement_date: str = self.prepared_epc["lodgement_date"]
         self.current_energy_efficiency: int = int(
             self.prepared_epc["current_energy_efficiency"]
@@ -466,9 +466,7 @@ class EPCRecord:
             (property_dimensions["PROPERTY_TYPE"] == self.prepared_epc["property-type"])
         ]
 
-        if (
-            self.construction_age_band not in DATA_ANOMALY_MATCHES
-        ):
+        if self.construction_age_band not in DATA_ANOMALY_MATCHES:
             result = result[
                 (result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)
             ]
@@ -480,7 +478,12 @@ class EPCRecord:
             result = result[(result["BUILT_FORM"] == self.prepared_epc["built-form"])]
 
         return result[
-            ["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
+            [
+                "NUMBER_HABITABLE_ROOMS",
+                "NUMBER_HEATED_ROOMS",
+                "TOTAL_FLOOR_AREA",
+                "FLOOR_HEIGHT",
+            ]
         ].mean()
 
     def _clean_property_dimensions(self):
@@ -491,9 +494,11 @@ class EPCRecord:
         if not self.prepared_epc:
             raise ValueError("EPC Record doesn not contain epc data")
 
-        if (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES) or (
-            self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES
-        ) or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES):
+        if (
+            (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES)
+            or (self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES)
+            or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES)
+        ):
             property_dimensions = read_dataframe_from_s3_parquet(
                 bucket_name=DATA_BUCKET,
                 file_key=f"property_dimensions/{self.prepared_epc['local-authority']}.parquet",
@@ -507,12 +512,18 @@ class EPCRecord:
                 self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round()
             )
         else:
-            self.prepared_epc["number-habitable-rooms"] = float(self.prepared_epc["number-habitable-rooms"])
+            self.prepared_epc["number-habitable-rooms"] = float(
+                self.prepared_epc["number-habitable-rooms"]
+            )
 
         if self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES:
-            self.prepared_epc["number-heated-rooms"] = float(self.property_dimensions["NUMBER_HEATED_ROOMS"].round())
+            self.prepared_epc["number-heated-rooms"] = float(
+                self.property_dimensions["NUMBER_HEATED_ROOMS"].round()
+            )
         else:
-            self.prepared_epc["number-heated-rooms"] = float(self.prepared_epc["number-heated-rooms"])
+            self.prepared_epc["number-heated-rooms"] = float(
+                self.prepared_epc["number-heated-rooms"]
+            )
 
         self.number_of_floors = estimate_number_of_floors(
             self.prepared_epc["property-type"]
@@ -1033,18 +1044,18 @@ class EPCDifferenceRecord:
             "heat_demand_ending": self.record2.get(HEAT_DEMAND_RESPONSE),
             "carbon_starting": self.record1.get(CARBON_RESPONSE),
             "carbon_ending": self.record2.get(CARBON_RESPONSE),
-            "potential_energy_efficiency": self.earliest_record.get(
-                "potential_energy_efficiency"
-            ),
-            "environment_impact_potential": self.earliest_record.get(
-                "environment_impact_potential"
-            ),
-            "energy_consumption_potential": self.earliest_record.get(
-                "energy_consumption_potential"
-            ),
-            "co2_emissions_potential": self.earliest_record.get(
-                "co2_emissions_potential"
-            ),
+            # "potential_energy_efficiency": self.earliest_record.get(
+            #     "potential_energy_efficiency"
+            # ),
+            # "environment_impact_potential": self.earliest_record.get(
+            #     "environment_impact_potential"
+            # ),
+            # "energy_consumption_potential": self.earliest_record.get(
+            #     "energy_consumption_potential"
+            # ),
+            # "co2_emissions_potential": self.earliest_record.get(
+            #     "co2_emissions_potential"
+            # ),
             **ending_record,
             **starting_record,
         }

From 667ed1b990172887d88ec60c6eff45b02e1f255d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 12 Jun 2024 15:44:04 +0100
Subject: [PATCH 37/80] working on stonewater clustering pipeline

---
 .../EPC data pull - 12th June.py              | 156 +++++++++++
 etl/customers/stonewater/shdf_3_clustering.py | 264 +++++++++++++++++-
 .../epc_attributes/RoofAttributes.py          |   2 +-
 3 files changed, 419 insertions(+), 3 deletions(-)
 create mode 100644 etl/customers/places_for_people/EPC data pull - 12th June.py

diff --git a/etl/customers/places_for_people/EPC data pull - 12th June.py b/etl/customers/places_for_people/EPC data pull - 12th June.py
new file mode 100644
index 00000000..45a70ad4
--- /dev/null
+++ b/etl/customers/places_for_people/EPC data pull - 12th June.py	
@@ -0,0 +1,156 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+import numpy as np
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by LHP
+    :return:
+    """
+
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/Places for People NORTH WEST - EPC DATA PULL REQUEST.xlsx", header=0
+    )
+
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        full_address = home["Address"]
+
+        address1 = home["AddressLine1"]
+        postcode = home["Postcode"]
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=postcode,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": full_address,
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description"
+        ]
+    ]
+
+    # epc_df.to_csv("pfp sales data.csv", index=False)
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["Address"],
+        right_on=["asset_list_address"]
+    )
+
+    asset_list = asset_list.drop(columns=["asset_list_address"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "EPC Property Type",
+        "built-form": "EPC Archetype",
+        "total-floor-area": "EPC Property Floor Area",
+        "construction-age-band": "EPC Property Age Band",
+        "floor-height": "EPC Property Floor Height",
+        "number-habitable-rooms": "EPC Number of Habitable Rooms",
+        "walls-description": "EPC Wall Construction",
+        "roof-description": "EPC Roof Construction",
+        "mainheat-description": "EPC Heating Type",
+        "secondheat-description": "EPC Secondary Heating",
+        "transaction-type": "Reason for last EPC"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(
+            property_type=x["EPC Property Type"]
+        ) if not pd.isnull(x["EPC Property Type"]) else None, axis=1
+    )
+
+    asset_list["EPC Property Floor Area"] = asset_list["EPC Property Floor Area"].astype(float)
+    asset_list["EPC Number of Habitable Rooms"] = np.where(
+        asset_list["EPC Number of Habitable Rooms"] == "",
+        None,
+        asset_list["EPC Number of Habitable Rooms"]
+    )
+    asset_list["EPC Number of Habitable Rooms"] = asset_list["EPC Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["EPC Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["EPC Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["EPC Property Floor Height"]) if x["EPC Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["EPC Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["EPC Roof Construction"]).process()[
+            "insulation_thickness"] if not pd.isnull(x["EPC Roof Construction"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "Places for People NORTH WEST - EPC DATA PULL.xlsx"
+    asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 75917a55..44043206 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -10,11 +10,47 @@ from fuzzywuzzy import fuzz
 import numpy as np
 import pandas as pd
 import time
-from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3
+from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3, read_dataframe_from_s3_parquet
 
 load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 
+# We create a MAP of uprns, for EPCs that didn't give use the UPRN
+missing_uprn_map = [
+    # This is a map from internal_id to UPRN, for properties where we do have an EPC, but we don't have
+    # a uprn
+    # 1 Church Street, Alfreton, DE55 7AH
+    {"internal_id": 78, "mapped_uprn": None},  # Doesn't seem to exist any more
+    # 1 Granville Road, Luton, LU1 1PA
+    {"internal_id": 315, "mapped_uprn": 100080148856},
+    # 11 College Street, Birstall, Batley, WF17 9HF
+    # The EPC record is for 11 and 11a
+    {"internal_id": 1090, "mapped_uprn": 83190440},
+    # 11a College Street, Birstall, Batley, WF17 9HF
+    {"internal_id": 1092, "mapped_uprn": 83143766},
+    # Flat 5 Friars Street, Hereford, HR4 0AS
+    # TODO: Check this
+    {"internal_id": 1384, "mapped_uprn": 200002600892},
+    # This UPRN is for 5 Friars Court, which is a flat
+    # Flat 7 Friars Street, Hereford, HR4 0AS
+    # TODO: Check this
+    {"internal_id": 1385, "mapped_uprn": 200002600894},
+    # This UPRN is for 7 Friars Court, which is a flat
+    # 1 Waverley Street, Dudley, DY2 0YE
+    {"internal_id": 3349, "mapped_uprn": 90022438},
+    # 5 Brighton Road, Burgh Heath, Tadworth, KT20 6BQ
+    # TODO: Check this
+    # This UPRN is for 5 Copthorne, Brighton Road, Burgh Heath, KT20 6BQ, which is a flat
+    {"internal_id": 5027, "mapped_uprn": 100062145273},
+    # Room 1, 21 Coxford Road, Southampton, SO16 5FG
+    # This is for 21 Coxford Road
+    {"internal_id": 5554, "mapped_uprn": 100060692392},
+
+]
+missing_uprn_map = pd.DataFrame(missing_uprn_map)
+
+internal_id_epcs_to_drop = [315, 1384, 1385, 3349]
+
 
 def remove_commas_and_full_stops(input_string: str) -> str:
     """
@@ -610,7 +646,58 @@ def compile_data():
         header_row=4
     )
 
-    # TODO: Read in UPRNs
+    # TODO: Read in UPRNs or UDPRN
+
+    epc_data = json.loads(
+        read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name="customers/Stonewater/clustering/epc_data.json"
+        )
+    )
+    epc_data = pd.DataFrame(epc_data)
+
+    # We drop come EPCS
+    epc_data = epc_data[~epc_data["internal_id"].isin(internal_id_epcs_to_drop)]
+
+    # This we can use to produce additional variables such as number of old surveys
+    older_epc_data = json.loads(
+        read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name="customers/Stonewater/clustering/old_epc_data.json"
+        )
+    )
+    older_epc_data = {k: v for k, v in older_epc_data.items() if k not in internal_id_epcs_to_drop}
+
+    # This is the first ordnance survey data pull
+    os_most_relevant_1 = []
+    os_all_1 = {}
+    for i in tqdm(["1", "2", "3"]):
+        most_relevant_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_most_relevant_{i}.json"
+        )
+        os_most_relevant_1.extend(json.loads(most_relevant_segment))
+        os_all_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_all_{i}.json"
+        )
+        os_all_1 = {**os_all_1, **json.loads(os_all_segment)}
+
+    os_most_relevant_1 = pd.DataFrame(os_most_relevant_1)
+
+    # This is the second ordnance survey data pull
+    os_most_relevant_2 = read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="customers/Stonewater/clustering/problematic_os.json"
+    )
+    os_most_relevant_2 = json.loads(os_most_relevant_2)
+    os_most_relevant_2 = pd.DataFrame(os_most_relevant_2)
+
+    os_all_2 = read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="customers/Stonewater/clustering/problematic_os_all.json"
+    )
+    os_all_2 = json.loads(os_all_2)
 
     ########################################################################
     # Prepare asset list
@@ -664,3 +751,176 @@ def compile_data():
 
     if pd.isnull(asset_list["full_address"]).sum():
         raise ValueError("Missing full addresses")
+
+    # Quick check to see if we have os data for every property that doesn't have an EPC
+    without_epc = asset_list[~asset_list["internal_id"].isin(epc_data["internal_id"].values)]
+    os_most_relevant_1_internal_ids = os_most_relevant_1["internal_id"].tolist()
+    os_most_relevant_2_internal_ids = os_most_relevant_2["internal_id"].tolist()
+
+    missing_os_data = []
+    for _, x in without_epc.iterrows():
+        # We would prioritise the data pulled the second time around
+
+        internal_id = x["internal_id"]
+        if internal_id in os_most_relevant_2_internal_ids:
+            continue
+
+        if internal_id in os_most_relevant_1_internal_ids:
+            continue
+
+        missing_os_data.append(internal_id)
+
+    if len(missing_os_data):
+        raise Exception("We don't have SOME data for each internal_id")
+
+    # For the EPC data, some of them are missing UPRN
+    epc_data_to_address = asset_list[
+        asset_list["internal_id"].isin(epc_data["internal_id"].values)
+    ][
+        ["full_address", "internal_id"]].merge(
+        epc_data, how="left", on="internal_id"
+    )
+    missed_uprn = epc_data_to_address[epc_data_to_address["uprn"] == ""]
+
+    # Once we have UPRNs, we might want to pull in the EPC data again
+    # epc_data_with_uprn = []
+    # older_epc_data_with_uprn = {}
+    #
+    # for row_number, asset in tqdm(asset_list.iterrows(), total=len(asset_list)):
+    #     searcher = SearchEpc(
+    #         address1=str(asset["address1"]),
+    #         postcode=str(asset["postcode"]),
+    #         auth_token=EPC_AUTH_TOKEN,
+    #         os_api_key="",
+    #         full_address=str(asset["full_address"]),
+    #         uprn=asset["uprn"]
+    #     )
+    #     searcher.find_property(skip_os=True)
+    #
+    #     if searcher.newest_epc is None:
+    #         continue
+    #
+    #     epc_data_with_uprn.append(
+    #         {
+    #             "internal_id": asset["internal_id"],
+    #             **searcher.newest_epc
+    #         }
+    #     )
+    #
+    #     if searcher.older_epcs is not None:
+    #         older_epc_data_with_uprn[asset["internal_id"]] = searcher.older_epcs
+
+    # We now get the remaining properties
+    # TODO: We might want to use epc_data_with_uprn
+    remaining_properties = asset_list[~asset_list["internal_id"].isin(epc_data["internal_id"].values)]
+
+    # We estimate the data
+    final_epcs = []
+    for _, p in remaining_properties.iterrows():
+        internal_id = p["internal_id"]
+        uprn = p["UPRN"]
+
+        if internal_id in os_most_relevant_1_internal_ids:
+            p_os_data = os_most_relevant_1[os_most_relevant_1["internal_id"] == internal_id].to_dict("records")[0]
+            p_os_full = os_all_1[str(internal_id)]
+        else:
+            p_os_data = os_most_relevant_2[os_most_relevant_2["internal_id"] == internal_id].to_dict("records")[0]
+            p_os_full = os_all_2[str(internal_id)]
+        p_os_full = pd.DataFrame(
+            [x["DPA"] if "DPA" in x else x["LPI"] for x in p_os_full]
+        )
+
+        # TODO: Add this back in
+        # When we have this
+        if p["uprn"] != p_os_data["UPRN"]:
+            # Get it from the older data
+            filtered = p_os_full[p_os_full["UPRN"] == p["uprn"]]
+            p_os_data = filtered.to_dict("records")[0]
+
+        searcher = SearchEpc(
+            address1=str(p["address1"]),
+            postcode=str(p["postcode"]),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            uprn=uprn
+        )
+        searcher.ordnance_survey_client.parse_classification_code(p_os_data["CLASSIFICATION_CODE"])
+
+        searcher.find_property(skip_os=True)
+
+        final_epcs.append(
+            {
+                "internal_id": internal_id,
+                **searcher.newest_epc
+            }
+        )
+
+    final_epcs = pd.DataFrame(final_epcs)
+
+    complete_epcs = pd.concat(
+        [
+            epc_data,
+            final_epcs
+        ]
+    )
+
+    # We now pull additional data
+    uprns = complete_epcs["uprn"].tolist()
+    # We get the spatial file list and loop through each EPC and determine which file it needs.
+    # We then just read in the files that we need and get the data, for each uprn from that file
+
+    uprn_filenames = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
+    )
+
+    uprn_lookup = {}
+    for uprn in complete_epcs["uprn"]:
+        if not uprn:
+            # TODO: Do something about this!
+            continue
+        filtered_df = uprn_filenames[
+            (uprn_filenames["lower"] <= int(uprn))
+            & (uprn_filenames["upper"] >= int(uprn))
+            ]
+        if filtered_df["filenames"].values[0] in uprn_lookup:
+            uprn_lookup[filtered_df["filenames"].values[0]].append(int(uprn))
+        else:
+            uprn_lookup[filtered_df["filenames"].values[0]] = [int(uprn)]
+
+    spatial_data_to_uprn = []
+    for filename, associated_uprn in tqdm(uprn_lookup.items(), total=len(uprn_lookup)):
+        # Read in the file
+        spatial_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+        )
+
+        spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
+        spatial_data_to_uprn.append(spatial_df)
+
+    spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
+
+    # TODO: Let's store this in s3
+    save_data_to_s3(
+        data=json.dumps(spatial_data_to_uprn.to_dict("records")),
+        s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.json",
+        bucket_name="retrofit-data-dev"
+    )
+
+    # We merge this spatial data onto final EPCS
+    spatial_data_to_uprn = spatial_data_to_uprn.drop(
+        columns=["partition", "filename"]
+    ).rename(columns={"UPRN": "uprn"})
+    spatial_data_to_uprn["uprn"] = spatial_data_to_uprn["uprn"].astype(str)
+
+    property_attributes = complete_epcs.merge(
+        spatial_data_to_uprn,
+        how="left",
+        on="uprn"
+    )
+
+    # We drop the columns we don't care about for clustering
+    property_attributes = property_attributes.drop(
+        columns=[
+
+        ]
+    )
diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py
index 76f99f09..84d1f3e9 100644
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@@ -45,7 +45,7 @@ class RoofAttributes(Definitions):
         """
 
         self.description: str = description.lower().strip()
-        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or self.description == "sap05:roof"
 
         self.welsh_translation_search()
 

From 5e84967ee02fa5aa740426350290ae300b5381df Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 13 Jun 2024 00:26:22 +0100
Subject: [PATCH 38/80] merging asset list with uprns for stonewater

---
 etl/customers/stonewater/shdf_3_clustering.py | 685 +++++++++++++++++-
 1 file changed, 660 insertions(+), 25 deletions(-)

diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 44043206..6723b86e 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -5,6 +5,7 @@ from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc
 import urllib.parse
 import requests
+from datetime import datetime
 
 from fuzzywuzzy import fuzz
 import numpy as np
@@ -631,6 +632,23 @@ def app():
     # "Address ID": "external_address_id",
 
 
+def filter_os_data(p_os_data, p_os_data_all, udprn, is_flat):
+    if udprn is None:
+        p_os_data_all = pd.DataFrame([z["DPA"] if "DPA" in z else z["LPI"] for z in p_os_data_all])
+        if is_flat:
+            p_os_data_all = p_os_data_all[p_os_data_all["CLASSIFICATION_CODE"] == "RD06"]
+            return p_os_data_all.head(1)
+
+        return p_os_data_all.head(1)
+
+    final_os_data = p_os_data[p_os_data["UDPRN"] == udprn]
+    if final_os_data.empty:
+        p_os_data_all = pd.DataFrame([z["DPA"] if "DPA" in z else z["LPI"] for z in p_os_data_all])
+        final_os_data = p_os_data_all[p_os_data_all["UDPRN"].astype(str) == udprn]
+
+    return final_os_data
+
+
 def compile_data():
     """
     Various data sources have been produced to create the final data source for Stonewater.
@@ -640,13 +658,53 @@ def compile_data():
     ########################################################################
     # Read in data
     ########################################################################
-    asset_list = read_excel_from_s3(
-        file_key="customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
-        bucket_name="retrofit-data-dev",
-        header_row=4
+    # asset_list = read_excel_from_s3(
+    #     file_key="customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
+    #     bucket_name="retrofit-data-dev",
+    #     header_row=4
+    # )
+    #
+    # udprn_data = read_excel_from_s3(
+    #     file_key="customers/Stonewater/UDPRN updated RA Sample for 5 year programme.xlsx",
+    #     bucket_name="retrofit-data-dev",
+    #     header_row=0
+    # )[["AddressId", "UDPRN"]].rename(columns={"AddressId": "external_address_id"})
+
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
     )
 
+    udprn_data = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/UDPRN updated RA Sample for 5 year programme.xlsx", header=0
+    )[["AddressId", "UDPRN"]].rename(columns={"AddressId": "Address ID"})
+    udprn_data["UDPRN"] = udprn_data["UDPRN"].astype("Int64").astype(str)
+    udprn_data["Address ID"] = udprn_data["Address ID"].astype(str)
+
+    asset_list = asset_list.merge(udprn_data, how="inner", on="Address ID")
+    asset_list = asset_list.rename(columns={"UDPRN": "udprn"})
+
+    # Read in the lookups
+    uprn_lookup_1 = pd.DataFrame(json.loads(read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup.json"
+    )))
+
+    uprn_lookup_2 = pd.DataFrame(json.loads(read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup_2.json"
+    )))
+    uprn_lookup_2 = uprn_lookup_2.rename(
+        columns={
+            "epc_address": "standardised_address",
+            "epc_postcode": "standardised_postcode"
+        }
+    )
+
+    # concat
+    uprn_lookup = pd.concat([uprn_lookup_1, uprn_lookup_2])
+
     # TODO: Read in UPRNs or UDPRN
+    #       UPRN LOOKUPS TO READ IN: address_uprn_udprn_lookup, address_uprn_udprn_lookup_2
 
     epc_data = json.loads(
         read_from_s3(
@@ -660,13 +718,13 @@ def compile_data():
     epc_data = epc_data[~epc_data["internal_id"].isin(internal_id_epcs_to_drop)]
 
     # This we can use to produce additional variables such as number of old surveys
-    older_epc_data = json.loads(
-        read_from_s3(
-            bucket_name="retrofit-data-dev",
-            s3_file_name="customers/Stonewater/clustering/old_epc_data.json"
-        )
-    )
-    older_epc_data = {k: v for k, v in older_epc_data.items() if k not in internal_id_epcs_to_drop}
+    # older_epc_data = json.loads(
+    #     read_from_s3(
+    #         bucket_name="retrofit-data-dev",
+    #         s3_file_name="customers/Stonewater/clustering/old_epc_data.json"
+    #     )
+    # )
+    # older_epc_data = {k: v for k, v in older_epc_data.items() if k not in internal_id_epcs_to_drop}
 
     # This is the first ordnance survey data pull
     os_most_relevant_1 = []
@@ -703,8 +761,6 @@ def compile_data():
     # Prepare asset list
     ########################################################################
     # TODO: Merge on UPRNs
-    # Drop the bottom 4 rows, which are completely missing
-    asset_list = asset_list.head(-4)
 
     # Keep just the columns we're interested in
     asset_list = asset_list[
@@ -718,6 +774,7 @@ def compile_data():
             "City/Town",
             "County",
             "Address ID",  # This is not uprn
+            "udprn"
         ]
     ].rename(
         columns={
@@ -752,8 +809,17 @@ def compile_data():
     if pd.isnull(asset_list["full_address"]).sum():
         raise ValueError("Missing full addresses")
 
+    # Merge on UDPRN
+
+    asset_list = asset_list.merge(
+        uprn_lookup.drop(columns=["udprn"]), how="left", on=["internal_id", "external_address_id"]
+    )
+
+    # This is everything without a uprn
+
     # Quick check to see if we have os data for every property that doesn't have an EPC
     without_epc = asset_list[~asset_list["internal_id"].isin(epc_data["internal_id"].values)]
+
     os_most_relevant_1_internal_ids = os_most_relevant_1["internal_id"].tolist()
     os_most_relevant_2_internal_ids = os_most_relevant_2["internal_id"].tolist()
 
@@ -773,14 +839,124 @@ def compile_data():
     if len(missing_os_data):
         raise Exception("We don't have SOME data for each internal_id")
 
-    # For the EPC data, some of them are missing UPRN
-    epc_data_to_address = asset_list[
-        asset_list["internal_id"].isin(epc_data["internal_id"].values)
-    ][
-        ["full_address", "internal_id"]].merge(
-        epc_data, how="left", on="internal_id"
+    # Let's create a lookup table of internal_id, external_address_id, udprn, uprn, standardised_address
+    address_uprn_udprn_lookup = []
+    for _, x in without_epc.iterrows():
+        if pd.isnull(x["UDPRN"]):
+            continue
+        udprn = str(int(x["UDPRN"]))
+        internal_id = x["internal_id"]
+
+        is_flat = "flat" in x["address1"].lower()
+
+        # Get the OS data
+        final_os_data = pd.DataFrame()
+        if internal_id in os_most_relevant_1_internal_ids:
+            p_os_data = os_most_relevant_1[os_most_relevant_1["internal_id"] == internal_id]
+            p_os_data_all = os_all_1[str(internal_id)]
+            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn, is_flat)
+
+        if (internal_id in os_most_relevant_2_internal_ids) and final_os_data.empty:
+            p_os_data = os_most_relevant_2[os_most_relevant_2["internal_id"] == internal_id]
+            p_os_data_all = os_all_2[str(internal_id)]
+
+            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn, is_flat)
+
+        if final_os_data.empty:
+            continue
+
+        if final_os_data.shape[0] != 1:
+            if final_os_data["UPRN"].nunique() > 1:
+                raise Exception("Investigate me")
+
+        address_uprn_udprn_lookup.append(
+            {
+                "internal_id": internal_id,
+                "external_address_id": x["external_address_id"],
+                "udprn": udprn,
+                "uprn": final_os_data["UPRN"].values[0],
+                "standardised_address": final_os_data["ADDRESS"].values[0],
+                "standardised_postcode": final_os_data["POSTCODE"].values[0]
+            }
+        )
+
+    # Store this lookup
+    # save_data_to_s3(
+    #     data=json.dumps(address_uprn_udprn_lookup),
+    #     s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+
+    address_uprn_udprn_lookup = pd.DataFrame(address_uprn_udprn_lookup)
+    missed = asset_list[~asset_list["internal_id"].isin(address_uprn_udprn_lookup["internal_id"].values)]
+
+    address_comparison = (
+        asset_list[
+            ["internal_id", "external_address_id", "UDPRN", "full_address", "postcode", "house_number", "address1"]
+        ].merge(
+            epc_data[["internal_id", "address", "postcode", "address1", "uprn"]].rename(
+                columns={
+                    "address": "epc_address",
+                    "postcode": "epc_postcode",
+                    "address1": "epc_address1"
+                }
+            ),
+            how="inner",
+            on="internal_id"
+        )
     )
-    missed_uprn = epc_data_to_address[epc_data_to_address["uprn"] == ""]
+
+    address_comparison["address_similarity_score"] = address_comparison.apply(
+        lambda x: fuzz.ratio(
+            remove_commas_and_full_stops(x["address1"].lower() + x["postcode"].lower()),
+            remove_commas_and_full_stops(x["epc_address1"].lower() + x["epc_postcode"].lower())
+        ),
+        axis=1
+    )
+    address_comparison = address_comparison.sort_values("address_similarity_score", ascending=False)
+    # Cond
+    confident = address_comparison[address_comparison["address_similarity_score"] >= 95]
+    low_confidence = address_comparison[address_comparison["address_similarity_score"] < 95].copy()
+
+    lookup_2 = confident[
+        [
+            'internal_id', 'external_address_id', 'UDPRN', 'uprn',
+            'epc_address', 'epc_postcode']
+    ].rename(columns={"UDPRN": "udprn"})
+
+    # Store in S3
+    # save_data_to_s3(
+    #     data=json.dumps(lookup_2.to_dict("records")),
+    #     s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup_2.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+
+    # Need to deal with the low confidence records
+    low_confidence_asset_list = asset_list[asset_list["internal_id"].isin(low_confidence["internal_id"])]
+    for _, x in low_confidence_asset_list.iterrows():
+        udprn = str(int(x["UDPRN"]))
+        internal_id = x["internal_id"]
+        # Get the OS data
+        final_os_data = pd.DataFrame()
+        if internal_id in os_most_relevant_1_internal_ids:
+            p_os_data = os_most_relevant_1[os_most_relevant_1["internal_id"] == internal_id]
+            p_os_data_all = os_all_1[str(internal_id)]
+            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn)
+
+        if (internal_id in os_most_relevant_2_internal_ids) and final_os_data.empty:
+            p_os_data = os_most_relevant_2[os_most_relevant_2["internal_id"] == internal_id]
+            p_os_data_all = os_all_2[str(internal_id)]
+
+            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn)
+
+    # For the EPC data, some of them are missing UPRN
+    epc_data = epc_data.merge(missing_uprn_map, how="left", on="internal_id")
+    epc_data["uprn"] = np.where(
+        epc_data["uprn"] == "",
+        epc_data["mapped_uprn"],
+        epc_data["uprn"]
+    )
+    epc_data = epc_data.drop(columns=["mapped_uprn"])
 
     # Once we have UPRNs, we might want to pull in the EPC data again
     # epc_data_with_uprn = []
@@ -864,8 +1040,7 @@ def compile_data():
         ]
     )
 
-    # We now pull additional data
-    uprns = complete_epcs["uprn"].tolist()
+    # We now pull spatial data
     # We get the spatial file list and loop through each EPC and determine which file it needs.
     # We then just read in the files that we need and get the data, for each uprn from that file
 
@@ -875,7 +1050,7 @@ def compile_data():
 
     uprn_lookup = {}
     for uprn in complete_epcs["uprn"]:
-        if not uprn:
+        if pd.isnull(uprn):
             # TODO: Do something about this!
             continue
         filtered_df = uprn_filenames[
@@ -914,13 +1089,473 @@ def compile_data():
 
     property_attributes = complete_epcs.merge(
         spatial_data_to_uprn,
-        how="left",
+        how="inner",
         on="uprn"
     )
 
     # We drop the columns we don't care about for clustering
     property_attributes = property_attributes.drop(
         columns=[
-
+            "address",
+            "uprn-source",
+            "heating-cost-potential",
+            "hot-water-cost-potential",
+            "potential-energy-rating",
+            "environment-impact-potential",
+            "address3",
+            "local-authority-label",
+            "sheating-energy-eff",
+            "local-authority-label",
+            "county",
+            "postcode",
+            "constituency",
+            "co2-emissions-potential",
+            "energy-consumption-potential",
+            "local-authority",
+            "inspection-date",
+            "address1",
+            "constituency-label",
+            "building-reference-number",
+            "floor-energy-eff",
+            "address2",
+            "posttown",
+            "floor-env-eff",
+            "sheating-env-eff",
+            "lighting-cost-potential",
+            "main-heating-controls",
+            "transaction-type",
+            "uprn",
+            "lodgement-date",
+            "lmk-key",
+            "wind-turbine-count",
+            "tenure",
+            "potential-energy-efficiency",
         ]
     )
+
+    # Fields to transform: lodgement-datetime
+    property_attributes["days_since_last_epc"] = (
+        datetime.now() - pd.to_datetime(property_attributes["lodgement-datetime"])
+    ).dt.days
+
+    property_attributes = property_attributes.drop(columns=["lodgement-datetime"])
+
+    # Up to:
+    # Round averages to nearest integer
+    fill_with_average = [
+        "low-energy-fixed-light-count",
+        "floor-height",
+        "heating-cost-current",
+        "fixed-lighting-outlets-count",
+        "hot-water-cost-current",
+        "number-heated-rooms",
+        "co2-emiss-curr-per-floor-area",
+        "total-floor-area",
+        "environment-impact-current",
+        "co2-emissions-current",
+        "number-habitable-rooms",
+        "energy-consumption-current",
+        'lighting-cost-current',
+        "low_energy_lighting",
+    ]
+
+    fill_with_mode = [
+        "multi-glaze-proportion",
+        "extension-count",
+    ]
+
+    fill_with_zero = [
+        "unheated-corridor-length",
+        "number-open-fireplaces",
+        "glazed-area",
+        "photo-supply",
+    ]
+
+    fill_with_categorical = {
+        "construction-age-band": "unknown",
+        "mainheat-energy-eff": "N/A",
+        "windows-env-eff": "N/A",
+        "lighting-energy-eff": "N/A",
+        "energy-tariff": 'NO DATA!',
+        "mechanical-ventilation": 'NO DATA!',
+        "solar-water-heating-flag": "N",
+        "mains-gas-flag": "N",
+        "heat-loss-corridor": "unknown",
+        "flat-storey-count": "Not a flat",
+        "roof-energy-eff": "N/A",
+        "hot-water-env-eff": "N/A",
+        "mainheatc-energy-eff": "N/A",
+        "main-fuel": 'NO DATA!',
+        "lighting-env-eff": "N/A",
+        "windows-energy-eff": "N/A",
+        "roof-env-eff": "N/A",
+        "walls-env-eff": "N/A",
+        "mainheat-env-eff": "N/A",
+        "flat-top-storey": "N",
+        "mainheatc-env-eff": "N",
+        "floor-level": "NODATA!",
+        "hot-water-energy-eff": "N/A",
+    }
+
+    # Consolidation columns to single value
+    consolidation_columns = {
+        "glazed-type": {"from": ['', 'NO DATA!', 'not defined', 'INVALID!'], "to": "unknown"},
+        "mechanical-ventilation": {"from": ['', 'NO DATA!', 'not defined', 'INVALID!'], "to": "unknown"},
+        "solar-water-heating-flag": {"from": [''], "to": "N"},
+        "mains-gas-flag": {"from": [''], "to": "N"},
+        "heat-loss-corridor": {"from": ['NO DATA!', ''], "to": "N"},
+        "flat-top-storey": {"from": [''], "to": "N"},
+        "floor-level": {"from": [""], "to": "NODATA!"}
+    }
+
+
+def concatenate_row(row):
+    return ', '.join(row.dropna().replace('', None).dropna().astype(str))
+
+
+def compile_data_final():
+    # Updated version:
+
+    """
+    Various data sources have been produced to create the final data source for Stonewater.
+    This function combines them
+    :return:
+    """
+    ########################################################################
+    # Read in data
+    ########################################################################
+
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
+    )
+
+    udprn_data = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/UDPRN updated RA Sample for 5 year programme.xlsx", header=0
+    )[["AddressId", "UDPRN"]].rename(columns={"AddressId": "Address ID"})
+    udprn_data["UDPRN"] = udprn_data["UDPRN"].astype("Int64").astype(str)
+    udprn_data["Address ID"] = udprn_data["Address ID"].astype(str)
+
+    asset_list = asset_list.merge(udprn_data, how="inner", on="Address ID")
+    asset_list = asset_list.rename(columns={"UDPRN": "udprn"})
+
+    # Read in the lookups
+    uprn_lookup_1 = pd.DataFrame(json.loads(read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup.json"
+    )))
+    uprn_lookup_1["match_type"] = "Exact"
+
+    uprn_lookup_2 = pd.DataFrame(json.loads(read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup_2.json"
+    )))
+    uprn_lookup_2 = uprn_lookup_2.rename(
+        columns={
+            "epc_address": "standardised_address",
+            "epc_postcode": "standardised_postcode"
+        }
+    )
+    uprn_lookup_2["match_type"] = "EPC"
+
+    uprn_lookup_3 = pd.DataFrame(json.loads(read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="customers/Stonewater/clustering/ideal-postcodes_pull_2.json"
+    )))
+    uprn_lookup_3["standardised_address"] = uprn_lookup_3[["line_1", "line_2", "line_3", "district", "postcode"]].apply(
+        concatenate_row, axis=1
+    )
+    uprn_lookup_3 = uprn_lookup_3[
+        ["udprn", "uprn", "standardised_address", "postcode"]
+    ].rename(columns={"postcode": "standardised_postcode"})
+    uprn_lookup_3["match_type"] = "Exact"
+
+    uprn_lookup_4_basis = pd.read_csv("manual_fix_uprns-populated.csv", index_col=False)
+    uprn_lookup_4_basis["os_option_1_uprn"] = uprn_lookup_4_basis["os_option_1_uprn"].astype(str)
+    uprn_lookup_4_basis["os_option_2_uprn"] = uprn_lookup_4_basis["os_option_2_uprn"].astype("Int64").astype(str)
+    # prepare lookup 4
+    uprn_lookup_4 = []
+    for _, x in uprn_lookup_4_basis.iterrows():
+
+        property_type = None
+        built_form = None
+        if x["option"] == 1:
+            uprn = x["os_option_1_uprn"]
+            standardised_address = x["os_option_1_address"]
+            postcode = x["os_option_1_postcode"]
+        elif x["option"] == 2:
+            uprn = x["os_option_2_uprn"]
+            standardised_address = x["os_option_2_address"]
+            postcode = x["os_option_2_postcode"]
+        else:
+            uprn = x["manual_uprn"]
+            standardised_address = x["manual_address"]
+            postcode = x["manual_postcode"]
+
+        uprn_lookup_4.append(
+            {
+                "internal_id": x["internal_id"],
+                "external_address_id": x["external_address_id"],
+                "uprn": uprn,
+                "standardised_address": standardised_address,
+                "standardised_postcode": postcode,
+                "property_type": property_type,
+                "built_form": built_form
+            }
+        )
+    uprn_lookup_4 = pd.DataFrame(uprn_lookup_4)
+    uprn_lookup_4["match_type"] = "Fuzzy"
+
+    # concat
+    uprn_lookup = pd.concat([uprn_lookup_1, uprn_lookup_2])
+
+    # We now merge all of the UPRNs onto the asset list
+    assert len(uprn_lookup) + len(uprn_lookup_3) + len(uprn_lookup_4) == len(asset_list)
+
+    epc_data = json.loads(
+        read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name="customers/Stonewater/clustering/epc_data.json"
+        )
+    )
+    epc_data = pd.DataFrame(epc_data)
+
+    # We drop come EPCS
+    epc_data = epc_data[epc_data["internal_id"].isin(uprn_lookup_2["internal_id"].values)]
+
+    # This we can use to produce additional variables such as number of old surveys
+    # older_epc_data = json.loads(
+    #     read_from_s3(
+    #         bucket_name="retrofit-data-dev",
+    #         s3_file_name="customers/Stonewater/clustering/old_epc_data.json"
+    #     )
+    # )
+    # older_epc_data = {k: v for k, v in older_epc_data.items() if k not in internal_id_epcs_to_drop}
+
+    ########################################################################
+    # Prepare asset list
+    ########################################################################
+
+    # Keep just the columns we're interested in
+    asset_list = asset_list[
+        [
+            "Osm. ID",
+            "Org. ref.",
+            "Postcode",
+            "House no",
+            "Name",
+            "Address line 2",
+            "City/Town",
+            "County",
+            "Address ID",  # This is not uprn
+            "udprn"
+        ]
+    ].rename(
+        columns={
+            "Osm. ID": "internal_id",
+            "Org. ref.": "customer_asset_id",
+            "Postcode": "postcode",
+            "House no": "house_number",
+            "Name": "address1",
+            "Address line 2": "address2",
+            "City/Town": "city_town",
+            "County": "county",
+            "Address ID": "external_address_id",
+        }
+    )
+
+    # Create full address
+    asset_list["full_address"] = np.where(
+        ~pd.isnull(asset_list["address2"]),
+        (
+            asset_list["address1"] + ", " +
+            asset_list["address2"] + ", " +
+            asset_list["city_town"].str.title() + ", " +
+            # asset_list["county"] + ", " +
+            asset_list["postcode"]
+        ),
+        asset_list["address1"] + ", " +
+        asset_list["city_town"].str.title() + ", " +
+        # asset_list["county"] + ", " +
+        asset_list["postcode"]
+    )
+
+    if pd.isnull(asset_list["full_address"]).sum():
+        raise ValueError("Missing full addresses")
+
+    # Final preps of lookups
+    uprn_lookup_3["udprn"] = uprn_lookup_3["udprn"].astype(str)
+    uprn_lookup_3 = uprn_lookup_3.merge(
+        asset_list[["udprn", "internal_id", "external_address_id"]], how="left", on="udprn"
+    )
+    uprn_lookup = pd.concat([
+        uprn_lookup,
+        uprn_lookup_3,
+        uprn_lookup_4
+    ])
+    uprn_lookup["external_address_id"] = uprn_lookup["external_address_id"].astype(str)
+
+    asset_list = asset_list.merge(
+        uprn_lookup.drop(columns=["udprn"]),
+        how="inner",
+        on=["internal_id", "external_address_id"]
+    )
+
+    # This is everything without a uprn
+    missing_uprn = asset_list[pd.isnull(asset_list["uprn"])]
+
+    missing_uprn_with_udprn = missing_uprn[
+        missing_uprn["udprn"] != "<NA>"
+        ].reset_index(drop=True)
+
+    missing_uprn_without_udprn = missing_uprn[
+        missing_uprn["udprn"] == "<NA>"
+        ].reset_index(drop=True)
+
+    missing_uprn_without_udprn = missing_uprn_without_udprn[["internal_id", "external_address_id", "full_address"]]
+    # Pull in the best ordnance survey data for each one and manually fix
+    manua_fix = []
+    for _, x in missing_uprn_without_udprn.iterrows():
+        internal_id = x["internal_id"]
+
+        os_option_1_address = ""
+        os_option_1_postcode = ""
+        os_option_1_uprn = ""
+        if internal_id in os_most_relevant_1_internal_ids:
+            p_os_data = os_most_relevant_1[os_most_relevant_1["internal_id"] == internal_id]
+            os_option_1_address = p_os_data["ADDRESS"].values[0]
+            os_option_1_postcode = p_os_data["POSTCODE"].values[0]
+            os_option_1_uprn = p_os_data["UPRN"].values[0]
+
+        os_option_2_address = ""
+        os_option_2_postcode = ""
+        os_option_2_uprn = ""
+        if internal_id in os_most_relevant_2_internal_ids:
+            p_os_data = os_most_relevant_2[os_most_relevant_2["internal_id"] == internal_id]
+            os_option_2_address = p_os_data["ADDRESS"].values[0]
+            os_option_2_postcode = p_os_data["POSTCODE"].values[0]
+            os_option_2_uprn = p_os_data["UPRN"].values[0]
+
+        manua_fix.append(
+            {
+                **x.to_dict(),
+                "os_option_1_address": os_option_1_address,
+                "os_option_1_postcode": os_option_1_postcode,
+                "os_option_1_uprn": os_option_1_uprn,
+
+                "os_option_2_address": os_option_2_address,
+                "os_option_2_postcode": os_option_2_postcode,
+                "os_option_2_uprn": os_option_2_uprn,
+            }
+        )
+
+    manua_fix = pd.DataFrame(manua_fix)
+    # manua_fix.to_csv("manual_fix_uprns.csv")
+
+    # Split into chunks of 200
+    api_key = "ak_lxcapii7HnEhGKxuVmPquzTYKu9vp"
+    import requests
+    import time
+    completed_id = 0
+
+    uprn_to_udprn = []
+    for row_index, data in tqdm(missing_uprn_with_udprn.iterrows(), total=len(missing_uprn_with_udprn)):
+        if row_index < completed_id:
+            continue
+        time.sleep(0.5)
+
+        # Call the API
+        udprn = data["udprn"]
+
+        url = f"https://api.ideal-postcodes.co.uk/v1/udprn/{udprn}?api_key={api_key}"
+
+        payload = {
+            "api_key": api_key
+        }
+        headers = {
+            'Accept': 'application/json'
+        }
+
+        response = requests.request("GET", url, headers=headers, data=payload)
+        if response.status_code != 200:
+            raise ValueError("API call dead")
+
+        result = response.json()
+        uprn_to_udprn.append(
+            result["result"]
+        )
+        completed_id += 1
+
+    # Store in S3
+    # save_data_to_s3(
+    #     data=json.dumps(uprn_to_udprn),
+    #     s3_file_name="customers/Stonewater/clustering/ideal-postcodes_pull_2.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+
+    test = read_from_s3(
+        s3_file_name="customers/Stonewater/clustering/ideal-postcodes_pull_2.json",
+        bucket_name="retrofit-data-dev"
+    )
+    test = pd.DataFrame(json.loads(test))
+
+    for _, x in missing_uprn.iterrows():
+        udprn = x["udprn"]
+        udprn = None if udprn == "<NA>" else udprn
+        internal_id = x["internal_id"]
+
+        is_flat = "flat" in x["address1"].lower()
+        # Get the OS data
+        final_os_data = pd.DataFrame()
+        if internal_id in os_most_relevant_1_internal_ids:
+            p_os_data = os_most_relevant_1[os_most_relevant_1["internal_id"] == internal_id]
+            p_os_data_all = os_all_1[str(internal_id)]
+            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn, is_flat)
+
+        if (internal_id in os_most_relevant_2_internal_ids) and final_os_data.empty:
+            p_os_data = os_most_relevant_2[os_most_relevant_2["internal_id"] == internal_id]
+            p_os_data_all = os_all_2[str(internal_id)]
+            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn, is_flat)
+
+        # Try signing up on a free trial with these guys!
+        # https://ideal-postcodes.co.uk/pricing
+        # API example: https://docs.ideal-postcodes.co.uk/docs/api/udprn
+
+        if final_os_data.empty:
+            boo
+            continue
+
+        if final_os_data.shape[0] != 1:
+            if final_os_data["UPRN"].nunique() > 1:
+                raise Exception("Investigate me")
+
+    # TODO: We should do a different variation of similarity, where we strip out "Flat" and "Room x" if they are there
+    # This is the first ordnance survey data pull
+    os_most_relevant_1 = []
+    os_all_1 = {}
+    for i in tqdm(["1", "2", "3"]):
+        most_relevant_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_most_relevant_{i}.json"
+        )
+        os_most_relevant_1.extend(json.loads(most_relevant_segment))
+        os_all_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_all_{i}.json"
+        )
+        os_all_1 = {**os_all_1, **json.loads(os_all_segment)}
+
+    os_most_relevant_1 = pd.DataFrame(os_most_relevant_1)
+
+    # This is the second ordnance survey data pull
+    os_most_relevant_2 = read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="customers/Stonewater/clustering/problematic_os.json"
+    )
+    os_most_relevant_2 = json.loads(os_most_relevant_2)
+    os_most_relevant_2 = pd.DataFrame(os_most_relevant_2)
+
+    os_all_2 = read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="customers/Stonewater/clustering/problematic_os_all.json"
+    )
+    os_all_2 = json.loads(os_all_2)

From 6f9a78cabc366b741a24c5f30bc72e5ddcdaf84c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 13 Jun 2024 01:13:19 +0100
Subject: [PATCH 39/80] corrected spelling of built forms

---
 backend/OrdnanceSurvey.py                     |   4 +-
 etl/customers/stonewater/shdf_3_clustering.py | 223 ++++++++----------
 2 files changed, 106 insertions(+), 121 deletions(-)

diff --git a/backend/OrdnanceSurvey.py b/backend/OrdnanceSurvey.py
index 856dda7a..a4d716d0 100644
--- a/backend/OrdnanceSurvey.py
+++ b/backend/OrdnanceSurvey.py
@@ -117,8 +117,8 @@ class OrdnanceSuveyClient:
         value_map = {
             # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
             'RD': {},
-            'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
-            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
+            'RD02': {'property_type': 'House', 'built_form': 'Detached'},
+            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detached'},
             'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
             'RD06': {'property_type': 'Flat'},
         }
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 6723b86e..c7afa28d 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1285,7 +1285,7 @@ def compile_data_final():
         elif x["option"] == 2:
             uprn = x["os_option_2_uprn"]
             standardised_address = x["os_option_2_address"]
-            postcode = x["os_option_2_postcode"]
+            postcode = x["os_option_2_address"].split(", ")[-1]
         else:
             uprn = x["manual_uprn"]
             standardised_address = x["manual_address"]
@@ -1347,7 +1347,8 @@ def compile_data_final():
             "City/Town",
             "County",
             "Address ID",  # This is not uprn
-            "udprn"
+            "udprn",
+            "Owning body"
         ]
     ].rename(
         columns={
@@ -1360,6 +1361,7 @@ def compile_data_final():
             "City/Town": "city_town",
             "County": "county",
             "Address ID": "external_address_id",
+            "Owning body": "owner"
         }
     )
 
@@ -1400,59 +1402,117 @@ def compile_data_final():
         on=["internal_id", "external_address_id"]
     )
 
-    # This is everything without a uprn
-    missing_uprn = asset_list[pd.isnull(asset_list["uprn"])]
+    # Store locally
+    # asset_list.to_excel("Stonewater asset list with uprn.xlsx")
 
-    missing_uprn_with_udprn = missing_uprn[
-        missing_uprn["udprn"] != "<NA>"
-        ].reset_index(drop=True)
+    # We take just domestic properties
 
-    missing_uprn_without_udprn = missing_uprn[
-        missing_uprn["udprn"] == "<NA>"
-        ].reset_index(drop=True)
+    # This is the first ordnance survey data pull
+    os_most_relevant_1 = []
+    os_all_1 = {}
+    for i in tqdm(["1", "2", "3"]):
+        most_relevant_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_most_relevant_{i}.json"
+        )
+        os_most_relevant_1.extend(json.loads(most_relevant_segment))
+        os_all_segment = read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name=f"customers/Stonewater/clustering/os_all_{i}.json"
+        )
+        os_all_1 = {**os_all_1, **json.loads(os_all_segment)}
 
-    missing_uprn_without_udprn = missing_uprn_without_udprn[["internal_id", "external_address_id", "full_address"]]
-    # Pull in the best ordnance survey data for each one and manually fix
-    manua_fix = []
-    for _, x in missing_uprn_without_udprn.iterrows():
-        internal_id = x["internal_id"]
+    os_most_relevant_1 = pd.DataFrame(os_most_relevant_1)
 
-        os_option_1_address = ""
-        os_option_1_postcode = ""
-        os_option_1_uprn = ""
-        if internal_id in os_most_relevant_1_internal_ids:
-            p_os_data = os_most_relevant_1[os_most_relevant_1["internal_id"] == internal_id]
-            os_option_1_address = p_os_data["ADDRESS"].values[0]
-            os_option_1_postcode = p_os_data["POSTCODE"].values[0]
-            os_option_1_uprn = p_os_data["UPRN"].values[0]
+    # This is the second ordnance survey data pull
+    os_most_relevant_2 = read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="customers/Stonewater/clustering/problematic_os.json"
+    )
+    os_most_relevant_2 = json.loads(os_most_relevant_2)
+    os_most_relevant_2 = pd.DataFrame(os_most_relevant_2)
 
-        os_option_2_address = ""
-        os_option_2_postcode = ""
-        os_option_2_uprn = ""
-        if internal_id in os_most_relevant_2_internal_ids:
-            p_os_data = os_most_relevant_2[os_most_relevant_2["internal_id"] == internal_id]
-            os_option_2_address = p_os_data["ADDRESS"].values[0]
-            os_option_2_postcode = p_os_data["POSTCODE"].values[0]
-            os_option_2_uprn = p_os_data["UPRN"].values[0]
+    os_all_2 = read_from_s3(
+        bucket_name="retrofit-data-dev",
+        s3_file_name="customers/Stonewater/clustering/problematic_os_all.json"
+    )
+    os_all_2 = json.loads(os_all_2)
 
-        manua_fix.append(
+    needs_epc_data = asset_list[~asset_list["internal_id"].isin(epc_data["internal_id"])]
+
+    os_1_ids = os_most_relevant_1["internal_id"].values
+    os_2_ids = os_most_relevant_2["internal_id"].values
+
+    epc_data_batch_2 = []
+    older_epcs_batch_2 = {}
+    for _, property in tqdm(needs_epc_data.iterrows(), total=len(needs_epc_data)):
+        if pd.isnull(property["uprn"]):
+            continue
+        searcher = SearchEpc(
+            address1=", ".join(property["standardised_address"].split(", ")[:-1]),
+            postcode=property["standardised_postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            full_address=property["standardised_address"],
+            uprn=property["uprn"]
+        )
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None and property["match_type"] == "Exact":
+            # Estimate!
+            # Get the OS data
+            p_os_df = pd.DataFrame()
+            if property["internal_id"] in os_1_ids:
+                p_os_df = pd.DataFrame(
+                    [x["DPA"] if "DPA" in x else x["LPI"] for x in os_all_1[str(property["internal_id"])]]
+                )
+                p_os_df = p_os_df[p_os_df["UPRN"].astype(str) == property["uprn"]]
+
+            if p_os_df.empty:
+                p_os_df = pd.DataFrame(
+                    [x["DPA"] if "DPA" in x else x["LPI"] for x in os_all_2[str(property["internal_id"])]]
+                )
+                p_os_df = p_os_df[p_os_df["UPRN"] == property["uprn"]]
+
+            searcher.ordnance_survey_client.parse_classification_code(p_os_df["CLASSIFICATION_CODE"].values[0])
+            # Now we estimate
+            searcher.newest_epc = searcher.estimate_epc(
+                property_type=searcher.ordnance_survey_client.property_type,
+                built_form=searcher.ordnance_survey_client.built_form,
+                lmks_to_drop=None,
+                exclude_old=True
+            )
+
+        elif searcher.newest_epc is None and property["match_type"] == "Fuzzy":
+
+            if "flat" in property["standardised_address"].lower():
+                searcher.newest_epc = searcher.estimate_epc(
+                    property_type="Flat",
+                    built_form=None,
+                    lmks_to_drop=None,
+                    exclude_old=True
+                )
+            else:
+                searcher.newest_epc = searcher.estimate_epc(
+                    property_type="House",
+                    built_form=None,
+                    lmks_to_drop=None,
+                    exclude_old=True
+                )
+
+        epc_data_batch_2.append(
             {
-                **x.to_dict(),
-                "os_option_1_address": os_option_1_address,
-                "os_option_1_postcode": os_option_1_postcode,
-                "os_option_1_uprn": os_option_1_uprn,
-
-                "os_option_2_address": os_option_2_address,
-                "os_option_2_postcode": os_option_2_postcode,
-                "os_option_2_uprn": os_option_2_uprn,
+                "internal_id": property["internal_id"],
+                **searcher.newest_epc
             }
         )
 
-    manua_fix = pd.DataFrame(manua_fix)
-    # manua_fix.to_csv("manual_fix_uprns.csv")
+        if searcher.older_epcs is not None:
+            older_epcs_batch_2[property["internal_id"]] = searcher.older_epcs
 
-    # Split into chunks of 200
-    api_key = "ak_lxcapii7HnEhGKxuVmPquzTYKu9vp"
+
+def pull_ideal_postcodes(missing_uprn_with_udprn):
+    api_key = ""  # Log into the platform the get the API key: https://account.ideal-postcodes.co.uk/
     import requests
     import time
     completed_id = 0
@@ -1484,78 +1544,3 @@ def compile_data_final():
             result["result"]
         )
         completed_id += 1
-
-    # Store in S3
-    # save_data_to_s3(
-    #     data=json.dumps(uprn_to_udprn),
-    #     s3_file_name="customers/Stonewater/clustering/ideal-postcodes_pull_2.json",
-    #     bucket_name="retrofit-data-dev"
-    # )
-
-    test = read_from_s3(
-        s3_file_name="customers/Stonewater/clustering/ideal-postcodes_pull_2.json",
-        bucket_name="retrofit-data-dev"
-    )
-    test = pd.DataFrame(json.loads(test))
-
-    for _, x in missing_uprn.iterrows():
-        udprn = x["udprn"]
-        udprn = None if udprn == "<NA>" else udprn
-        internal_id = x["internal_id"]
-
-        is_flat = "flat" in x["address1"].lower()
-        # Get the OS data
-        final_os_data = pd.DataFrame()
-        if internal_id in os_most_relevant_1_internal_ids:
-            p_os_data = os_most_relevant_1[os_most_relevant_1["internal_id"] == internal_id]
-            p_os_data_all = os_all_1[str(internal_id)]
-            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn, is_flat)
-
-        if (internal_id in os_most_relevant_2_internal_ids) and final_os_data.empty:
-            p_os_data = os_most_relevant_2[os_most_relevant_2["internal_id"] == internal_id]
-            p_os_data_all = os_all_2[str(internal_id)]
-            final_os_data = filter_os_data(p_os_data, p_os_data_all, udprn, is_flat)
-
-        # Try signing up on a free trial with these guys!
-        # https://ideal-postcodes.co.uk/pricing
-        # API example: https://docs.ideal-postcodes.co.uk/docs/api/udprn
-
-        if final_os_data.empty:
-            boo
-            continue
-
-        if final_os_data.shape[0] != 1:
-            if final_os_data["UPRN"].nunique() > 1:
-                raise Exception("Investigate me")
-
-    # TODO: We should do a different variation of similarity, where we strip out "Flat" and "Room x" if they are there
-    # This is the first ordnance survey data pull
-    os_most_relevant_1 = []
-    os_all_1 = {}
-    for i in tqdm(["1", "2", "3"]):
-        most_relevant_segment = read_from_s3(
-            bucket_name="retrofit-data-dev",
-            s3_file_name=f"customers/Stonewater/clustering/os_most_relevant_{i}.json"
-        )
-        os_most_relevant_1.extend(json.loads(most_relevant_segment))
-        os_all_segment = read_from_s3(
-            bucket_name="retrofit-data-dev",
-            s3_file_name=f"customers/Stonewater/clustering/os_all_{i}.json"
-        )
-        os_all_1 = {**os_all_1, **json.loads(os_all_segment)}
-
-    os_most_relevant_1 = pd.DataFrame(os_most_relevant_1)
-
-    # This is the second ordnance survey data pull
-    os_most_relevant_2 = read_from_s3(
-        bucket_name="retrofit-data-dev",
-        s3_file_name="customers/Stonewater/clustering/problematic_os.json"
-    )
-    os_most_relevant_2 = json.loads(os_most_relevant_2)
-    os_most_relevant_2 = pd.DataFrame(os_most_relevant_2)
-
-    os_all_2 = read_from_s3(
-        bucket_name="retrofit-data-dev",
-        s3_file_name="customers/Stonewater/clustering/problematic_os_all.json"
-    )
-    os_all_2 = json.loads(os_all_2)

From 496ae8c969ea214981190b0b00536ccfc4827fc2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 13 Jun 2024 02:29:41 +0100
Subject: [PATCH 40/80] Set up some different clustering approaches

---
 etl/customers/stonewater/shdf_3_clustering.py | 562 ++++++++++++++----
 1 file changed, 437 insertions(+), 125 deletions(-)

diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index c7afa28d..c853fa94 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1082,131 +1082,6 @@ def compile_data():
     )
 
     # We merge this spatial data onto final EPCS
-    spatial_data_to_uprn = spatial_data_to_uprn.drop(
-        columns=["partition", "filename"]
-    ).rename(columns={"UPRN": "uprn"})
-    spatial_data_to_uprn["uprn"] = spatial_data_to_uprn["uprn"].astype(str)
-
-    property_attributes = complete_epcs.merge(
-        spatial_data_to_uprn,
-        how="inner",
-        on="uprn"
-    )
-
-    # We drop the columns we don't care about for clustering
-    property_attributes = property_attributes.drop(
-        columns=[
-            "address",
-            "uprn-source",
-            "heating-cost-potential",
-            "hot-water-cost-potential",
-            "potential-energy-rating",
-            "environment-impact-potential",
-            "address3",
-            "local-authority-label",
-            "sheating-energy-eff",
-            "local-authority-label",
-            "county",
-            "postcode",
-            "constituency",
-            "co2-emissions-potential",
-            "energy-consumption-potential",
-            "local-authority",
-            "inspection-date",
-            "address1",
-            "constituency-label",
-            "building-reference-number",
-            "floor-energy-eff",
-            "address2",
-            "posttown",
-            "floor-env-eff",
-            "sheating-env-eff",
-            "lighting-cost-potential",
-            "main-heating-controls",
-            "transaction-type",
-            "uprn",
-            "lodgement-date",
-            "lmk-key",
-            "wind-turbine-count",
-            "tenure",
-            "potential-energy-efficiency",
-        ]
-    )
-
-    # Fields to transform: lodgement-datetime
-    property_attributes["days_since_last_epc"] = (
-        datetime.now() - pd.to_datetime(property_attributes["lodgement-datetime"])
-    ).dt.days
-
-    property_attributes = property_attributes.drop(columns=["lodgement-datetime"])
-
-    # Up to:
-    # Round averages to nearest integer
-    fill_with_average = [
-        "low-energy-fixed-light-count",
-        "floor-height",
-        "heating-cost-current",
-        "fixed-lighting-outlets-count",
-        "hot-water-cost-current",
-        "number-heated-rooms",
-        "co2-emiss-curr-per-floor-area",
-        "total-floor-area",
-        "environment-impact-current",
-        "co2-emissions-current",
-        "number-habitable-rooms",
-        "energy-consumption-current",
-        'lighting-cost-current',
-        "low_energy_lighting",
-    ]
-
-    fill_with_mode = [
-        "multi-glaze-proportion",
-        "extension-count",
-    ]
-
-    fill_with_zero = [
-        "unheated-corridor-length",
-        "number-open-fireplaces",
-        "glazed-area",
-        "photo-supply",
-    ]
-
-    fill_with_categorical = {
-        "construction-age-band": "unknown",
-        "mainheat-energy-eff": "N/A",
-        "windows-env-eff": "N/A",
-        "lighting-energy-eff": "N/A",
-        "energy-tariff": 'NO DATA!',
-        "mechanical-ventilation": 'NO DATA!',
-        "solar-water-heating-flag": "N",
-        "mains-gas-flag": "N",
-        "heat-loss-corridor": "unknown",
-        "flat-storey-count": "Not a flat",
-        "roof-energy-eff": "N/A",
-        "hot-water-env-eff": "N/A",
-        "mainheatc-energy-eff": "N/A",
-        "main-fuel": 'NO DATA!',
-        "lighting-env-eff": "N/A",
-        "windows-energy-eff": "N/A",
-        "roof-env-eff": "N/A",
-        "walls-env-eff": "N/A",
-        "mainheat-env-eff": "N/A",
-        "flat-top-storey": "N",
-        "mainheatc-env-eff": "N",
-        "floor-level": "NODATA!",
-        "hot-water-energy-eff": "N/A",
-    }
-
-    # Consolidation columns to single value
-    consolidation_columns = {
-        "glazed-type": {"from": ['', 'NO DATA!', 'not defined', 'INVALID!'], "to": "unknown"},
-        "mechanical-ventilation": {"from": ['', 'NO DATA!', 'not defined', 'INVALID!'], "to": "unknown"},
-        "solar-water-heating-flag": {"from": [''], "to": "N"},
-        "mains-gas-flag": {"from": [''], "to": "N"},
-        "heat-loss-corridor": {"from": ['NO DATA!', ''], "to": "N"},
-        "flat-top-storey": {"from": [''], "to": "N"},
-        "floor-level": {"from": [""], "to": "NODATA!"}
-    }
 
 
 def concatenate_row(row):
@@ -1256,6 +1131,11 @@ def compile_data_final():
         }
     )
     uprn_lookup_2["match_type"] = "EPC"
+    uprn_lookup_2["uprn"] = np.where(
+        uprn_lookup_2["internal_id"] == 1091,
+        83143766,
+        uprn_lookup_2["uprn"]
+    )
 
     uprn_lookup_3 = pd.DataFrame(json.loads(read_from_s3(
         bucket_name="retrofit-data-dev",
@@ -1319,6 +1199,12 @@ def compile_data_final():
     )
     epc_data = pd.DataFrame(epc_data)
 
+    epc_data["uprn"] = np.where(
+        epc_data["internal_id"] == 1091,
+        83143766,
+        epc_data["uprn"]
+    )
+
     # We drop come EPCS
     epc_data = epc_data[epc_data["internal_id"].isin(uprn_lookup_2["internal_id"].values)]
 
@@ -1510,6 +1396,432 @@ def compile_data_final():
         if searcher.older_epcs is not None:
             older_epcs_batch_2[property["internal_id"]] = searcher.older_epcs
 
+    # Store in S3
+    # TODO - read in instead of running
+    # save_data_to_s3(
+    #     data=json.dumps(epc_data_batch_2),
+    #     s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    # 
+    # save_data_to_s3(
+    #     data=json.dumps(older_epcs_batch_2),
+    #     s3_file_name="customers/Stonewater/clustering/older_epcs_batch_2.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+
+    epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
+    complete_epcs = pd.concat([epc_data, epc_data_batch_2])
+
+    # We now prepare the final data for clustering
+    uprn_filenames = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
+    )
+
+    uprn_map = {}
+    for uprn in complete_epcs["uprn"]:
+        filtered_df = uprn_filenames[
+            (uprn_filenames["lower"] <= int(uprn))
+            & (uprn_filenames["upper"] >= int(uprn))
+            ]
+        if filtered_df["filenames"].values[0] in uprn_map:
+            uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
+        else:
+            uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
+
+    spatial_data_to_uprn = []
+    for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
+        # Read in the file
+        spatial_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+        )
+
+        spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
+        spatial_data_to_uprn.append(spatial_df)
+
+    spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
+
+    # TODO: Let's store this in s3
+    # save_data_to_s3(
+    #     data=json.dumps(spatial_data_to_uprn.to_dict("records")),
+    #     s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.json",
+    #     bucket_name="retrofit-data-dev"
+    # )
+
+    spatial_data_to_uprn = spatial_data_to_uprn.drop(
+        columns=["partition", "filename"]
+    ).rename(columns={"UPRN": "uprn"})
+    spatial_data_to_uprn["uprn"] = spatial_data_to_uprn["uprn"].astype(str)
+
+    property_attributes = complete_epcs.merge(
+        spatial_data_to_uprn,
+        how="left",
+        on="uprn"
+    )
+
+    # We drop the columns we don't care about for clustering
+    property_attributes = property_attributes.drop(
+        columns=[
+            "address",
+            "uprn-source",
+            "heating-cost-potential",
+            "hot-water-cost-potential",
+            "potential-energy-rating",
+            "environment-impact-potential",
+            "address3",
+            "local-authority-label",
+            "sheating-energy-eff",
+            "local-authority-label",
+            "county",
+            "postcode",
+            "constituency",
+            "co2-emissions-potential",
+            "energy-consumption-potential",
+            "local-authority",
+            "inspection-date",
+            "address1",
+            "constituency-label",
+            "building-reference-number",
+            "floor-energy-eff",
+            "address2",
+            "posttown",
+            "floor-env-eff",
+            "sheating-env-eff",
+            "lighting-cost-potential",
+            "main-heating-controls",
+            "transaction-type",
+            "uprn",
+            "lodgement-date",
+            "lmk-key",
+            "wind-turbine-count",
+            "tenure",
+            "potential-energy-efficiency",
+            "glazed-area"
+        ]
+    )
+
+    # Fields to transform: lodgement-datetime
+    property_attributes["days_since_last_epc"] = (
+        datetime.now() - pd.to_datetime(property_attributes["lodgement-datetime"])
+    ).dt.days
+
+    property_attributes = property_attributes.drop(columns=["lodgement-datetime"])
+
+    # Up to:
+    # Round averages to nearest integer
+    fill_with_average = [
+        "low-energy-fixed-light-count",
+        "floor-height",
+        "heating-cost-current",
+        "fixed-lighting-outlets-count",
+        "hot-water-cost-current",
+        "number-heated-rooms",
+        "co2-emiss-curr-per-floor-area",
+        "total-floor-area",
+        "environment-impact-current",
+        "co2-emissions-current",
+        "number-habitable-rooms",
+        "energy-consumption-current",
+        'lighting-cost-current',
+        "low-energy-lighting",
+    ]
+
+    fill_with_mode = [
+        "multi-glaze-proportion",
+        "extension-count",
+    ]
+
+    fill_with_zero = [
+        "unheated-corridor-length",
+        "number-open-fireplaces",
+        "photo-supply",
+    ]
+
+    fill_with_categorical = {
+        "construction-age-band": "unknown",
+        "mainheat-energy-eff": "N/A",
+        "windows-env-eff": "N/A",
+        "lighting-energy-eff": "N/A",
+        "energy-tariff": 'NO DATA!',
+        "mechanical-ventilation": 'NO DATA!',
+        "solar-water-heating-flag": "N",
+        "mains-gas-flag": "N",
+        "heat-loss-corridor": "unknown",
+        "flat-storey-count": "Not a flat",
+        "roof-energy-eff": "N/A",
+        "hot-water-env-eff": "N/A",
+        "mainheatc-energy-eff": "N/A",
+        "main-fuel": 'NO DATA!',
+        "lighting-env-eff": "N/A",
+        "windows-energy-eff": "N/A",
+        "roof-env-eff": "N/A",
+        "walls-env-eff": "N/A",
+        "mainheat-env-eff": "N/A",
+        "flat-top-storey": "N",
+        "mainheatc-env-eff": "N",
+        "floor-level": "NODATA!",
+        "hot-water-energy-eff": "N/A",
+    }
+
+    # Consolidation columns to single value
+    consolidation_columns = {
+        "glazed-type": {"from": ['', 'NO DATA!', 'not defined', 'INVALID!'], "to": "unknown"},
+        "mechanical-ventilation": {"from": ['', 'NO DATA!', 'not defined', 'INVALID!'], "to": "unknown"},
+        "solar-water-heating-flag": {"from": [''], "to": "N"},
+        "mains-gas-flag": {"from": [''], "to": "N"},
+        "heat-loss-corridor": {"from": ['NO DATA!', ''], "to": "N"},
+        "flat-top-storey": {"from": [''], "to": "N"},
+        "floor-level": {"from": [""], "to": "NODATA!"}
+    }
+
+    # Perform the cleaning
+    for col in fill_with_average:
+        property_attributes[col] = property_attributes[col].replace('', None)
+        avg_val = np.mean([float(x) for x in property_attributes[col].values if x not in [None, "", np.nan]])
+        if pd.isnull(avg_val):
+            raise Exception("something went wrong")
+        property_attributes[col] = property_attributes[col].fillna(round(avg_val))
+        property_attributes[col] = property_attributes[col].astype(float)
+
+    for c in fill_with_zero:
+        property_attributes[c] = property_attributes[c].replace('', 0)
+        property_attributes[c] = property_attributes[c].fillna(0)
+        property_attributes[c] = property_attributes[c].astype(float)
+
+    from scipy import stats
+    for col in fill_with_mode:
+        property_attributes[col] = property_attributes[col].replace('', None)
+        mode_val = stats.mode([float(x) for x in property_attributes[col].values if x not in [None, "", np.nan]])[0]
+        if pd.isnull(mode_val):
+            raise Exception("something went wrong")
+        property_attributes[col] = property_attributes[col].fillna(mode_val)
+        property_attributes[col] = property_attributes[col].astype(float)
+
+    for c, fill_val in fill_with_categorical.items():
+        property_attributes[c] = property_attributes[c].replace('', fill_val)
+        property_attributes[c] = property_attributes[c].fillna(fill_val)
+
+    # Finally, consolidate
+    for c, consolidate_config in consolidation_columns.items():
+        for v in consolidate_config["from"]:
+            property_attributes[c] = property_attributes[c].replace(v, consolidate_config["to"])
+
+    property_attributes["estimated"] = property_attributes["estimated"].fillna(False)
+    property_attributes["conservation_status"] = property_attributes["conservation_status"].fillna(False)
+
+    # CLUSTERING!!
+
+    # from sklearn.cluster import KMeans
+    # from sklearn.preprocessing import OneHotEncoder
+    # from scipy.spatial.distance import cdist
+    #
+    # property_attributes.set_index('internal_id', inplace=True)
+    #
+    # # Step 1: Prepare the data
+    # # Identify categorical columns (you might need to adjust this)
+    # categorical_cols = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
+    # for col in categorical_cols:
+    #     property_attributes[col] = property_attributes[col].astype(str)
+    #
+    # # Applying OneHotEncoder
+    # encoder = OneHotEncoder(sparse=False)
+    # encoded_cats = encoder.fit_transform(property_attributes[categorical_cols])
+    #
+    # # Creating a new DataFrame with encoded categorical data and original numerical data
+    # numerical_data = property_attributes.select_dtypes(include=[np.number])
+    # data_for_clustering = pd.concat([numerical_data, pd.DataFrame(encoded_cats, index=numerical_data.index)], axis=1)
+    #
+    # # Convert all column names to strings to satisfy KMeans requirements
+    # data_for_clustering.columns = data_for_clustering.columns.astype(str)
+    #
+    # # Step 2: K-Means Clustering
+    # k = 450  # number of clusters
+    # kmeans = KMeans(n_clusters=k, random_state=0)
+    # property_attributes['cluster'] = kmeans.fit_predict(data_for_clustering)
+    #
+    # # Extracting centroids
+    # centroids = kmeans.cluster_centers_
+    #
+    # # Step 3: Assign clusters and rank rows
+    # # Calculating distances from each point to its cluster's centroid
+    # distances = cdist(data_for_clustering, centroids, 'euclidean')
+    # min_distances = distances.min(axis=1)
+    # property_attributes['distance_to_centroid'] = min_distances
+    #
+    # # Ranking rows by distance within each cluster
+    # property_attributes['rank'] = property_attributes.groupby('cluster')['distance_to_centroid'].rank(method='first')
+    #
+    # # Sorting to verify
+    # property_attributes.sort_values(by=['cluster', 'rank'], inplace=True)
+    #
+    # # Optional: Displaying the dataframe
+    # print(property_attributes.head())
+
+    from sklearn.cluster import KMeans
+    from sklearn.preprocessing import StandardScaler, OneHotEncoder
+    from sklearn.compose import ColumnTransformer
+    from sklearn.pipeline import Pipeline
+    from scipy.spatial.distance import cdist
+    id_column = 'internal_id'
+    property_attributes.set_index(id_column, inplace=True)
+
+    # Define the preprocessing for numerical and categorical features
+    numerical_features = property_attributes.select_dtypes(include=['int64', 'float64']).columns.tolist()
+    categorical_features = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
+
+    for col in categorical_features:
+        property_attributes[col] = property_attributes[col].astype(str)
+
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ('num', StandardScaler(), numerical_features),
+            ('cat', OneHotEncoder(), categorical_features)
+        ]
+    )
+
+    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
+                               ('kmeans', KMeans(n_clusters=10, random_state=0))])
+
+    # Fit the pipeline to the data
+    pipeline.fit(property_attributes)
+
+    # Transform the data using the fitted pipeline
+    processed_data = pipeline.named_steps['preprocessor'].transform(property_attributes)
+
+    # Get cluster labels
+    property_attributes['cluster'] = pipeline.named_steps['kmeans'].labels_
+
+    # Get centroids (already in the same transformed space)
+    centroids = pipeline.named_steps['kmeans'].cluster_centers_
+
+    processed_data = processed_data.toarray()
+
+    # Calculate distances from each point to the centroid of its cluster
+    distances_to_centroids = [
+        cdist(processed_data[i].reshape(1, -1), centroids[label].reshape(1, -1)).flatten()[0]
+        for i, label in enumerate(property_attributes['cluster'])
+    ]
+
+    property_attributes['distance_to_centroid'] = distances_to_centroids
+
+    for cluster_id in property_attributes['cluster'].unique():
+        cluster_data = property_attributes[property_attributes['cluster'] == cluster_id]
+        min_distance = cluster_data['distance_to_centroid'].min()
+        print(f"Cluster {cluster_id} minimum distance to centroid: {min_distance}")
+        if min_distance != 0:
+            print(f"No point with zero distance found in cluster {cluster_id}")
+
+    # Ranking rows by distance within each cluster
+    property_attributes['rank'] = property_attributes.groupby('cluster')['distance_to_centroid'].rank(
+        method='first')
+
+    # Sorting to verify
+    property_attributes.sort_values(by=['cluster', 'rank'], inplace=True)
+
+    ################################################
+    # Agglomertive Clustering
+    ################################################
+
+    # from sklearn.cluster import KMeans, AgglomerativeClustering
+    # from sklearn.preprocessing import StandardScaler, OneHotEncoder
+    # from sklearn.compose import ColumnTransformer
+    # from sklearn.pipeline import Pipeline
+    # from scipy.spatial.distance import cdist
+    # import numpy as np
+    # from collections import Counter
+    #
+    # id_column = 'internal_id'
+    # property_attributes.set_index(id_column, inplace=True)
+    #
+    # # Define the preprocessing for numerical and categorical features
+    # numerical_features = property_attributes.select_dtypes(include=['int64', 'float64']).columns.tolist()
+    # categorical_features = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
+    #
+    # for col in categorical_features:
+    #     property_attributes[col] = property_attributes[col].astype(str)
+    #
+    # preprocessor = ColumnTransformer(
+    #     transformers=[
+    #         ('num', StandardScaler(), numerical_features),
+    #         ('cat', OneHotEncoder(sparse_output=False), categorical_features)
+    #     ]
+    # )
+    #
+    # # Function to perform clustering and merge small clusters
+    # def cluster_with_min_size(data, preprocessor, n_clusters=10, min_size=5):
+    #     while True:
+    #         # Preprocess the data
+    #         processed_data = preprocessor.fit_transform(data)
+    #
+    #         # Initial clustering
+    #         clustering = AgglomerativeClustering(n_clusters=n_clusters)
+    #         labels = clustering.fit_predict(processed_data)
+    #
+    #         # Check cluster sizes
+    #         cluster_counts = Counter(labels)
+    #
+    #         # Find clusters smaller than min_size
+    #         small_clusters = {cluster for cluster, count in cluster_counts.items() if count < min_size}
+    #
+    #         if not small_clusters:
+    #             break
+    #
+    #         # Merge small clusters
+    #         for cluster in small_clusters:
+    #             # Find the nearest cluster to merge with
+    #             cluster_data = processed_data[labels == cluster]
+    #             other_clusters = [i for i in range(n_clusters) if i not in small_clusters]
+    #             other_cluster_data = [processed_data[labels == i] for i in other_clusters]
+    #             other_centroids = np.vstack([data.mean(axis=0) for data in other_cluster_data])
+    #
+    #             distances = cdist(cluster_data, other_centroids).mean(axis=0)
+    #             closest_cluster = other_clusters[np.argmin(distances)]
+    #
+    #             labels[labels == cluster] = closest_cluster
+    #
+    #         n_clusters -= len(small_clusters)
+    #
+    #     return labels
+    #
+    # # Perform clustering with minimum size constraint
+    # n_clusters = 10
+    # min_size = 5
+    # property_attributes['cluster'] = cluster_with_min_size(property_attributes, preprocessor, n_clusters, min_size)
+    #
+    # # Filter out empty clusters
+    # valid_clusters = property_attributes['cluster'].unique()
+    #
+    # # Get centroids for the resulting clusters
+    # processed_data = preprocessor.transform(property_attributes.drop(columns=["cluster"]))
+    # centroids = np.vstack([processed_data[property_attributes['cluster'] == i].mean(axis=0) for i in valid_clusters])
+    #
+    # # Calculate distances from each point to the centroid of its cluster
+    # distances_to_centroids = [
+    #     cdist(processed_data[i].reshape(1, -1),
+    #           centroids[valid_clusters.tolist().index(label)].reshape(1, -1)).flatten()[0]
+    #     for i, label in enumerate(property_attributes['cluster'])
+    # ]
+    #
+    # property_attributes['distance_to_centroid'] = distances_to_centroids
+    #
+    # # Verify that at least one point in each cluster has zero distance to the centroid
+    # for cluster_id in valid_clusters:
+    #     cluster_data = property_attributes[property_attributes['cluster'] == cluster_id]
+    #     min_distance = cluster_data['distance_to_centroid'].min()
+    #     print(f"Cluster {cluster_id} minimum distance to centroid: {min_distance}")
+    #     if min_distance != 0:
+    #         print(f"No point with zero distance found in cluster {cluster_id}")
+    #
+    # # Rank the distances within each cluster
+    # property_attributes['rank_within_cluster'] = property_attributes.groupby('cluster')['distance_to_centroid'] \
+    #     .rank(method='first')
+    #
+    # # Reset index to get 'internal_id' back
+    # property_attributes.reset_index(inplace=True)
+    #
+    # # Display the DataFrame
+    # print(property_attributes)
+
 
 def pull_ideal_postcodes(missing_uprn_with_udprn):
     api_key = ""  # Log into the platform the get the API key: https://account.ideal-postcodes.co.uk/

From 5b9a36d6d28981b030e7f63d4652318ae811b26c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 13 Jun 2024 11:28:12 +0100
Subject: [PATCH 41/80] finished stonewater

---
 backend/SearchEpc.py                          | 21 ++--
 etl/customers/stonewater/shdf_3_clustering.py | 97 ++++++++++++++-----
 2 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 275669cc..37c2b7f9 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -482,15 +482,22 @@ class SearchEpc:
                 if lmks_to_drop is not None:
                     epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
 
-                if not epc_data.empty:
-                    # Further processing of the EPC data
+                try:
+                    epc_data['lodgement-datetime'] = pd.to_datetime(
+                        epc_data['lodgement-datetime'], format='%Y-%m-%d %H:%M:%S', errors='coerce'
+                    )
+                except Exception as e:
+                    logger.error("Problem formatting lodgement-datime, appling fallback: " + str(e))
                     epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
 
-                    if exclude_old:
-                        # Exclude EPC data older than 10 years
-                        epc_data = epc_data[
-                            epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10))
-                            ]
+                if exclude_old:
+                    # Exclude EPC data older than 10 years
+                    epc_data = epc_data[
+                        epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10))
+                        ]
+
+                if not epc_data.empty:
+                    # Further processing of the EPC data
 
                     epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
                     epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index c853fa94..5129dfb1 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -11,7 +11,8 @@ from fuzzywuzzy import fuzz
 import numpy as np
 import pandas as pd
 import time
-from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3, read_dataframe_from_s3_parquet
+from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3, read_dataframe_from_s3_parquet, \
+    save_dataframe_to_s3_parquet, save_pickle_to_s3
 
 load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@@ -1360,7 +1361,10 @@ def compile_data_final():
                 )
                 p_os_df = p_os_df[p_os_df["UPRN"] == property["uprn"]]
 
-            searcher.ordnance_survey_client.parse_classification_code(p_os_df["CLASSIFICATION_CODE"].values[0])
+            if not p_os_df.empty:
+                searcher.ordnance_survey_client.parse_classification_code(p_os_df["CLASSIFICATION_CODE"].values[0])
+            else:
+                searcher.ordnance_survey_client.property_type = ""
             # Now we estimate
             searcher.newest_epc = searcher.estimate_epc(
                 property_type=searcher.ordnance_survey_client.property_type,
@@ -1395,20 +1399,19 @@ def compile_data_final():
 
         if searcher.older_epcs is not None:
             older_epcs_batch_2[property["internal_id"]] = searcher.older_epcs
-
     # Store in S3
     # TODO - read in instead of running
-    # save_data_to_s3(
-    #     data=json.dumps(epc_data_batch_2),
-    #     s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.json",
-    #     bucket_name="retrofit-data-dev"
-    # )
-    # 
-    # save_data_to_s3(
-    #     data=json.dumps(older_epcs_batch_2),
-    #     s3_file_name="customers/Stonewater/clustering/older_epcs_batch_2.json",
-    #     bucket_name="retrofit-data-dev"
-    # )
+    save_pickle_to_s3(
+        data=epc_data_batch_2,
+        s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
+        bucket_name="retrofit-data-dev"
+    )
+
+    save_pickle_to_s3(
+        data=older_epcs_batch_2,
+        s3_file_name="customers/Stonewater/clustering/older_epcs_batch_2.pkl",
+        bucket_name="retrofit-data-dev"
+    )
 
     epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
     complete_epcs = pd.concat([epc_data, epc_data_batch_2])
@@ -1439,15 +1442,15 @@ def compile_data_final():
         spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
         spatial_data_to_uprn.append(spatial_df)
 
-    spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
-
     # TODO: Let's store this in s3
-    # save_data_to_s3(
-    #     data=json.dumps(spatial_data_to_uprn.to_dict("records")),
-    #     s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.json",
+    # save_pickle_to_s3(
+    #     data=spatial_data_to_uprn,
+    #     s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
     #     bucket_name="retrofit-data-dev"
     # )
 
+    spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
+
     spatial_data_to_uprn = spatial_data_to_uprn.drop(
         columns=["partition", "filename"]
     ).rename(columns={"UPRN": "uprn"})
@@ -1455,10 +1458,16 @@ def compile_data_final():
 
     property_attributes = complete_epcs.merge(
         spatial_data_to_uprn,
-        how="left",
+        how="inner",
         on="uprn"
     )
 
+    property_attributes = property_attributes.merge(
+        asset_list[["internal_id", "owner", "match_type"]], how="left", on="internal_id"
+    )
+
+    # TODO: Add on data from the asset list such as ownership
+
     # We drop the columns we don't care about for clustering
     property_attributes = property_attributes.drop(
         columns=[
@@ -1502,7 +1511,7 @@ def compile_data_final():
 
     # Fields to transform: lodgement-datetime
     property_attributes["days_since_last_epc"] = (
-        datetime.now() - pd.to_datetime(property_attributes["lodgement-datetime"])
+        datetime.now() - pd.to_datetime(property_attributes["lodgement-datetime"], errors="coerce")
     ).dt.days
 
     property_attributes = property_attributes.drop(columns=["lodgement-datetime"])
@@ -1561,6 +1570,7 @@ def compile_data_final():
         "mainheatc-env-eff": "N",
         "floor-level": "NODATA!",
         "hot-water-energy-eff": "N/A",
+        "glazed-type": "unknown"
     }
 
     # Consolidation columns to single value
@@ -1608,6 +1618,19 @@ def compile_data_final():
 
     property_attributes["estimated"] = property_attributes["estimated"].fillna(False)
     property_attributes["conservation_status"] = property_attributes["conservation_status"].fillna(False)
+    property_attributes["days_since_last_epc"] = property_attributes["days_since_last_epc"].fillna(
+        property_attributes["days_since_last_epc"].mean()
+    )
+
+    missings = pd.isnull(property_attributes).sum()
+    missings = missings[missings > 0]
+
+    # Save this
+    # save_pickle_to_s3(
+    #     data=property_attributes,
+    #     bucket_name="retrofit-data-dev",
+    #     s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
+    # )
 
     # CLUSTERING!!
 
@@ -1680,7 +1703,7 @@ def compile_data_final():
     )
 
     pipeline = Pipeline(steps=[('preprocessor', preprocessor),
-                               ('kmeans', KMeans(n_clusters=10, random_state=0))])
+                               ('kmeans', KMeans(n_clusters=450, random_state=0))])
 
     # Fit the pipeline to the data
     pipeline.fit(property_attributes)
@@ -1718,6 +1741,36 @@ def compile_data_final():
     # Sorting to verify
     property_attributes.sort_values(by=['cluster', 'rank'], inplace=True)
 
+    ################################################
+    # Prepare outputs!!!!
+    ################################################
+    property_attributes.reset_index(inplace=True)
+    property_attributes["archetype_representative"] = property_attributes["rank"] == 1
+
+    asset_list_with_archetypes = asset_list.merge(
+        property_attributes[["internal_id", "cluster", "archetype_representative", "rank"]], how="left",
+        on="internal_id"
+    )
+
+    asset_list_with_archetypes["cluster"] = asset_list_with_archetypes["cluster"].fillna(-999)
+    asset_list_with_archetypes["cluster"] = asset_list_with_archetypes["cluster"].astype(int).astype(str)
+    asset_list_with_archetypes["cluster"] = asset_list_with_archetypes["cluster"].replace("-999", "NO ARCHETYPE")
+
+    asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].fillna(-999)
+    asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].astype(int).astype(str)
+    asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].replace("-999", "NO ARCHETYPE")
+
+    asset_list_with_archetypes["archetype_representative"] = asset_list_with_archetypes[
+        "archetype_representative"].fillna(False)
+
+    asset_list_with_archetypes.to_csv("Stonewater asset list with archetypes.csv", index=False)
+
+    stonewater_uprn_lookup = asset_list_with_archetypes[
+        ["external_address_id", "udprn", "uprn", "match_type", "standardised_address", "standardised_postcode"]
+    ]
+
+    stonewater_uprn_lookup.to_excel("Stonewater uprn lookup table.xlsx")
+
     ################################################
     # Agglomertive Clustering
     ################################################

From 9781b08478d1d1a6f689616714fc6b201abd02e7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 13 Jun 2024 17:06:14 +0100
Subject: [PATCH 42/80] done testing heights

---
 backend/apis/GoogleSolarApi.py | 258 +--------------------------------
 1 file changed, 2 insertions(+), 256 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 8ee7017e..0afa0b26 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -1,4 +1,6 @@
 import pandas as pd
+import numpy as np
+from recommendations.Costs import MCS_SOLAR_PV_COST_DATA
 
 from backend.Property import Property
 from backend.SearchEpc import SearchEpc
@@ -123,263 +125,7 @@ solar_potential["wholeRoofStats"]["groundAreaMeters2"]
 solar_potential["solarPanelConfigs"][0]
 solar_potential["solarPanelConfigs"][1]
 
-# Copy of response for testing - 6 Laura Close, Tintagel, PL34 0EB
-# {'name': 'buildings/ChIJ2yC6t4KEa0gRh2TIssogI7k', 'center': {'latitude': 50.667375, 'longitude': -4.7416833},
-# 'imageryDate': {'year': 2021, 'month': 7, 'day': 19}, 'regionCode': 'GB', 'solarPotential': {'maxArrayPanelsCount':
-# 39, 'maxArrayAreaMeters2': 76.578636, 'maxSunshineHoursPerYear': 1172.0627, 'carbonOffsetFactorKgPerMwh':
-# 478.99942, 'wholeRoofStats': {'areaMeters2': 129.65686, 'sunshineQuantiles': [537, 738.3836, 805.62445, 842.6802,
-# 909.8431, 972.15234, 1036.1013, 1092.051, 1135.8192, 1163.1444, 1193.6012], 'groundAreaMeters2': 112.33},
-# 'roofSegmentStats': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'stats': {'areaMeters2': 44.08321,
-# 'sunshineQuantiles': [614, 940.86975, 982.39124, 1057.0664, 1109.6869, 1137.5837, 1152.9211, 1163.1106, 1168.2212,
-# 1170.8883, 1193.6012], 'groundAreaMeters2': 37.61}, 'center': {'latitude': 50.6673664, 'longitude':
-# -4.741714099999999}, 'boundingBox': {'sw': {'latitude': 50.6673354, 'longitude': -4.741777}, 'ne': {'latitude':
-# 50.6674029, 'longitude': -4.7416472}}, 'planeHeightAtCenterMeters': 93.0221}, {'pitchDegrees': 34.39779,
-# 'azimuthDegrees': 31.74401, 'stats': {'areaMeters2': 44.622986, 'sunshineQuantiles': [537, 671.49774, 733.84985,
-# 780.82733, 801.4026, 814.0189, 824.0077, 847.77484, 895.08295, 950.1469, 1123.3503], 'groundAreaMeters2': 36.82},
-# 'center': {'latitude': 50.6673966, 'longitude': -4.7416813}, 'boundingBox': {'sw': {'latitude': 50.667361,
-# 'longitude': -4.7417497}, 'ne': {'latitude': 50.6674303, 'longitude': -4.741615599999999}},
-# 'planeHeightAtCenterMeters': 92.87593}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'stats': {
-# 'areaMeters2': 17.074476, 'sunshineQuantiles': [644.71136, 731.0546, 782.89813, 842.7107, 908.55585, 966.6212,
-# 1010.6367, 1038.2543, 1053.2788, 1090.6831, 1128.0178], 'groundAreaMeters2': 17.050001}, 'center': {'latitude':
-# 50.66740850000001, 'longitude': -4.7416025}, 'boundingBox': {'sw': {'latitude': 50.6673895, 'longitude':
-# -4.7416436}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415572}}, 'planeHeightAtCenterMeters':
-# 90.630356}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'stats': {'areaMeters2': 13.501617,
-# 'sunshineQuantiles': [749, 976.85345, 1059.0062, 1081.6173, 1097.4441, 1110.3171, 1128.2186, 1133.9421, 1142.068,
-# 1148.2168, 1157.632], 'groundAreaMeters2': 12.02}, 'center': {'latitude': 50.667315699999996, 'longitude':
-# -4.741675400000001}, 'boundingBox': {'sw': {'latitude': 50.667291399999996, 'longitude': -4.7417066},
-# 'ne': {'latitude': 50.6673372, 'longitude': -4.741648400000001}}, 'planeHeightAtCenterMeters': 92.36334},
-# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'stats': {'areaMeters2': 10.374564, 'sunshineQuantiles': [
-# 617.9507, 752.2504, 847.66315, 872.0505, 881.26227, 900.9639, 933.3188, 967.4747, 1000.8129, 1038.3002, 1105.545],
-# 'groundAreaMeters2': 8.83}, 'center': {'latitude': 50.6673295, 'longitude': -4.7417128}, 'boundingBox': {'sw': {
-# 'latitude': 50.6673134, 'longitude': -4.7417422}, 'ne': {'latitude': 50.6673413, 'longitude': -4.7416775}},
-# 'planeHeightAtCenterMeters': 92.31146}], 'solarPanelConfigs': [{'panelsCount': 4, 'yearlyEnergyDcKwh': 1867.1516,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 4,
-# 'yearlyEnergyDcKwh': 1867.1515, 'segmentIndex': 0}]}, {'panelsCount': 5, 'yearlyEnergyDcKwh': 2335.0068,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 5,
-# 'yearlyEnergyDcKwh': 2335.0068, 'segmentIndex': 0}]}, {'panelsCount': 6, 'yearlyEnergyDcKwh': 2799.8508,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 6,
-# 'yearlyEnergyDcKwh': 2799.8508, 'segmentIndex': 0}]}, {'panelsCount': 7, 'yearlyEnergyDcKwh': 3264.6506,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 7,
-# 'yearlyEnergyDcKwh': 3264.6506, 'segmentIndex': 0}]}, {'panelsCount': 8, 'yearlyEnergyDcKwh': 3726.2405,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 8,
-# 'yearlyEnergyDcKwh': 3726.2405, 'segmentIndex': 0}]}, {'panelsCount': 9, 'yearlyEnergyDcKwh': 4187.721,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 9,
-# 'yearlyEnergyDcKwh': 4187.721, 'segmentIndex': 0}]}, {'panelsCount': 10, 'yearlyEnergyDcKwh': 4646.094,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
-# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}]}, {'panelsCount': 11, 'yearlyEnergyDcKwh': 5103.777,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
-# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 1, 'yearlyEnergyDcKwh': 457.68268, 'segmentIndex': 3}]}, {'panelsCount': 12, 'yearlyEnergyDcKwh':
-# 5559.845, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
-# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 13, 'yearlyEnergyDcKwh':
-# 6013.053, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 11,
-# 'yearlyEnergyDcKwh': 5099.302, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 14, 'yearlyEnergyDcKwh':
-# 6461.664, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
-# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 15, 'yearlyEnergyDcKwh':
-# 6902.33, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
-# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 16, 'yearlyEnergyDcKwh':
-# 7321.6436, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
-# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
-# 'panelsCount': 1, 'yearlyEnergyDcKwh': 419.31348, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
-# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 17,
-# 'yearlyEnergyDcKwh': 7740.388, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331,
-# 'panelsCount': 12, 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]},
-# {'panelsCount': 18, 'yearlyEnergyDcKwh': 8154.265, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 13, 'yearlyEnergyDcKwh': 5961.7896, 'segmentIndex': 0},
-# {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579,
-# 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh':
-# 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 19, 'yearlyEnergyDcKwh': 8566.032, 'roofSegmentSummaries': [{
-# 'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 14, 'yearlyEnergyDcKwh': 6373.556,
-# 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh':
-# 838.0579, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 20, 'yearlyEnergyDcKwh': 8976.624,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
-# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 21,
-# 'yearlyEnergyDcKwh': 9380.78, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331,
-# 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1242.214, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]},
-# {'panelsCount': 22, 'yearlyEnergyDcKwh': 9784.078, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 4, 'yearlyEnergyDcKwh': 1645.5122,
-# 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh':
-# 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 23, 'yearlyEnergyDcKwh': 10162.354, 'roofSegmentSummaries': [{
-# 'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484,
-# 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 4, 'yearlyEnergyDcKwh':
-# 1645.5122, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 1, 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}]}, {'panelsCount': 24, 'yearlyEnergyDcKwh':
-# 10535.894, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
-# 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
-# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294,
-# 'azimuthDegrees': 308.42334, 'panelsCount': 1, 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}]}, {'panelsCount':
-# 25, 'yearlyEnergyDcKwh': 10901.273, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees':
-# 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3},
-# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497,
-# 'segmentIndex': 4}]}, {'panelsCount': 26, 'yearlyEnergyDcKwh': 11242.756, 'roofSegmentSummaries': [{'pitchDegrees':
-# 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 1, 'yearlyEnergyDcKwh': 341.4827,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 27, 'yearlyEnergyDcKwh':
-# 11579.401, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 678.1277, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 28, 'yearlyEnergyDcKwh': 11919.106, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1017.83356,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 29, 'yearlyEnergyDcKwh':
-# 12255.358, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 4, 'yearlyEnergyDcKwh': 1354.0854, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 30, 'yearlyEnergyDcKwh': 12586.448, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 5, 'yearlyEnergyDcKwh': 1685.1748,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 31, 'yearlyEnergyDcKwh':
-# 12911.502, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 6, 'yearlyEnergyDcKwh': 2010.2289, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 32, 'yearlyEnergyDcKwh': 13233.139, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 7, 'yearlyEnergyDcKwh': 2331.8652,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 33, 'yearlyEnergyDcKwh':
-# 13554.602, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 8, 'yearlyEnergyDcKwh': 2653.3286, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 34, 'yearlyEnergyDcKwh': 13893.903, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 9, 'yearlyEnergyDcKwh': 2992.6301,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 35, 'yearlyEnergyDcKwh':
-# 14221.166, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 10, 'yearlyEnergyDcKwh': 3319.893, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 36, 'yearlyEnergyDcKwh': 14536.154, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 11, 'yearlyEnergyDcKwh': 3634.8809,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 37, 'yearlyEnergyDcKwh':
-# 14850.317, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 12, 'yearlyEnergyDcKwh': 3949.0444, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3},
-# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497,
-# 'segmentIndex': 4}]}, {'panelsCount': 38, 'yearlyEnergyDcKwh': 15160.658, 'roofSegmentSummaries': [{'pitchDegrees':
-# 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 13, 'yearlyEnergyDcKwh': 4259.385,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 39, 'yearlyEnergyDcKwh':
-# 15438.986, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 14, 'yearlyEnergyDcKwh': 4537.713, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}],
-# 'panelCapacityWatts': 400, 'panelHeightMeters': 1.879, 'panelWidthMeters': 1.045, 'panelLifetimeYears': 20,
-# 'buildingStats': {'areaMeters2': 138.38115, 'sunshineQuantiles': [537, 728.5604, 799.23975, 833.99713, 900.88086,
-# 959.65875, 1024.2743, 1086.1285, 1132.8774, 1162.1904, 1193.6012], 'groundAreaMeters2': 117.16}, 'solarPanels': [{
-# 'center': {'latitude': 50.667371499999994, 'longitude': -4.7417235}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 468.5037, 'segmentIndex': 0}, {'center': {'latitude': 50.6673614, 'longitude': -4.7417023},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 467.61072, 'segmentIndex': 0}, {'center': {'latitude':
-# 50.667365100000005, 'longitude': -4.7417311}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 465.55005,
-# 'segmentIndex': 0}, {'center': {'latitude': 50.6673512, 'longitude': -4.741681000000001}, 'orientation':
-# 'LANDSCAPE', 'yearlyEnergyDcKwh': 465.48712, 'segmentIndex': 0}, {'center': {'latitude': 50.667357599999995,
-# 'longitude': -4.7416734}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 467.8553, 'segmentIndex': 0},
-# {'center': {'latitude': 50.6673779, 'longitude': -4.741715999999999}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 464.84396, 'segmentIndex': 0}, {'center': {'latitude': 50.6673678, 'longitude': -4.7416947},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 464.79984, 'segmentIndex': 0}, {'center': {'latitude': 50.6673549,
-# 'longitude': -4.7417098}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 461.58975, 'segmentIndex': 0},
-# {'center': {'latitude': 50.6673816, 'longitude': -4.7417448}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
-# 461.48065, 'segmentIndex': 0}, {'center': {'latitude': 50.6673881, 'longitude': -4.7417372}, 'orientation':
-# 'LANDSCAPE', 'yearlyEnergyDcKwh': 458.3733, 'segmentIndex': 0}, {'center': {'latitude': 50.6673149, 'longitude':
-# -4.7416768}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 457.68268, 'segmentIndex': 3}, {'center': {
-# 'latitude': 50.6673204, 'longitude': -4.7416867}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 456.06827,
-# 'segmentIndex': 3}, {'center': {'latitude': 50.667375199999995, 'longitude': -4.7417524}, 'orientation':
-# 'LANDSCAPE', 'yearlyEnergyDcKwh': 453.20776, 'segmentIndex': 0}, {'center': {'latitude': 50.667364, 'longitude':
-# -4.7416659}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 448.61087, 'segmentIndex': 0}, {'center': {
-# 'latitude': 50.6673094, 'longitude': -4.741666899999999}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
-# 440.66626, 'segmentIndex': 3}, {'center': {'latitude': 50.667403799999995, 'longitude': -4.741588900000001},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 419.31348, 'segmentIndex': 2}, {'center': {'latitude':
-# 50.66740850000001, 'longitude': -4.7416016999999995}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 418.74448,
-# 'segmentIndex': 2}, {'center': {'latitude': 50.6673688, 'longitude': -4.7417599}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 413.877, 'segmentIndex': 0}, {'center': {'latitude': 50.667348499999996, 'longitude':
-# -4.7417174}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 411.76657, 'segmentIndex': 0}, {'center': {
-# 'latitude': 50.6673587, 'longitude': -4.7417387}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 410.5925,
-# 'segmentIndex': 0}, {'center': {'latitude': 50.6673992, 'longitude': -4.7415761}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 404.15607, 'segmentIndex': 2}, {'center': {'latitude': 50.6674132, 'longitude': -4.7416145},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 403.29822, 'segmentIndex': 2}, {'center': {'latitude': 50.6673324,
-# 'longitude': -4.7417015}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}, {'center':
-# {'latitude': 50.667417799999996, 'longitude': -4.7416273}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
-# 373.53967, 'segmentIndex': 2}, {'center': {'latitude': 50.667324900000004, 'longitude': -4.7417104}, 'orientation':
-# 'PORTRAIT', 'yearlyEnergyDcKwh': 365.37958, 'segmentIndex': 4}, {'center': {'latitude': 50.6674043, 'longitude':
-# -4.741680800000001}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 341.4827, 'segmentIndex': 1}, {'center': {
-# 'latitude': 50.667392299999996, 'longitude': -4.7416919}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh':
-# 336.64502, 'segmentIndex': 1}, {'center': {'latitude': 50.667397, 'longitude': -4.741704599999999}, 'orientation':
-# 'PORTRAIT', 'yearlyEnergyDcKwh': 339.7059, 'segmentIndex': 1}, {'center': {'latitude': 50.6674018, 'longitude':
-# -4.7417174}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 336.25195, 'segmentIndex': 1}, {'center': {'latitude':
-# 50.6673875, 'longitude': -4.7416791}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 331.08936, 'segmentIndex':
-# 1}, {'center': {'latitude': 50.6674065, 'longitude': -4.7417301}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh':
-# 325.05405, 'segmentIndex': 1}, {'center': {'latitude': 50.6673828, 'longitude': -4.7416664}, 'orientation':
-# 'PORTRAIT', 'yearlyEnergyDcKwh': 321.63647, 'segmentIndex': 1}, {'center': {'latitude': 50.667378, 'longitude':
-# -4.741653599999999}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 321.46332, 'segmentIndex': 1}, {'center': {
-# 'latitude': 50.667373299999994, 'longitude': -4.7416409}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 339.3016,
-# 'segmentIndex': 1}, {'center': {'latitude': 50.6673853, 'longitude': -4.7416298}, 'orientation': 'PORTRAIT',
-# 'yearlyEnergyDcKwh': 327.26282, 'segmentIndex': 1}, {'center': {'latitude': 50.667399499999995, 'longitude':
-# -4.741668}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 314.9878, 'segmentIndex': 1}, {'center': {'latitude':
-# 50.6673948, 'longitude': -4.7416553}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 314.16364, 'segmentIndex':
-# 1}, {'center': {'latitude': 50.667390000000005, 'longitude': -4.7416425}, 'orientation': 'PORTRAIT',
-# 'yearlyEnergyDcKwh': 310.3404, 'segmentIndex': 1}, {'center': {'latitude': 50.6674186, 'longitude': -4.7417191},
-# 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 278.3281, 'segmentIndex': 1}]}, 'boundingBox': {'sw': {'latitude':
-# 50.6672904, 'longitude': -4.741778}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415536}},
-# 'imageryQuality': 'MEDIUM', 'imageryProcessedDate': {'year': 2024, 'month': 4, 'day': 18}}
-
-
 self = GoogleSolarApi(api_key=api_key)
-import numpy as np
-from recommendations.Costs import MCS_SOLAR_PV_COST_DATA
 
 
 class GoogleSolarApi:

From 01c50eb5cba3603696f93ab89c403ed7b58d139b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 24 Jun 2024 14:57:01 +0100
Subject: [PATCH 43/80] integrating solar api to router

---
 .idea/Model.iml                        |   2 +-
 .idea/misc.xml                         |   2 +-
 backend/apis/GoogleSolarApi.py         | 167 +++++++------------------
 backend/app/config.py                  |   1 +
 backend/app/plan/router.py             |   4 +
 backend/ml_models/AnnualBillSavings.py |   2 +
 6 files changed, 51 insertions(+), 127 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 0afa0b26..cac82f4b 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -1,136 +1,19 @@
 import pandas as pd
 import numpy as np
 from recommendations.Costs import MCS_SOLAR_PV_COST_DATA
-
-from backend.Property import Property
-from backend.SearchEpc import SearchEpc
-from etl.epc.Record import EPCRecord
-from dotenv import load_dotenv
-from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
-import os
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 import requests
-import msgpack
 from functools import lru_cache
 import time
 
-load_dotenv(dotenv_path="backend/.env")
-EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
-
-# This is for 6 Laura Close, Tintagel, PL34 0EB (same property that Cotswolrd energy used)
-uprn = 100040099104
-# This is for 353A, Hermitage Lane, ME16 9NT (one of the e.on properties)
-uprn = 200000964454
-# This is for 14 Victoria Road, Cross Hills, KEIGHLEY, North Yorkshire, ENGLAND, BD20 8SY
-uprn = 100050346517
-
-cleaning_data = read_dataframe_from_s3_parquet(
-    bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
-)
-
-searcher = SearchEpc(address1="", postcode="", uprn=uprn, auth_token=EPC_AUTH_TOKEN, os_api_key="")
-
-searcher.find_property(skip_os=True)
-
-epc_records = {
-    'original_epc': searcher.newest_epc.copy(),
-    'full_sap_epc': searcher.full_sap_epc.copy(),
-    'old_data': searcher.older_epcs.copy(),
-}
-
-epc = EPCRecord(
-    epc_records=epc_records,
-    run_mode="newdata",
-    cleaning_data=cleaning_data
-)
-
-uprn_filenames = read_dataframe_from_s3_parquet(
-    bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
-)
-
-p = Property(
-    id=0,
-    address=searcher.address_clean,
-    postcode=searcher.postcode_clean,
-    epc_record=epc,
-    already_installed={},
-    non_invasive_recommendations={},
-)
-
-p.get_spatial_data(uprn_filenames)
-
-cleaned = read_from_s3(
-    s3_file_name="cleaned_epc_data/cleaned.bson",
-    bucket_name="retrofit-data-dev"
-)
-
-cleaned = msgpack.unpackb(cleaned, raw=False)
-
-from etl.solar.SolarPhotoSupply import SolarPhotoSupply
-
-photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
-
-p.get_components(
-    cleaned=cleaned,
-    photo_supply_lookup=photo_supply_lookup,
-    floor_area_decile_thresholds=floor_area_decile_thresholds
-)
-p.hot_water_energy_source
-p.heating_energy_source
-
-longitude = p.spatial["longitude"]
-latitude = p.spatial["latitude"]
-
-api_key = "AIzaSyCIz8Psu5h-1txuDX0rQpUTgkvdj8yohqU"
-url = 'https://solar.googleapis.com/v1/solarPotential'
-params = {
-    'location.latitude': f'{latitude:.5f}',
-    'location.longitude': f'{longitude:.5f}',
-    'requiredQuality': "MEDIUM",
-    'key': api_key
-}
-
-insights_url = 'https://solar.googleapis.com/v1/buildingInsights:findClosest'
-
-# Make the GET request to the Solar API
-insights_response = requests.get(insights_url, params=params)
-insights_data = insights_response.json()
-
-solar_potential = insights_data["solarPotential"]
-
-from pprint import pprint
-
-pprint(solar_potential)
-
-# This is the maximum number of panels that can be installed
-solar_potential["maxArrayPanelsCount"]
-
-# This is the size of the panels used in the calculation - 400 watt
-solar_potential["panelCapacityWatts"]
-
-# Height of the panels used
-solar_potential["panelHeightMeters"]
-
-# Width of the panels used
-solar_potential["panelWidthMeters"]
-
-# This is the maximum area that can be covered by the panels
-solar_potential["maxArrayAreaMeters2"]
-
-# This is the area of the roof
-solar_potential["wholeRoofStats"]["areaMeters2"]
-
-# This is the area of the floor
-solar_potential["wholeRoofStats"]["groundAreaMeters2"]
-
-solar_potential["solarPanelConfigs"][0]
-solar_potential["solarPanelConfigs"][1]
-
-self = GoogleSolarApi(api_key=api_key)
-
 
 class GoogleSolarApi:
     NORTH_FACING_AZIMUTH_RANGE = (-30, 30)
 
+    # Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
+    # be exported
+    SOLAR_CONSUMPTION_PROPORTION = 0.5
+
     def __init__(self, api_key, max_retries=5):
         """
         Initialize the GoogleSolarApi class with the provided API key and maximum retries.
@@ -150,6 +33,8 @@ class GoogleSolarApi:
         self.roof_area = None
         self.roof_segment_indexes = None
         self.panel_area = None
+        self.panel_wattage = None
+        self.panel_performance = None
 
     def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
         """
@@ -198,7 +83,6 @@ class GoogleSolarApi:
         :return: The JSON response containing the building insights data.
         """
 
-        # TODO - can we make a request which includes the 30cm buffer from the edge of the roof?
         self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
 
         # Extract key data from the insights response
@@ -209,6 +93,7 @@ class GoogleSolarApi:
             self.insights_data["solarPotential"]["panelHeightMeters"] *
             self.insights_data["solarPotential"]["panelWidthMeters"]
         )
+        self.panel_wattage = self.insights_data["solarPotential"]["panelCapacityWatts"]
 
         # Automatically exclude north-facing segments
         self.exclude_north_facing_segments()
@@ -246,7 +131,8 @@ class GoogleSolarApi:
                         "generatedEnergy": generated_energy,
                         "ratio": ratio,
                         "n_panels": segment["panelsCount"],
-                        "cost": cost
+                        "cost": cost,
+                        "panneled_roof_area": self.panel_area * int(segment["panelsCount"])
                     }
                 )
 
@@ -263,12 +149,43 @@ class GoogleSolarApi:
                     "n_panels": roi_summary["n_panels"].sum(),
                     "total_energy": total_energy,
                     "total_cost": total_cost,
-                    "weighted_ratio": weighted_ratio
+                    "weighted_ratio": weighted_ratio,
+                    "panneled_roof_area": roi_summary["panneled_roof_area"].sum(),
+                    "array_warrage": roi_summary["n_panels"].sum() * self.panel_wattage
                 }
             )
 
         panel_performance = pd.DataFrame(panel_performance)
+        # We can have duplicate configurations
+        panel_performance = panel_performance.drop_duplicates()
+        # Ensure more than 4 panels
+        panel_performance = panel_performance[panel_performance["n_panels"] >= 4]
+        # Remove anything where the total energy is less than half of the array wattage
+        panel_performance = panel_performance[
+            (panel_performance["total_energy"] / panel_performance["array_warrage"]) >= 0.5
+            ]
+
+        # This first bracket is the value of the energy bill savings
+        panel_performance["bill_savings"] = (
+            self.SOLAR_CONSUMPTION_PROPORTION *
+            panel_performance["total_energy"] *
+            AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        )
+        # This is the amount of energy exported
+        panel_performance["export_value"] = (
+            (1 - self.SOLAR_CONSUMPTION_PROPORTION) *
+            panel_performance["total_energy"] *
+            AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
+        )
+        panel_performance["energy_value"] = panel_performance["bill_savings"] + panel_performance["export_value"]
+        panel_performance["payback_years"] = panel_performance["total_cost"] / panel_performance["energy_value"]
+
         panel_performance = panel_performance.sort_values("weighted_ratio", ascending=False)
+        # TODO: Finish this!!
+
+        panel_performance["roof_area_percentage"] = panel_performance["panneled_roof_area"] / self.roof_area
+
+        self.panel_performance = panel_performance
 
     def exclude_north_facing_segments(self):
         """
diff --git a/backend/app/config.py b/backend/app/config.py
index 764bddf5..6f2e405b 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -14,6 +14,7 @@ class Settings(BaseSettings):
     PLAN_TRIGGER_BUCKET: str
     EPC_AUTH_TOKEN: str
     ORDNANCE_SURVEY_API_KEY: str
+    GOOGLE_SOLAR_API_KEY: str
     DB_HOST: str
     DB_PASSWORD: str
     DB_USERNAME: str
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 9caab324..54e02766 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -29,6 +29,7 @@ from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
 
 from backend.ml_models.api import ModelApi
 from backend.Property import Property
+from backend.apis.GoogleSolarApi import GoogleSolarApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 
 from recommendations.optimiser.CostOptimiser import CostOptimiser
@@ -347,10 +348,13 @@ async def trigger_plan(body: PlanTriggerRequest):
             bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
         )
         photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
+        solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
 
         logger.info("Getting spatial data")
         for p in input_properties:
             p.get_spatial_data(uprn_filenames)
+            # Call Google Solar API
+            solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index b92077e4..d88fe677 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -14,6 +14,8 @@ class AnnualBillSavings:
     # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
     ELECTRICITY_PRICE_CAP = 0.245
     GAS_PRICE_CAP = 0.0604
+    # This is the most recent export payment figure, at 12p per kwh
+    ELECTRICITY_EXPORT_PAYMENT = 0.12
 
     # This is a weighted mean of the price caps, using the consumption figures above as weights
     PRICE_FACTOR = 0.09549999999999999

From 83339d2cbe84a3b8a4273e7ea468822f5305c6e5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 25 Jun 2024 14:47:36 +0100
Subject: [PATCH 44/80] Added unit tests for annual bill savings appliance
 consumption

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 backend/Property.py                           | 23 ++++++-
 backend/apis/GoogleSolarApi.py                | 32 ++++++++++
 backend/app/plan/router.py                    |  4 --
 backend/ml_models/AnnualBillSavings.py        | 62 ++++++++++++++++++-
 etl/customers/stonewater/shdf_3_clustering.py |  8 ++-
 recommendations/Recommendations.py            | 13 +---
 8 files changed, 123 insertions(+), 23 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 3599f21b..fde0802d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -18,6 +18,7 @@ from recommendations.recommendation_utils import (
     esimtate_pitched_roof_area,
     estimate_windows,
 )
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 
 ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
 DATA_BUCKET = os.environ.get(
@@ -590,6 +591,23 @@ class Property:
         self.set_energy_source()
         self.find_energy_sources()
 
+    def set_current_energy_bill(self):
+        """
+        Given what we know about the property now, estimates the current energy consumption using the UCL paper
+        https://www.sciencedirect.com/science/article/pii/S0378778823002542
+        :return:
+        """
+        starting_heat_demand = (
+            float(self.data["energy-consumption-current"]) * self.floor_area
+        )
+
+        self.current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy_consumption=starting_heat_demand,
+            current_epc_rating=self.data["current-energy-rating"],
+        )
+
+        self.current_energy_bill = AnnualBillSavings.calculate_annual_bill(self.current_adjusted_energy)
+
     def set_spatial(self, spatial: pd.DataFrame):
         """
         Sets whether the property is in a conservation area given the output of the ConservationAreaClient
@@ -909,14 +927,13 @@ class Property:
         return component_data
 
     def set_adjusted_energy(
-        self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill
+        self, expected_adjusted_energy, expected_energy_bill
     ):
         """
         Stores these values for usage later
         """
-        self.current_adjusted_energy = current_adjusted_energy
+
         self.expected_adjusted_energy = expected_adjusted_energy
-        self.current_energy_bill = current_energy_bill
         self.expected_energy_bill = expected_energy_bill
 
     def set_windows_count(self):
diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index cac82f4b..99c49b2f 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -14,6 +14,31 @@ class GoogleSolarApi:
     # be exported
     SOLAR_CONSUMPTION_PROPORTION = 0.5
 
+    # These are variables, described in the documentation for cost analysis for non-us locations, seen here
+    # https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
+    # We use the default figures that the API uses for US locations
+
+    # The factor by which the cost of electricity increases annually. The Solar API uses 1.022 (2.2% annual increase)
+    # for US locations.
+    cost_increase_factor = 1.022
+
+    # The efficiency at which an inverter converts the DC electricity that is produced by the solar panels to the AC
+    # electricity that is used in a household. The Solar API uses 85% for US locations. We use 0.95.5 which is the
+    # middle value of the 93-98% range, cited by Sunsave:
+    # https://www.sunsave.energy/solar-panels-advice/system-size/inverters
+    dc_to_ac_rate = 0.955
+
+    # The Solar API uses 1.04 (4% annual increase) for US locations
+    discount_rate = 1.04
+
+    # How much the efficiency of the solar panels declines each year. The Solar API uses 0.995 (0.5% annual decrease)
+    # for US locations
+    efficiency_depreciation_factor = 0.995
+
+    # The expected lifespan of the solar installation. The Solar API uses 20 years. Adjust this value as needed for
+    # your area
+    installation_life_span = 20
+
     def __init__(self, api_key, max_retries=5):
         """
         Initialize the GoogleSolarApi class with the provided API key and maximum retries.
@@ -94,6 +119,13 @@ class GoogleSolarApi:
             self.insights_data["solarPotential"]["panelWidthMeters"]
         )
         self.panel_wattage = self.insights_data["solarPotential"]["panelCapacityWatts"]
+        if self.panel_wattage != 400:
+            # In the API documentation, it claims that the default output is 250W, however we've only seen 400W, so if
+            # we get anything other than 400W, we'll need to adjust the calculations in the output. For this, we should
+            # refer to https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
+            # Where the documentation explains how to adjust the yearlyEnergyDcKwh figures.
+            # It should be straightforward, but I'd rather see an actual instance of this happening
+            raise NotImplementedError("Panel wattage is not 400W - implement me")
 
         # Automatically exclude north-facing segments
         self.exclude_north_facing_segments()
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 54e02766..0957b2d2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -426,9 +426,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             (
                 recommendations_with_impact,
-                current_adjusted_energy,
                 expected_adjusted_energy,
-                current_energy_bill,
                 expected_energy_bill
             ) = (
                 Recommendations.calculate_recommendation_impact(
@@ -440,9 +438,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             # Store the resulting adjusted energy in the property instance
             property_instance.set_adjusted_energy(
-                current_adjusted_energy=current_adjusted_energy,
                 expected_adjusted_energy=expected_adjusted_energy,
-                current_energy_bill=current_energy_bill,
                 expected_energy_bill=expected_energy_bill
             )
 
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index d88fe677..7395ab6b 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -1,3 +1,6 @@
+import numpy as np
+
+
 class AnnualBillSavings:
     """
     This is a simple class which will estimate the annual bill savings, based on the kwh savings.
@@ -60,8 +63,58 @@ class AnnualBillSavings:
 
         return cls.ELECTRICITY_PRICE_CAP * kwh + (cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
 
+    @staticmethod
+    def calculate_occupants(total_floor_area):
+        """
+        From Table 1b of the SAP 2012 documentation https://bregroup.com/documents/d/bre-group/sap-2012_9-92
+        Provides a methodology to estimate occupancy, based on floor area. This is used to calculate the amount of
+        electricity used be appliances and during cooking.
+        :param total_floor_area:
+        :return:
+        """
+
+        if total_floor_area <= 13.9:
+            return 1
+
+        return 1 + (1.76 * (1 - np.exp(-0.000349 * (total_floor_area - 13.9) * (total_floor_area - 13.9))) + 0.0013 * (
+            total_floor_area - 13.9))
+
+    @staticmethod
+    def estimate_electrical_appliances(occupants, total_floor_area):
+        """
+        From secion L2 of SAP2012 Electrical appliances
+        https://bregroup.com/documents/d/bre-group/sap-2012_9-92
+        Used to estimate the amount of energy used by electrical appliances
+        :param occupants:
+        :param total_floor_area:
+        :return:
+        """
+        e_a = 207.8 * np.power(total_floor_area * occupants, 0.4717)
+
+        days_in_month = {
+            1: 31,
+            2: 28,
+            3: 31,
+            4: 30,
+            5: 31,
+            6: 30,
+            7: 31,
+            8: 31,
+            9: 30,
+            10: 31,
+            11: 30,
+            12: 31
+        }
+
+        eam = 0
+        for m in range(1, 13):
+            nm = days_in_month[m]
+            eam += e_a * (1 + 0.157 * np.cos(2 * np.pi * (m - 1.78) / 12)) * nm / 365
+
+        return eam
+
     @classmethod
-    def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
+    def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating, total_floor_area):
         """
         The over-prediction of energy use by EPCs in Great Britain: A comparison
         of EPC-modelled and metered primary energy use intensity
@@ -72,6 +125,13 @@ class AnnualBillSavings:
         :return:
         """
 
+        # The EPC energy consumption does not factor in cooking and applicance use, so this is estimated using the
+        # methodology outlined in SAP, and is discussed in the UCL paper in section 3.1.1
+        estimated_occupants = cls.calculate_occupants(total_floor_area=total_floor_area)
+        appliances_energy_use = cls.estimate_electrical_appliances(estimated_occupants, total_floor_area)
+
+        epc_energy_consumption += appliances_energy_use
+
         gradients = {
             "A": -0.1,
             "B": -0.1,
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 5129dfb1..6c7a0fc6 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -6,6 +6,7 @@ from backend.SearchEpc import SearchEpc
 import urllib.parse
 import requests
 from datetime import datetime
+from scipy import stats
 
 from fuzzywuzzy import fuzz
 import numpy as np
@@ -1598,7 +1599,6 @@ def compile_data_final():
         property_attributes[c] = property_attributes[c].fillna(0)
         property_attributes[c] = property_attributes[c].astype(float)
 
-    from scipy import stats
     for col in fill_with_mode:
         property_attributes[col] = property_attributes[col].replace('', None)
         mode_val = stats.mode([float(x) for x in property_attributes[col].values if x not in [None, "", np.nan]])[0]
@@ -1632,6 +1632,12 @@ def compile_data_final():
     #     s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
     # )
 
+    # from utils.s3 import read_pickle_from_s3
+    # data = read_pickle_from_s3(
+    #     bucket_name="retrofit-data-dev",
+    #     s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
+    # )
+
     # CLUSTERING!!
 
     # from sklearn.cluster import KMeans
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 19fba581..c9ac1072 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -311,14 +311,6 @@ class Recommendations:
         # This is the unadjusted resulting heat demand
         predicted_heat_demand_change = starting_heat_demand - expected_heat_demand
 
-        # We don't want to adjust the heat demand for mechanical ventilation so we add it back on
-
-        # We adjust the heat demand figures to align to the UCL paper
-        current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy_consumption=starting_heat_demand,
-            current_epc_rating=property_instance.data["current-energy-rating"],
-        )
-
         # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
         #       actually implemented
         expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
@@ -327,11 +319,10 @@ class Recommendations:
         )
 
         adjusted_heat_demand_change = (
-            current_adjusted_energy - expected_adjusted_energy
+            property_instance.current_adjusted_energy - expected_adjusted_energy
         )
 
         # TODO: We should determine if the home is gas & electricity or just electricity
-        current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
         expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
 
         for recommendations_by_type in property_recommendations:
@@ -410,8 +401,6 @@ class Recommendations:
 
         return (
             property_recommendations,
-            current_adjusted_energy,
             expected_adjusted_energy,
-            current_energy_bill,
             expected_energy_bill
         )

From dd825c73a795404fe7145280c99522dc114caaaf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 25 Jun 2024 15:53:17 +0100
Subject: [PATCH 45/80] implementing the solar costing model (incomplete)

---
 .idea/Model.iml                |   2 +-
 .idea/misc.xml                 |   2 +-
 backend/Property.py            |   2 +
 backend/apis/GoogleSolarApi.py | 113 +++++++++++++++++++++++++++++----
 backend/app/plan/router.py     |   6 +-
 5 files changed, 108 insertions(+), 17 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index fde0802d..411a4db0 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -590,6 +590,7 @@ class Property:
         )
         self.set_energy_source()
         self.find_energy_sources()
+        self.set_current_energy_bill()
 
     def set_current_energy_bill(self):
         """
@@ -604,6 +605,7 @@ class Property:
         self.current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
             epc_energy_consumption=starting_heat_demand,
             current_epc_rating=self.data["current-energy-rating"],
+            total_floor_area=self.floor_area
         )
 
         self.current_energy_bill = AnnualBillSavings.calculate_annual_bill(self.current_adjusted_energy)
diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 99c49b2f..6d2ddf6c 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -135,6 +135,99 @@ class GoogleSolarApi:
         # We now start finding the solar panel configurations
         self.optimise_solar_configuration()
 
+    @staticmethod
+    def lifetime_production_ac_kwh(
+        row,
+        efficiency_depreciation_factor,
+        installation_life_span
+    ):
+        """
+        Mimics the function described in the Google Solar API documentation, presenting the lifetime production
+        AC KWH as a geometri sum
+        """
+
+        return (
+            row["initial_ac_kwh_per_year"] *
+            (1 - pow(
+                efficiency_depreciation_factor,
+                installation_life_span)) /
+            (1 - efficiency_depreciation_factor))
+
+    @staticmethod
+    def annualUtilityBillEstimate(
+        yearlyKWhEnergyConsumption,
+        initialAcKwhPerYear,
+        efficiencyDepreciationFactor,
+        year,
+        costIncreaseFactor,
+        discountRate):
+        """
+        Implements the bill costing model for esimating annual bill
+        :param yearlyKWhEnergyConsumption:
+        :param initialAcKwhPerYear:
+        :param efficiencyDepreciationFactor:
+        :param year:
+        :param costIncreaseFactor:
+        :param discountRate:
+        :return:
+        """
+
+        return (
+            billCostModel(
+                yearlyKWhEnergyConsumption -
+                annualProduction(
+                    initialAcKwhPerYear,
+                    efficiencyDepreciationFactor,
+                    year)) *
+            pow(costIncreaseFactor, year) /
+            pow(discountRate, year))
+
+    def lifetimeUtilityBill(
+        yearlyKWhEnergyConsumption,
+        initialAcKwhPerYear,
+        efficiencyDepreciationFactor,
+        installationLifeSpan,
+        costIncreaseFactor,
+        discountRate):
+        bill = [0] * installationLifeSpan
+        for year in range(installationLifeSpan):
+            bill[year] = annualUtilityBillEstimate(
+                yearlyKWhEnergyConsumption,
+                initialAcKwhPerYear,
+                efficiencyDepreciationFactor,
+                year,
+                costIncreaseFactor,
+                discountRate)
+        return bill
+
+    def estimate_solar_costs(self, panel_performance):
+        """
+        This method implements the recommended costing approach, to estimate the ROI of a solar panel
+        configuration, as described in the Google Solar API documentation
+        :param panel_performance: dataframe containing the solar panel array configuration and energy generation data
+        :return:
+        """
+
+        # we now estiamte the financial benefits of solar panels for the household, using the framework described
+        # by the Google Solar API
+        # 1) Convert Solar Energy AD production from the DC production
+        panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
+
+        # Remove anything where the total ac energy is less than half of the array wattage
+        panel_performance = panel_performance[
+            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
+            ]
+
+        # 2) Calculate the liftime solar energy production
+        panel_performance['lifetime_ac_kwh'] = panel_performance.apply(
+            self.lifetime_production_ac_kwh,
+            axis=1,
+            efficiency_depreciation_factor=self.efficiency_depreciation_factor,
+            installation_life_span=self.installation_life_span
+        )
+
+        # TODO: Complete the rest of the solar model
+
     def optimise_solar_configuration(self):
         """
         Optimise the solar panel configuration for the building.
@@ -153,14 +246,14 @@ class GoogleSolarApi:
             roi_summary = []
             for segment in roof_segment_summaries:
                 wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
-                generated_energy = segment["yearlyEnergyDcKwh"]
-                ratio = generated_energy / wattage
-                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (generated_energy / 1000)
+                generated_dc_energy = segment["yearlyEnergyDcKwh"]
+                ratio = generated_dc_energy / wattage
+                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (generated_dc_energy / 1000)
                 roi_summary.append(
                     {
                         "segmentIndex": segment["segmentIndex"],
                         "wattage": wattage,
-                        "generatedEnergy": generated_energy,
+                        "generated_dc_energy": generated_dc_energy,
                         "ratio": ratio,
                         "n_panels": segment["panelsCount"],
                         "cost": cost,
@@ -171,15 +264,15 @@ class GoogleSolarApi:
             roi_summary = pd.DataFrame(roi_summary)
 
             weighted_ratio = np.average(
-                roi_summary["ratio"].values, weights=roi_summary["generatedEnergy"].values
+                roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
             )
             total_cost = roi_summary["cost"].sum()
-            total_energy = roi_summary["generatedEnergy"].sum()
+            yearly_dc_energy = roi_summary["generated_dc_energy"].sum()
 
             panel_performance.append(
                 {
                     "n_panels": roi_summary["n_panels"].sum(),
-                    "total_energy": total_energy,
+                    "yearly_dc_energy": yearly_dc_energy,
                     "total_cost": total_cost,
                     "weighted_ratio": weighted_ratio,
                     "panneled_roof_area": roi_summary["panneled_roof_area"].sum(),
@@ -192,10 +285,8 @@ class GoogleSolarApi:
         panel_performance = panel_performance.drop_duplicates()
         # Ensure more than 4 panels
         panel_performance = panel_performance[panel_performance["n_panels"] >= 4]
-        # Remove anything where the total energy is less than half of the array wattage
-        panel_performance = panel_performance[
-            (panel_performance["total_energy"] / panel_performance["array_warrage"]) >= 0.5
-            ]
+
+        self.estimate_solar_costs()
 
         # This first bracket is the value of the energy bill savings
         panel_performance["bill_savings"] = (
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0957b2d2..bfe5a9e4 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -352,9 +352,10 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         logger.info("Getting spatial data")
         for p in input_properties:
+            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
             p.get_spatial_data(uprn_filenames)
             # Call Google Solar API
-            solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
+            solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}
@@ -362,9 +363,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         representative_recommendations = {}
         for p in tqdm(input_properties):
 
-            # Property recommendations
-            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
-
             recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
             property_recommendations, property_representative_recommendations = recommender.recommend()
 

From f321f46e5475aa31b37ea54d7a44de4c629dff45 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 25 Jun 2024 16:07:46 +0100
Subject: [PATCH 46/80] Added missing files

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 backend/tests/test_annual_bill_savings.py     | 82 +++++++++++++++++++
 etl/customers/vander_elliot/non_intrusives.py | 17 ++++
 etl/non_intrusive_surveys/photos/README.md    |  2 +-
 .../upload/UploadNonIntrusives.py             | 18 ++++
 etl/non_intrusive_surveys/upload/__init__.py  |  0
 7 files changed, 120 insertions(+), 3 deletions(-)
 create mode 100644 backend/tests/test_annual_bill_savings.py
 create mode 100644 etl/customers/vander_elliot/non_intrusives.py
 create mode 100644 etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
 create mode 100644 etl/non_intrusive_surveys/upload/__init__.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/tests/test_annual_bill_savings.py b/backend/tests/test_annual_bill_savings.py
new file mode 100644
index 00000000..81c2898c
--- /dev/null
+++ b/backend/tests/test_annual_bill_savings.py
@@ -0,0 +1,82 @@
+import numpy as np
+import pytest
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+
+appliance_consumption_cases = [
+    {
+        "total_floor_area": 13.9,
+        "n_occupants": 1,
+        "consumption": 718.4795859263703
+    },
+    {
+        "total_floor_area": 20,
+        "n_occupants": 1.0306381042556767,
+        "consumption": 865.2316409517844
+    },
+    {
+        "total_floor_area": 30,
+        "n_occupants": 1.1731577598127325,
+        "consumption": 1113.5965321501362
+    },
+    {
+        "total_floor_area": 50,
+        "n_occupants": 1.6901008890848956,
+        "consumption": 1683.31305074609
+    },
+    {
+        "total_floor_area": 75,
+        "n_occupants": 2.361158387531988,
+        "consumption": 2386.2935599981865
+    },
+    {
+        "total_floor_area": 100,
+        "n_occupants": 2.739525875076067,
+        "consumption": 2931.6076153011486
+    },
+    {
+        "total_floor_area": 125,
+        "n_occupants": 2.8807344137165405,
+        "consumption": 3335.143110751552
+    },
+    {
+        "total_floor_area": 150,
+        "n_occupants": 2.934188599837662,
+        "consumption": 3666.3228057866513
+    },
+    {
+        "total_floor_area": 200,
+        "n_occupants": 3.001920087128373,
+        "consumption": 4244.625403339813
+    },
+    {
+        "total_floor_area": 300,
+        "n_occupants": 3.1319299999993095,
+        "consumption": 5243.086106676302
+    },
+    {
+        "total_floor_area": 500,
+        "n_occupants": 3.39193,
+        "consumption": 6927.400500420533
+    },
+    {
+        "total_floor_area": 1000,
+        "n_occupants": 4.04193,
+        "consumption": 10434.755635642652
+    }
+]
+
+
+class TestAnnualBillSavings:
+
+    @pytest.mark.parametrize(
+        "test_case",
+        appliance_consumption_cases
+    )
+    def test_appliance_estimation(self, test_case):
+        n_occupants = AnnualBillSavings.calculate_occupants(test_case["total_floor_area"])
+        assert np.isclose(n_occupants, test_case["n_occupants"])
+
+        appliance_consumption = AnnualBillSavings.estimate_electrical_appliances(
+            n_occupants, test_case["total_floor_area"]
+        )
+        assert np.isclose(appliance_consumption, test_case["consumption"])
diff --git a/etl/customers/vander_elliot/non_intrusives.py b/etl/customers/vander_elliot/non_intrusives.py
new file mode 100644
index 00000000..e11bc3f8
--- /dev/null
+++ b/etl/customers/vander_elliot/non_intrusives.py
@@ -0,0 +1,17 @@
+from etl.non_intrusive_surveys.upload.UploadNonIntrusives import UploadNonIntrusives
+
+
+def app():
+    """
+    This script handles the creation of the portfolio for the non-intrusive surveys
+    :return:
+    """
+
+    non_intrusive_s3_filename = (
+        "customers/Vander Elliot/Non-intrusive survey template V2 - Amazon Management Services.xlsx"
+    )
+
+    non_intrusive = UploadNonIntrusives(
+        s3_template_location=non_intrusive_s3_filename,
+        s3_bucket="retrofit-datalake-dev",
+    )
diff --git a/etl/non_intrusive_surveys/photos/README.md b/etl/non_intrusive_surveys/photos/README.md
index 9dbe951f..a58603b4 100644
--- a/etl/non_intrusive_surveys/photos/README.md
+++ b/etl/non_intrusive_surveys/photos/README.md
@@ -15,5 +15,5 @@ pip install -r requirements.txt
 The main application is found in the app.py file. To run the application, use the following command:
 
 ```bash
-python app.py
+python UploadNonIntrusives.py
 ```
\ No newline at end of file
diff --git a/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py b/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
new file mode 100644
index 00000000..00f707e9
--- /dev/null
+++ b/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
@@ -0,0 +1,18 @@
+from utils.s3 import read_excel_from_s3
+
+
+class UploadNonIntrusives:
+    """
+    This class handles the upload of findings from the non-intrusive surveys, to the database
+    """
+
+    def __init__(self, s3_template_location, s3_bucket):
+        self.s3_template_location = s3_template_location
+        self.s3_bucket = s3_bucket
+        self.template = self.read_template()
+
+    def read_template(self):
+        """
+        This method reads the template from S3
+        """
+        return read_excel_from_s3(file_key=self.s3_template_location, bucket_name=self.s3_bucket, header_row=0)
diff --git a/etl/non_intrusive_surveys/upload/__init__.py b/etl/non_intrusive_surveys/upload/__init__.py
new file mode 100644
index 00000000..e69de29b

From de50ba13a5e06cc24e3c2a5d40d7e2458b3e06f7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 25 Jun 2024 16:58:06 +0100
Subject: [PATCH 47/80] Set up upload non-instrusives client

---
 etl/customers/vander_elliot/non_intrusives.py |  57 ++++++++
 .../upload/UploadNonIntrusives.py             | 136 +++++++++++++++++-
 2 files changed, 191 insertions(+), 2 deletions(-)

diff --git a/etl/customers/vander_elliot/non_intrusives.py b/etl/customers/vander_elliot/non_intrusives.py
index e11bc3f8..57e0522b 100644
--- a/etl/customers/vander_elliot/non_intrusives.py
+++ b/etl/customers/vander_elliot/non_intrusives.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from etl.non_intrusive_surveys.upload.UploadNonIntrusives import UploadNonIntrusives
 
 
@@ -7,6 +8,60 @@ def app():
     :return:
     """
 
+    # In the future, we can just use the ordnance survey api
+    uprn_lookup = [
+        {'House Number': 79,
+         'Address Line 1': 'Clare Road',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 9LZ',
+         'uprn': 41018850},
+        {'House Number': 'Flat 1',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41052320},
+        {'House Number': 'Flat 2',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41052321},
+        {'House Number': 'Flat 3',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41052322},
+        {'House Number': 'Flat 4',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41222759},
+        {'House Number': 'Flat 1',
+         'Address Line 1': '4 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41222760},
+        {'House Number': 'Flat 2 (NO ACCESS)',
+         'Address Line 1': '4 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41222761},
+        {'House Number': 'Flat 3',
+         'Address Line 1': '4 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41212534},
+        {'House Number': 'Flat 1 (NO ACCESS)',
+         'Address Line 1': '29 Bedford Road',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L4 5PS',
+         'uprn': 38237316},
+        {'House Number': 'Flat 2 (NO ACCESS)',
+         'Address Line 1': '29 Bedford Road',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L4 5PS',
+         'uprn': 38237317}
+    ]
+
     non_intrusive_s3_filename = (
         "customers/Vander Elliot/Non-intrusive survey template V2 - Amazon Management Services.xlsx"
     )
@@ -14,4 +69,6 @@ def app():
     non_intrusive = UploadNonIntrusives(
         s3_template_location=non_intrusive_s3_filename,
         s3_bucket="retrofit-datalake-dev",
+        uprn_lookup=uprn_lookup,
+        survey_date=datetime.strptime('2024-06-21', '%Y-%m-%d')
     )
diff --git a/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py b/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
index 00f707e9..824f41f9 100644
--- a/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
+++ b/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
@@ -1,4 +1,10 @@
 from utils.s3 import read_excel_from_s3
+from utils.logger import setup_logger
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
+
+logger = setup_logger()
 
 
 class UploadNonIntrusives:
@@ -6,13 +12,139 @@ class UploadNonIntrusives:
     This class handles the upload of findings from the non-intrusive surveys, to the database
     """
 
-    def __init__(self, s3_template_location, s3_bucket):
+    COLUMN_PREFIXES: dict = {
+        'Surveyor First Name': 'Surveyor',
+        'Surveyor Last Name': 'Surveyor',
+        'House Number': 'Property Details',
+        'Address Line 1': 'Property Details',
+        'Address Line 2': 'Property Details',
+        'Postcode': 'Property Details',
+        'Property Year Built': 'Property Details',
+        'Wall Construction': 'Walls',
+        'Wall Construction Notes': 'Walls',
+        'Existing insulation?': 'Walls',
+        'Retro Drilled?': 'Walls',
+        'Condition (cracks & damp)': 'Walls',
+        'Condition Notes': 'Walls',
+        'Alternative walls': 'Walls',
+        'Alternative walls percentage': 'Walls',
+        'Adequate Ventilation?': 'Walls',
+        'Ventilation notes': 'Walls',
+        'Party wall': 'Walls',
+        'Floor Type': 'Floor',
+        'Wall render': 'Wall Render',
+        'Wall Render Condition': 'Wall Render',
+        'Roof Type': 'Roof',
+        'Roof insulation ': 'Roof',
+        'Roof Condition': 'Roof',
+        'Obvious Roof Shading': 'Roof',
+        'Roof orientation - Primary': 'Roof',
+        'Roof orientation - Secondary': 'Roof',
+        'Obstructions on the roof': 'Roof',
+        'Flue type': 'Heating',
+        'Is there an extension?': 'Access',
+        'Are there any out-buildings?': 'Access',
+        'Is there a conservatory?': 'Access',
+        'Is the property straight onto a footpath?': 'Access',
+        'Is there a requirement for planning consent for works?': 'Access',
+        'Is there space for an external unit?': 'Air Source Heat Pump',
+        'Could a cylinder fit in the loft?': 'Air Source Heat Pump',
+        'Are there obvious areas of heat loss from the walls?': 'Thermography',
+        'Are there obvious areas of heat loss from the roof?': 'Thermography',
+        'Does the existing insulation exhibit signs of inconsistent performance or underperformance?': 'Thermography',
+        'Is there excessive levels of heat loss from windows?': 'Thermography',
+        'Is there excessive levels of heat loss from doors?': 'Thermography',
+        'Material inside the walls': 'Borescope Test',
+        'Cavity depth (mm)': 'Borescope Test',
+        'Is there rubble in the cavity?': 'Borescope Test',
+        'Wall tie type': 'Borescope Test',
+        'Wall tie integrity': 'Borescope Test',
+        'Inner block work': 'Borescope Test',
+        'Current glazing': 'Windows',
+        'Windows Age (pre/post 2002)': 'Windows',
+        'Glazing gap': 'Windows',
+        'Are there obvious trickle vents in the windows?': 'Windows',
+        'Is there sufficient space in the garden?': 'Ground Source Heat Pump',
+        'Does the property need a CIGA check?': 'Funding',
+        'Is the property eligible for GBIS?': 'Funding',
+        'Is the property eligible for ECO4?': 'Funding',
+        'Is the property eligible for the Local Authority Flex Scheme?': 'Funding',
+        'Is the property eligible for HUG?': 'Funding',
+        'Is the property eligible for LAD?': 'Funding',
+        'Other funding recommendations': 'Funding'
+    }
+
+    def __init__(self, s3_template_location, s3_bucket, uprn_lookup, survey_date):
         self.s3_template_location = s3_template_location
         self.s3_bucket = s3_bucket
         self.template = self.read_template()
 
+        self.uprn_lookup = uprn_lookup
+        self.survey_date = survey_date
+
     def read_template(self):
         """
         This method reads the template from S3
         """
-        return read_excel_from_s3(file_key=self.s3_template_location, bucket_name=self.s3_bucket, header_row=0)
+        return read_excel_from_s3(file_key=self.s3_template_location, bucket_name=self.s3_bucket, header_row=2)
+
+    def upload(self):
+        """
+        This method uploads the non-intrusive survey data to the database
+        """
+
+        if self.uprn_lookup is None:
+            raise Exception("Implement call to ordnance survey to get uprn lookup data")
+
+        logger.info("Preparing non-intrusive notes")
+        non_intrusives = self.template.to_dict(orient="records")
+
+        non_invasive_notes = []
+        for survey in non_intrusives:
+            # Remove any NAN entries
+            survey_clean = {self.COLUMN_PREFIXES[k] + ": " + k: v for k, v in survey.items() if v == v}
+
+            uprn_data = [
+                x for x in self.uprn_lookup if (
+                    str(x['House Number']).strip() == str(survey_clean['Property Details: House Number']).strip() and
+                    x['Address Line 1'] == survey_clean['Property Details: Address Line 1'].strip() and
+                    x['Address Line 2'] == survey_clean['Property Details: Address Line 2'].strip() and
+                    x['Postcode'] == survey_clean['Property Details: Postcode'].strip()
+                )
+            ]
+            if len(uprn_data) != 1:
+                address = (
+                    str(survey_clean['Property Details: House Number']) + ' ' +
+                    survey_clean['Property Details: Address Line 1'] + ' ' +
+                    survey_clean['Property Details: Address Line 2'] + ' ' +
+                    survey_clean['Property Details: Postcode']
+                )
+                raise Exception(f"Failed to find UPRN data for {address}")
+
+            surveyor = (
+                survey_clean.pop("Surveyor: Surveyor First Name") + " " +
+                survey_clean.pop("Surveyor: Surveyor Last Name")
+            )
+
+            # Include all of the information apart from data that includes the Property details prefix and the
+            # surveyor - we do however include Property Details: Property Year Built
+            notes_to_upload = {
+                k: v for k, v in survey_clean.items() if k == "Property Details: Property Year Built" or (
+                    not k.startswith("Property Details") and
+                    not k.startswith("Surveyor")
+                )
+            }
+
+            non_invasive_notes.append({
+                "uprn": uprn_data[0]['uprn'],
+                "surveyor": surveyor,
+                "survey_date": self.survey_date,
+                **notes_to_upload
+            })
+
+        # Implement call to upload notes_to_upload to the database
+        logger.info("Uploading non-intrusive notes to the database")
+
+        session = sessionmaker(bind=db_engine)()
+
+        upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)

From 8fcae893c758d123fedd91aba2cc51b088f7dee2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 25 Jun 2024 17:14:57 +0100
Subject: [PATCH 48/80] set up uploader class for non-intrusives and built
 portfolio for non-intrusive VE

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 backend/app/plan/router.py                    |  3 +-
 etl/customers/vander_elliot/non_intrusives.py | 39 +++++++++++++++++++
 .../upload/UploadNonIntrusives.py             |  1 -
 recommendations/Recommendations.py            |  1 +
 6 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index bfe5a9e4..5e10080e 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -355,7 +355,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
             p.get_spatial_data(uprn_filenames)
             # Call Google Solar API
-            solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
+            # TODO: Complete me
+            # solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}
diff --git a/etl/customers/vander_elliot/non_intrusives.py b/etl/customers/vander_elliot/non_intrusives.py
index 57e0522b..7d092b5d 100644
--- a/etl/customers/vander_elliot/non_intrusives.py
+++ b/etl/customers/vander_elliot/non_intrusives.py
@@ -1,6 +1,11 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
 from datetime import datetime
 from etl.non_intrusive_surveys.upload.UploadNonIntrusives import UploadNonIntrusives
 
+PORTFOLIO_ID = 82
+USER_ID = 8
+
 
 def app():
     """
@@ -72,3 +77,37 @@ def app():
         uprn_lookup=uprn_lookup,
         survey_date=datetime.strptime('2024-06-21', '%Y-%m-%d')
     )
+    non_intrusive.upload()
+
+    # We can create the asset list from the uprn lookup
+    asset_list = [
+        {
+            "uprn": x["uprn"],
+            "address": f"{x['House Number']} {x['Address Line 1']}",
+            "postcode": x["Postcode"]
+        }
+        for x in uprn_lookup
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py b/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
index 824f41f9..b711de8c 100644
--- a/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
+++ b/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
@@ -146,5 +146,4 @@ class UploadNonIntrusives:
         logger.info("Uploading non-intrusive notes to the database")
 
         session = sessionmaker(bind=db_engine)()
-
         upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index c9ac1072..827ca928 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -316,6 +316,7 @@ class Recommendations:
         expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
             epc_energy_consumption=expected_heat_demand,
             current_epc_rating=property_instance.data["current-energy-rating"],
+            total_floor_area=property_instance.floor_area
         )
 
         adjusted_heat_demand_change = (

From 8a5e98d3ba322f6656a45c0aa9f6689a1e5105e3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 26 Jun 2024 16:31:57 +0100
Subject: [PATCH 49/80] udpating costing with installer quotes

---
 backend/app/db/models/materials.py            |  1 +
 backend/app/plan/router.py                    | 20 +++----
 etl/costs/app.py                              | 10 +++-
 etl/customers/vander_elliot/non_intrusives.py | 18 +++++-
 recommendations/Costs.py                      | 59 ++++++++++++++++---
 5 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py
index 97085d7a..f0af3343 100644
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@@ -88,3 +88,4 @@ class Material(Base):
     plant_cost = Column(Float)
     total_cost = Column(Float)
     notes = Column(String)
+    is_installer_quote = Column(Boolean, nullable=False, default=False)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 5e10080e..80392c88 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -284,16 +284,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            if not is_new:
-                continue
-
-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            # if not is_new:
+            #     continue
+            #
+            # create_property_targets(
+            #     session,
+            #     property_id=property_id,
+            #     portfolio_id=body.portfolio_id,
+            #     epc_target=body.goal_value,
+            #     heat_demand_target=None
+            # )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/etl/costs/app.py b/etl/costs/app.py
index 30eff735..59852cc5 100644
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@@ -7,10 +7,13 @@ from sqlalchemy.orm import Session
 from sqlalchemy import create_engine
 from backend.app.db.models.materials import Material
 from recommendations.recommendation_utils import calculate_r_value_per_mm
+import inspect
 
-DATA_DIRECTORY = Path(__file__).parent / "local_data" / "Hestia Materials.xlsx"
+src_file_path = inspect.getfile(lambda: None)
+
+DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20240626 Hestia Materials.xlsx"
 # Environment file is at the same level as this file
-ENV_FILE = Path(__file__).parent / "etl" / "costs" / ".env"
+ENV_FILE = Path(src_file_path).parent / "etl" / "costs" / ".env"
 dotenv.load_dotenv(ENV_FILE)
 
 DB_USERNAME = os.getenv('DB_USERNAME')
@@ -87,7 +90,8 @@ def app():
             solid_floor_costs,
             ewi_costs,
             lel_costs,
-            flat_roof_costs
+            flat_roof_costs,
+            window_costs
         ]
     )
 
diff --git a/etl/customers/vander_elliot/non_intrusives.py b/etl/customers/vander_elliot/non_intrusives.py
index 7d092b5d..bbc46754 100644
--- a/etl/customers/vander_elliot/non_intrusives.py
+++ b/etl/customers/vander_elliot/non_intrusives.py
@@ -6,6 +6,14 @@ from etl.non_intrusive_surveys.upload.UploadNonIntrusives import UploadNonIntrus
 PORTFOLIO_ID = 82
 USER_ID = 8
 
+already_installed = [
+    {
+        'address': 'Flat 3 2 Linacre Lane',
+        'postcode': 'L20 5AH',
+        "already_installed": ["windows_glazing"]
+    }
+]
+
 
 def app():
     """
@@ -91,6 +99,14 @@ def app():
 
     asset_list = pd.DataFrame(asset_list)
 
+    # Store overrides in s3
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(already_installed),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=already_installed_filename
+    )
+
     # Store the asset list in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
     save_csv_to_s3(
@@ -105,7 +121,7 @@ def app():
         "goal": "Increase EPC",
         "goal_value": "A",
         "trigger_file_path": filename,
-        "already_installed_file_path": "",
+        "already_installed_file_path": already_installed_filename,
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": "",
         "budget": None,
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 5f752730..b056274e 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -104,9 +104,9 @@ DOUBLE_RADIATOR_COST = 300
 FLUE_COST = 600
 PIPEWORK_COST = 750  # Min cost is £500
 
-# This is the cost per meter squared for cavity extraction
-# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/
-CAVITY_EXTRACTION_COST = 21.5
+# Based on SCIS figures
+# TODO: Add this to databse
+CAVITY_EXTRACTION_COST = 25
 
 
 class Costs:
@@ -203,6 +203,20 @@ class Costs:
         :return: A dictionary containing detailed cost breakdown.
         """
 
+        # CWI usually takes 1 day
+        labour_hours = 8
+        labour_days = 1
+
+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * wall_area
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
         material_cost_per_m2 = material["material_cost"]
 
         base_material_cost = material_cost_per_m2 * wall_area
@@ -220,11 +234,6 @@ class Costs:
 
         total_cost = subtotal_before_vat + vat_cost
 
-        labour_hours = material["labour_hours_per_unit"] * wall_area
-
-        # Assume a team of 2
-        labour_days = (labour_hours / 8) / 2
-
         if is_extraction_and_refill:
             # bump up the cost of the work
             total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area
@@ -314,6 +323,22 @@ class Costs:
         :return:
         """
 
+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * wall_area
+
+            labour_hours = material["labour_hours_per_unit"] * wall_area
+
+            # To install internal wall insulation, a small to medium size project might be conducted by a team of 3-5
+            # people
+            labour_days = (labour_hours / 8) / 4
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
         # Extract and check the different types of data we'll need
         demolition_data = [x for x in non_insulation_materials if x["type"] == "iwi_wall_demolition"]
         vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "iwi_vapour_barrier"]
@@ -619,6 +644,24 @@ class Costs:
         :return:
         """
 
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * wall_area
+            # Add on a buffer for scaffolding
+            if self.property.data["property-type"] == "House":
+                total_cost += self.EWI_SCAFFOLDING_PRELIMINARIES * total_cost
+
+            labour_hours = material["labour_hours_per_unit"] * wall_area
+
+            # To install internal wall insulation, a small to medium size project might be conducted by a team of 3-5
+            # people
+            labour_days = (labour_hours / 8) / 4
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
         # For semi detatched and detatched houses, as well as maisonettes, we price for scaffolding
 
         if self.property.data["property-type"] == "House":

From 88812a55240154d7af1d49f579bd5789038abd2c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 27 Jun 2024 10:53:55 +0100
Subject: [PATCH 50/80] done with vander elliot non-intrusives portfolio

---
 backend/Property.py                           |  1 -
 recommendations/Costs.py                      | 62 +++++++++++++++++--
 .../optimiser/optimiser_functions.py          |  7 +--
 recommendations/recommendation_utils.py       |  7 +--
 4 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 411a4db0..a80c3057 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -950,7 +950,6 @@ class Property:
             construction_age_band=self.construction_age_band,
             floor_area=self.floor_area,
             number_habitable_rooms=self.number_of_rooms,
-            extension_count=float(self.data["extension-count"]),
         )
 
     def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index b056274e..68870841 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -261,6 +261,20 @@ class Costs:
 
         :return: A dictionary containing detailed cost breakdown.
         """
+
+        labour_hours = material["labour_hours_per_unit"] * floor_area
+        # Assume a team of 1 person
+        labour_days = labour_hours / 8
+
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * floor_area
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
         material_cost_per_m2 = material["material_cost"]
 
         # We inflate material costs due to recent price increases
@@ -282,11 +296,6 @@ class Costs:
 
         total_cost = subtotal_before_vat + vat_cost
 
-        labour_hours = material["labour_hours_per_unit"] * floor_area
-
-        # Assume a team of 1 person
-        labour_days = labour_hours / 8
-
         return {
             "total": total_cost,
             "subtotal": subtotal_before_vat,
@@ -423,6 +432,21 @@ class Costs:
         :return:
         """
 
+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * insulation_floor_area
+
+            labour_hours = material["labour_hours_per_unit"] * insulation_floor_area
+            # To install suspended floor insulation, a small to medium size project might be conducted by a team of 3
+            # people
+            labour_days = (labour_hours / 8) / 3
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
         demolition_data = [x for x in non_insulation_materials if x["type"] == "suspended_floor_demolition"]
         vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "suspended_floor_vapour_barrier"]
         redecoration_data = [x for x in non_insulation_materials if x["type"] == "suspended_floor_redecoration"]
@@ -525,6 +549,21 @@ class Costs:
         :return:
         """
 
+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * insulation_floor_area
+
+            labour_hours = material["labour_hours_per_unit"] * insulation_floor_area
+            # To install suspended floor insulation, a small to medium size project might be conducted by a team of 3
+            # people
+            labour_days = (labour_hours / 8) / 3
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
         demolition_data = [x for x in non_insulation_materials if x["type"] == "solid_floor_demolition"]
         preparation_data = [x for x in non_insulation_materials if x["type"] == "solid_floor_preparation"]
         vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "solid_floor_vapour_barrier"]
@@ -915,6 +954,19 @@ class Costs:
 
         """
 
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * number_of_windows
+
+            labour_hours = material["labour_hours_per_unit"] * number_of_windows
+            # To install windows, a small to medium size project might be conducted by a team of 2-3 people
+            labour_days = (labour_hours / 8) / 2
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
         material_cost = material["material_cost"] * number_of_windows
 
         labour_cost = (
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index d6353eea..083a7c25 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -18,11 +18,10 @@ def prepare_input_measures(property_recommendations, goal):
 
     input_measures = []
     for recs in property_recommendations:
+
         if recs[0]["type"] == "solar_pv":
-            # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
-            # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
-            # under-report the potential cost
-            recs = [r for r in recs if r["has_battery"]]
+            # if the recommendation is a solar recommendation with a battery, we exclude it from the optimisation.
+            recs = [r for r in recs if ~r["has_battery"]]
 
         input_measures.append(
             [
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 07a861dc..9b5e22d1 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -673,8 +673,10 @@ def esimtate_pitched_roof_area(floor_area: float, floor_height: float) -> float:
 
 
 def estimate_windows(
-    property_type, built_form, construction_age_band, floor_area, number_habitable_rooms, extension_count
+    property_type, built_form, construction_age_band, floor_area, number_habitable_rooms
 ):
+    # If there is an extension, that will boost the number of habitable rooms
+    
     # Base window count based on habitable rooms
     window_count = number_habitable_rooms
 
@@ -717,9 +719,6 @@ def estimate_windows(
         # Older houses with smaller, more numerous windows
         window_count += 1
 
-    # Adjust for extensions (each extension might add windows)
-    window_count += extension_count
-
     # Adjustments for specific property types
     if property_type in ["Flat", "Maisontte"]:
         # Flats might have fewer windows due to shared walls

From e9366c72e891b5405607714f064d7a0326772d08 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 27 Jun 2024 12:41:56 +0100
Subject: [PATCH 51/80] added handling of some additional cases in sap
 description cleaning

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 .../stonewater/outputs 27th June 2024.py      |  48 +++++
 etl/customers/stonewater/shdf_3_clustering.py | 200 +++++-------------
 .../epc_attributes/FloorAttributes.py         |   2 +-
 .../epc_attributes/HotWaterAttributes.py      |   4 +-
 .../epc_attributes/LightingAttributes.py      |  11 +-
 .../epc_attributes/MainheatAttributes.py      |  15 +-
 .../MainheatControlAttributes.py              |   4 +-
 .../epc_attributes/WindowAttributes.py        |   2 +-
 10 files changed, 126 insertions(+), 164 deletions(-)
 create mode 100644 etl/customers/stonewater/outputs 27th June 2024.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/stonewater/outputs 27th June 2024.py b/etl/customers/stonewater/outputs 27th June 2024.py
new file mode 100644
index 00000000..ebb6fc5b
--- /dev/null
+++ b/etl/customers/stonewater/outputs 27th June 2024.py	
@@ -0,0 +1,48 @@
+"""
+This script prepares some outputs for the stonewater project, 27th June 2024
+
+The work done so far has been data cleaning and clustering.
+In this script, we do the following things:
+
+1) Match the clustering data to the archetypes
+2) Do some basic analysis on the data
+3) Mapping of the archetypes
+"""
+import pandas as pd
+from utils.s3 import read_pickle_from_s3
+
+archetyped_asset_list = pd.read_csv("Stonewater asset list with archetypes.csv")
+archetyped_asset_list = archetyped_asset_list[
+    [
+        "internal_id", "customer_asset_id", "udprn", "uprn", "cluster", "archetype_representative", "rank"
+    ]
+]
+archetyped_asset_list = archetyped_asset_list[archetyped_asset_list["rank"] != "NO ARCHETYPE"]
+archetyped_asset_list["rank"] = archetyped_asset_list["rank"].astype(int)
+# Sort
+archetyped_asset_list = archetyped_asset_list.sort_values(by=["cluster", "rank"])
+
+# Read in and merge on clustering features
+clustering_features = read_pickle_from_s3(
+    bucket_name="retrofit-data-dev",
+    s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
+)
+
+archetyped_asset_list = archetyped_asset_list.merge(
+    clustering_features,
+    on="internal_id",
+    how="inner"
+)
+
+property_type_archetypes = archetyped_asset_list[
+    ["cluster", "rank", "property-type", "built-form", "walls-description"]]
+
+# Key variables for separation:
+# - property-type
+# - built-form
+# - walls-description
+# - roof-description
+
+clustering_features[["property-type", "built-form", "walls-description"]].drop_duplicates().shape
+
+clustering_features["walls-description"].value_counts()
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 6c7a0fc6..b8e71ae7 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1633,58 +1633,60 @@ def compile_data_final():
     # )
 
     # from utils.s3 import read_pickle_from_s3
-    # data = read_pickle_from_s3(
+    # property_attributes = read_pickle_from_s3(
     #     bucket_name="retrofit-data-dev",
     #     s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
     # )
 
-    # CLUSTERING!!
+    # We perform some additional cleaning on the data
+    import msgpack
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
 
-    # from sklearn.cluster import KMeans
-    # from sklearn.preprocessing import OneHotEncoder
-    # from scipy.spatial.distance import cdist
-    #
-    # property_attributes.set_index('internal_id', inplace=True)
-    #
-    # # Step 1: Prepare the data
-    # # Identify categorical columns (you might need to adjust this)
-    # categorical_cols = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
-    # for col in categorical_cols:
-    #     property_attributes[col] = property_attributes[col].astype(str)
-    #
-    # # Applying OneHotEncoder
-    # encoder = OneHotEncoder(sparse=False)
-    # encoded_cats = encoder.fit_transform(property_attributes[categorical_cols])
-    #
-    # # Creating a new DataFrame with encoded categorical data and original numerical data
-    # numerical_data = property_attributes.select_dtypes(include=[np.number])
-    # data_for_clustering = pd.concat([numerical_data, pd.DataFrame(encoded_cats, index=numerical_data.index)], axis=1)
-    #
-    # # Convert all column names to strings to satisfy KMeans requirements
-    # data_for_clustering.columns = data_for_clustering.columns.astype(str)
-    #
-    # # Step 2: K-Means Clustering
-    # k = 450  # number of clusters
-    # kmeans = KMeans(n_clusters=k, random_state=0)
-    # property_attributes['cluster'] = kmeans.fit_predict(data_for_clustering)
-    #
-    # # Extracting centroids
-    # centroids = kmeans.cluster_centers_
-    #
-    # # Step 3: Assign clusters and rank rows
-    # # Calculating distances from each point to its cluster's centroid
-    # distances = cdist(data_for_clustering, centroids, 'euclidean')
-    # min_distances = distances.min(axis=1)
-    # property_attributes['distance_to_centroid'] = min_distances
-    #
-    # # Ranking rows by distance within each cluster
-    # property_attributes['rank'] = property_attributes.groupby('cluster')['distance_to_centroid'].rank(method='first')
-    #
-    # # Sorting to verify
-    # property_attributes.sort_values(by=['cluster', 'rank'], inplace=True)
-    #
-    # # Optional: Displaying the dataframe
-    # print(property_attributes.head())
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+    from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
+    from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
+    from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
+    from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+    from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
+    from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+    from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
+    from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes
+    from etl.epc_clean.epc_attributes.LightingAttributes import LightingAttributes
+
+    cleaners = {
+        "floor-description": FloorAttributes,
+        'hotwater-description': HotWaterAttributes,
+        'main-fuel': MainFuelAttributes,
+        'mainheat-description': MainHeatAttributes,
+        'mainheatcont-description': MainheatControlAttributes,
+        'roof-description': RoofAttributes,
+        'walls-description': WallAttributes,
+        'windows-description': WindowAttributes,
+        'lighting-description': LightingAttributes
+    }
+    for variable_to_clean in cleaned.keys():
+        unique_descriptions = property_attributes[variable_to_clean].unique()
+        clean_df = pd.DataFrame(cleaned[variable_to_clean])
+        # Check if we have any
+        missed = [x for x in unique_descriptions if x not in clean_df["original_description"].values]
+        if missed:
+            descriptions_to_append = []
+            for description in missed:
+                if variable_to_clean == "lighting-description":
+                    cln = cleaners[variable_to_clean](description, **{"averages": pd.DataFrame()})
+                else:
+                    cln = cleaners[variable_to_clean](description)
+                to_append = {
+                    "original_description": description,
+                    "clean_description": cln.description.replace("(assumed)", "").rstrip().capitalize(),
+                    **cln.process()
+                }
+                descriptions_to_append.append(to_append)
+
+    # CLUSTERING!!
 
     from sklearn.cluster import KMeans
     from sklearn.preprocessing import StandardScaler, OneHotEncoder
@@ -1777,110 +1779,6 @@ def compile_data_final():
 
     stonewater_uprn_lookup.to_excel("Stonewater uprn lookup table.xlsx")
 
-    ################################################
-    # Agglomertive Clustering
-    ################################################
-
-    # from sklearn.cluster import KMeans, AgglomerativeClustering
-    # from sklearn.preprocessing import StandardScaler, OneHotEncoder
-    # from sklearn.compose import ColumnTransformer
-    # from sklearn.pipeline import Pipeline
-    # from scipy.spatial.distance import cdist
-    # import numpy as np
-    # from collections import Counter
-    #
-    # id_column = 'internal_id'
-    # property_attributes.set_index(id_column, inplace=True)
-    #
-    # # Define the preprocessing for numerical and categorical features
-    # numerical_features = property_attributes.select_dtypes(include=['int64', 'float64']).columns.tolist()
-    # categorical_features = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
-    #
-    # for col in categorical_features:
-    #     property_attributes[col] = property_attributes[col].astype(str)
-    #
-    # preprocessor = ColumnTransformer(
-    #     transformers=[
-    #         ('num', StandardScaler(), numerical_features),
-    #         ('cat', OneHotEncoder(sparse_output=False), categorical_features)
-    #     ]
-    # )
-    #
-    # # Function to perform clustering and merge small clusters
-    # def cluster_with_min_size(data, preprocessor, n_clusters=10, min_size=5):
-    #     while True:
-    #         # Preprocess the data
-    #         processed_data = preprocessor.fit_transform(data)
-    #
-    #         # Initial clustering
-    #         clustering = AgglomerativeClustering(n_clusters=n_clusters)
-    #         labels = clustering.fit_predict(processed_data)
-    #
-    #         # Check cluster sizes
-    #         cluster_counts = Counter(labels)
-    #
-    #         # Find clusters smaller than min_size
-    #         small_clusters = {cluster for cluster, count in cluster_counts.items() if count < min_size}
-    #
-    #         if not small_clusters:
-    #             break
-    #
-    #         # Merge small clusters
-    #         for cluster in small_clusters:
-    #             # Find the nearest cluster to merge with
-    #             cluster_data = processed_data[labels == cluster]
-    #             other_clusters = [i for i in range(n_clusters) if i not in small_clusters]
-    #             other_cluster_data = [processed_data[labels == i] for i in other_clusters]
-    #             other_centroids = np.vstack([data.mean(axis=0) for data in other_cluster_data])
-    #
-    #             distances = cdist(cluster_data, other_centroids).mean(axis=0)
-    #             closest_cluster = other_clusters[np.argmin(distances)]
-    #
-    #             labels[labels == cluster] = closest_cluster
-    #
-    #         n_clusters -= len(small_clusters)
-    #
-    #     return labels
-    #
-    # # Perform clustering with minimum size constraint
-    # n_clusters = 10
-    # min_size = 5
-    # property_attributes['cluster'] = cluster_with_min_size(property_attributes, preprocessor, n_clusters, min_size)
-    #
-    # # Filter out empty clusters
-    # valid_clusters = property_attributes['cluster'].unique()
-    #
-    # # Get centroids for the resulting clusters
-    # processed_data = preprocessor.transform(property_attributes.drop(columns=["cluster"]))
-    # centroids = np.vstack([processed_data[property_attributes['cluster'] == i].mean(axis=0) for i in valid_clusters])
-    #
-    # # Calculate distances from each point to the centroid of its cluster
-    # distances_to_centroids = [
-    #     cdist(processed_data[i].reshape(1, -1),
-    #           centroids[valid_clusters.tolist().index(label)].reshape(1, -1)).flatten()[0]
-    #     for i, label in enumerate(property_attributes['cluster'])
-    # ]
-    #
-    # property_attributes['distance_to_centroid'] = distances_to_centroids
-    #
-    # # Verify that at least one point in each cluster has zero distance to the centroid
-    # for cluster_id in valid_clusters:
-    #     cluster_data = property_attributes[property_attributes['cluster'] == cluster_id]
-    #     min_distance = cluster_data['distance_to_centroid'].min()
-    #     print(f"Cluster {cluster_id} minimum distance to centroid: {min_distance}")
-    #     if min_distance != 0:
-    #         print(f"No point with zero distance found in cluster {cluster_id}")
-    #
-    # # Rank the distances within each cluster
-    # property_attributes['rank_within_cluster'] = property_attributes.groupby('cluster')['distance_to_centroid'] \
-    #     .rank(method='first')
-    #
-    # # Reset index to get 'internal_id' back
-    # property_attributes.reset_index(inplace=True)
-    #
-    # # Display the DataFrame
-    # print(property_attributes)
-
 
 def pull_ideal_postcodes(missing_uprn_with_udprn):
     api_key = ""  # Log into the platform the get the API key: https://account.ideal-postcodes.co.uk/
diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py
index 245a91bc..817c2b43 100644
--- a/etl/epc_clean/epc_attributes/FloorAttributes.py
+++ b/etl/epc_clean/epc_attributes/FloorAttributes.py
@@ -38,7 +38,7 @@ class FloorAttributes(Definitions):
         self.description: str = description.lower()
 
         self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or (
-            description in self.OBSERVED_ERRORS)
+            description in self.OBSERVED_ERRORS) or (self.description == "sap05:floor")
 
         # Try and perform a translation, incase it's in welsh
         self.translate_welsh_text()
diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
index 54deaa09..f9cec48b 100644
--- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py
+++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
@@ -129,7 +129,9 @@ class HotWaterAttributes(Definitions):
     def __init__(self, description: str):
         self.description: str = clean_description(description.lower()).strip()
 
-        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or (
+            self.description == "sap05 hot-water"
+        )
 
         translation = self.WELSH_TEXT.get(self.description)
 
diff --git a/etl/epc_clean/epc_attributes/LightingAttributes.py b/etl/epc_clean/epc_attributes/LightingAttributes.py
index 0fe3db16..18475b2d 100644
--- a/etl/epc_clean/epc_attributes/LightingAttributes.py
+++ b/etl/epc_clean/epc_attributes/LightingAttributes.py
@@ -1,15 +1,18 @@
 import re
+from BaseUtility import Definitions
 from etl.epc_clean.epc_attributes.attribute_utils import clean_description
 from etl.epc_clean.utils import correct_spelling
 
 
-class LightingAttributes:
+class LightingAttributes(Definitions):
     WELSH_TEXT = {
         "goleuadau ynni-isel ym mhob un ogçör mannau gosod": "low energy lighting in all fixed outlets",
         "dim goleuadau ynni-isel": "no low energy lighting",
         "goleuadau ynni-isel ym mhob un o'r mannau gosod": 'Low energy lighting in all fixed outlets'
     }
 
+    OBSERVED_ERRORS = []
+
     def __init__(self, description, averages):
         self.description: str = clean_description(description.lower())
 
@@ -18,6 +21,9 @@ class LightingAttributes:
         self.description = correct_spelling(self.description)
         self.averages = averages
 
+        self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or (
+            description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting")
+
     def welsh_translation_search(self):
         """
         For welsh text describing the percentage of low energy lighting, we match the regular
@@ -40,6 +46,9 @@ class LightingAttributes:
 
         description = self.description
 
+        if self.nodata:
+            return {"low_energy_proportion": None}
+
         if 'no low energy lighting' in description:
             return {"low_energy_proportion": 0}
 
diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py
index 9f0931a3..56115dca 100644
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@@ -77,7 +77,9 @@ class MainHeatAttributes(Definitions):
 
         self.description: str = clean_description(self.description).strip()
         # Remove special characters
-        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or (
+            description == "SAP05:Main-Heating"
+        )
 
         translation = self.WELSH_TEXT.get(self.description)
         if translation:
@@ -97,11 +99,12 @@ class MainHeatAttributes(Definitions):
 
         self.process_edge_cases()
 
-        if (not description or not any(
-            rt in self.description for rt in
-            self.HEAT_SYSTEMS + self.FUEL_TYPES + self.DISTRIBUTION_SYSTEMS + self.OTHERS
-        ) and not self.is_edge_case):
-            raise ValueError('Invalid description')
+        if not self.nodata:
+            if (not description or not any(
+                rt in self.description for rt in
+                self.HEAT_SYSTEMS + self.FUEL_TYPES + self.DISTRIBUTION_SYSTEMS + self.OTHERS
+            ) and not self.is_edge_case):
+                raise ValueError('Invalid description')
 
     def process_edge_cases(self) -> (dict, bool):
         """
diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
index 887bdda7..46fff6d8 100644
--- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
@@ -117,7 +117,9 @@ class MainheatControlAttributes(Definitions):
 
     def __init__(self, description: str):
         self.description: str = clean_description(description.lower()).strip()
-        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or (
+            description == "SAP05:Main-Heating-Controls"
+        )
 
         translation = self.WELSH_TEXT.get(self.description)
         if translation:
diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py
index 5286fc5a..e9139510 100644
--- a/etl/epc_clean/epc_attributes/WindowAttributes.py
+++ b/etl/epc_clean/epc_attributes/WindowAttributes.py
@@ -38,7 +38,7 @@ class WindowAttributes(Definitions):
 
         # In the case of an empty description, we want to return a dictionary with all values set to False
         # and indicate there was no data
-        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description == "SAP05:Windows"
 
         translation = self.WELSH_TEXT.get(self.description)
         if translation:

From 4e85d1380edcee2b7a54dced036790d9c269cb03 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 27 Jun 2024 14:17:39 +0100
Subject: [PATCH 52/80] cleaning columns for stonewater clustering

---
 etl/customers/stonewater/shdf_3_clustering.py | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index b8e71ae7..8b878f26 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1668,6 +1668,7 @@ def compile_data_final():
         'lighting-description': LightingAttributes
     }
     for variable_to_clean in cleaned.keys():
+
         unique_descriptions = property_attributes[variable_to_clean].unique()
         clean_df = pd.DataFrame(cleaned[variable_to_clean])
         # Check if we have any
@@ -1686,6 +1687,67 @@ def compile_data_final():
                 }
                 descriptions_to_append.append(to_append)
 
+            descriptions_to_append = pd.DataFrame(descriptions_to_append)
+            clean_df = pd.concat([clean_df, descriptions_to_append])
+
+            starting_size = len(property_attributes)
+            property_attributes = property_attributes.merge(
+                clean_df, how="left", left_on=variable_to_clean, right_on="original_description"
+            )
+            if starting_size != property_attributes.shape[0]:
+                raise Exception("something went wrong")
+            property_attributes = property_attributes.drop(columns=["original_description", "clean_description"])
+            # Fill missings
+            for k in clean_df.columns:
+                if k in property_attributes.columns:
+                    property_attributes[k] = property_attributes[k].fillna("missing")
+
+    # We group some variables such as thermal transmittance for walls, roof, floors
+    ranges = {
+        "< 0.1": (0, 0.1),
+        "0.1 - 0.3": (0.1, 0.3),
+        "0.3 - 0.5": (0.3, 0.5),
+        "0.5 - 0.7": (0.5, 0.7),
+        "0.9 - 1": (0.9, 1),
+        "1 - 1.5": (1, 1.5),
+        "1.5 - 2": (1.5, 2),
+        "2+": (2, 2.5)
+    }
+
+    # Generate the lookup table
+    thermal_transmittance_lookup_table = []
+    for i in range(1, 251):
+        value = i / 100
+        for label, (low, high) in ranges.items():
+            if low < value <= high:
+                thermal_transmittance_lookup_table.append({"from": value, "to": label})
+                break
+
+    # Convert to DataFrame for display
+    thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
+    thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
+
+    thermal_transmittance_cols = [
+        c for c in property_attributes.columns if "thermal_transmittance" in c and "unit" not in c
+    ]
+    for i, col in enumerate(thermal_transmittance_cols):
+        # Perform the mapping
+        to_col = f"to_{i}"
+        property_attributes[col] = property_attributes[col].astype(str)
+        property_attributes = property_attributes.merge(
+            thermal_transmittance_lookup_table.rename(columns={"to": to_col}),
+            how="left",
+            left_on=col,
+            right_on="from",
+            suffixes=("", f"_{i}")
+        )
+        property_attributes = property_attributes.drop(columns=["from", col])
+        property_attributes[to_col] = property_attributes[to_col].fillna("unknown")
+
+    # Drop the description columns that are the keys in cleaned
+    property_attributes = property_attributes.drop(columns=list(cleaned.keys()))
+    # Perform the mapping
+
     # CLUSTERING!!
 
     from sklearn.cluster import KMeans

From 07ddf8383b1b290c4855b6e12f6dc60b74be9456 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 27 Jun 2024 17:58:50 +0100
Subject: [PATCH 53/80] extended the capturing of u-values from thermal
 transmittance descriptions

---
 etl/customers/stonewater/shdf_3_clustering.py   | 1 +
 etl/epc_clean/epc_attributes/attribute_utils.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 8b878f26..caaf84a6 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1667,6 +1667,7 @@ def compile_data_final():
         'windows-description': WindowAttributes,
         'lighting-description': LightingAttributes
     }
+    
     for variable_to_clean in cleaned.keys():
 
         unique_descriptions = property_attributes[variable_to_clean].unique()
diff --git a/etl/epc_clean/epc_attributes/attribute_utils.py b/etl/epc_clean/epc_attributes/attribute_utils.py
index 60f4653e..a5326207 100644
--- a/etl/epc_clean/epc_attributes/attribute_utils.py
+++ b/etl/epc_clean/epc_attributes/attribute_utils.py
@@ -2,8 +2,8 @@ import re
 import string
 from typing import Tuple, Union, Dict, List
 
-THERMAL_TRANSMITTANCE_STR = r"average thermal transmittance (-?\d+(\.\d+)?)\s(w/m\S+k)"
-THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTANCE_STR)
+THERMAL_TRANSMITTANCE_STR = r"average thermal transmittance\s*[=:-]?\s*(-?\d+(\.\d+)?)\s*[wW]/m\S*[kK]"
+THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTANCE_STR, re.IGNORECASE)
 
 DOUBLE_SPACE_PATTERN = re.compile(r"\s+")
 

From 6f32aa672bdeaa043a9cf3f81c5c35801bdd741c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 27 Jun 2024 18:03:57 +0100
Subject: [PATCH 54/80] Added corrections to walls cleaning class

---
 etl/epc_clean/epc_attributes/WallAttributes.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py
index 09eac215..49252552 100644
--- a/etl/epc_clean/epc_attributes/WallAttributes.py
+++ b/etl/epc_clean/epc_attributes/WallAttributes.py
@@ -75,12 +75,19 @@ class WallAttributes(Definitions):
         'insulation_thickness', 'external_insulation', 'internal_insulation'
     ]
 
+    CORRECTIONS = {
+        "Granite or whin, as built, no insulation (assumed)": "Granite or whinstone, as built, no insulation (assumed)",
+    }
+
     def __init__(self, description: str):
         """
         :param description: Description of the walls.
         """
         self.description: str = description
 
+        if self.description in self.CORRECTIONS:
+            self.description = self.CORRECTIONS[self.description]
+
         self.welsh_translation_search()
 
         self.nodata = not description or description in self.DATA_ANOMALY_MATCHES

From 4456ab29eeac9a3407408d84b39ccc328dd8983a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 28 Jun 2024 11:03:22 +0100
Subject: [PATCH 55/80] added the grouped clustering

---
 .../stonewater/outputs 27th June 2024.py      |  31 ++-
 etl/customers/stonewater/shdf_3_clustering.py | 206 ++++++++++++------
 2 files changed, 161 insertions(+), 76 deletions(-)

diff --git a/etl/customers/stonewater/outputs 27th June 2024.py b/etl/customers/stonewater/outputs 27th June 2024.py
index ebb6fc5b..d8bf43be 100644
--- a/etl/customers/stonewater/outputs 27th June 2024.py	
+++ b/etl/customers/stonewater/outputs 27th June 2024.py	
@@ -11,7 +11,7 @@ In this script, we do the following things:
 import pandas as pd
 from utils.s3 import read_pickle_from_s3
 
-archetyped_asset_list = pd.read_csv("Stonewater asset list with archetypes.csv")
+archetyped_asset_list = pd.read_csv("Stonewater asset list with archetypes V2.csv")
 archetyped_asset_list = archetyped_asset_list[
     [
         "internal_id", "customer_asset_id", "udprn", "uprn", "cluster", "archetype_representative", "rank"
@@ -34,15 +34,22 @@ archetyped_asset_list = archetyped_asset_list.merge(
     how="inner"
 )
 
+# Look at number of combinations
+# - If we look at the number of combinations of property type & built form, we have 25 unique combinations
+# - If we look at the number of combinations of property type, built form, and walls description, this jumps
+# massively to 237 unique combinations
+# - Adding roof description to the mix, we have 857 unique combinations
+# - Adding floor description, we have 1278 unique combinations
+# This doesn't even begin to consider the other variables that we have in the dataset, such as the property dimensions,
+# location, and other factors.
+# Ideally, we would perfectly separate these variables but this is not possible, given the constraint of needing ~450
+# archetypes. We will need to make some compromises here. This is where a clustering algorithm can help us.
+# We don't end up with perfect separation but we can get a good enough separation to make the archetypes useful, and can
+# base the archetypes on a number of energy performance metrics, as well as location and other factors.
+# archetyped_asset_list[
+#     ["property-type", "built-form", "walls-description", "roof-description",
+#      "floor-description"]].drop_duplicates().shape
+
 property_type_archetypes = archetyped_asset_list[
-    ["cluster", "rank", "property-type", "built-form", "walls-description"]]
-
-# Key variables for separation:
-# - property-type
-# - built-form
-# - walls-description
-# - roof-description
-
-clustering_features[["property-type", "built-form", "walls-description"]].drop_duplicates().shape
-
-clustering_features["walls-description"].value_counts()
+    ["cluster", "rank", "property-type", "built-form", "walls-description"]
+]
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index caaf84a6..fa6551b7 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -14,6 +14,11 @@ import pandas as pd
 import time
 from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3, read_dataframe_from_s3_parquet, \
     save_dataframe_to_s3_parquet, save_pickle_to_s3
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from scipy.spatial.distance import cdist
 
 load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@@ -1090,6 +1095,26 @@ def concatenate_row(row):
     return ', '.join(row.dropna().replace('', None).dropna().astype(str))
 
 
+def adjust_clusters(cluster_allocation, total_clusters):
+    current_total = sum(cluster_allocation.values())
+    adjustment = total_clusters - current_total
+    if adjustment > 0:
+        # Increase clusters, start from the largest group
+        for group in sorted(cluster_allocation, key=lambda x: -cluster_allocation[x]):
+            cluster_allocation[group] += 1
+            adjustment -= 1
+            if adjustment == 0:
+                break
+    elif adjustment < 0:
+        # Decrease clusters, start from the largest group
+        for group in sorted(cluster_allocation, key=lambda x: -cluster_allocation[x]):
+            cluster_allocation[group] -= 1
+            adjustment += 1
+            if adjustment == 0:
+                break
+    return cluster_allocation
+
+
 def compile_data_final():
     # Updated version:
 
@@ -1667,7 +1692,7 @@ def compile_data_final():
         'windows-description': WindowAttributes,
         'lighting-description': LightingAttributes
     }
-    
+
     for variable_to_clean in cleaned.keys():
 
         unique_descriptions = property_attributes[variable_to_clean].unique()
@@ -1691,28 +1716,45 @@ def compile_data_final():
             descriptions_to_append = pd.DataFrame(descriptions_to_append)
             clean_df = pd.concat([clean_df, descriptions_to_append])
 
-            starting_size = len(property_attributes)
-            property_attributes = property_attributes.merge(
-                clean_df, how="left", left_on=variable_to_clean, right_on="original_description"
-            )
-            if starting_size != property_attributes.shape[0]:
-                raise Exception("something went wrong")
-            property_attributes = property_attributes.drop(columns=["original_description", "clean_description"])
-            # Fill missings
-            for k in clean_df.columns:
-                if k in property_attributes.columns:
-                    property_attributes[k] = property_attributes[k].fillna("missing")
+        clean_df = clean_df.rename(
+            columns={
+                "thermal_transmittance": f"{variable_to_clean}_thermal_transmittance",
+                "is_assumed": f"{variable_to_clean}_is_assumed",
+            }
+        )
+
+        if 'thermal_transmittance_unit' in clean_df.columns:
+            clean_df = clean_df.drop(columns=['thermal_transmittance_unit'])
+
+        starting_size = len(property_attributes)
+        property_attributes = property_attributes.merge(
+            clean_df, how="left", left_on=variable_to_clean, right_on="original_description"
+        )
+        if starting_size != property_attributes.shape[0]:
+            raise Exception("something went wrong")
+        property_attributes = property_attributes.drop(columns=["original_description", "clean_description"])
+        # Fill missings
+        for k in clean_df.columns:
+            if k in property_attributes.columns:
+                property_attributes[k] = property_attributes[k].fillna("missing")
 
     # We group some variables such as thermal transmittance for walls, roof, floors
+    # ranges = {
+    #     "< 0.1": (0, 0.1),
+    #     "0.1 - 0.3": (0.1, 0.3),
+    #     "0.3 - 0.5": (0.3, 0.5),
+    #     "0.5 - 0.7": (0.5, 0.7),
+    #     "0.9 - 1": (0.9, 1),
+    #     "1 - 1.5": (1, 1.5),
+    #     "1.5 - 2": (1.5, 2),
+    #     "2+": (2, 2.5)
+    # }
+
     ranges = {
         "< 0.1": (0, 0.1),
         "0.1 - 0.3": (0.1, 0.3),
         "0.3 - 0.5": (0.3, 0.5),
-        "0.5 - 0.7": (0.5, 0.7),
-        "0.9 - 1": (0.9, 1),
-        "1 - 1.5": (1, 1.5),
-        "1.5 - 2": (1.5, 2),
-        "2+": (2, 2.5)
+        "0.5+": (0.5, 2.5),
     }
 
     # Generate the lookup table
@@ -1733,7 +1775,7 @@ def compile_data_final():
     ]
     for i, col in enumerate(thermal_transmittance_cols):
         # Perform the mapping
-        to_col = f"to_{i}"
+        to_col = f"to_{col}"
         property_attributes[col] = property_attributes[col].astype(str)
         property_attributes = property_attributes.merge(
             thermal_transmittance_lookup_table.rename(columns={"to": to_col}),
@@ -1750,72 +1792,108 @@ def compile_data_final():
     # Perform the mapping
 
     # CLUSTERING!!
-
-    from sklearn.cluster import KMeans
-    from sklearn.preprocessing import StandardScaler, OneHotEncoder
-    from sklearn.compose import ColumnTransformer
-    from sklearn.pipeline import Pipeline
-    from scipy.spatial.distance import cdist
-    id_column = 'internal_id'
-    property_attributes.set_index(id_column, inplace=True)
+    grouping_columns = [
+        'is_cavity_wall', 'is_solid_brick', 'built-form', 'property-type'
+    ]
 
     # Define the preprocessing for numerical and categorical features
     numerical_features = property_attributes.select_dtypes(include=['int64', 'float64']).columns.tolist()
     categorical_features = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
+    categorical_features = [c for c in categorical_features if c not in ["internal_id", grouping_columns]]
 
     for col in categorical_features:
         property_attributes[col] = property_attributes[col].astype(str)
 
-    preprocessor = ColumnTransformer(
-        transformers=[
-            ('num', StandardScaler(), numerical_features),
-            ('cat', OneHotEncoder(), categorical_features)
+    id_column = 'internal_id'
+    n_clusters = 450
+    random_state = 0
+
+    training_data_grouped = property_attributes.groupby(grouping_columns)
+    group_sizes = {name: len(group) for name, group in training_data_grouped}
+    total_size = sum(group_sizes.values())
+    cluster_allocation = {
+        name: max(1, int(round(n_clusters * (size / total_size)))) for name, size in group_sizes.items()
+    }
+
+    # Adjust cluster allocation to ensure total clusters sum to 450
+    cluster_allocation = adjust_clusters(cluster_allocation, n_clusters)
+
+    # TODO: This code throws many warnings because of the highly fragmented dataframe. We should re-factor this to
+    #       collect the results of the clustering and then perform the transformations afterwards
+
+    final_clusters = []
+    for group_variables, group_data in tqdm(training_data_grouped, total=len(training_data_grouped)):
+
+        group_n_clusters = cluster_allocation[group_variables]
+        group_data.set_index(id_column, inplace=True)
+
+        preprocessor = ColumnTransformer(
+            transformers=[
+                ('num', StandardScaler(), numerical_features),
+                ('cat', OneHotEncoder(), categorical_features)
+            ]
+        )
+
+        pipeline = Pipeline(steps=[('preprocessor', preprocessor),
+                                   ('kmeans', KMeans(n_clusters=group_n_clusters, random_state=random_state))])
+
+        # Fit the pipeline to the data
+        pipeline.fit(group_data)
+
+        # Transform the data using the fitted pipeline
+        processed_data = pipeline.named_steps['preprocessor'].transform(group_data)
+
+        # Get cluster labels
+        group_data['cluster'] = pipeline.named_steps['kmeans'].labels_
+
+        # Get centroids (already in the same transformed space)
+        centroids = pipeline.named_steps['kmeans'].cluster_centers_
+
+        # if the data isn't an array, make it one
+        if not isinstance(processed_data, np.ndarray):
+            processed_data = processed_data.toarray()
+
+        # Calculate distances from each point to the centroid of its cluster
+        distances_to_centroids = [
+            cdist(processed_data[i].reshape(1, -1), centroids[label].reshape(1, -1)).flatten()[0]
+            for i, label in enumerate(group_data['cluster'])
         ]
-    )
 
-    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
-                               ('kmeans', KMeans(n_clusters=450, random_state=0))])
+        group_data['distance_to_centroid'] = distances_to_centroids
 
-    # Fit the pipeline to the data
-    pipeline.fit(property_attributes)
+        # for cluster_id in group_data['cluster'].unique():
+        #     cluster_data = group_data[group_data['cluster'] == cluster_id]
+        #     min_distance = cluster_data['distance_to_centroid'].min()
+        #     print(f"Cluster {cluster_id} minimum distance to centroid: {min_distance}")
+        #     if min_distance != 0:
+        #         print(f"No point with zero distance found in cluster {cluster_id}")
 
-    # Transform the data using the fitted pipeline
-    processed_data = pipeline.named_steps['preprocessor'].transform(property_attributes)
+        # Ranking rows by distance within each cluster
+        group_data['rank'] = group_data.groupby('cluster')['distance_to_centroid'].rank(method='first')
 
-    # Get cluster labels
-    property_attributes['cluster'] = pipeline.named_steps['kmeans'].labels_
+        # Sorting to verify
+        group_data.sort_values(by=['cluster', 'rank'], inplace=True)
+        group_data.reset_index(inplace=True)
 
-    # Get centroids (already in the same transformed space)
-    centroids = pipeline.named_steps['kmeans'].cluster_centers_
+        to_append = group_data[["internal_id", "cluster", "rank"]].copy()
+        to_append["cluster"] = to_append["cluster"].astype(str) + str(group_variables)
+        final_clusters.append(to_append)
 
-    processed_data = processed_data.toarray()
+    final_clusters = pd.concat(final_clusters)
+    # remap the clusters from the current names to 1 -> n_clusters
 
-    # Calculate distances from each point to the centroid of its cluster
-    distances_to_centroids = [
-        cdist(processed_data[i].reshape(1, -1), centroids[label].reshape(1, -1)).flatten()[0]
-        for i, label in enumerate(property_attributes['cluster'])
-    ]
-
-    property_attributes['distance_to_centroid'] = distances_to_centroids
-
-    for cluster_id in property_attributes['cluster'].unique():
-        cluster_data = property_attributes[property_attributes['cluster'] == cluster_id]
-        min_distance = cluster_data['distance_to_centroid'].min()
-        print(f"Cluster {cluster_id} minimum distance to centroid: {min_distance}")
-        if min_distance != 0:
-            print(f"No point with zero distance found in cluster {cluster_id}")
-
-    # Ranking rows by distance within each cluster
-    property_attributes['rank'] = property_attributes.groupby('cluster')['distance_to_centroid'].rank(
-        method='first')
-
-    # Sorting to verify
-    property_attributes.sort_values(by=['cluster', 'rank'], inplace=True)
+    cluster_mapping = {cluster: i for i, cluster in enumerate(final_clusters["cluster"].unique())}
+    final_clusters["cluster"] = final_clusters["cluster"].map(cluster_mapping)
+    final_clusters["cluster"] = final_clusters["cluster"].astype(str)
 
     ################################################
     # Prepare outputs!!!!
     ################################################
+
     property_attributes.reset_index(inplace=True)
+    property_attributes = property_attributes.merge(
+        final_clusters, how="left", on="internal_id"
+    )
     property_attributes["archetype_representative"] = property_attributes["rank"] == 1
 
     asset_list_with_archetypes = asset_list.merge(
@@ -1834,7 +1912,7 @@ def compile_data_final():
     asset_list_with_archetypes["archetype_representative"] = asset_list_with_archetypes[
         "archetype_representative"].fillna(False)
 
-    asset_list_with_archetypes.to_csv("Stonewater asset list with archetypes.csv", index=False)
+    asset_list_with_archetypes.to_csv("Stonewater asset list with archetypes V2.csv", index=False)
 
     stonewater_uprn_lookup = asset_list_with_archetypes[
         ["external_address_id", "udprn", "uprn", "match_type", "standardised_address", "standardised_postcode"]

From 37780687eb4db19738091dd22e6b17e0e15a5c5a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 28 Jun 2024 20:34:55 +0100
Subject: [PATCH 56/80] Basic setup of stonewater map app

---
 .../map_app/Stonewater Mapping Data.json      |  1 +
 etl/customers/stonewater/map_app/callbacks.py |  0
 etl/customers/stonewater/map_app/config.py    |  8 ++
 etl/customers/stonewater/map_app/map_page.py  | 94 +++++++++++++++++++
 .../stonewater/map_app/requirements.txt       | 12 +++
 etl/customers/stonewater/map_app/server.py    | 45 +++++++++
 etl/customers/stonewater/map_app/wsgi.py      |  8 ++
 7 files changed, 168 insertions(+)
 create mode 100644 etl/customers/stonewater/map_app/Stonewater Mapping Data.json
 create mode 100644 etl/customers/stonewater/map_app/callbacks.py
 create mode 100644 etl/customers/stonewater/map_app/config.py
 create mode 100644 etl/customers/stonewater/map_app/map_page.py
 create mode 100644 etl/customers/stonewater/map_app/requirements.txt
 create mode 100644 etl/customers/stonewater/map_app/server.py
 create mode 100644 etl/customers/stonewater/map_app/wsgi.py

diff --git a/etl/customers/stonewater/map_app/Stonewater Mapping Data.json b/etl/customers/stonewater/map_app/Stonewater Mapping Data.json
new file mode 100644
index 00000000..0d2978c4
--- /dev/null
+++ b/etl/customers/stonewater/map_app/Stonewater Mapping Data.json	
@@ -0,0 +1 @@
+[{"uprn": 100050346525.0, "standardised_address": "32 VICTORIA ROAD, CROSS HILLS, KEIGHLEY", "standardised_postcode": "BD20 8SY", "LONGITUDE": -1.995112, "LATITUDE": 53.8997423}, {"uprn": 100050344995.0, "standardised_address": "20, Lang Kirk Close, Farnhill", "standardised_postcode": "BD20 9AR", "LONGITUDE": -1.9846986, "LATITUDE": 53.9106527}, {"uprn": 100050344996.0, "standardised_address": "22 LANG KIRK CLOSE, FARNHILL, KEIGHLEY", "standardised_postcode": "BD20 9AR", "LONGITUDE": -1.984729, "LATITUDE": 53.9107785}, {"uprn": 100050354230.0, "standardised_address": "34, Neville Road, Gargrave", "standardised_postcode": "BD23 3RE", "LONGITUDE": -2.1094417, "LATITUDE": 53.9833594}, {"uprn": 100050354235.0, "standardised_address": "39, NEVILLE ROAD, GARGRAVE, SKIPTON, BD23 3RE", "standardised_postcode": "BD23 3RE", "LONGITUDE": -2.1090613, "LATITUDE": 53.9836833}, {"uprn": 100071236000.0, "standardised_address": "3, Sorrell Place", "standardised_postcode": "CV10 7AY", "LONGITUDE": -1.4620998, "LATITUDE": 52.5044784}, {"uprn": 100030002639.0, "standardised_address": "6 St. Martins Court, Church Street, Amber Valley, DE55 7AH", "standardised_postcode": "DE55 7AH", "LONGITUDE": -1.3916142, "LATITUDE": 53.0973039}, {"uprn": 83008532.0, "standardised_address": "66, Belmont Terrace, Linthwaite", "standardised_postcode": "HD7 5SF", "LONGITUDE": -1.8505805, "LATITUDE": 53.6249856}, {"uprn": 83146613.0, "standardised_address": "35, Thorpes Crescent, Skelmanthorpe", "standardised_postcode": "HD8 9DH", "LONGITUDE": -1.6465264, "LATITUDE": 53.5871495}, {"uprn": 10010168062.0, "standardised_address": "2, HARVEST COURT, HALIFAX, HX1 5DU", "standardised_postcode": "HX1 5DU", "LONGITUDE": -1.869757, "LATITUDE": 53.722287}, {"uprn": 10006738671.0, "standardised_address": "6, DEAN COURT, HALIFAX, HX3 0UX", "standardised_postcode": "HX3 0UX", "LONGITUDE": -1.8767188, "LATITUDE": 53.6993657}, {"uprn": 100051314061.0, "standardised_address": "21, CHAPEL CLOSE, HOLYWELL GREEN, HALIFAX, HX4 9BF", "standardised_postcode": "HX4 9BF", "LONGITUDE": -1.8679148, "LATITUDE": 53.6762656}, {"uprn": 100051314049.0, "standardised_address": "2, Chapel Close, Holywell Green", "standardised_postcode": "HX4 9BF", "LONGITUDE": -1.8673862, "LATITUDE": 53.6758786}, {"uprn": 200002737104.0, "standardised_address": "35, Bradley View, Holywell Green", "standardised_postcode": "HX4 9DN", "LONGITUDE": -1.8707939, "LATITUDE": 53.675307}, {"uprn": 200001826125.0, "standardised_address": "56, BRADLEY VIEW, HOLYWELL GREEN, HALIFAX, HX4 9DN", "standardised_postcode": "HX4 9DN", "LONGITUDE": -1.871579, "LATITUDE": 53.675982}, {"uprn": 100052043238.0, "standardised_address": "50 Marton Heights, Hollins Lane", "standardised_postcode": "HX6 2RZ", "LONGITUDE": -1.9191337, "LATITUDE": 53.7123626}, {"uprn": 100052043248.0, "standardised_address": "6 Marton Heights, Hollins Lane", "standardised_postcode": "HX6 2RZ", "LONGITUDE": -1.9174519, "LATITUDE": 53.7124064}, {"uprn": 100052043251.0, "standardised_address": "8 Marton Heights, Hollins Lane", "standardised_postcode": "HX6 2RZ", "LONGITUDE": -1.9177397, "LATITUDE": 53.7124695}, {"uprn": 100052043217.0, "standardised_address": "31 Marton Heights, Hollins Lane", "standardised_postcode": "HX6 2RZ", "LONGITUDE": -1.9200123, "LATITUDE": 53.712489}, {"uprn": 10004000472.0, "standardised_address": "26, BENTLEY CLOSE, LOUGHBOROUGH, LE11 1SY", "standardised_postcode": "LE11 1SY", "LONGITUDE": -1.1965799, "LATITUDE": 52.7702045}, {"uprn": 10004000480.0, "standardised_address": "37, BENTLEY CLOSE, LOUGHBOROUGH, LE11 1SY", "standardised_postcode": "LE11 1SY", "LONGITUDE": -1.1970509, "LATITUDE": 52.7698738}, {"uprn": 200000844913.0, "standardised_address": "17, SADDLERS CLOSE, LOUGHBOROUGH, LE11 5HD", "standardised_postcode": "LE11 5HD", "LONGITUDE": -1.2189761, "LATITUDE": 52.7809828}, {"uprn": 100030549024.0, "standardised_address": "7, YEW TREE CRESCENT, MELTON MOWBRAY, LE13 1LN", "standardised_postcode": "LE13 1LN", "LONGITUDE": -0.8849264, "LATITUDE": 52.7756315}, {"uprn": 100030549068.0, "standardised_address": "51, YEW TREE CRESCENT, MELTON MOWBRAY, LE13 1LN", "standardised_postcode": "LE13 1LN", "LONGITUDE": -0.8856134, "LATITUDE": 52.7748558}, {"uprn": 2465087590.0, "standardised_address": "69, LOUGHBOROUGH ROAD, LEICESTER, LE4 5LL", "standardised_postcode": "LE4 5LL", "LONGITUDE": -1.1223645, "LATITUDE": 52.6532303}, {"uprn": 10033732069.0, "standardised_address": "118, HIGH STREET, EARL SHILTON, LEICESTER, LE9 7DG", "standardised_postcode": "LE9 7DG", "LONGITUDE": -1.3115505, "LATITUDE": 52.5764223}, {"uprn": 100032081116.0, "standardised_address": "22 Peter Dyer Court, Seacroft Road", "standardised_postcode": "LN12 2DT", "LONGITUDE": 0.2614919, "LATITUDE": 53.3389736}, {"uprn": 72266102.0, "standardised_address": "24, Petersfield Avenue", "standardised_postcode": "LS10 3PF", "LONGITUDE": -1.5282803, "LATITUDE": 53.7589478}, {"uprn": 72266110.0, "standardised_address": "35, PETERSFIELD AVENUE, LEEDS, LS10 3PF", "standardised_postcode": "LS10 3PF", "LONGITUDE": -1.5279768, "LATITUDE": 53.7589556}, {"uprn": 72304733.0, "standardised_address": "2, Warwick Court, Horsforth", "standardised_postcode": "LS18 4TB", "LONGITUDE": -1.6366988, "LATITUDE": 53.8328644}, {"uprn": 100080154916.0, "standardised_address": "16, IVY ROAD, LUTON, LU1 1DN", "standardised_postcode": "LU1 1DN", "LONGITUDE": -0.4296342, "LATITUDE": 51.8852366}, {"uprn": 100080163981.0, "standardised_address": "51, Newcombe Road", "standardised_postcode": "LU1 1LH", "LONGITUDE": -0.4315661, "LATITUDE": 51.880245}, {"uprn": 100080171727.0, "standardised_address": "85, Runley Road", "standardised_postcode": "LU1 1TX", "LONGITUDE": -0.4477962, "LATITUDE": 51.881881}, {"uprn": 100080141071.0, "standardised_address": "22, DELPHINE CLOSE, LUTON, LU1 5RE", "standardised_postcode": "LU1 5RE", "LONGITUDE": -0.4396275, "LATITUDE": 51.8756855}, {"uprn": 100080152967.0, "standardised_address": "187, HITCHIN ROAD, LUTON, LU2 0EP", "standardised_postcode": "LU2 0EP", "LONGITUDE": -0.4046343, "LATITUDE": 51.8875165}, {"uprn": 100080139727.0, "standardised_address": "89, CROMWELL ROAD, LUTON, LU3 1DP", "standardised_postcode": "LU3 1DP", "LONGITUDE": -0.4244653, "LATITUDE": 51.8859587}, {"uprn": 100080177405.0, "standardised_address": "81, SWAN MEAD, LUTON, LU4 0YP", "standardised_postcode": "LU4 0YP", "LONGITUDE": -0.4837081, "LATITUDE": 51.9040007}, {"uprn": 100080173090.0, "standardised_address": "28, Selbourne Road", "standardised_postcode": "LU4 8LP", "LONGITUDE": -0.4331933, "LATITUDE": 51.8894382}, {"uprn": 100080100758.0, "standardised_address": "15 MILL ROAD, HOUGHTON REGIS", "standardised_postcode": "LU5 5BD", "LONGITUDE": -0.5277522, "LATITUDE": 51.9026985}, {"uprn": 10001025868.0, "standardised_address": "43 KINGSLAND CLOSE, HOUGHTON REGIS", "standardised_postcode": "LU5 5UT", "LONGITUDE": -0.5022206, "LATITUDE": 51.9055144}, {"uprn": 10001025856.0, "standardised_address": "37 Kingsland Close, Houghton Regis", "standardised_postcode": "LU5 5UT", "LONGITUDE": -0.5027665, "LATITUDE": 51.9057101}, {"uprn": 10001023485.0, "standardised_address": "16, WILLOUGHBY CLOSE, DUNSTABLE, LU6 3TF", "standardised_postcode": "LU6 3TF", "LONGITUDE": -0.5131682, "LATITUDE": 51.8799288}, {"uprn": 100080120241.0, "standardised_address": "58, VIMY ROAD, LEIGHTON BUZZARD, LU7 1FQ", "standardised_postcode": "LU7 1FQ", "LONGITUDE": -0.6684013, "LATITUDE": 51.9218008}, {"uprn": 10001024491.0, "standardised_address": "11 Stephenson Close", "standardised_postcode": "LU7 2NE", "LONGITUDE": -0.6776464, "LATITUDE": 51.9125089}, {"uprn": 25002516.0, "standardised_address": "19, COTTESLOE COURT, STONY STRATFORD, MILTON KEYNES, MK11 1NL", "standardised_postcode": "MK11 1NL", "LONGITUDE": -0.8374092, "LATITUDE": 52.0540273}, {"uprn": 100080016298.0, "standardised_address": "11, Grafton Road", "standardised_postcode": "MK40 1DH", "LONGITUDE": -0.4762835, "LATITUDE": 52.1356448}, {"uprn": 100081334233.0, "standardised_address": "24, CHAUCER ROAD, BEDFORD, MK40 2AJ", "standardised_postcode": "MK40 2AJ", "LONGITUDE": -0.4820862, "LATITUDE": 52.1402846}, {"uprn": 10002971391.0, "standardised_address": "ROOM 5, 13, FOSTER HILL ROAD, BEDFORD, MK40 2ES", "standardised_postcode": "MK40 2ES", "LONGITUDE": -0.4680169, "LATITUDE": 52.141748}, {"uprn": 100080995116.0, "standardised_address": "27C, KIMBOLTON ROAD, BEDFORD, MK40 2NY", "standardised_postcode": "MK40 2NY", "LONGITUDE": -0.4587805, "LATITUDE": 52.1433879}, {"uprn": 10033179536.0, "standardised_address": "95, CROWE ROAD, BEDFORD, MK40 4FY", "standardised_postcode": "MK40 4FY", "LONGITUDE": -0.4812176, "LATITUDE": 52.1366436}, {"uprn": 10033179410.0, "standardised_address": "95, Henley Road", "standardised_postcode": "MK40 4FZ", "LONGITUDE": -0.4803204, "LATITUDE": 52.1355261}, {"uprn": 10033179403.0, "standardised_address": "81, HENLEY ROAD, BEDFORD, MK40 4FZ", "standardised_postcode": "MK40 4FZ", "LONGITUDE": -0.480418, "LATITUDE": 52.1356622}, {"uprn": 100080010125.0, "standardised_address": "19, CROMWELL ROAD, BEDFORD, MK40 4LR", "standardised_postcode": "MK40 4LR", "LONGITUDE": -0.482858, "LATITUDE": 52.1335177}, {"uprn": 100080025762.0, "standardised_address": "95, Mallard Hill", "standardised_postcode": "MK41 7QU", "LONGITUDE": -0.4682452, "LATITUDE": 52.1531728}, {"uprn": 10002966181.0, "standardised_address": "22, LEWES GARDENS, BEDFORD, MK41 8NW", "standardised_postcode": "MK41 8NW", "LONGITUDE": -0.4331692, "LATITUDE": 52.1566033}, {"uprn": 10002966183.0, "standardised_address": "26, Lewes Gardens", "standardised_postcode": "MK41 8NW", "LONGITUDE": -0.4333292, "LATITUDE": 52.1566506}, {"uprn": 10002966213.0, "standardised_address": "58, EXETER WALK, BEDFORD, MK41 8QN", "standardised_postcode": "MK41 8QN", "LONGITUDE": -0.4327389, "LATITUDE": 52.1573743}, {"uprn": 100080011160.0, "standardised_address": "12 Devizes Avenue, Bedford, MK41 8QT", "standardised_postcode": "MK41 8QT", "LONGITUDE": -0.4345664, "LATITUDE": 52.1569295}, {"uprn": 100080993516.0, "standardised_address": "11 RAGLAN COURT, DEVIZES AVENUE, BEDFORD, MK41 8QT", "standardised_postcode": "MK41 8QT", "LONGITUDE": -0.4341117, "LATITUDE": 52.1565548}, {"uprn": 10002966248.0, "standardised_address": "112, EXETER WALK, BEDFORD, MK41 8QW", "standardised_postcode": "MK41 8QW", "LONGITUDE": -0.4350041, "LATITUDE": 52.1573586}, {"uprn": 100080023488.0, "standardised_address": "1, Kirkman Close", "standardised_postcode": "MK42 0HY", "LONGITUDE": -0.4560605, "LATITUDE": 52.1251283}, {"uprn": 100080994421.0, "standardised_address": "16, BARTRAM COURT, 123, HIGH STREET, KEMPSTON, BEDFORD, MK42 7BP", "standardised_postcode": "MK42 7BP", "LONGITUDE": -0.505862, "LATITUDE": 52.1144562}, {"uprn": 100080020671.0, "standardised_address": "227, Hillgrounds Road, Kempston", "standardised_postcode": "MK42 8HW", "LONGITUDE": -0.4966793, "LATITUDE": 52.1269081}, {"uprn": 100080020676.0, "standardised_address": "237, Hillgrounds Road, Kempston", "standardised_postcode": "MK42 8HW", "LONGITUDE": -0.4962226, "LATITUDE": 52.1270414}, {"uprn": 100080037472.0, "standardised_address": "98, St. Johns Avenue, Kempston", "standardised_postcode": "MK42 8JR", "LONGITUDE": -0.4971843, "LATITUDE": 52.1133086}, {"uprn": 100081210880.0, "standardised_address": "64, AMPTHILL ROAD, BEDFORD, MK42 9HP", "standardised_postcode": "MK42 9HP", "LONGITUDE": -0.4711524, "LATITUDE": 52.1257766}, {"uprn": 10002970239.0, "standardised_address": "17, Davis Close", "standardised_postcode": "MK42 9LG", "LONGITUDE": -0.467977, "LATITUDE": 52.1232364}, {"uprn": 100080049331.0, "standardised_address": "26, Burridge Close, Marston Moretaine", "standardised_postcode": "MK43 0SG", "LONGITUDE": -0.5481185, "LATITUDE": 52.0673686}, {"uprn": 100080008976.0, "standardised_address": "5, Colley Close, Colmworth", "standardised_postcode": "MK44 2HE", "LONGITUDE": -0.3845779, "LATITUDE": 52.2116856}, {"uprn": 100080047408.0, "standardised_address": "86, Ailesbury Road, Ampthill", "standardised_postcode": "MK45 2XD", "LONGITUDE": -0.4882962, "LATITUDE": 52.0291749}, {"uprn": 10014614558.0, "standardised_address": "1 Pembroke Close, Houghton Conquest", "standardised_postcode": "MK45 3FH", "LONGITUDE": -0.4761796, "LATITUDE": 52.0615268}, {"uprn": 100080051765.0, "standardised_address": "2 FISHERS CLOSE, UPPER GRAVENHURST", "standardised_postcode": "MK45 4LJ", "LONGITUDE": -0.3766392, "LATITUDE": 52.0106445}, {"uprn": 25068208.0, "standardised_address": "14, Colne, Tinkers Bridge", "standardised_postcode": "MK6 3DJ", "LONGITUDE": -0.7241388, "LATITUDE": 52.0203873}, {"uprn": 25069053.0, "standardised_address": "185, BEADLEMEAD, NETHERFIELD, MILTON KEYNES, MK6 4HU", "standardised_postcode": "MK6 4HU", "LONGITUDE": -0.7286182, "LATITUDE": 52.0202019}, {"uprn": 25052926.0, "standardised_address": "56, CHERVIL, BEANHILL, MILTON KEYNES, MK6 4LG", "standardised_postcode": "MK6 4LG", "LONGITUDE": -0.7383444, "LATITUDE": 52.0184541}, {"uprn": 25054338.0, "standardised_address": "12, Osprey Close, Eaglestone", "standardised_postcode": "MK6 5BQ", "LONGITUDE": -0.7403678, "LATITUDE": 52.0299488}, {"uprn": 10093919581.0, "standardised_address": "9, ABIGAR CLOSE, WHITEHOUSE, MILTON KEYNES, MK8 1EN", "standardised_postcode": "MK8 1EN", "LONGITUDE": -0.8157866, "LATITUDE": 52.0314226}, {"uprn": 100031517329.0, "standardised_address": "37 Abbey Lodge, Baslow Drive, Beeston, Nottingham, NG9 2RZ", "standardised_postcode": "NG9 2RZ", "LONGITUDE": -1.207631, "LATITUDE": 52.9383028}, {"uprn": 100031531710.0, "standardised_address": "68 Abbey Lodge, Charles Avenue, Beeston, Nottingham, NG9 2SY", "standardised_postcode": "NG9 2SY", "LONGITUDE": -1.2088379, "LATITUDE": 52.938212}, {"uprn": 100031074426.0, "standardised_address": "13, ST. DUNSTANS CLOSE, KETTERING, NN15 5JE", "standardised_postcode": "NN15 5JE", "LONGITUDE": -0.6934448, "LATITUDE": 52.3923711}, {"uprn": 100031074430.0, "standardised_address": "17, St. Dunstans Close", "standardised_postcode": "NN15 5JE", "LONGITUDE": -0.6933767, "LATITUDE": 52.3921905}, {"uprn": 15007202.0, "standardised_address": "4, Kirton End", "standardised_postcode": "NN3 8FD", "LONGITUDE": -0.8198817, "LATITUDE": 52.267948}, {"uprn": 15007203.0, "standardised_address": "5, Kirton End", "standardised_postcode": "NN3 8FD", "LONGITUDE": -0.8198954, "LATITUDE": 52.2679841}, {"uprn": 10000861159.0, "standardised_address": "8, CLUNY WAY, ARLESEY, SG15 6ZB", "standardised_postcode": "SG15 6ZB", "LONGITUDE": -0.2630433, "LATITUDE": 52.0124857}, {"uprn": 100080067685.0, "standardised_address": "20, Reynolds Close", "standardised_postcode": "SG18 0QL", "LONGITUDE": -0.2602507, "LATITUDE": 52.0913964}, {"uprn": 100080067686.0, "standardised_address": "21 REYNOLDS CLOSE, BIGGLESWADE", "standardised_postcode": "SG18 0QL", "LONGITUDE": -0.2603335, "LATITUDE": 52.091385}, {"uprn": 100080080505.0, "standardised_address": "24, SKIPTON CLOSE, SANDY, SG19 1UB", "standardised_postcode": "SG19 1UB", "LONGITUDE": -0.2865906, "LATITUDE": 52.1338632}, {"uprn": 83104312.0, "standardised_address": "59, Johnson Street", "standardised_postcode": "WF14 8PQ", "LONGITUDE": -1.6989579, "LATITUDE": 53.670334}, {"uprn": 83177389.0, "standardised_address": "11, Primrose Gardens", "standardised_postcode": "WF17 0PZ", "LONGITUDE": -1.6201405, "LATITUDE": 53.7163994}, {"uprn": 83142904.0, "standardised_address": "64, Low Lane, Birstall", "standardised_postcode": "WF17 9HD", "LONGITUDE": -1.6661726, "LATITUDE": 53.7324005}, {"uprn": 83143336.0, "standardised_address": "7, MUSGRAVE STREET, BIRSTALL, BATLEY, WF17 9PF", "standardised_postcode": "WF17 9PF", "LONGITUDE": -1.6664882, "LATITUDE": 53.7322235}, {"uprn": 63013649.0, "standardised_address": "13, Broadacre Road", "standardised_postcode": "WF5 0QR", "LONGITUDE": -1.5691375, "LATITUDE": 53.6774393}, {"uprn": 10000832751.0, "standardised_address": "3, JUBILEE VILLAS, MAMBLE, KIDDERMINSTER, DY14 9JH", "standardised_postcode": "DY14 9JH", "LONGITUDE": -2.4549656, "LATITUDE": 52.3407598}, {"uprn": 10014089919.0, "standardised_address": "4, Spilsbury View, Mamble", "standardised_postcode": "DY14 9JJ", "LONGITUDE": -2.4544103, "LATITUDE": 52.3410046}, {"uprn": 10000830555.0, "standardised_address": "6, The Leasowes, Bayton", "standardised_postcode": "DY14 9NA", "LONGITUDE": -2.4474051, "LATITUDE": 52.3566115}, {"uprn": 100120589226.0, "standardised_address": "7 The Beeches, Mamble", "standardised_postcode": "DY14 9PD", "LONGITUDE": -2.4558902, "LATITUDE": 52.3407472}, {"uprn": 100121247799.0, "standardised_address": "14, Wesley Court, All Saints Road", "standardised_postcode": "GL1 4EF", "LONGITUDE": -2.2377897, "LATITUDE": 51.8605575}, {"uprn": 100120502788.0, "standardised_address": "28, BRAMBLE DRIVE, CAM, DURSLEY, GL11 5PX", "standardised_postcode": "GL11 5PX", "LONGITUDE": -2.362587, "LATITUDE": 51.6908247}, {"uprn": 100120502765.0, "standardised_address": "5, Bramble Drive, Cam", "standardised_postcode": "GL11 5PX", "LONGITUDE": -2.3621281, "LATITUDE": 51.6913296}, {"uprn": 10006832500.0, "standardised_address": "65, Midland Court", "standardised_postcode": "GL7 1JZ", "LONGITUDE": -1.9623299, "LATITUDE": 51.7089224}, {"uprn": 10009135124.0, "standardised_address": "45, Sapperton", "standardised_postcode": "GL7 6LQ", "LONGITUDE": -2.0804444, "LATITUDE": 51.7270088}, {"uprn": 200002589431.0, "standardised_address": "6, Caldervale, Bodenham", "standardised_postcode": "HR1 3LB", "LONGITUDE": -2.6677089, "LATITUDE": 52.1597419}, {"uprn": 200002599852.0, "standardised_address": "6, Cornewall Close, Moccas", "standardised_postcode": "HR2 9LG", "LONGITUDE": -2.9400679, "LATITUDE": 52.0787247}, {"uprn": 200002600043.0, "standardised_address": "12, Gosmore Road, Clehonger", "standardised_postcode": "HR2 9SN", "LONGITUDE": -2.8030098, "LATITUDE": 52.0335617}, {"uprn": 200002600713.0, "standardised_address": "6, THE COURTLANDS, WINFORTON, HEREFORD, HR3 6EF", "standardised_postcode": "HR3 6EF", "LONGITUDE": -3.0308025, "LATITUDE": 52.117301}, {"uprn": 200002600714.0, "standardised_address": "7, The Courtlands, Winforton", "standardised_postcode": "HR3 6EF", "LONGITUDE": -3.0309782, "LATITUDE": 52.1173174}, {"uprn": 200002600633.0, "standardised_address": "25 WEST VIEW, ALMELEY", "standardised_postcode": "HR3 6LE", "LONGITUDE": -2.9765856, "LATITUDE": 52.1599482}, {"uprn": 200002600640.0, "standardised_address": "9 WEST VIEW, ALMELEY", "standardised_postcode": "HR3 6LE", "LONGITUDE": -2.9776456, "LATITUDE": 52.1602901}, {"uprn": 10009580850.0, "standardised_address": "Questmore Cottage, Eardisley", "standardised_postcode": "HR3 6LW", "LONGITUDE": -3.0174005, "LATITUDE": 52.1638316}, {"uprn": 200002600667.0, "standardised_address": "2, Manor Close, Almeley", "standardised_postcode": "HR3 6NF", "LONGITUDE": -2.9777219, "LATITUDE": 52.159768}, {"uprn": 200002600745.0, "standardised_address": "18 ORCHARD CLOSE, EARDISLEY", "standardised_postcode": "HR3 6NP", "LONGITUDE": -3.0049396, "LATITUDE": 52.1369089}, {"uprn": 200002630372.0, "standardised_address": "7 NEW BARNFIELDS, STRETTON SUGWAS", "standardised_postcode": "HR4 7AZ", "LONGITUDE": -2.7937164, "LATITUDE": 52.0784081}, {"uprn": 200002630153.0, "standardised_address": "15 Brookside, Canon Pyon", "standardised_postcode": "HR4 8NY", "LONGITUDE": -2.7860426, "LATITUDE": 52.1366665}, {"uprn": 10007371180.0, "standardised_address": "1 CUCKOO PEN, KINGS PYON, HEREFORD, HR4 8PT", "standardised_postcode": "HR4 8PT", "LONGITUDE": -2.8223066, "LATITUDE": 52.1517035}, {"uprn": 10007360456.0, "standardised_address": "Flat 3, Whitehill House, Kington Road, Weobley, Herefordshire, County of, HR4 8QT", "standardised_postcode": "HR4 8QT", "LONGITUDE": -2.8871351, "LATITUDE": 52.1637664}, {"uprn": 10007360557.0, "standardised_address": "4 The Close, Burton Gardens, Weobley, Herefordshire, County of, HR4 8RQ", "standardised_postcode": "HR4 8RQ", "LONGITUDE": -2.8713531, "LATITUDE": 52.158175}, {"uprn": 200002606366.0, "standardised_address": "26, Burton Gardens, Weobley", "standardised_postcode": "HR4 8SR", "LONGITUDE": -2.870072, "LATITUDE": 52.1584452}, {"uprn": 200002606471.0, "standardised_address": "19 BURTON CRESCENT, WEOBLEY", "standardised_postcode": "HR4 8TB", "LONGITUDE": -2.8686872, "LATITUDE": 52.157898}, {"uprn": 200002610232.0, "standardised_address": "Flat 18, Caldwell Court, Walmer Street, Herefordshire, County of, HR4 9JD", "standardised_postcode": "HR4 9JD", "LONGITUDE": -2.7209647, "LATITUDE": 52.0587273}, {"uprn": 200002610239.0, "standardised_address": "Flat 25, Caldwell Court, Walmer Street, Herefordshire, County of, HR4 9JD", "standardised_postcode": "HR4 9JD", "LONGITUDE": -2.720922, "LATITUDE": 52.0587905}, {"uprn": 10007369692.0, "standardised_address": "29, GREENFIELDS, KINGTON, HR5 3AA", "standardised_postcode": "HR5 3AA", "LONGITUDE": -3.0276747, "LATITUDE": 52.2055563}, {"uprn": 200002611192.0, "standardised_address": "2, THE CRESCENT, KINGTON, HR5 3AS", "standardised_postcode": "HR5 3AS", "LONGITUDE": -3.03446, "LATITUDE": 52.2028267}, {"uprn": 200002610772.0, "standardised_address": "14, PARK ROAD, KINGTON, HR5 3AW", "standardised_postcode": "HR5 3AW", "LONGITUDE": -3.0366914, "LATITUDE": 52.2025104}, {"uprn": 200002611451.0, "standardised_address": "Flat 6 The Old Mill, 1, The Square", "standardised_postcode": "HR5 3BA", "LONGITUDE": -3.0321263, "LATITUDE": 52.2040909}, {"uprn": 200002610986.0, "standardised_address": "41, HATTON GARDENS, KINGTON, HR5 3DD", "standardised_postcode": "HR5 3DD", "LONGITUDE": -3.0211028, "LATITUDE": 52.2054965}, {"uprn": 200002611663.0, "standardised_address": "10, PASSEY COURT, THE SQUARE, KINGTON, HR5 3EE", "standardised_postcode": "HR5 3EE", "LONGITUDE": -3.0313311, "LATITUDE": 52.2042027}, {"uprn": 200002611138.0, "standardised_address": "6 Markwick Close", "standardised_postcode": "HR5 3UE", "LONGITUDE": -3.0305986, "LATITUDE": 52.2022671}, {"uprn": 200002631485.0, "standardised_address": "15 Westland View, Luston", "standardised_postcode": "HR6 0EA", "LONGITUDE": -2.7559424, "LATITUDE": 52.2641453}, {"uprn": 200002611869.0, "standardised_address": "6, Stockton Rock, Kimbolton", "standardised_postcode": "HR6 0JE", "LONGITUDE": -2.705673, "LATITUDE": 52.2478961}, {"uprn": 200002611690.0, "standardised_address": "6, Hengrave Green, Ivington", "standardised_postcode": "HR6 0JL", "LONGITUDE": -2.771841, "LATITUDE": 52.2051174}, {"uprn": 200002611760.0, "standardised_address": "9, HENGRAVE GREEN, IVINGTON, LEOMINSTER, HR6 0JL", "standardised_postcode": "HR6 0JL", "LONGITUDE": -2.7726156, "LATITUDE": 52.2050583}, {"uprn": 200002631301.0, "standardised_address": "102, Humber Close, Steensbridge", "standardised_postcode": "HR6 0LT", "LONGITUDE": -2.6696194, "LATITUDE": 52.2115774}, {"uprn": 200002631307.0, "standardised_address": "108, HUMBER CLOSE, STEENSBRIDGE, LEOMINSTER, HR6 0LT", "standardised_postcode": "HR6 0LT", "LONGITUDE": -2.6688721, "LATITUDE": 52.2115187}, {"uprn": 200002631317.0, "standardised_address": "119, HUMBER CLOSE, STEENSBRIDGE, LEOMINSTER, HR6 0LT", "standardised_postcode": "HR6 0LT", "LONGITUDE": -2.6702553, "LATITUDE": 52.2110433}, {"uprn": 200002611946.0, "standardised_address": "19, Cherrybrook Close, Hope-under-Dinmore", "standardised_postcode": "HR6 0PW", "LONGITUDE": -2.7212073, "LATITUDE": 52.1716237}, {"uprn": 200002611963.0, "standardised_address": "34, Cherrybrook Close, Hope-under-Dinmore", "standardised_postcode": "HR6 0PW", "LONGITUDE": -2.7207064, "LATITUDE": 52.171393}, {"uprn": 200002611958.0, "standardised_address": "3, Cherrybrook Close, Hope-under-Dinmore", "standardised_postcode": "HR6 0PW", "LONGITUDE": -2.7198309, "LATITUDE": 52.1715062}, {"uprn": 200002611938.0, "standardised_address": "11, CHERRYBROOK CLOSE, HOPE-UNDER-DINMORE, LEOMINSTER, HR6 0PW", "standardised_postcode": "HR6 0PW", "LONGITUDE": -2.7205116, "LATITUDE": 52.1720055}, {"uprn": 10007361923.0, "standardised_address": "1, JOHN ABEL CLOSE, LEOMINSTER, HR6 8AG", "standardised_postcode": "HR6 8AG", "LONGITUDE": -2.7349774, "LATITUDE": 52.2227768}, {"uprn": 200002612332.0, "standardised_address": "1 FALCONER PLACE", "standardised_postcode": "HR6 8AP", "LONGITUDE": -2.7342632, "LATITUDE": 52.2265124}, {"uprn": 200002612389.0, "standardised_address": "31, Worcester Road", "standardised_postcode": "HR6 8AU", "LONGITUDE": -2.731483, "LATITUDE": 52.2248304}, {"uprn": 200002612795.0, "standardised_address": "7, Kenwater Close", "standardised_postcode": "HR6 8DL", "LONGITUDE": -2.7422995, "LATITUDE": 52.2299145}, {"uprn": 200002612771.0, "standardised_address": "12, Kenwater Close", "standardised_postcode": "HR6 8DL", "LONGITUDE": -2.7418443, "LATITUDE": 52.2298365}, {"uprn": 200002612893.0, "standardised_address": "22, PARADISE COURT, LEOMINSTER, HR6 8DY", "standardised_postcode": "HR6 8DY", "LONGITUDE": -2.7391676, "LATITUDE": 52.2308872}, {"uprn": 200002631238.0, "standardised_address": "97A, BRIDGE STREET, LEOMINSTER, HR6 8EA", "standardised_postcode": "HR6 8EA", "LONGITUDE": -2.74386, "LATITUDE": 52.2339146}, {"uprn": 200002612984.0, "standardised_address": "35, Ridgemoor Road", "standardised_postcode": "HR6 8EJ", "LONGITUDE": -2.7411285, "LATITUDE": 52.2331731}, {"uprn": 200002612993.0, "standardised_address": "51, RIDGEMOOR ROAD, LEOMINSTER, HR6 8EJ", "standardised_postcode": "HR6 8EJ", "LONGITUDE": -2.7409404, "LATITUDE": 52.2335823}, {"uprn": 200002613041.0, "standardised_address": "64, Ridgemoor Road", "standardised_postcode": "HR6 8EL", "LONGITUDE": -2.7395478, "LATITUDE": 52.2334921}, {"uprn": 200002613014.0, "standardised_address": "16, Ridgemoor Road", "standardised_postcode": "HR6 8EL", "LONGITUDE": -2.740019, "LATITUDE": 52.2327699}, {"uprn": 200002613032.0, "standardised_address": "48, RIDGEMOOR ROAD, LEOMINSTER, HR6 8EL", "standardised_postcode": "HR6 8EL", "LONGITUDE": -2.7390232, "LATITUDE": 52.2336483}, {"uprn": 200002613051.0, "standardised_address": "15, CHEATON CLOSE, LEOMINSTER, HR6 8EN", "standardised_postcode": "HR6 8EN", "LONGITUDE": -2.7391605, "LATITUDE": 52.233099}, {"uprn": 200002613143.0, "standardised_address": "66, CHEATON CLOSE, LEOMINSTER, HR6 8EW", "standardised_postcode": "HR6 8EW", "LONGITUDE": -2.7384096, "LATITUDE": 52.2328519}, {"uprn": 200002614594.0, "standardised_address": "73, BARGATES, LEOMINSTER, HR6 8HB", "standardised_postcode": "HR6 8HB", "LONGITUDE": -2.7446336, "LATITUDE": 52.22678}, {"uprn": 200002613305.0, "standardised_address": "5, PUMP PIECE, LEOMINSTER, HR6 8HR", "standardised_postcode": "HR6 8HR", "LONGITUDE": -2.7462416, "LATITUDE": 52.2248908}, {"uprn": 200002614704.0, "standardised_address": "34, SANDPITS, LEOMINSTER, HR6 8HT", "standardised_postcode": "HR6 8HT", "LONGITUDE": -2.7462558, "LATITUDE": 52.2239917}, {"uprn": 200002613457.0, "standardised_address": "2, GEORGE STREET, LEOMINSTER, HR6 8JZ", "standardised_postcode": "HR6 8JZ", "LONGITUDE": -2.741903, "LATITUDE": 52.2219602}, {"uprn": 200002613508.0, "standardised_address": "42, CROFT STREET, LEOMINSTER, HR6 8LA", "standardised_postcode": "HR6 8LA", "LONGITUDE": -2.7433093, "LATITUDE": 52.2228864}, {"uprn": 200002613514.0, "standardised_address": "50, Croft Street", "standardised_postcode": "HR6 8LA", "LONGITUDE": -2.7438936, "LATITUDE": 52.2228108}, {"uprn": 200002613493.0, "standardised_address": "29, Croft Street", "standardised_postcode": "HR6 8LA", "LONGITUDE": -2.7422535, "LATITUDE": 52.2227852}, {"uprn": 200002613536.0, "standardised_address": "3, CONINGSBY ROAD, LEOMINSTER, HR6 8LL", "standardised_postcode": "HR6 8LL", "LONGITUDE": -2.7388428, "LATITUDE": 52.2241091}, {"uprn": 200002613531.0, "standardised_address": "11, CONINGSBY ROAD, LEOMINSTER, HR6 8LL", "standardised_postcode": "HR6 8LL", "LONGITUDE": -2.7383178, "LATITUDE": 52.2239957}, {"uprn": 200002613553.0, "standardised_address": "1, Eaton Close", "standardised_postcode": "HR6 8LQ", "LONGITUDE": -2.7378856, "LATITUDE": 52.2242331}, {"uprn": 200002631267.0, "standardised_address": "14, EATON CLOSE, LEOMINSTER, HR6 8LQ", "standardised_postcode": "HR6 8LQ", "LONGITUDE": -2.7374908, "LATITUDE": 52.2251436}, {"uprn": 200002613566.0, "standardised_address": "22, EATON CLOSE, LEOMINSTER, HR6 8LQ", "standardised_postcode": "HR6 8LQ", "LONGITUDE": -2.7370288, "LATITUDE": 52.224652}, {"uprn": 200002616236.0, "standardised_address": "FLAT 7, NEWMAN HOUSE, RYELANDS ROAD, LEOMINSTER, HR6 8PD", "standardised_postcode": "HR6 8PD", "LONGITUDE": -2.7448232, "LATITUDE": 52.2235533}, {"uprn": 200002613680.0, "standardised_address": "29, HOLLAND ROAD, LEOMINSTER, HR6 8PF", "standardised_postcode": "HR6 8PF", "LONGITUDE": -2.7437325, "LATITUDE": 52.224556}, {"uprn": 200002613705.0, "standardised_address": "31, Mortimer Street", "standardised_postcode": "HR6 8PG", "LONGITUDE": -2.7425265, "LATITUDE": 52.2233499}, {"uprn": 200002613737.0, "standardised_address": "21, WIGMORE STREET, LEOMINSTER, HR6 8PJ", "standardised_postcode": "HR6 8PJ", "LONGITUDE": -2.7446163, "LATITUDE": 52.2223539}, {"uprn": 200002613761.0, "standardised_address": "15, Wigmore Street", "standardised_postcode": "HR6 8PL", "LONGITUDE": -2.7443457, "LATITUDE": 52.2227091}, {"uprn": 10007370335.0, "standardised_address": "8, MORTIMER CLOSE, LEOMINSTER, HR6 8PQ", "standardised_postcode": "HR6 8PQ", "LONGITUDE": -2.7423152, "LATITUDE": 52.2238457}, {"uprn": 10023974314.0, "standardised_address": "ROOM 2, LENDOR, 9, BUCKFIELD ROAD, LEOMINSTER, HEREFORDSHIRE, HR6 8SF", "standardised_postcode": "HR6 8SF", "LONGITUDE": -2.7549361, "LATITUDE": 52.2258208}, {"uprn": 200002616762.0, "standardised_address": "50, Portna Way", "standardised_postcode": "HR6 9AD", "LONGITUDE": -2.7679625, "LATITUDE": 52.2220362}, {"uprn": 200002616765.0, "standardised_address": "56, PORTNA WAY, LEOMINSTER, HR6 9AD", "standardised_postcode": "HR6 9AD", "LONGITUDE": -2.7681663, "LATITUDE": 52.221972}, {"uprn": 200002616784.0, "standardised_address": "6, Portna Way", "standardised_postcode": "HR6 9AE", "LONGITUDE": -2.7661266, "LATITUDE": 52.2216886}, {"uprn": 200002616770.0, "standardised_address": "13, Portna Way", "standardised_postcode": "HR6 9AE", "LONGITUDE": -2.7667581, "LATITUDE": 52.2218013}, {"uprn": 200002616815.0, "standardised_address": "28, FOOTWAY CROFT, LEOMINSTER, HR6 9AG", "standardised_postcode": "HR6 9AG", "LONGITUDE": -2.7674867, "LATITUDE": 52.2216078}, {"uprn": 200002616876.0, "standardised_address": "2, Curl View, Pembridge", "standardised_postcode": "HR6 9ET", "LONGITUDE": -2.8940396, "LATITUDE": 52.2194034}, {"uprn": 200002616880.0, "standardised_address": "6, Curl View, Pembridge", "standardised_postcode": "HR6 9ET", "LONGITUDE": -2.894712, "LATITUDE": 52.2193533}, {"uprn": 200002617122.0, "standardised_address": "11, Curl View Crescent, Pembridge", "standardised_postcode": "HR6 9HQ", "LONGITUDE": -2.8945024, "LATITUDE": 52.2198494}, {"uprn": 200002631408.0, "standardised_address": "39 THE GROVE, SHOBDON", "standardised_postcode": "HR6 9NF", "LONGITUDE": -2.8869065, "LATITUDE": 52.2511987}, {"uprn": 200002616906.0, "standardised_address": "18 MOOR MEADOW, SHOBDON", "standardised_postcode": "HR6 9NT", "LONGITUDE": -2.8840648, "LATITUDE": 52.2515397}, {"uprn": 10009574826.0, "standardised_address": "2 The Village, Yatton", "standardised_postcode": "HR6 9TL", "LONGITUDE": -2.8369525, "LATITUDE": 52.2958029}, {"uprn": 200002622624.0, "standardised_address": "10, MILLER CRADDOCK WAY, LEDBURY, HR8 2XT", "standardised_postcode": "HR8 2XT", "LONGITUDE": -2.4284905, "LATITUDE": 52.0307915}, {"uprn": 200002622657.0, "standardised_address": "10, ASTON CLOSE, LEDBURY, HR8 2XU", "standardised_postcode": "HR8 2XU", "LONGITUDE": -2.4271925, "LATITUDE": 52.0307153}, {"uprn": 200002622662.0, "standardised_address": "16, Aston Close", "standardised_postcode": "HR8 2XU", "LONGITUDE": -2.4278182, "LATITUDE": 52.0306051}, {"uprn": 100071536782.0, "standardised_address": "2, Findon Way", "standardised_postcode": "SY3 5NA", "LONGITUDE": -2.8027488, "LATITUDE": 52.7101477}, {"uprn": 100071536880.0, "standardised_address": "113, Lambourn Drive", "standardised_postcode": "SY3 5NF", "LONGITUDE": -2.8030195, "LATITUDE": 52.7106552}, {"uprn": 10009574764.0, "standardised_address": "2 THE BROOK, LINGEN, BUCKNELL, SY7 0DY", "standardised_postcode": "SY7 0DY", "LONGITUDE": -2.9317576, "LATITUDE": 52.2975198}, {"uprn": 200002628293.0, "standardised_address": "21 ROSEMARY, LEINTWARDINE", "standardised_postcode": "SY7 0LR", "LONGITUDE": -2.8731711, "LATITUDE": 52.3614144}, {"uprn": 200002628296.0, "standardised_address": "24 ROSEMARY, LEINTWARDINE", "standardised_postcode": "SY7 0LR", "LONGITUDE": -2.8730416, "LATITUDE": 52.360804}, {"uprn": 200002628400.0, "standardised_address": "10 Lowe Croft, Leintwardine", "standardised_postcode": "SY7 0NP", "LONGITUDE": -2.8736761, "LATITUDE": 52.3609612}, {"uprn": 200002628392.0, "standardised_address": "2, Lowe Croft, Leintwardine", "standardised_postcode": "SY7 0NP", "LONGITUDE": -2.8736537, "LATITUDE": 52.3605657}, {"uprn": 200002628536.0, "standardised_address": "4, HALLETS WELL, ORLETON, LUDLOW, SY8 4HH", "standardised_postcode": "SY8 4HH", "LONGITUDE": -2.7409679, "LATITUDE": 52.3020458}, {"uprn": 200002628506.0, "standardised_address": "10 ST GEORGES CRESCENT, ORLETON", "standardised_postcode": "SY8 4HL", "LONGITUDE": -2.7420943, "LATITUDE": 52.3018769}, {"uprn": 200002628470.0, "standardised_address": "8 GREEN LANE, ORLETON", "standardised_postcode": "SY8 4JE", "LONGITUDE": -2.7581608, "LATITUDE": 52.3023411}, {"uprn": 10007370967.0, "standardised_address": "15 The Avenue, Wyson, Brimfield, Herefordshire, County of, SY8 4NJ", "standardised_postcode": "SY8 4NJ", "LONGITUDE": -2.7028498, "LATITUDE": 52.3093186}, {"uprn": 100120590397.0, "standardised_address": "6, Spring Gardens", "standardised_postcode": "WR15 8BE", "LONGITUDE": -2.5918475, "LATITUDE": 52.3107187}, {"uprn": 100120590377.0, "standardised_address": "8, SCOTLAND PLACE, TENBURY WELLS, WR15 8BT", "standardised_postcode": "WR15 8BT", "LONGITUDE": -2.5949436, "LATITUDE": 52.3118989}, {"uprn": 100120590206.0, "standardised_address": "24, KYRESIDE, TENBURY WELLS, WR15 8BU", "standardised_postcode": "WR15 8BU", "LONGITUDE": -2.5948807, "LATITUDE": 52.3050216}, {"uprn": 100120590212.0, "standardised_address": "30, Kyreside", "standardised_postcode": "WR15 8BU", "LONGITUDE": -2.5946937, "LATITUDE": 52.306389}, {"uprn": 100120590223.0, "standardised_address": "41, KYRESIDE, TENBURY WELLS, WR15 8BU", "standardised_postcode": "WR15 8BU", "LONGITUDE": -2.5946878, "LATITUDE": 52.3059485}, {"uprn": 100120590253.0, "standardised_address": "71, KYRESIDE, TENBURY WELLS, WR15 8BX", "standardised_postcode": "WR15 8BX", "LONGITUDE": -2.5932704, "LATITUDE": 52.3041486}, {"uprn": 100120589910.0, "standardised_address": "1, Bromyard Road", "standardised_postcode": "WR15 8BZ", "LONGITUDE": -2.5950627, "LATITUDE": 52.3043733}, {"uprn": 100120590015.0, "standardised_address": "19, CRESCENT PLACE, TENBURY WELLS, WR15 8DF", "standardised_postcode": "WR15 8DF", "LONGITUDE": -2.5947732, "LATITUDE": 52.3024688}, {"uprn": 100120590467.0, "standardised_address": "13, THE CRESCENT, TENBURY WELLS, WR15 8DG", "standardised_postcode": "WR15 8DG", "LONGITUDE": -2.5943843, "LATITUDE": 52.3030012}, {"uprn": 100120590458.0, "standardised_address": "4, THE CRESCENT, TENBURY WELLS, WR15 8DG", "standardised_postcode": "WR15 8DG", "LONGITUDE": -2.5934407, "LATITUDE": 52.3026284}, {"uprn": 100120590555.0, "standardised_address": "61, WHEELER ORCHARD, TENBURY WELLS, WR15 8DQ", "standardised_postcode": "WR15 8DQ", "LONGITUDE": -2.5955391, "LATITUDE": 52.3016289}, {"uprn": 100120590504.0, "standardised_address": "10, Wheeler Orchard", "standardised_postcode": "WR15 8DQ", "LONGITUDE": -2.5972002, "LATITUDE": 52.303005}, {"uprn": 100120590505.0, "standardised_address": "11, Wheeler Orchard", "standardised_postcode": "WR15 8DQ", "LONGITUDE": -2.5973032, "LATITUDE": 52.3030315}, {"uprn": 100120590513.0, "standardised_address": "19 Wheeler Orchard", "standardised_postcode": "WR15 8DQ", "LONGITUDE": -2.5962211, "LATITUDE": 52.3032707}, {"uprn": 100120590539.0, "standardised_address": "45 Wheeler Orchard", "standardised_postcode": "WR15 8DQ", "LONGITUDE": -2.5953731, "LATITUDE": 52.3023669}, {"uprn": 100120590126.0, "standardised_address": "4, GRASSY BANK, TENBURY WELLS, WR15 8DR", "standardised_postcode": "WR15 8DR", "LONGITUDE": -2.5936679, "LATITUDE": 52.3020698}, {"uprn": 100120590355.0, "standardised_address": "22, PEMBROKE AVENUE, TENBURY WELLS, WR15 8EH", "standardised_postcode": "WR15 8EH", "LONGITUDE": -2.5950524, "LATITUDE": 52.3090753}, {"uprn": 100120594666.0, "standardised_address": "32 Pembroke Gardens, Pembroke Avenue", "standardised_postcode": "WR15 8EH", "LONGITUDE": -2.5954208, "LATITUDE": 52.3091994}, {"uprn": 10000832235.0, "standardised_address": "2 Cutmill Bridge, Eardiston", "standardised_postcode": "WR15 8JN", "LONGITUDE": -2.4455624, "LATITUDE": 52.3121523}, {"uprn": 10000832237.0, "standardised_address": "4 Cutmill Bridge, Eardiston", "standardised_postcode": "WR15 8JN", "LONGITUDE": -2.4453422, "LATITUDE": 52.3121352}, {"uprn": 10000832267.0, "standardised_address": "25, CUTMILL BRIDGE, EARDISTON, TENBURY WELLS, WR15 8JN", "standardised_postcode": "WR15 8JN", "LONGITUDE": -2.4442501, "LATITUDE": 52.311945}, {"uprn": 100120606489.0, "standardised_address": "3, Astley Orchard, Eastham", "standardised_postcode": "WR15 8NR", "LONGITUDE": -2.4883154, "LATITUDE": 52.3102573}, {"uprn": 100120606492.0, "standardised_address": "6 Astley Orchard, Eastham", "standardised_postcode": "WR15 8NR", "LONGITUDE": -2.4887428, "LATITUDE": 52.3104443}, {"uprn": 10000830371.0, "standardised_address": "1 Church Close, Stoke Bliss", "standardised_postcode": "WR15 8QJ", "LONGITUDE": -2.5145315, "LATITUDE": 52.2613192}, {"uprn": 100120590485.0, "standardised_address": "5 The Oaks, Stoke Bliss", "standardised_postcode": "WR15 8RR", "LONGITUDE": -2.5435427, "LATITUDE": 52.2649832}, {"uprn": 10014092735.0, "standardised_address": "5, Malt House Mews", "standardised_postcode": "WR15 8TZ", "LONGITUDE": -2.5954096, "LATITUDE": 52.312769}, {"uprn": 90120326.0, "standardised_address": "Flat 7, Beverley Court, Clarendon Place, Dudley, B62 9BE", "standardised_postcode": "B62 9BE", "LONGITUDE": -2.0159488, "LATITUDE": 52.4625896}, {"uprn": 90118998.0, "standardised_address": "FLAT 78, VICTORIA COURT, BINSWOOD ROAD, HALESOWEN, B62 9BQ", "standardised_postcode": "B62 9BQ", "LONGITUDE": -2.0145066, "LATITUDE": 52.4631112}, {"uprn": 32015926.0, "standardised_address": "62 Petford Street", "standardised_postcode": "B64 6DY", "LONGITUDE": -2.0714993, "LATITUDE": 52.4736718}, {"uprn": 32004728.0, "standardised_address": "16, RED BRICK CLOSE, CRADLEY HEATH, B64 7DR", "standardised_postcode": "B64 7DR", "LONGITUDE": -2.0758424, "LATITUDE": 52.4652633}, {"uprn": 32148048.0, "standardised_address": "2, Brailsford Drive", "standardised_postcode": "B66 3NH", "LONGITUDE": -1.9679894, "LATITUDE": 52.4929906}, {"uprn": 10008537133.0, "standardised_address": "FLAT 8, 45, CORBETT STREET, SMETHWICK, B66 3PU", "standardised_postcode": "B66 3PU", "LONGITUDE": -1.9616889, "LATITUDE": 52.4893028}, {"uprn": 32144722.0, "standardised_address": "67, Talbot Road", "standardised_postcode": "B66 4DX", "LONGITUDE": -1.9667612, "LATITUDE": 52.4810605}, {"uprn": 32146056.0, "standardised_address": "Flat 6, 179 Bearwood Road, Sandwell, B66 4LN", "standardised_postcode": "B66 4LN", "LONGITUDE": -1.9670378, "LATITUDE": 52.4852949}, {"uprn": 32146371.0, "standardised_address": "10, Clent View, Gilbert Road", "standardised_postcode": "B66 4PU", "LONGITUDE": -1.9602366, "LATITUDE": 52.4829554}, {"uprn": 32129152.0, "standardised_address": "120, VICARAGE ROAD, SMETHWICK, B67 7AP", "standardised_postcode": "B67 7AP", "LONGITUDE": -1.9765168, "LATITUDE": 52.4936489}, {"uprn": 32129140.0, "standardised_address": "105, Vicarage Road", "standardised_postcode": "B67 7AP", "LONGITUDE": -1.9763696, "LATITUDE": 52.4934421}, {"uprn": 32131232.0, "standardised_address": "6 The Oaks, South Road, Sandwell, B67 7BY", "standardised_postcode": "B67 7BY", "LONGITUDE": -1.9747203, "LATITUDE": 52.4930012}, {"uprn": 32131213.0, "standardised_address": "FLAT 1, 41, SOUTH ROAD, SMETHWICK, B67 7BZ", "standardised_postcode": "B67 7BZ", "LONGITUDE": -1.9742786, "LATITUDE": 52.4926236}, {"uprn": 200001483798.0, "standardised_address": "Flat 1, 5 North Street, Sandwell, B67 7DA", "standardised_postcode": "B67 7DA", "LONGITUDE": -1.975265, "LATITUDE": 52.4933789}, {"uprn": 32131139.0, "standardised_address": "16 St. Albans Close", "standardised_postcode": "B67 7PD", "LONGITUDE": -1.9806101, "LATITUDE": 52.4966882}, {"uprn": 32130313.0, "standardised_address": "31, White Road", "standardised_postcode": "B67 7PG", "LONGITUDE": -1.978032, "LATITUDE": 52.4974969}, {"uprn": 32067588.0, "standardised_address": "12, COTSWOLD CLOSE, OLDBURY, B69 1FB", "standardised_postcode": "B69 1FB", "LONGITUDE": -2.0284188, "LATITUDE": 52.490672}, {"uprn": 32067603.0, "standardised_address": "40, Wolverley Crescent", "standardised_postcode": "B69 1FD", "LONGITUDE": -2.0298473, "LATITUDE": 52.4906267}, {"uprn": 32067644.0, "standardised_address": "22, WITLEY CRESCENT, OLDBURY, B69 1FF", "standardised_postcode": "B69 1FF", "LONGITUDE": -2.0285806, "LATITUDE": 52.4904562}, {"uprn": 32086384.0, "standardised_address": "45 COYNE ROAD, WEST BROMWICH", "standardised_postcode": "B70 7HJ", "LONGITUDE": -2.0035014, "LATITUDE": 52.5127458}, {"uprn": 100120639219.0, "standardised_address": "21 MUSKETTS COURT, BIRCHFIELD ROAD, REDDITCH", "standardised_postcode": "B97 4NA", "LONGITUDE": -1.958538, "LATITUDE": 52.2974523}, {"uprn": 100120639206.0, "standardised_address": "8 MUSKETTS COURT, BIRCHFIELD ROAD, REDDITCH", "standardised_postcode": "B97 4NA", "LONGITUDE": -1.9586844, "LATITUDE": 52.2972294}, {"uprn": 100070622454.0, "standardised_address": "24, Boyd Close", "standardised_postcode": "CV2 2NF", "LONGITUDE": -1.4423782, "LATITUDE": 52.431865}, {"uprn": 100070665874.0, "standardised_address": "4 JONATHAN ROAD, COVENTRY", "standardised_postcode": "CV2 2NQ", "LONGITUDE": -1.4423682, "LATITUDE": 52.4326561}, {"uprn": 100070665876.0, "standardised_address": "6 JONATHAN ROAD", "standardised_postcode": "CV2 2NQ", "LONGITUDE": -1.4423971, "LATITUDE": 52.4326922}, {"uprn": 100071505169.0, "standardised_address": "192, Leam Terrace", "standardised_postcode": "CV31 1DW", "LONGITUDE": -1.5185861, "LATITUDE": 52.2852891}, {"uprn": 10013181859.0, "standardised_address": "121 Radford Road", "standardised_postcode": "CV31 1JZ", "LONGITUDE": -1.5185861, "LATITUDE": 52.2852891}, {"uprn": 100071252300.0, "standardised_address": "21 EPPERSTONE COURT, AVENUE ROAD, LEAMINGTON SPA, CV31 3NH", "standardised_postcode": "CV31 3NH", "LONGITUDE": -1.5363462, "LATITUDE": 52.2854703}, {"uprn": 100070246806.0, "standardised_address": "38 Whittle Court, Upper Holly Walk", "standardised_postcode": "CV32 4LB", "LONGITUDE": -1.5250688, "LATITUDE": 52.2917237}, {"uprn": 100070246778.0, "standardised_address": "9 Whittle Court, Upper Holly Walk", "standardised_postcode": "CV32 4LB", "LONGITUDE": -1.5250688, "LATITUDE": 52.2917237}, {"uprn": 100070246783.0, "standardised_address": "15 Whittle Court, Upper Holly Walk", "standardised_postcode": "CV32 4LB", "LONGITUDE": -1.5250688, "LATITUDE": 52.2917237}, {"uprn": 100071254640.0, "standardised_address": "8 Leicester Court, Leicester Street", "standardised_postcode": "CV32 4UD", "LONGITUDE": -1.523885, "LATITUDE": 52.2947491}, {"uprn": 100071256971.0, "standardised_address": "20 STUART COURT, WARWICK TERRACE, LEAMINGTON SPA, CV32 5NU", "standardised_postcode": "CV32 5NU", "LONGITUDE": -1.5418657, "LATITUDE": 52.2922215}, {"uprn": 10003791898.0, "standardised_address": "17 Goode Close", "standardised_postcode": "CV34 5LP", "LONGITUDE": -1.6014186, "LATITUDE": 52.2866675}, {"uprn": 100070262028.0, "standardised_address": "9, Grange Close", "standardised_postcode": "CV34 5PE", "LONGITUDE": -1.5601131, "LATITUDE": 52.2884158}, {"uprn": 100070645529.0, "standardised_address": "28, Ensign Close", "standardised_postcode": "CV4 9TU", "LONGITUDE": -1.5962218, "LATITUDE": 52.4031891}, {"uprn": 10023037940.0, "standardised_address": "57 Dunster Place", "standardised_postcode": "CV6 4JD", "LONGITUDE": -1.4997454, "LATITUDE": 52.4441734}, {"uprn": 100071341257.0, "standardised_address": "45, THE FIRS, MAXSTOKE LANE, MERIDEN, COVENTRY, CV7 7NT", "standardised_postcode": "CV7 7NT", "LONGITUDE": -1.6515841, "LATITUDE": 52.4397024}, {"uprn": 100071341250.0, "standardised_address": "38, The Firs, Maxstoke Lane", "standardised_postcode": "CV7 7NT", "LONGITUDE": -1.651866, "LATITUDE": 52.4394065}, {"uprn": 90114588.0, "standardised_address": "Flat 2, 20 Wellington Road, Dudley, DY1 1RB", "standardised_postcode": "DY1 1RB", "LONGITUDE": -2.0956881, "LATITUDE": 52.5076633}, {"uprn": 90000273.0, "standardised_address": "36, Edenbridge View", "standardised_postcode": "DY1 2JJ", "LONGITUDE": -2.1174047, "LATITUDE": 52.5187103}, {"uprn": 100120753988.0, "standardised_address": "Flat 1, Hargreaves Court, Parry Road, Wyre Forest, DY11 6LZ", "standardised_postcode": "DY11 6LZ", "LONGITUDE": -2.2655377, "LATITUDE": 52.3760041}, {"uprn": 90094063.0, "standardised_address": "11, IVANHOE STREET, DUDLEY, DY2 0YB", "standardised_postcode": "DY2 0YB", "LONGITUDE": -2.1002305, "LATITUDE": 52.5032005}, {"uprn": 90094168.0, "standardised_address": "129, Ivanhoe Street", "standardised_postcode": "DY2 0YD", "LONGITUDE": -2.0989799, "LATITUDE": 52.5038938}, {"uprn": 90126107.0, "standardised_address": "FLAT 1, 26, NORTH STREET, DUDLEY, DY2 7DU", "standardised_postcode": "DY2 7DU", "LONGITUDE": -2.0756107, "LATITUDE": 52.5100603}, {"uprn": 90009856.0, "standardised_address": "30, Lauder Close", "standardised_postcode": "DY3 3XN", "LONGITUDE": -2.1305593, "LATITUDE": 52.5466641}, {"uprn": 10013825938.0, "standardised_address": "21, SHEPHERDS DROVE, WEST ASHTON, TROWBRIDGE, BA14 6DG", "standardised_postcode": "BA14 6DG", "LONGITUDE": -2.1703744, "LATITUDE": 51.2959889}, {"uprn": 100121084534.0, "standardised_address": "68, BARN GLEBE, TROWBRIDGE, BA14 7JZ", "standardised_postcode": "BA14 7JZ", "LONGITUDE": -2.194204, "LATITUDE": 51.3212819}, {"uprn": 250045056.0, "standardised_address": "116, WOOKEY HOLE ROAD, WELLS, BA5 2NQ", "standardised_postcode": "BA5 2NQ", "LONGITUDE": -2.6618111, "LATITUDE": 51.2154525}, {"uprn": 100040783250.0, "standardised_address": "210, WINDHAM ROAD, BOURNEMOUTH, BH1 4QX", "standardised_postcode": "BH1 4QX", "LONGITUDE": -1.8485583, "LATITUDE": 50.7304686}, {"uprn": 100040728126.0, "standardised_address": "34, Copper Beech Gardens", "standardised_postcode": "BH10 5DB", "LONGITUDE": -1.8917906, "LATITUDE": 50.7540873}, {"uprn": 100040753089.0, "standardised_address": "Flat 2, Portiere House, 10 Moore Avenue, Bournemouth, Christchurch and Poole, BH11 8AY", "standardised_postcode": "BH11 8AY", "LONGITUDE": -1.9112837, "LATITUDE": 50.7610641}, {"uprn": 100040761568.0, "standardised_address": "11 RAGLAN GARDENS, BOURNEMOUTH", "standardised_postcode": "BH11 8QU", "LONGITUDE": -1.9130825, "LATITUDE": 50.7542938}, {"uprn": 100040820003.0, "standardised_address": "41, Merrow Avenue", "standardised_postcode": "BH12 1PY", "LONGITUDE": -1.9063404, "LATITUDE": 50.7368871}, {"uprn": 100040827450.0, "standardised_address": "335b, Ringwood Road", "standardised_postcode": "BH12 3JN", "LONGITUDE": -1.9462039, "LATITUDE": 50.7421399}, {"uprn": 100040813791.0, "standardised_address": "18 Jellicoe Close, Bournemouth, Christchurch and Poole, BH14 0PX", "standardised_postcode": "BH14 0PX", "LONGITUDE": -1.960793, "LATITUDE": 50.72879}, {"uprn": 100040821208.0, "standardised_address": "Flat 6, Ashley Mount, 7 Mount Road, Bournemouth, Christchurch and Poole, BH14 0QW", "standardised_postcode": "BH14 0QW", "LONGITUDE": -1.9495564, "LATITUDE": 50.7293428}, {"uprn": 10001086391.0, "standardised_address": "10 MANTON CLOSE, POOLE", "standardised_postcode": "BH15 4QA", "LONGITUDE": -2.0098437, "LATITUDE": 50.7207753}, {"uprn": 100040827073.0, "standardised_address": "1, RICE TERRACE, POOLE, BH16 5DH", "standardised_postcode": "BH16 5DH", "LONGITUDE": -2.0197173, "LATITUDE": 50.7266588}, {"uprn": 100040827063.0, "standardised_address": "3 Rice Gardens", "standardised_postcode": "BH16 5DJ", "LONGITUDE": -2.01989, "LATITUDE": 50.7264753}, {"uprn": 100040607845.0, "standardised_address": "16 WARBLER CLOSE, UPTON", "standardised_postcode": "BH16 5RL", "LONGITUDE": -2.0316176, "LATITUDE": 50.742003}, {"uprn": 100040824517.0, "standardised_address": "Flat 14, Tanglewood Lodge, 89a Petersham Road, Bournemouth, Christchurch and Poole, BH17 7DW", "standardised_postcode": "BH17 7DW", "LONGITUDE": -2.0037117, "LATITUDE": 50.742439}, {"uprn": 10001087039.0, "standardised_address": "59, DEWLISH CLOSE, POOLE, BH17 8AQ", "standardised_postcode": "BH17 8AQ", "LONGITUDE": -1.9568173, "LATITUDE": 50.7539593}, {"uprn": 10001087048.0, "standardised_address": "77, Dewlish Close", "standardised_postcode": "BH17 8AQ", "LONGITUDE": -1.9563924, "LATITUDE": 50.7535454}, {"uprn": 100040799312.0, "standardised_address": "60, Chaldon Road", "standardised_postcode": "BH17 8DB", "LONGITUDE": -1.9598665, "LATITUDE": 50.7524496}, {"uprn": 100040799318.0, "standardised_address": "66, Chaldon Road", "standardised_postcode": "BH17 8DB", "LONGITUDE": -1.9600812, "LATITUDE": 50.7526369}, {"uprn": 100040799326.0, "standardised_address": "74, Chaldon Road", "standardised_postcode": "BH17 8DB", "LONGITUDE": -1.9604614, "LATITUDE": 50.7528353}, {"uprn": 100040799344.0, "standardised_address": "92, CHALDON ROAD, POOLE, BH17 8DB", "standardised_postcode": "BH17 8DB", "LONGITUDE": -1.9604477, "LATITUDE": 50.7524138}, {"uprn": 200004823359.0, "standardised_address": "12, Marsh Way", "standardised_postcode": "BH19 2TE", "LONGITUDE": -1.9770611, "LATITUDE": 50.6078225}, {"uprn": 200004825497.0, "standardised_address": "67 MARSH WAY, SWANAGE", "standardised_postcode": "BH19 2TE", "LONGITUDE": -1.9787943, "LATITUDE": 50.6087743}, {"uprn": 200004825492.0, "standardised_address": "57 MARSH WAY, SWANAGE", "standardised_postcode": "BH19 2TE", "LONGITUDE": -1.978921, "LATITUDE": 50.6084065}, {"uprn": 100040612353.0, "standardised_address": "22 Steppes, Langton Matravers, Dorset, BH19 3EY", "standardised_postcode": "BH19 3EY", "LONGITUDE": -1.9982872, "LATITUDE": 50.6114398}, {"uprn": 100040615235.0, "standardised_address": "18, FOLLY LANE, WAREHAM, BH20 4HH", "standardised_postcode": "BH20 4HH", "LONGITUDE": -2.1091065, "LATITUDE": 50.6892606}, {"uprn": 100040613496.0, "standardised_address": "11, BELLS ORCHARD LANE, WAREHAM, BH20 4HP", "standardised_postcode": "BH20 4HP", "LONGITUDE": -2.1076971, "LATITUDE": 50.6876586}, {"uprn": 200004818272.0, "standardised_address": "8 CHRISTMAS CLOSE, WAREHAM", "standardised_postcode": "BH20 4RG", "LONGITUDE": -2.1182555, "LATITUDE": 50.6868481}, {"uprn": 10011953366.0, "standardised_address": "3 Long Ground Cottages, Church Knowle", "standardised_postcode": "BH20 5NH", "LONGITUDE": -2.0886006, "LATITUDE": 50.6358618}, {"uprn": 10011953315.0, "standardised_address": "2 Tollgate Cottages, Kimmeridge", "standardised_postcode": "BH20 5PE", "LONGITUDE": -2.1203866, "LATITUDE": 50.6155815}, {"uprn": 200004827674.0, "standardised_address": "5 Hardy Cottages, School Lane, West Lulworth, Dorset, BH20 5SA", "standardised_postcode": "BH20 5SA", "LONGITUDE": -2.2402216, "LATITUDE": 50.6254701}, {"uprn": 100040619240.0, "standardised_address": "43B WEST STREET, BERE REGIS", "standardised_postcode": "BH20 7HS", "LONGITUDE": -2.2223413, "LATITUDE": 50.7545276}, {"uprn": 100041099402.0, "standardised_address": "3 LYS COTTAGES, SOUTHBROOK, BERE REGIS, WAREHAM, BH20 7LH", "standardised_postcode": "BH20 7LH", "LONGITUDE": -2.2180587, "LATITUDE": 50.7499967}, {"uprn": 100040715693.0, "standardised_address": "219, BEAUFORT ROAD, BOURNEMOUTH, BH6 5AF", "standardised_postcode": "BH6 5AF", "LONGITUDE": -1.8032153, "LATITUDE": 50.7350516}, {"uprn": 24102341.0, "standardised_address": "32 MARCONI CLOSE, WESTON-SUPER-MARE", "standardised_postcode": "BS23 3HH", "LONGITUDE": -2.9532424, "LATITUDE": 51.346858}, {"uprn": 539294.0, "standardised_address": "18, CARMARTHEN GROVE, WILLSBRIDGE, BRISTOL, BS30 6UY", "standardised_postcode": "BS30 6UY", "LONGITUDE": -2.477798, "LATITUDE": 51.4318312}, {"uprn": 100041033195.0, "standardised_address": "DORCHESTER YOUTH & COMMUNITY CENTRE, KINGS ROAD, DORCHESTER, DT1 1NJ", "standardised_postcode": "DT1 1NJ", "LONGITUDE": -2.4256275, "LATITUDE": 50.712939}, {"uprn": 100040630187.0, "standardised_address": "35 ALFRED PLACE, DORCHESTER", "standardised_postcode": "DT1 1NW", "LONGITUDE": -2.4289537, "LATITUDE": 50.7124378}, {"uprn": 100040603020.0, "standardised_address": "9 Thrift Close, Stalbridge", "standardised_postcode": "DT10 2LE", "LONGITUDE": -2.371142, "LATITUDE": 50.9560518}, {"uprn": 100040603027.0, "standardised_address": "16, THRIFT CLOSE, STALBRIDGE, STURMINSTER NEWTON, DT10 2LE", "standardised_postcode": "DT10 2LE", "LONGITUDE": -2.3710023, "LATITUDE": 50.9563849}, {"uprn": 10013220368.0, "standardised_address": "16 PORTMAN MEWS, BRYANSTON, BLANDFORD FORUM, DT11 0PR", "standardised_postcode": "DT11 0PR", "LONGITUDE": -2.1854767, "LATITUDE": 50.8609862}, {"uprn": 100040589446.0, "standardised_address": "1 YARDE FARM, PIMPERNE", "standardised_postcode": "DT11 8XF", "LONGITUDE": -2.1359685, "LATITUDE": 50.878473}, {"uprn": 100040589449.0, "standardised_address": "4 YARDE FARM, PIMPERNE", "standardised_postcode": "DT11 8XF", "LONGITUDE": -2.1359404, "LATITUDE": 50.8786079}, {"uprn": 100040589455.0, "standardised_address": "10 YARDE FARM, PIMPERNE", "standardised_postcode": "DT11 8XF", "LONGITUDE": -2.1360556, "LATITUDE": 50.8790934}, {"uprn": 100040588938.0, "standardised_address": "3 PLUMBLEY MEADOWS, WINTERBORNE KINGSTON", "standardised_postcode": "DT11 9BY", "LONGITUDE": -2.1991201, "LATITUDE": 50.7784828}, {"uprn": 100040603895.0, "standardised_address": "5, Woodsford Lane, Moreton", "standardised_postcode": "DT2 8AY", "LONGITUDE": -2.3122792, "LATITUDE": 50.7027633}, {"uprn": 100040640575.0, "standardised_address": "3 WOODBURY DROVE, CROSSWAYS", "standardised_postcode": "DT2 8XT", "LONGITUDE": -2.3343597, "LATITUDE": 50.6988929}, {"uprn": 100040659720.0, "standardised_address": "44, Buddleia Close", "standardised_postcode": "DT3 6SG", "LONGITUDE": -2.4455732, "LATITUDE": 50.6450829}, {"uprn": 100040662263.0, "standardised_address": "138, Corporation Road", "standardised_postcode": "DT4 0LQ", "LONGITUDE": -2.4683463, "LATITUDE": 50.6151519}, {"uprn": 100040662275.0, "standardised_address": "150, Corporation Road", "standardised_postcode": "DT4 0LQ", "LONGITUDE": -2.4681304, "LATITUDE": 50.6149443}, {"uprn": 100040626274.0, "standardised_address": "2 Pitchers, Salwayash, Dorset, DT6 5QS", "standardised_postcode": "DT6 5QS", "LONGITUDE": -2.7730072, "LATITUDE": 50.7662791}, {"uprn": 100040626501.0, "standardised_address": "12, RIVERVALE, BRIDPORT, DT6 5RN", "standardised_postcode": "DT6 5RN", "LONGITUDE": -2.7586317, "LATITUDE": 50.740939}, {"uprn": 100040621177.0, "standardised_address": "54 REDLANDS LANE, BROADWINDSOR", "standardised_postcode": "DT8 3ST", "LONGITUDE": -2.7955262, "LATITUDE": 50.8187693}, {"uprn": 10094240246.0, "standardised_address": "43, Willow Rise, Witheridge", "standardised_postcode": "EX16 8FD", "LONGITUDE": -3.6994066, "LATITUDE": 50.9120479}, {"uprn": 10004844899.0, "standardised_address": "WILTSHIRE HOUSE HOSTEL, 64, EXMOUTH STREET, KINGSHILL, SWINDON, SN1 3PU", "standardised_postcode": "SN1 3PU", "LONGITUDE": -1.7943904, "LATITUDE": 51.5551385}, {"uprn": 10094328457.0, "standardised_address": "ROOM 10, 2, SWINDON FOYER, 17-21, BATH ROAD, OLD TOWN, SWINDON, SN1 4AS", "standardised_postcode": "SN1 4AS", "LONGITUDE": -1.7778769, "LATITUDE": 51.5523398}, {"uprn": 100120980894.0, "standardised_address": "10, WINE STREET, DEVIZES, SN10 1AP", "standardised_postcode": "SN10 1AP", "LONGITUDE": -1.9947743, "LATITUDE": 51.3518889}, {"uprn": 100120977834.0, "standardised_address": "19C, MONDAY MARKET STREET, DEVIZES, SN10 1DN", "standardised_postcode": "SN10 1DN", "LONGITUDE": -1.9921895, "LATITUDE": 51.3520506}, {"uprn": 200001300706.0, "standardised_address": "39, HURRICANE ROAD, BOWERHILL, MELKSHAM, SN12 6SZ", "standardised_postcode": "SN12 6SZ", "LONGITUDE": -2.1236125, "LATITUDE": 51.3578666}, {"uprn": 200001300707.0, "standardised_address": "4, HURRICANE ROAD, BOWERHILL, MELKSHAM, SN12 6SZ", "standardised_postcode": "SN12 6SZ", "LONGITUDE": -2.1248482, "LATITUDE": 51.3580721}, {"uprn": 100121013791.0, "standardised_address": "4, SOUTHMEAD, CHIPPENHAM, SN14 0RU", "standardised_postcode": "SN14 0RU", "LONGITUDE": -2.1380925, "LATITUDE": 51.4540985}, {"uprn": 100121013888.0, "standardised_address": "134, SOUTHMEAD, CHIPPENHAM, SN14 0SB", "standardised_postcode": "SN14 0SB", "LONGITUDE": -2.1339064, "LATITUDE": 51.4547597}, {"uprn": 10010426772.0, "standardised_address": "21, CLIVE PARADE, CRICKLADE ROAD, SWINDON, SN2 1AJ", "standardised_postcode": "SN2 1AJ", "LONGITUDE": -1.7755952, "LATITUDE": 51.5844431}, {"uprn": 100121161218.0, "standardised_address": "44 Tulip Tree Close", "standardised_postcode": "SN2 1RR", "LONGITUDE": -1.782982, "LATITUDE": 51.5790352}, {"uprn": 100121344047.0, "standardised_address": "46, Harber Court, May Close", "standardised_postcode": "SN2 1XD", "LONGITUDE": -1.7793342, "LATITUDE": 51.5772052}, {"uprn": 10004840648.0, "standardised_address": "FLAT 3, 6, CAMDALE PARADE, CRICKLADE ROAD, SWINDON, SN2 8AH", "standardised_postcode": "SN2 8AH", "LONGITUDE": -1.7769189, "LATITUDE": 51.573548}, {"uprn": 10004842231.0, "standardised_address": "4, Huddleston Close", "standardised_postcode": "SN2 8BG", "LONGITUDE": -1.7727817, "LATITUDE": 51.5707465}, {"uprn": 100121141804.0, "standardised_address": "96, LENNOX DRIVE, SWINDON, SN3 3BD", "standardised_postcode": "SN3 3BD", "LONGITUDE": -1.7635782, "LATITUDE": 51.5611407}, {"uprn": 100121345610.0, "standardised_address": "14 BRAIN COURT, BUNCE ROAD, SWINDON, SN3 4QT", "standardised_postcode": "SN3 4QT", "LONGITUDE": -1.7510497, "LATITUDE": 51.5782255}, {"uprn": 100121345614.0, "standardised_address": "18 BRAIN COURT, BUNCE ROAD, SWINDON, SN3 4QT", "standardised_postcode": "SN3 4QT", "LONGITUDE": -1.7510497, "LATITUDE": 51.5782255}, {"uprn": 100121345636.0, "standardised_address": "38 BRAIN COURT, BUNCE ROAD, SWINDON, SN3 4QT", "standardised_postcode": "SN3 4QT", "LONGITUDE": -1.7510497, "LATITUDE": 51.5782255}, {"uprn": 100121345678.0, "standardised_address": "74 BRAIN COURT, BUNCE ROAD, SWINDON, SN3 4QU", "standardised_postcode": "SN3 4QU", "LONGITUDE": -1.7510497, "LATITUDE": 51.5782255}, {"uprn": 100121134536.0, "standardised_address": "6, Goulding Close", "standardised_postcode": "SN3 4QY", "LONGITUDE": -1.7486145, "LATITUDE": 51.5801714}, {"uprn": 10008541709.0, "standardised_address": "44, THORNEY PARK, WROUGHTON, SWINDON, SN4 0QS", "standardised_postcode": "SN4 0QS", "LONGITUDE": -1.7872903, "LATITUDE": 51.5116279}, {"uprn": 10008541885.0, "standardised_address": "100, Thorney Park, Wroughton", "standardised_postcode": "SN4 0QT", "LONGITUDE": -1.789516, "LATITUDE": 51.5108628}, {"uprn": 10008541880.0, "standardised_address": "105, Thorney Park, Wroughton", "standardised_postcode": "SN4 0QT", "LONGITUDE": -1.7888605, "LATITUDE": 51.5108736}, {"uprn": 10008541889.0, "standardised_address": "112, Thorney Park, Wroughton", "standardised_postcode": "SN4 0QT", "LONGITUDE": -1.7878319, "LATITUDE": 51.5108692}, {"uprn": 100121169562.0, "standardised_address": "Flat 15, Windmill Court, Uxbridge Road, Freshbrook, Swindon, SN5 8RT", "standardised_postcode": "SN5 8RT", "LONGITUDE": -1.8418052, "LATITUDE": 51.550096}, {"uprn": 100121169564.0, "standardised_address": "Flat 17, Windmill Court, Uxbridge Road, Freshbrook, Swindon, SN5 8RT", "standardised_postcode": "SN5 8RT", "LONGITUDE": -1.8418052, "LATITUDE": 51.550096}, {"uprn": 100121055967.0, "standardised_address": "93-95, ST. EDMUNDS CHURCH STREET, SALISBURY, SP1 1EQ", "standardised_postcode": "SP1 1EQ", "LONGITUDE": -1.7924578, "LATITUDE": 51.0713354}, {"uprn": 100121044735.0, "standardised_address": "44 Glyndebourne Close", "standardised_postcode": "SP2 9EY", "LONGITUDE": -1.8269895, "LATITUDE": 51.0812824}, {"uprn": 200001120569.0, "standardised_address": "10 COOKS CLOSE, SALISBURY", "standardised_postcode": "SP2 9PS", "LONGITUDE": -1.8317301, "LATITUDE": 51.0888336}, {"uprn": 100121047096.0, "standardised_address": "26, Hops Close, Chilmark", "standardised_postcode": "SP3 5BE", "LONGITUDE": -2.0471076, "LATITUDE": 51.0932517}, {"uprn": 100121046662.0, "standardised_address": "12, Hill Close, Tisbury", "standardised_postcode": "SP3 6TB", "LONGITUDE": -2.0788121, "LATITUDE": 51.0669769}, {"uprn": 100121046667.0, "standardised_address": "23, Hill Close, Tisbury", "standardised_postcode": "SP3 6TB", "LONGITUDE": -2.0786986, "LATITUDE": 51.0671501}, {"uprn": 100121046668.0, "standardised_address": "25, Hill Close, Tisbury", "standardised_postcode": "SP3 6TB", "LONGITUDE": -2.0786213, "LATITUDE": 51.0671395}, {"uprn": 100120987378.0, "standardised_address": "113, HIGH STREET, NETHERAVON, SALISBURY, SP4 9PJ", "standardised_postcode": "SP4 9PJ", "LONGITUDE": -1.7907184, "LATITUDE": 51.2385674}, {"uprn": 100121055592.0, "standardised_address": "40, SPIDERS ISLAND, ALDERBURY, WILTSHIRE, SP5 3BG", "standardised_postcode": "SP5 3BG", "LONGITUDE": -1.7204383, "LATITUDE": 51.0379501}, {"uprn": 100121045369.0, "standardised_address": "11, GRIMSTEAD ROAD, WHADDON, SALISBURY, SP5 3EE", "standardised_postcode": "SP5 3EE", "LONGITUDE": -1.7206544, "LATITUDE": 51.0375909}, {"uprn": 100121045372.0, "standardised_address": "19, Grimstead Road, Whaddon", "standardised_postcode": "SP5 3EE", "LONGITUDE": -1.7204116, "LATITUDE": 51.0376623}, {"uprn": 200002927526.0, "standardised_address": "6, Stanley Close, Bishopstone", "standardised_postcode": "SP5 4BH", "LONGITUDE": -1.9035983, "LATITUDE": 51.0312746}, {"uprn": 10010447328.0, "standardised_address": "17 HILLVIEW, EBBESBOURNE WAKE", "standardised_postcode": "SP5 5JJ", "LONGITUDE": -2.0155382, "LATITUDE": 51.0160085}, {"uprn": 100040690924.0, "standardised_address": "2, Lime Tree Close, Alderholt", "standardised_postcode": "SP6 3RQ", "LONGITUDE": -1.8334067, "LATITUDE": 50.9123707}, {"uprn": 100040900005.0, "standardised_address": "26 TURNER CLOSE, BRIDGWATER", "standardised_postcode": "TA6 3PA", "LONGITUDE": -2.9996053, "LATITUDE": 51.1249647}, {"uprn": 10002989537.0, "standardised_address": "CARTREF, WINNINGS WAY, TORQUAY, TQ1 3GZ", "standardised_postcode": "TQ1 3GZ", "LONGITUDE": -3.513879, "LATITUDE": 50.4785659}, {"uprn": 100061879598.0, "standardised_address": "11 Blackdown Road", "standardised_postcode": "BN13 2EZ", "LONGITUDE": -0.4017112, "LATITUDE": 50.8371451}, {"uprn": 100061879600.0, "standardised_address": "14, BLACKDOWN ROAD, WORTHING, BN13 2EZ", "standardised_postcode": "BN13 2EZ", "LONGITUDE": -0.401944, "LATITUDE": 50.8371204}, {"uprn": 100061899941.0, "standardised_address": "25, QUANTOCK ROAD, WORTHING, BN13 2HQ", "standardised_postcode": "BN13 2HQ", "LONGITUDE": -0.4024977, "LATITUDE": 50.8375507}, {"uprn": 100062191147.0, "standardised_address": "40 Hurst Cottages, East Street, Amberley, Horsham, BN18 9NP", "standardised_postcode": "BN18 9NP", "LONGITUDE": -0.5285661, "LATITUDE": 50.9086612}, {"uprn": 100062191157.0, "standardised_address": "50 Hurst Cottages, East Street, Amberley, Horsham, BN18 9NP", "standardised_postcode": "BN18 9NP", "LONGITUDE": -0.5284547, "LATITUDE": 50.9096733}, {"uprn": 200002880107.0, "standardised_address": "9a Hurst Cottages, East Street, Amberley, Horsham, BN18 9NP", "standardised_postcode": "BN18 9NP", "LONGITUDE": -0.5298463, "LATITUDE": 50.9097002}, {"uprn": 100061911692.0, "standardised_address": "Flat 4, Highland Lodge, 17 Carew Road, Eastbourne, BN21 2JQ", "standardised_postcode": "BN21 2JQ", "LONGITUDE": 0.2759205, "LATITUDE": 50.7765092}, {"uprn": 10010653970.0, "standardised_address": "3 GLADSTONE CLOSE, EASTBOURNE", "standardised_postcode": "BN22 9BP", "LONGITUDE": 0.2792262, "LATITUDE": 50.7975267}, {"uprn": 100061914793.0, "standardised_address": "Flat 2, Lakeside Court, 6 Lakelands Close, Eastbourne, BN22 9EQ", "standardised_postcode": "BN22 9EQ", "LONGITUDE": 0.2878669, "LATITUDE": 50.7989404}, {"uprn": 10010655892.0, "standardised_address": "5, Britten Close", "standardised_postcode": "BN23 7TR", "LONGITUDE": 0.3111695, "LATITUDE": 50.8061167}, {"uprn": 10010655608.0, "standardised_address": "5, Laughton Close", "standardised_postcode": "BN23 8JU", "LONGITUDE": 0.287275, "LATITUDE": 50.8121013}, {"uprn": 100060019188.0, "standardised_address": "23 Rotherfield Avenue", "standardised_postcode": "BN23 8JZ", "LONGITUDE": 0.2887367, "LATITUDE": 50.8109304}, {"uprn": 100061916123.0, "standardised_address": "12 The Rookery, Eastbourne, BN23 8LD", "standardised_postcode": "BN23 8LD", "LONGITUDE": 0.3010123, "LATITUDE": 50.8114353}, {"uprn": 10004614538.0, "standardised_address": "10 BUTTS FIELD, HAILSHAM", "standardised_postcode": "BN27 2BZ", "LONGITUDE": 0.2641057, "LATITUDE": 50.8558489}, {"uprn": 10004614505.0, "standardised_address": "1 BUTTS FIELD, HAILSHAM", "standardised_postcode": "BN27 2BZ", "LONGITUDE": 0.2630997, "LATITUDE": 50.8564551}, {"uprn": 22136962.0, "standardised_address": "16, BLUEBIRD COURT 12-14, HOVE STREET, HOVE, BN3 2TU", "standardised_postcode": "BN3 2TU", "LONGITUDE": -0.1806456, "LATITUDE": 50.826389}, {"uprn": 100062006747.0, "standardised_address": "19 Nelson House, Short Street, Rushmoor, GU11 1HX", "standardised_postcode": "GU11 1HX", "LONGITUDE": -0.7667619, "LATITUDE": 51.2496261}, {"uprn": 100062006748.0, "standardised_address": "20 Nelson House, Short Street, Rushmoor, GU11 1HX", "standardised_postcode": "GU11 1HX", "LONGITUDE": -0.7667619, "LATITUDE": 51.2496261}, {"uprn": 100060533152.0, "standardised_address": "5, Raglan Close", "standardised_postcode": "GU12 4PG", "LONGITUDE": -0.7516103, "LATITUDE": 51.2440431}, {"uprn": 100061765042.0, "standardised_address": "1 RANVILLE CLOSE, PETWORTH", "standardised_postcode": "GU28 0EN", "LONGITUDE": -0.6124007, "LATITUDE": 50.9814381}, {"uprn": 100061762685.0, "standardised_address": "21 JUNE MEADOWS, MIDHURST", "standardised_postcode": "GU29 9ER", "LONGITUDE": -0.7509855, "LATITUDE": 50.9904527}, {"uprn": 100061762706.0, "standardised_address": "42 JUNE MEADOWS, MIDHURST", "standardised_postcode": "GU29 9ER", "LONGITUDE": -0.7517059, "LATITUDE": 50.9906852}, {"uprn": 200001064783.0, "standardised_address": "17 Chestnut Close", "standardised_postcode": "GU29 9TT", "LONGITUDE": -0.7473381, "LATITUDE": 50.9766817}, {"uprn": 200001064789.0, "standardised_address": "2, HORNBEAM WAY, MIDHURST, GU29 9TU", "standardised_postcode": "GU29 9TU", "LONGITUDE": -0.7483841, "LATITUDE": 50.9769806}, {"uprn": 100062161165.0, "standardised_address": "Flat 7, Brook House, Park Drive, Waverley, GU6 7EH", "standardised_postcode": "GU6 7EH", "LONGITUDE": -0.4741177, "LATITUDE": 51.1453322}, {"uprn": 100062367433.0, "standardised_address": "COPTHORNE COTTAGE, BRIGHTON ROAD, KINGSWOOD, TADWORTH, KT20 6BQ", "standardised_postcode": "KT20 6BQ", "LONGITUDE": -0.2205069, "LATITUDE": 51.3005374}, {"uprn": 100062145269.0, "standardised_address": "Flat 1, Copthorne House, Brighton Road, Kingswood, Reigate and Banstead, KT20 6BQ", "standardised_postcode": "KT20 6BQ", "LONGITUDE": -0.2203005, "LATITUDE": 51.3003094}, {"uprn": 100062145274.0, "standardised_address": "Flat 6, Copthorne House, Brighton Road, Kingswood, Reigate and Banstead, KT20 6BQ", "standardised_postcode": "KT20 6BQ", "LONGITUDE": -0.2203005, "LATITUDE": 51.3003094}, {"uprn": 100062145280.0, "standardised_address": "Flat 11, Copthorne House, Brighton Road, Kingswood, Reigate and Banstead, KT20 6BQ", "standardised_postcode": "KT20 6BQ", "LONGITUDE": -0.22026, "LATITUDE": 51.3006146}, {"uprn": 10007059816.0, "standardised_address": "2, OLD ST. MARYS, WEST HORSLEY, LEATHERHEAD, KT24 6JG", "standardised_postcode": "KT24 6JG", "LONGITUDE": -0.4571645, "LATITUDE": 51.2643896}, {"uprn": 100120914390.0, "standardised_address": "42, Wordsworth Road", "standardised_postcode": "OX14 5NX", "LONGITUDE": -1.2998371, "LATITUDE": 51.6633308}, {"uprn": 10011922279.0, "standardised_address": "60, Goldings Road, Hook Norton", "standardised_postcode": "OX15 5FG", "LONGITUDE": -1.4854983, "LATITUDE": 52.0000434}, {"uprn": 200001511761.0, "standardised_address": "78G, PARK STREET, THAME, OX9 3HX", "standardised_postcode": "OX9 3HX", "LONGITUDE": -0.9726332, "LATITUDE": 51.744209}, {"uprn": 100061693227.0, "standardised_address": "65, ESSEX ROAD, BOGNOR REGIS, PO21 2BY", "standardised_postcode": "PO21 2BY", "LONGITUDE": -0.6808471, "LATITUDE": 50.7923111}, {"uprn": 100061699117.0, "standardised_address": "64A, LINDEN ROAD, BOGNOR REGIS, PO21 2DT", "standardised_postcode": "PO21 2DT", "LONGITUDE": -0.6799074, "LATITUDE": 50.7883884}, {"uprn": 100061694176.0, "standardised_address": "22, FLETCHER WAY, BOGNOR REGIS, PO21 2NU", "standardised_postcode": "PO21 2NU", "LONGITUDE": -0.6808653, "LATITUDE": 50.7916638}, {"uprn": 100061694158.0, "standardised_address": "3, FLETCHER WAY, BOGNOR REGIS, PO21 2NU", "standardised_postcode": "PO21 2NU", "LONGITUDE": -0.6814028, "LATITUDE": 50.7922275}, {"uprn": 100061708497.0, "standardised_address": "66A, VICTORIA DRIVE, BOGNOR REGIS, PO21 2TG", "standardised_postcode": "PO21 2TG", "LONGITUDE": -0.6821079, "LATITUDE": 50.7868215}, {"uprn": 100061692981.0, "standardised_address": "7, ELM TREE CLOSE, BOGNOR REGIS, PO21 5BF", "standardised_postcode": "PO21 5BF", "LONGITUDE": -0.6893863, "LATITUDE": 50.7974796}, {"uprn": 100061688879.0, "standardised_address": "32, Birdham Close", "standardised_postcode": "PO21 5TD", "LONGITUDE": -0.6958816, "LATITUDE": 50.7960865}, {"uprn": 100061707577.0, "standardised_address": "32, The Croft", "standardised_postcode": "PO21 5TH", "LONGITUDE": -0.6968881, "LATITUDE": 50.7950995}, {"uprn": 1775024104.0, "standardised_address": "15, EDENBRIDGE ROAD, SOUTHSEA, PO4 8PE", "standardised_postcode": "PO4 8PE", "LONGITUDE": -1.0477756, "LATITUDE": 50.7983904}, {"uprn": 310063074.0, "standardised_address": "2, OPAL COURT, LOWER FIELD ROAD, READING, RG1 6BW", "standardised_postcode": "RG1 6BW", "LONGITUDE": -0.97969, "LATITUDE": 51.4489452}, {"uprn": 310006357.0, "standardised_address": "Flat 1, Galloway House, Rembrandt Way, Reading, RG1 6QU", "standardised_postcode": "RG1 6QU", "LONGITUDE": -0.9915054, "LATITUDE": 51.4449921}, {"uprn": 310056378.0, "standardised_address": "112, ADMIRALS COURT, ROSE KILN LANE, READING, RG1 6SS", "standardised_postcode": "RG1 6SS", "LONGITUDE": -0.9773714, "LATITUDE": 51.4470907}, {"uprn": 200004733000.0, "standardised_address": "Flat 6, Lynton Court, Pelican Lane, West Berkshire, RG14 1NN", "standardised_postcode": "RG14 1NN", "LONGITUDE": -1.3238052, "LATITUDE": 51.4073996}, {"uprn": 200004733014.0, "standardised_address": "Flat 20, Lynton Court, Pelican Lane, West Berkshire, RG14 1NN", "standardised_postcode": "RG14 1NN", "LONGITUDE": -1.3242952, "LATITUDE": 51.4073215}, {"uprn": 10007903996.0, "standardised_address": "11 Donnington Lodge, Oxford Road, Donnington, West Berkshire, RG14 3AA", "standardised_postcode": "RG14 3AA", "LONGITUDE": -1.3286281, "LATITUDE": 51.4177589}, {"uprn": 100081226752.0, "standardised_address": "23 DONNINGTON LODGE, OXFORD ROAD, DONNINGTON, NEWBURY, RG14 3AA", "standardised_postcode": "RG14 3AA", "LONGITUDE": -1.3287324, "LATITUDE": 51.4175077}, {"uprn": 310001598.0, "standardised_address": "14, Hagley Road", "standardised_postcode": "RG2 0DN", "LONGITUDE": -0.9682675, "LATITUDE": 51.4411482}, {"uprn": 310032425.0, "standardised_address": "26, Hagley Road", "standardised_postcode": "RG2 0DN", "LONGITUDE": -0.9686028, "LATITUDE": 51.4409533}, {"uprn": 100080226372.0, "standardised_address": "3, Butts Furlong, Brightwalton", "standardised_postcode": "RG20 7DH", "LONGITUDE": -1.3881378, "LATITUDE": 51.5113557}, {"uprn": 100062458850.0, "standardised_address": "3 DAYS MEADOW, WOOLTON HILL", "standardised_postcode": "RG20 9US", "LONGITUDE": -1.3912259, "LATITUDE": 51.35076}, {"uprn": 100060223249.0, "standardised_address": "9, DANKWORTH ROAD, BASINGSTOKE, RG22 4LJ", "standardised_postcode": "RG22 4LJ", "LONGITUDE": -1.120265, "LATITUDE": 51.2424042}, {"uprn": 100060223353.0, "standardised_address": "133, Dankworth Road", "standardised_postcode": "RG22 4LJ", "LONGITUDE": -1.120787, "LATITUDE": 51.2420754}, {"uprn": 100060225935.0, "standardised_address": "36, Foxs Furlong, Chineham", "standardised_postcode": "RG24 8WN", "LONGITUDE": -1.0443702, "LATITUDE": 51.2967794}, {"uprn": 10001320962.0, "standardised_address": "17, Longs Court", "standardised_postcode": "RG28 7BU", "LONGITUDE": -1.3400448, "LATITUDE": 51.2300347}, {"uprn": 310009363.0, "standardised_address": "38, STRATHY CLOSE, READING, RG30 2PP", "standardised_postcode": "RG30 2PP", "LONGITUDE": -1.0145665, "LATITUDE": 51.4587161}, {"uprn": 310048525.0, "standardised_address": "21, COLLIERS WAY, READING, RG30 2QS", "standardised_postcode": "RG30 2QS", "LONGITUDE": -1.0103715, "LATITUDE": 51.4549271}, {"uprn": 310059914.0, "standardised_address": "42, COLLIERS WAY, READING, RG30 2QT", "standardised_postcode": "RG30 2QT", "LONGITUDE": -1.0103342, "LATITUDE": 51.4553225}, {"uprn": 310008254.0, "standardised_address": "3 Brook Lea, Caversham, Reading, RG4 8EP", "standardised_postcode": "RG4 8EP", "LONGITUDE": -0.9594228, "LATITUDE": 51.464404}, {"uprn": 310040251.0, "standardised_address": "7 Brook Lea, Caversham, Reading, RG4 8EP", "standardised_postcode": "RG4 8EP", "LONGITUDE": -0.9588334, "LATITUDE": 51.4643718}, {"uprn": 100121306362.0, "standardised_address": "4, Smith Close, Sonning Common", "standardised_postcode": "RG4 9TL", "LONGITUDE": -0.9851249, "LATITUDE": 51.5186171}, {"uprn": 14049181.0, "standardised_address": "3, Kendrick Close", "standardised_postcode": "RG40 2LZ", "LONGITUDE": -0.8363485, "LATITUDE": 51.4070013}, {"uprn": 100080247057.0, "standardised_address": "12, The Glebe, Aldworth", "standardised_postcode": "RG8 9SH", "LONGITUDE": -1.1981048, "LATITUDE": 51.5116775}, {"uprn": 100080247061.0, "standardised_address": "17, The Glebe, Aldworth", "standardised_postcode": "RG8 9SH", "LONGITUDE": -1.1985385, "LATITUDE": 51.5115996}, {"uprn": 100061824929.0, "standardised_address": "16, VINALL GARDENS, OLD GUILDFORD ROAD, BROADBRIDGE HEATH, HORSHAM, RH12 3HX", "standardised_postcode": "RH12 3HX", "LONGITUDE": -0.3640146, "LATITUDE": 51.0719231}, {"uprn": 10003085392.0, "standardised_address": "18 St. Marks Lane, Horsham, RH12 5PU", "standardised_postcode": "RH12 5PU", "LONGITUDE": -0.3145139, "LATITUDE": 51.0829412}, {"uprn": 100062482574.0, "standardised_address": "Flat 1, Wigmore House, Keymer Road, Mid Sussex, RH15 0AH", "standardised_postcode": "RH15 0AH", "LONGITUDE": -0.1263008, "LATITUDE": 50.9487666}, {"uprn": 100062483148.0, "standardised_address": "12 OAKENFIELD, BURGESS HILL", "standardised_postcode": "RH15 8SJ", "LONGITUDE": -0.1355338, "LATITUDE": 50.9656152}, {"uprn": 100062483149.0, "standardised_address": "14 OAKENFIELD, BURGESS HILL", "standardised_postcode": "RH15 8SJ", "LONGITUDE": -0.1360491, "LATITUDE": 50.9655873}, {"uprn": 100062483150.0, "standardised_address": "15, Oakenfield", "standardised_postcode": "RH15 8SJ", "LONGITUDE": -0.1361846, "LATITUDE": 50.9655929}, {"uprn": 100061832309.0, "standardised_address": "9B STANE STREET CLOSE, CODMORE HILL, PULBOROUGH", "standardised_postcode": "RH20 1BD", "LONGITUDE": -0.5026947, "LATITUDE": 50.9661759}, {"uprn": 100061831182.0, "standardised_address": "3, Piers Secomb Close, Coldwaltham", "standardised_postcode": "RH20 1QA", "LONGITUDE": -0.5421865, "LATITUDE": 50.9370885}, {"uprn": 100061826991.0, "standardised_address": "57, Beech Grove, Storrington", "standardised_postcode": "RH20 3NP", "LONGITUDE": -0.4416205, "LATITUDE": 50.9245994}, {"uprn": 100062512568.0, "standardised_address": "FLAT 41, KERRIGAN COURT, 16, WESTWOOD ROAD, SOUTHAMPTON, SO17 1JT", "standardised_postcode": "SO17 1JT", "LONGITUDE": -1.4022734, "LATITUDE": 50.9223801}, {"uprn": 100062512088.0, "standardised_address": "FLAT 6, RAGLAN COURT, 11, WINN ROAD, SOUTHAMPTON, SO17 1WU", "standardised_postcode": "SO17 1WU", "LONGITUDE": -1.4025758, "LATITUDE": 50.9243151}, {"uprn": 10034867337.0, "standardised_address": "6a, Epping Close", "standardised_postcode": "SO18 5SE", "LONGITUDE": -1.3526289, "LATITUDE": 50.9247678}, {"uprn": 100060305293.0, "standardised_address": "21, MOUNTBATTEN ROAD, EASTLEIGH, SO50 4RQ", "standardised_postcode": "SO50 4RQ", "LONGITUDE": -1.3550759, "LATITUDE": 50.9819271}, {"uprn": 100061985990.0, "standardised_address": "Flat 10, Raglan Court, Mountbatten Road, Eastleigh, SO50 4RR", "standardised_postcode": "SO50 4RR", "LONGITUDE": -1.3541953, "LATITUDE": 50.9822382}, {"uprn": 200000713806.0, "standardised_address": "2 NUTBANE CLOSE, ANDOVER", "standardised_postcode": "SP10 3WA", "LONGITUDE": -1.5003561, "LATITUDE": 51.2059018}, {"uprn": 100060566549.0, "standardised_address": "9, Vespasian Road", "standardised_postcode": "SP10 5JP", "LONGITUDE": -1.476821, "LATITUDE": 51.2249974}, {"uprn": 100060566550.0, "standardised_address": "10, VESPASIAN ROAD, ANDOVER, SP10 5JP", "standardised_postcode": "SP10 5JP", "LONGITUDE": -1.4767158, "LATITUDE": 51.2254375}, {"uprn": 100060566565.0, "standardised_address": "25, VESPASIAN ROAD, ANDOVER, SP10 5JP", "standardised_postcode": "SP10 5JP", "LONGITUDE": -1.4776377, "LATITUDE": 51.224965}, {"uprn": 100060563379.0, "standardised_address": "52, ROMAN WAY, ANDOVER, SP10 5JU", "standardised_postcode": "SP10 5JU", "LONGITUDE": -1.4764479, "LATITUDE": 51.2250676}, {"uprn": 100060049602.0, "standardised_address": "5, Salisbury Road", "standardised_postcode": "TN37 6RX", "LONGITUDE": 0.5616196, "LATITUDE": 50.862757}]
\ No newline at end of file
diff --git a/etl/customers/stonewater/map_app/callbacks.py b/etl/customers/stonewater/map_app/callbacks.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/stonewater/map_app/config.py b/etl/customers/stonewater/map_app/config.py
new file mode 100644
index 00000000..1dbd5d04
--- /dev/null
+++ b/etl/customers/stonewater/map_app/config.py
@@ -0,0 +1,8 @@
+import os
+import json
+import dotenv
+
+# When running locally, we'll need to load the .env file
+dotenv.load_dotenv()
+
+MAPBOX_ACCESS_TOKEN = os.getenv("MAPBOX_ACCESS_TOKEN")
diff --git a/etl/customers/stonewater/map_app/map_page.py b/etl/customers/stonewater/map_app/map_page.py
new file mode 100644
index 00000000..c39a53af
--- /dev/null
+++ b/etl/customers/stonewater/map_app/map_page.py
@@ -0,0 +1,94 @@
+import dash_bootstrap_components as dbc
+from dash import html, dcc
+import json
+import plotly.graph_objects as go
+import pandas as pd
+
+from config import MAPBOX_ACCESS_TOKEN
+
+
+def make_map(locations):
+    if not locations:
+        return None
+
+    df = pd.DataFrame(locations)
+
+    # Create custom hover text
+    df['hover_text'] = df.apply(
+        lambda row: f"UPRN: {int(row['uprn'])}<br>Address: {row['standardised_address']}<br>Postcode: "
+                    f"{row['standardised_postcode']}<br>Latitude: {row['LATITUDE']}<br>Longitude: {row['LONGITUDE']}",
+        axis=1)
+
+    data = [
+        go.Scattermapbox(
+            lat=df["LATITUDE"].tolist(),
+            lon=df["LONGITUDE"].tolist(),
+            mode="markers",
+            marker=go.scattermapbox.Marker(size=10, color="#027fa6"),
+            text=df["hover_text"],  # Use the custom hover text
+            hoverinfo='text'
+        )
+    ]
+
+    layout = go.Layout(
+        autosize=True,
+        hovermode="closest",
+        mapbox=go.layout.Mapbox(
+            accesstoken=MAPBOX_ACCESS_TOKEN,
+            bearing=0,
+            center=go.layout.mapbox.Center(lat=53, lon=-1.5),
+            pitch=0,
+            zoom=4,
+        ),
+        margin={"t": 0},
+    )
+
+    fig = go.Figure(data=data, layout=layout)
+
+    plot = dcc.Graph(figure=fig, config={"displayModeBar": False})
+
+    return plot
+
+
+def layout():
+    # Get the data
+    with open("Stonewater Mapping Data.json", "r") as file:
+        locations = json.load(file)
+
+    page = dbc.Container(
+        [
+            dbc.Row(
+                dbc.Col(
+                    html.Div(
+                        [
+                            html.H1(
+                                "Stonewater Survey Map",
+                                style={"font-size": "2.5rem", "font-weight": "bold", "margin-bottom": "20px"}
+                            ),
+                            html.P(
+                                "This map shows the location of the properties that are to be surveyed by Osmosis.",
+                                style={"font-size": "1.25rem", "margin-bottom": "40px"}
+                            ),
+                        ],
+                        className="text-center"
+                    ),
+                    width=12
+                ),
+                className="mt-5"
+            ),
+            dbc.Row(
+                dbc.Col(
+                    make_map(locations=locations),
+                    width=12,
+                    align="center",
+                    className="text-center"
+                ),
+                className="metric-row",
+                justify="center"
+            )
+        ],
+        fluid=True,
+        className="p-5"
+    )
+
+    return page
diff --git a/etl/customers/stonewater/map_app/requirements.txt b/etl/customers/stonewater/map_app/requirements.txt
new file mode 100644
index 00000000..81943dd1
--- /dev/null
+++ b/etl/customers/stonewater/map_app/requirements.txt
@@ -0,0 +1,12 @@
+dash==2.8.1
+gunicorn
+pandas
+dash-bootstrap-components==1.3.1
+boto3
+dropbox
+Flask-Caching
+dash-extensions
+mysql-connector-python
+sqlalchemy
+werkzeug==2.3.7
+python-dotenv
\ No newline at end of file
diff --git a/etl/customers/stonewater/map_app/server.py b/etl/customers/stonewater/map_app/server.py
new file mode 100644
index 00000000..87f10e21
--- /dev/null
+++ b/etl/customers/stonewater/map_app/server.py
@@ -0,0 +1,45 @@
+import logging
+import secrets
+
+import dash_bootstrap_components as dbc
+from dash import html
+from dash_extensions.enrich import DashProxy, MultiplexerTransform
+import flask
+from map_page import layout
+
+logger = logging.getLogger(__name__)
+
+# We just use a simple secret key for the moment
+
+SECRET_KEY = secrets.token_hex(24)
+
+
+def init_app():
+    app = DashProxy(
+        __name__,
+        server=flask.Flask(__name__),
+        suppress_callback_exceptions=True,
+        external_stylesheets=[
+            dbc.themes.BOOTSTRAP,
+            dbc.icons.FONT_AWESOME,
+            "https://fonts.googleapis.com/css?family=Comfortaa",
+        ],
+        transforms=[MultiplexerTransform()]
+    )
+
+    server = app.server
+
+    # Set app config
+    server.config.update(
+        SECRET_KEY=SECRET_KEY,
+    )
+
+    app.title = "Hesta X Stonewater"
+
+    # Define the layout
+    app.layout = layout()
+
+    return app
+
+
+app = init_app()
diff --git a/etl/customers/stonewater/map_app/wsgi.py b/etl/customers/stonewater/map_app/wsgi.py
new file mode 100644
index 00000000..3390e6ff
--- /dev/null
+++ b/etl/customers/stonewater/map_app/wsgi.py
@@ -0,0 +1,8 @@
+# Callbacks must be imported to run the app
+import callbacks  # NOQA
+from server import app
+
+application = app.server
+
+if __name__ == "__main__":
+    app.run_server(port=8080, debug=True, host="0.0.0.0")

From f6adb3619bdb19da43d856ee71fcaa70d09cbacb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 1 Jul 2024 11:26:18 +0100
Subject: [PATCH 57/80] Added some logo and styling to app

---
 .../stonewater/map_app/assets/hestia-logo.png | Bin 0 -> 17967 bytes
 .../map_app/assets/osmosis-Logo.svg           |   1 +
 .../map_app/assets/stonewater-logo.png        | Bin 0 -> 19000 bytes
 etl/customers/stonewater/map_app/map_page.py  |  39 +++++++++++++++++-
 4 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 etl/customers/stonewater/map_app/assets/hestia-logo.png
 create mode 100644 etl/customers/stonewater/map_app/assets/osmosis-Logo.svg
 create mode 100644 etl/customers/stonewater/map_app/assets/stonewater-logo.png

diff --git a/etl/customers/stonewater/map_app/assets/hestia-logo.png b/etl/customers/stonewater/map_app/assets/hestia-logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..8a49c95b9671ffce2f6ceff29c7bcec766939bd1
GIT binary patch
literal 17967
zcmeIaXH-*L*EWm=uz>{;X~K~sRX}=?A}SE6(xo3kdXpNdQ8=Q2G?CsELJ3v6v>-~6
z8X%z)qS9-S7Dym@7nW1*=N;o4<NN+R;~V!6MUcJM+H=i$%{i}m?S$RcQl&e=a)OGA
zitY~Nwk{PF%{3~jLobdU20tMc1n_|`#~(tBJ*lWpG93IlM3s_uk&23w>dtK?{YPml
zSlXoD9n5#WHBKF#aR1aCyAd19+pKsQqJQVGn(on8ub-d$J>qcIW1h>9yEkqqDSf_s
z=kl?ytIoYLb7@n{y}jL9JNuc?I;d?;X<@ycPi|fA(uVKix6$#1yI2DWr$FDOV&+3&
z4^%QKv2&>h|9Z?x4gS7Sa`a$pR329#2S1DvP&#;o$7aXD6Hrl2tHm7bpVKJ#{K2=i
z=imolKE?m{(*K(2f0s+V-mGrqvF+jQ9Du&w9M7hj#fiB@G-gIRtn3sKNh4`X*0Qoy
z`Q-M6I`8`7Q&f+`&cGiJFu?PFgD}PbZ|*Zb%J+KnD4*joUFzRKBzRp=I>9Ti6wVT?
zf9N0-(v@Qlt7*q%g>o9)01tk{?(F|xFBfUN8W?cZ?_PS2zs-TA!f*cb$<bZQ<84LE
z-U)FV<Bhgdch3i(zv~x#Ug9X?@Hu#r1;7$}m4E$y1Tb#TG2I&r$8|Ah;3pt4R(@yU
zRIi!;X<iLsU{Y}|$e|n_h|aATeom$Eg`BraixXked+8$^h>`s;>CMRb@!{F6L$6!@
zc~Bya=dVH1vCK=B%oQc?>;r8rE(l~YV%5j14XIuwaZ<m^;8dnlDtLU9FE=rZ8ey#%
zCNn&2nNw-T?5{YU!Fc3<+>IZSZo+0h{9rhLqYH_ux{sye75Vp8TaWABk{6*|>T%LH
zDGARI^ew$ab%p!idzV&nB($3p>Tq=tjb<DD{>N6n9!2CtbEw9o1dd-^?((NQ#0y~e
zZ+FuWRz9x#(c#1TPza`o+1=J0>2&Od@9}>bkq;8XA}xbtk#JH;oA2}nxA>2JQiEHB
zh%6aSoz%q&c$(j$Qq%a?!Kf<*>)*Z_@cvOSCf?yLBh~c3&sp6)7?V&F#RVyTWHMPu
zdn4>Wme-1rtrx3v#X82wZcjBBoql|d;U5vvx~ZGS$#}M&Qk}z6MK*A`C-py;e*>0x
z4z!D7sUrG{j0|UZA9-E$A4e+)QEq82ZEL6>HqIHXiA?8+y689_e(0{@zn$*HMfhc_
zd>;BBdC5K3q5#99ot%4w@Dal9yy8T!!QuaL#$u2|H)8)^xFmpp$F~38;xS#Sz8C*;
zGbWH2QvL}Jt(%v@dSZWXd9i_0B0$JbvbCS1B}2PyR;qPzu5I1&MNV|&Q$n;aG+Ji3
zJ?(l+FspTc2->oQ7(cR8H}b7ndM#dBMmW@OtZ^HJN>%gS&DHQJ#mK<S=wkc}3E^-?
zZFZU5OXoN6@;(}ZL65EmniU1u7WpeXQ*N;BIi{JN*;gGa`#@#G^Ovi>?mY%vdBNG2
zL~GTG&RUY(Qq)oJVl3M^TlNKNjbqA!y45Q(ZXMIzy?E&b+DY$;3E^duf4jPLu5C+0
zeGbzoVRtiF#JPs%)04kVkn0WnlAV?;VJLsy*4+CJPq%ksa$@Vt%!n1(1+2jMzD(_7
zPKlgQny>S)b|Nlnv?g`W+HvTGc$VYR^(_CVfh>Qh{q1V@CNzjI(!}h01XB^x7@v$C
za;ALKy$tUu@~{BQ!KUQc=~eo?$1Tbi+Od#iviWvo_s#scTP97?iNEcgVyooHOCKnf
zASavWIh$g$UA6F{(ao~H=}!-_n~nqNNeqRViJ?cUzr3yEA8HcSB+|TxpZm*~`@9a~
zsEnfSaBij=o}c1oJ7$Auu5b^qT_)di_PbR?t`c9_T+e%<p|JmQWPMwFr_f)S7@9||
z<amyq<bD<AL5fBg8^L}1P|iu+(KL5m|FN?RN(KJLR|9X>qmrGFnd5L73@s_^Qz&t&
zPii2L#x&X5sdvSCzfkWD?@lE;7z3ZjriOA??MoMQ=%(V>j$q?94jt;${5!0vPwUQU
z!96j?HI4*?M_c*YAqUCA!z>C+{tE0Hc!}84RXefn^SJD{M-g{Mq{u?Hmc!(ad8?xM
zRrk*VaTSIC0;+H}cvOJ!?4-jvE%*A8uv%yowN%?n)z*RbT$}!n1F+uO{pqxuZjW3}
zZQXWHOJj=Q2tsf;&WL?o?&UM<Kn9k54q`Aa&y`Jq62cnB-neP?wi`d95KAp9yZW?B
z!&()uGq+;~Mds)iI8FA}Hrg>EX!gxnGY&D~%s+=M_nrD-mAmbu>+KHguC>cV`%9MG
zH=g|M8Tm;{j`p0@NYCJkt57Vrp`1i@qZECBj8=CfG9+@+{`GaO(nv0xw$z>piM8yr
z0)rgPV)Lz60WRoc*#_6bK%Tnr;f2e{rKwNQJV#A`sAjL4#n*ED$+!*o^6kOZcpWwp
zg|BrqEI`Ky?qpljAh^A6%|>w;^AUNqD-;swZW~z8QLmUGblCji)KRIPJ^%KOT}_4l
zIwK5hwVI$oOr*Wm_&3JnKUOM$YF~v7ywkR1k{9<0xW<_OhOF*K$Pg@>(<@svGEIRl
zWVtO%mQf=5RX)yHVY_&)k52G!2l=QC8OC&??`k{Vn$o&S(D0n(AZ1>Z4!nb(tOt-M
zF@(Iz&NXHFOu(9elT5(?jQ_;>cZ7qS7kAQXm2OJfCf$F|t9PO?V0{=)d6?p8*OA=@
zJ<+&-r2j%lVA-NfoOyPyus`Mgy3xIuPEGvey}jq|&Z_`*=}N<<D8u#_F)gRO@^;@C
zDtHEja11y8zOi~J`1}o;zw;I&KZKoxbMLNd_^iNpapsyYI!vaF=sP71o7|+0gK^_p
z?#eo3X0gO4n6`ewYpnC1)ZLxOCd}42N+@H6u(w|0-@hsyTo~jc>^SX?4KQj)KGE~G
zQ2qA<fy^0Vsctl1>ER=bUoE3#q8S~1WotPT_HP!>p`0vz5JvP`t-|BUiR}aJ?RB{D
z9=AK5&Mtn|MS<>xRaU*I+o+)qGdm@ITC6(y%!dSBjEnW0WzJa*-^Ee3rJ#yIYsU_h
zGUd&f(HS_E=#K+r0s$nS`gNyBnuv4XnH7&)MWXT)*~lpA?KrPy9RcY1lvPmw@Izw8
zNXbL4z23^<eBSM9)Ayz6_td-VP1Wa60x+38Q#||r{f$!BZ>Zcqhhgu?0-Mg7B3TRZ
zFS|0ulewCXA`m_SwuMu4!$@h(4UAAbj3wgIz<{4nz#l8=9MpV|PaOcqmD9RM@^t;j
zw(}f(rp0OAJauBQwM<<dtlG|5NEigy9O1d{!f_6MBLA4u;`>C?6g}IH>}-GKO6>a%
z@lW~jme&h|ygS?P(4|G~5*#b(J>4!cYf6rMQ1@;hf{KrxAvK9RI0->C5qjz-1v(pe
zMpf8(7b#4ptLJDZRfqCvWPafZT`J~-EZE5m*DBSjU=jA!Nb#Q0H6j_7?J=Q0Jw+GF
zKCnC3!HUx2CxS_5RqEuKkVYLg+NK2V35W7Aqn5A|A99NM-kNlMV1V$WwWUbcr`4Yo
z;(T(ZKaYwF4<1b@F&(z7F^9>=8ZBWZWH@*ProBsF>}@%R`9d9nG>QXMHrZc%m9CP$
zFj2beC>%I3Voo{w(?B^VbQ|lfNhGT;A-RtG2eG!VS*CS~`$i1^>unkk4Fm7rw_T?!
zFMa*oq}-pDl%9)qsGO})>vvS!;&7>?@5+#F=6gbqo3KyQAFoj!QdN*?N_R3y;~cD)
z^8Uz8$u&EBUGb%ml3)rsTM&Cy)co}Im`lQtLk|x62?6C87Oml#Vscf-#nXeG9yUe(
zzAS%AXdc(4#<X#I+gf+m2F3*jO=82PZ?5fVXD_92^Xxry%UB;A`IeY0@b)v(+xD?5
zb_i;NTraQ;+dc<pxhS0;+A5)tTUQd6t@!*ai`qtu#QyNZE{?qRUk4#82cgSh=xvR*
z|7e%&Yh2{3lV%~dzkLnVy<?1U8h>=!-AL#bH9?(;gHiKrn~K5JfIntWn0<X<8WJc%
znPe6YGSG^v0v!#@JxT$0J0@+sJxoR?px)I{p>%=_y8x#uKL`ZxhcU97#a_b&lCXS%
z0m|)(>hVa;=b#^2IibsHG5+AR<$BU(i)@paGM%#vLutlKZ@*N<tr1C0NxQ^$veXzP
zrW4?+W)#8~nyE!mZ$rYIMK|9qrNl0<EbZHm`-8%U%H^Pt9*+H<7GY{6#onhp7oQ?`
zzFL7ctm+DceXoZ5`t}Ff3uP*dcFy{@m&uRXWX<Aj&(?X}IwFEE*}_?&^0Fe2BI>kC
z9bT%<O^WmuMJ(jBlH4Xee$-@-2bSJ)N<5U4L<-qjOp;N$72OS2u|<L9u7MIcAc#8^
zAzQ9aqOrSs{+eTrC|M6@{njz2Vav#PR9a=t$YvYSt!!qcUnLkLNVnaZ^-O$B#?CU;
zmGc_=3UU56+4%hT!U3!T=k(qgckr00hK18~m0Lb1b1CEIek>q?j4)(rFH;E5wU^+(
z?0LBU4ja;IFAr>f3a`w1T(_ojN|<Fe&8V%PqiwTlw`Yn>tMfg_A66?B3&zx1mu=gu
zSW*lVlut)hjGt^A_x6cVJ3F_yP}5z<KMdAYWLfzrCLEou7@oz4iTBI4wKRg+aJzGM
zuhquNPk$ZGgeHzqa_qU33fe}~H60f~6<R4NcM%{%0EOkm_cj9@_l+1?k?ZF2nC*DC
zz+`C6<;nOWIi-TQ1jIqrSoRqiIq5|rW!r}vXNS^?4bLq{bp&MRL>D8MySvb>kDs|k
zy&>BRgC!Q{B*zqo_xxHOYC!i@t(ybezAdT$d>%IwTEv&;k#BQbuHQTjiUL$*T@=<z
z-IsY5Hk}DPRx9f0QU7s8Ya8Q-)|)HB^SwNfcsYoRzgsrc?AxZ-1f$xaQcNKTky^DQ
zc|04Dk`0nJt1fs=F&N{YoZfG^aBl0U=Jy@*r`qizt?5<2=c*IDjSH-BnyBI08-H|U
zqcy8&>y{LAC=ln}sVog&<W8)}!`$bjOiqK@13$6F32m&{;#@se>5M{I;<?LKd&~;S
zx8inH0!afa+b5J`>iP!S?N*Nid&g~L$2kY87B(S&GzqUaa~)!2D}=Bf<PUQ~e)ou$
zZ%o}cpY?mEZo@{5#jN%VBi>Pt>)O!kURM~IU-5xmK9Q4Tv}kTW#JFX&ytz2<9E{1g
zR&=P0a199bN@|`IpDm2HwER&&-t|L^K-l;DmG!7W`Qrl%2sm<dfckt*LtiO-*OSuo
zu1x3QYT!A0TPCjCAcw!I!gnhovUeo-*)^Hb<LqQDFG(dwOn9EnYL@5fHM-S$*+Z(x
z@qtp|Uq*6Zf^Spw>g2cQ^%4|*>vc~1WH_;xyA7MP%7{6pn?t8tuh>7|sMQ_zm3KYf
zuejxTWD`faW=w%}?c08fIy^_VUNJF#_8+z|iW&RHB(hOn)_zbkZfibdp!aH@U>&2x
zo_y2+c$G2Es+}*%Zk6lII&5ew5O)bRgO(q63C}Nv({n=%oFBQ&TF6o?R+^Z<Viqj?
z`W1(~L9+$AW^keo4De9l6n^j7nX<V!_VrOai;UbB{-|Dh-3m9RyabJ=nfHl#au9D}
zx5~szBpmN^2mS2J13A9vc@ONJ-yu1puZUK?D`L>cjhSJ+LtZhjzBe?~uPl_ZG~zfV
z*it7S^&g*mBCP!(;d*`JK1Xf2bAb61pBAuyXG!$JJSL0pw65u7MlDPz@aFlJ%>tcx
zhgw@pQ<%XVj*AARl6MZS8?_hR&F(#EF6+Bw#FjZa&7ttu?$5wOt<kJPk4EC%=TqaV
zM=!|3_cQBWGIA3n*Aj2W@LR{Qs0IG+M3LJW<PBaBb<>aZZc48GkUDuWT`ea3c~w(v
z?LtkHuq^K3RlkSXM+|?V#g5WqmPCUnJ<MOUW^`83k&;(()t~Z)mz2e6@PX6Y!(ui=
zhBPMLowt(f5a}peyfxVr9rZheE<^K~-H=b_NTYOaI+nw!a^vJrP+*jUtn7Un(YCWt
z*kzhprdW{_y={zli?Ws@g7uGwV0us6tR^p6oOO{}DZw~>v+#GQy}S+Mla1ya{2W}B
z6CLq#ems->rI&c=SBQWvW1IYOB-++O@=BfyPu&;=QP>s#^y0J2ka#hOL{iYs{OPR5
zT>MDB4pv}5O>*bhIQjQb+U&SvAZn6oYoV~qoz|la6;KkP(tn0(?11Db1>DhRBVnSY
zAtAs|P0|2AxeZ(FpP3H6xg4bei5G;>(X}iuiTZRE0@`5AzAie_(WZB&v-~T<LHbVn
z8MvVWm!~PgIedM@TW^-|qntjJW7N&*y_e_E(l?L8*Etkaqu(ck+h%EUT{*aAx=C8C
zwh*P&K06Ab;}A6G85tU`pCD5c4I1>`yXyxn|Hf+g#wttBK%=<lGp3(;Lf{X;qj1em
z#tcX1hqCrOjNK^TN`E@SPK_w1Mg(S}6s!Z>D%E)g^TQ|2BVBEz>p*q~#m_>?w9?|{
zV)sh@c8(6e1C?!C?_b-j<do=R2hCVB1y3&bx<%w0oQ6xa+e|H;EW3U}*XE?IDK5TN
z`_ew%K7u|;s-OhzWWCCsO5q2sz4VToNHIWIcCycG8+HT1o64wNN-_9R2H#j<6o9Nm
zNC;mfp+IrojO4@J^bE?~=V&M|^1t(&kE&9Evyx*^geak7(mb}QmzCA~4QOxHo(S&W
zytkDm-hmZ2Sm&%-%1%z*+My_q0&Zfz^atq#j_j_I<431<a>LG9k%ywuJ$h-JRkO;!
z4AC0>>Cv~s@rO$_y(Kz~fwWc&QzpdpYjfLWq$!7+Sq^#AN{&-_J6H51R!8z@SJHTs
z8qse<Zd_+&;e-sHfp;>(L&fYYEjD958RuCbpz0xwWn0){tv_nko}lr1iE<Ddtd%hP
zTfKm@eg~BA_Zk;aOLea4uB3QYr2+s^1)LI?IN#q(f?TH&auSZ1GLH6!`YK|Cdn<Zs
zvmi-G?Gn6Pg<Z(c{uSVqW0VeD@v)L4g~=1kAFK~}NPVRO%H7%6cjWlkR8cd8krD2R
z*Lam7Oy1%w01p<cZ@~6~?=1JFOvux`zbT#I3;{S~(I1S-Rs7@+JRZ=jy3~kCmlef}
z>ZaK$?E6Yr-?<ew{mwKY97X0m0i42dHGQe6zrarQ_+O8}2v68=l5;IIx)oFK8zdeu
zoEGW6a;|}@(>EduE-r=@<HL=Os)*wr`-NE)`}2k!A^JnDc}|vWR{#<IqpZ~jY3vQ%
z_+K|Livc&F+|Z~x*ULJ14>fy_@X`==zqiqoZU6pxJwyoR;Vf%0%=>RE+*K;T-(anN
z#%TcPU85M2k!nsGXZ=|f<CxHh>h<fMvo>YBJt6@@;kg(<)__F5r}fvx+A-(gfR>-B
zd*J3`yF5ZHNs@KcXSf9@8Id_+LOG_KM+Lvi`%0IVFLBxY@Ba7rPis|N7Zqy<B-?qN
zY!eLh+EBym9Bv7`Yk#9$c=ohXCw!I(47GlCdVd`f#C9qvt1F_LRl))kGBQy6Nw_Vp
z`i7p`|LuS>IIkQym>Q_HQMNQu@u(Mf?3^(zfS-#;Io{957`RXS>l_80TLgy=CGvv2
zxEkcIruNq^X*P($0e3ZoBo<$uYDc2lmzPq+5hdd-VKV(b;7Wna<iPjz%A<g9{O*@W
z`M&#OHD}dk8K~bi!#U$x<Ne~kE^p6DeNOd!w)u9O-#2D#hm>rZXOBkB-T3!{09&er
zyX1UVM@5g9x;bN}Sy-MI<6Q^f2?`3K#Oz4m$DnPR&6QCQ>jZTEPe$SUUcsd<uoU0*
z0FbEGh$WcR6SPe%=0;ME09M#t^w$r{-sEu`R9?2Tp<E-`sBA@k#X{_{<PUcEC;|*r
zMuRbASg)Kfa#KvjXi+nLN{p3Eth=mL8t%jA8zbAu?O9(G=<uqu??yKgq_X@U#o~7-
z0O*xxWS1c)3>&!8SYD>1w=%saK4>&FYx=FyKJg_jLjS&KZe}KQm^WT8BJVBqh=%9)
za_QA~$1{xy{{m3%MO>SyGNu1GvVbK%nhV#ne6_+BUfg#9*m*(4AJsX*E1#Mr=M!`f
zJUQTQ!BFD+lMhfMo=XUSl5>#Tj0|gd%@UtijPINXD9m7Y`3<tN)72eWPGZoxq@D?o
z_`#2E<NM=+w7A-o-p%LM%^F|vhJ0@`hM4L$Yvb;tI=RFJ$>AQ4_tg*#G<$U$KdJJO
zU{+2Gh872diyuw}04UKWL)-@|+sn?{u8z=gai0dK`fym&<sJtY=<ZlG@u0DM)m)M)
z+H^?i#QS4>R7z?w1t2M11N8X^TAK{1)qRqoWlOxDix`{wC3Dz@AZb^!C<^fNPL>J2
z8ZIzPFXWedk6=nCULWEDc}eVM=dpuxk_o|V6YMf%g@PW*);M>L<Zq9WPBWXXg<qWS
z^|v{S5O-ejPj1IF)VF*dop)|0=jlX>-aG+Z?F{@V!sOt3eN=)<UKFS+rx)5(s>~_<
zUPdi-srV7fbfajnKFe=jLLl?`YOL&fc~vuNGs3A@#BPF$jtl<cJ5-55a!B{(B{z<G
zEHJRh>qPQDzY^VH&jCv$q#V;_Og*@?r^zTK5~-J-HVju`{*4-#{+MpCzMxV;rII6#
zi6veB5*bBAdE{8QI}<~Q<F06q*+%YQePu$pN=@=Op>|$hAm~oaVf)TwfYEpnBYN`y
z=En}Q^(^kX|Bd!W>-@ZJzBU!c&eiB)oKGQ45HE2Su2SshGb<u7N8b;VmPIv}N|oxU
z9IQ+g3>-~8hTkPdO?iqTQi~HBu$D#3$|_-g^L=>U99S8X&)~_zuE%P@z;655Ym4J7
zFgi0AyzI{#VZXEhe;+_{2<EPKccb2H@`K^Q{>E}5#5y|#mb|}U3gUHwZI^sImn!u{
zFy?83qRd88imx%hVfuq9e3gFoi-SX;IzWkpgGL5SVq6`)=xcYrh>SQn>sP;-w@BeW
zYe{&yh)?k3xh(apNe@;i=x;Tx0os`|ii?+q;5)ZdY|*_Br%92s-2fnkZ2tu8lM00S
zw*v(CjbT}gRzB_E#ZpVG?f2IFsMjO#b*)E-A28)}ab>gAB6HhSH~`CghEt!7mfT_i
zCZ>TP8bVfn5!rG&`%whDwHgV?oT`5x+O$%^!s96JF0Xu@k8|7<>I!5GgD01iarXF7
z1Fr`+WN>~QH;qfdazr?|yp=L%mMy_Cj!&Xj<7w>0{^_}gX4t?;2ym$S*{+PVO&6%8
z9`YybnSrS$n1-Zb6&!@ey_bl%4l*o!0`wL8-XrTnpOQHTiSlPu)mLNjtzHsrOECY5
zCZS(t>QBJEu}h0U1}<c5rb|gWzLheRb8$`wy$0Enl4$jLItRJ_fiz4pkwhwZRLsBG
z8orOWyKHwr70|)YjeS10Sf`mn4U-)smV|9O{Ve|F4}uj8q3@EF_<OYXET@t(ADO&}
zWVxi_897YhE{z&QsVlkU!-9p8#N91R-{yUfluP+gx7lr31+fE@3|8cHG)aRt2is-s
zH@a)YfY1p(|5$zd*XiGIIj-yQ`Qvq2kdyAhiUN(}PM5DeVQAu;1i_75?sys-g5~JC
z_oUEj%^943r^kG%rq2goXxOTai23qfA@`_XFRG<HlL=0>2*UfX=w!TGAB<V6y|;a`
z=b-LaZKg}pf~Gj_@!49k<_&A?32Jrs2Fz@gMj-&T5OK+jf=yK9TjzmDyj_Uw@o61z
zE2D&Eu|^f+o!6CYEiS1dIhHJbgB-#D9m+4nJ?v-{qsA1x0>(Q|LF9nxdYyaw&p_>$
zVpnww_YMwaR!Edz8j1%<ssUJG;h~lBqX-%rlGP~1sa(p(+<ac=*{%KL<b@A5?X>8S
zUli&=i~=8iElGoTK5sF)cP-i17t9yV_jUM+P%L^-X*Er-bCXURsbZF}o(QYOQ(G;;
zBwN?S$;MbkrGheYcCuJsv`c-L)p#Zx(HD^mHQIPB>7)WumiM;!&zkKgdY{N|RN^;}
z9*UnV+~u2hyG@DZ4gV7B5MSbq97`oX^q5@R98=j?VB7Q4h}Cra6o4K?MOL4Irz~Pw
zPp+Z^=FG^iwtsKEs!A4^oh;jREPZ6ca?B7I>3dbi08sWoo=_zZaQ+J(Zn_{Le>NRD
zEKLltudleS7vWG>U18f&hkJ1`rIOH77hzfV_FJ8Fg#tJB3Cv-00krO5EfX-wP<OXm
zO&`#ocaiF2PsI%kq)R2ranoo;$^+WzpSk}q|IHXR2FAeRoI|Q&!Vz1gj^l3u%i}dy
zIyzjfpFM;W@7Wg*>n+kIuyy;@?-JBJJv6hd6}6=LyV!EpH|?2}3Y4+xdTx$3mgc)f
z(iI|OGksE4lUP#|)RkzhifZ%!sEZCL^>9dbW?hNHLPI5}h(tio#b)HnOHWbVK=`U8
zYPzL$m+53zBkTK9h2FVsI;W&b$*wnsj(-i8ZsZ?Rj&Z~$M+%zPCngjQM6M9&RWh`M
zp2wr?w~|c0VJ=D=lfanp=Lq9x6uJlN+t|!b_AMS?oVcEILu@8HCv-rt<AQwAU{#6)
zSMlI^W;xcA9GL0rGTU(-Ntc$wLpj#i5BR`$n=8?+(azq(D|ND9jObV0qJLe4$57FY
ztsRrX1W%_V{GzCTIUs0II+w9vU>hYiSW;;H5#RDHvu*A3gVK)0P4%L9(tM<#QQIV{
z?bV{i=mWOV?k>G+EYk9^Op5jLhVu0tc4owpGQOPgn=ve<p6yn<!clHg78@Th_m?DJ
ztM}LQfC}!aFXoY<txAfq;?g{zasOH6J`TSC=O+}qxeu+KOss{5M>Zw8Vk900e07r_
zVQsf-jZy?pbCGbOyU7U5_GIkkh97y`tOD7DSk&vj8bzC;l5uh0QJG;?d^KSC91n-0
z0AEuh?zHn1C`5Aj#_9K#u4rn5(8qRJ5!%rbP%&YSxf5orw8)A1*8dE}!!(Eo(mq(s
z^oOz{n^~KM6ZLeZk@7GmRV09t2fdOY9nZvDQyBayzg0H~f@~LAlk)z0Z%Qo9g)NQ_
z{L<*RM8V=gpz)bn8c2*`T<Bee@az?4;_S)mC!t{s=KVgwm_ONfUB|9)S3eVeXIYox
zHunr|zFU`QYlnN|#kdky0tnHO?MeLvo<02MtY^93UPk_EpngF=HJvV(R`(qSZ|!C0
zYM)vIS;kBl>fS@^lv$bdgqnwXO@;|2h3H~}l{8-1u8SkuO&)MRj5P+RFmK_o_$D@W
z4#i$ccV#d);ytXisGEj83kE-T+A*Kb6!=1!WpZ6#$Uq@Y;@mSfb`}>{gKBc>FZ@Tj
ze?ysGH{PM42KL9-BZ;|w#@{NXE08XhS}eB{y4siPWU&w4avnvfOR&)*UkbUqkiO_a
zjdxy4Z%-L2hX3qGe>PLp`fHp9{S{XX_ti++@xo@aSo!q98tf!WW9)8ex3*tm+H9L+
zET;q?O%&*zqWyLA_{{4{?riy*>CK+}cMoy{eHmiFw(EM8w7Z=tiK=&6CN3SHY;)yL
zGm-OUlAN?MCcl~=XL@=WqM5atQ#Px-g3>h~ZkertcjUE!(*9onlcJ(j@U<oF-hRV&
znxybM?G)a|Ez{wd*@-goFw!IZo1sU^f#W0lWfxhVXPt#}o1+rEo^^&{Z++H<RxAzN
zC`Rrxn0fvO2HTHOBf|C6tu|7vR?|16?N&j2He90*6u`!pe$wiVW^58?=Gg5VS-TE`
zCj3rJ%4IufOO&Fvp7hK^jh=?9nq^Id3XzA4FNci(v0HX#I5E2AOX`v!E^NX6^P{$Q
zzV6?L=uXkK^mF9a_oHe*o==msWE9A_1O_01&qz0AMWlw1kAex;Q~htTlz8~4o*(_<
zHjJ}Iu{+I~uoDgUt5~F&(Y=V)vYBEqmC@^aBNTll`cB+8!fHyIfqd;-@^Fyf)^vly
z`^sM_66}FD>kUA<?EwC&BwuLq&T-FS3BH>vI~&qum~w-7srM0{srQL`1x_i9Ajd{<
zB!{K~(xCs@<JgLdakNDx(0P10BJ}2#`B`-i4%5km*~d<9;zxNIz85FoAu%?Zob50V
zWH_x+r*nBLW|hm<lO1kuwno1IYJjX~2Wo&O<NdvRgjB$$l;KYzZfvbaU0i*yiGfK2
zusuRaaqXyC%Jt%^FqdjYqFUd;qE{KIQ_|Bfvr4mL1<(o=OSvOEM8F_;9TkM>Ufrz)
z#b>}>*<3Ku2NIHf`z}*!R33W1HKT{0=Z5JID(aw52Qt;%9}2JUjS>~rAK^dL0ss9^
zptbvdFhbtJBotKa-U+vG;mE)hi`pp35mwL!P`VV?Ek)!;R=2VtH2zcXjlO;&Tn<f@
z^RVXx5*qqIuKg$I!bJ`%z=~uPBQHI6I3T$WKt)y*l5PyN8|obJ)$(bb6^o{*>J@F#
zrd=@T09rV|*e}iVisBiY*U~O22CSYNO5LMtU(eHrzBniVN4$kg6+BiVvLk8yvsVD-
z6cvh|E~}NgeX7cz@sPdN-)pdE4(5jy5eI!rEmg<Y#=6~tKqtNCq?w`cP*$?>H!uso
zq&P<pXhF^1l4nG{!-Z_0DoP_(@Y=ACl@ER9Mv(p9OBbc%cRx+KScmz}&WJqZ9{bwY
z)LvvN;bx~-3^mQhxkYeH&JG@VE%B2%7#vkv1bUo~@uyv*>_WR8T#%k7yA>&1y&#cZ
z97bH68_M{@4Q83$?iLWZG9}eeP3EGlvtFwIIv;JtpN74ZiaVD#tkn?#Ef8B09(xF|
zdG}K~N9}y<zG{|Wb%T5(X@KXpQZQy!8za+s)cCQ(4^9-A^g1LxX?)dy&nd>_5hYBH
zu(`MxI!<Z5D91`7i_k%mJxSf|1P-;+NfKN>LA*4ifupkKHFscQedcq`v`D#ftJBXB
zZ+CmRjN%%d)hrsOLiC3vxNb3J#_fK(63y&8G8l|e22A;|=Y$EBhUnih-w$Thp9Q$=
z#*CtR0)sYb-Pa0}qZPNO7L#}9M+zA{RXiLUnafe^>K_sG`~DhF#g>3b=_}?XpYOM*
z)4EQUCfL<|)g`$iJH4&@P8Ph7E1=3!{h_x#C|S?DoYM8DAZ5MvBj{oECcNR-2E`fZ
zb!nSS(j}a6Op9{P?UpZ=8Hd|5v`^}WaZ>l4{n3Rf6{vKCU^2AZpTIPyoT4b=)kv<P
z#@~VHZ<%DqVur3G-zvjzlzk(h-$nPhsSyg?Z0KhJ%ze98YePzobQnl0#z)1SOpz<}
zw*h@1;GbFK%4doJpE6jc4Aee8ft8at-n{Y3tp7;Snad&4=Z>cMODdC<9Cysd9<&GP
zHm<pBuMA`gbbZUrb?9}X%Za_)9(3-y!aZ!CDuuMCN0!2d6qUTm*xZ>COD#o>vOD->
z0Qrn}Q@mtnSjnvR0yb{e&~0XX$s+ayP=@9dSFbcKgj9kMJJ6skptMSvN53`!F>5k8
zxBO$JeLj!*;Je+PLHUjMiK)xUc}6+uZ&wMvYlUS5F%e^>#Wh&2wad_pd%NATr%m_P
zHnSs3)t?zectUcPq?+QQo_h;xKU>St&Y^o1)$_4zMH{Vy-dn&SKQCmyn8Z${-dgOg
ztMndfl3lrJic9yIs>|N~p%)N-0#q|h$*zwy$6kRkCyu}lAw8Hv^l1s+LD#wLHG0{z
zk?zdS0iEACrHJhI7F{^t*}kRD_?qDt0!NC{B39!EuQvlyVMT7V!p#V_tP=U9`TR+K
zhOcJty6cx9bB^}=g=ntf#><)w=E+P5kMVa!{@q&~m+FH-!`6Rmz{WM+^kuq<8L24l
zWTEG`iE>frAoQmb4RY(Jb+gS8tL5aus%RYHsY<lvZV?e*PKj8^16`B7W~iSPnxmyi
zz|qok!5Y11NC&s;c`ng3IWN-azoap$;G}Yu^z@Z2OX0FUP<!}P)Gm4`JNufdZ`|Y9
zt4V^^jMZd#l49AfXY&CCrC)coV)2F&fBpPBk9$*`@veVJaWy*Qnj<(W-Vl?(bXEew
zz8f^wS%up$D4U7a^GKKAGTa;3$qcgTlu9MP^^I9neEHR6#Wp22w|UWmMfsTSGfFnm
zpZp<bX>l{vZo`yi@|jnDU8bd%z1$m?3P*j(p)A_r-Trk;4aG5U^ligSuS0%iJv&+i
zJBP-tn?hzF?q(Y&wS?a799^Fq2%G7&)XFO#^fw7?=uf3T-Il`iB7I-TSEDE>=&GN6
zoqC`EG{TfrzIfbsX8PfRwcfqa-7mz`tK&>3SEaTsx-!|EEYmV{eM|eH4KsZiPxQFZ
zL8#~ZmIda!zR)|crj1)7d<UZir>5PHQhDwg*>Shb=jTsbeK}&?94SHwT}*uYxoxq0
zjqKpK!IF0&XSEu9E1TEt)l`=EAn?ylYD-W2tJ>KkdDaV4sJ!=1dQrw39~RfrDs^1h
zL32yeqqu$)+kKuPD1u*|6YJY51CzL67Qx#UrB3Ixg-@}a#Ag~h_eAJw73}rYC5kgY
zuGqLWsM_0bbtTye#-*IxFY`ePSrSB;AV$UFx7)^;aQiibNfIMWaR>Sry=~2=0HI5d
zs`mG}Ojkn1WsBlDs|Xd=Bnp^{-+s5w%q0FbcFJjCsv?toT2M~@Iwa@FM%#wzuAQ#0
z@m8VrR^HMOnA?klDLHPWx_u+9m^uVl9;sIpsTLOW)h9j{$gtQ?zeYTisF>5wSha8|
zg%D+cx<rDTPMXX$$8XC!sdx!DJ(__U7`<-pKYEw`@4Qm5TD}vj8H|}=6{?F_+#JKi
z^DpEzOBj!nD~X#D!fHI`*>#CIVk3)Tgr1&Y%%{=MPqj;}r4tkv0>XBRy`Fx~7h@fG
znA)#xLNKWSzprSr+r_j$Q$|53<k+cesKs=e_`r(#Jq7F<Jn_<G8Mp~Amu3j2F3~8i
zC!xmND-#Lo;%_KJ=wmTf?sM=^RAl3al4y&!)nK}kS!nLCN*5`sE^&Fn5&nBdwCcWU
zTc{cPp2@6dbU+hvGbg%f-m%EDy-ji`s;OwBWi@9obLdo6;e+~DWmW^h`b09!=>Y|{
zt{jbE^8;ipc|siIGgEeK_56J%&1;Ge3;ZPPz3-xc*%fpjT%ar~XD1J!abEpNuZf|B
zAp(F`Q&EdDr<X}9882=@j}EtvrUTR~SQRCNcHfV0r<A6QC&=-e%gTZm1vcrTkv9!v
zz}##xQI9=VDvdZCIt*7|TQu2uytz}1N6rgFvEsT*t9=7+6#&KflVrMbO1Cg5=T6Mn
z24ajO-8fT43LaMo9fY~!kc&r#8s7GDzJ3xn_(XC8Zw`CjXAcDX<||j!YrJ+8dx!IJ
zMPfp>`8)}7bvi&9)#Et!ZY`2u-e#0xYCz(cu9&csS62g&oasdr4%(732X`HC&9N0r
zg`&a}PGIOASW=haT{@vrzhscirOD5~lu*6XO4AEJq(^u8=Bd2?2M8{?hDm3$Ibq2O
zGMR|7*m)hx=uaBKZ8{g1XrwSX=!Rfo@rKZs{ha=O23v>Xlbcd#5Ni464{^lP`KNXq
z&%pUbq~seXd&v9@5U`<VZVHr>({bKGV~s_!?j)j7>nj>Wt758*nJa+FBNr5aCe?Io
zPxZjjhU+)Wic99I35Ku*^LGmIIIUFj>p`YN^JafL@8ajCoVc148~fLHVhUK<7wNCE
z7&KCbi$!Ur*moy)^_GkygY}1&EH^(fMK@p!k{1?7D3T|1t%p_i1Gn5liRi~q>#n$d
zP;w;8F9vLQ&!vD@8phU#HqpM&t^mXnvB9u1o-^<S#hg2MQ%Yy7cfF^rTE6(}z|7K(
zY;%~oX0mr=l!q)4$BdU}lTQ`HK}jT)>RI`A8tn2;*CJDVyde)8T!ruv3-j;K|L6-X
zMjB0Vdi%`nE)s;daIX1zewy}J@B`Is`$zp~F+lui)hM6t*b@~H6YoK8>$;tgVS)Ql
zT+*|zF14bZixignY8Dm$cwH}hPgnrrLb<WU2*#AN#Ycq1#~8GGOU<8Mm|LfvxBuHh
zj%INw(KLv<VT?Yg8y!!uT8McR_|}W3YGZABN;5g+zb&XJIj*>gTcQ@9Cs}p}ykGmH
zlff8JhOArR5n9(z!}5BQ5OO>c!Z%|~zNv5V+;!8L;*=m&b0NOoPp)sDS57x(NHYl5
z$KEMA9n?sisN-jN55>w>e3X=w&D*h8%73vnzFr;HU;+{svJF?8zXPOyL{yJk0oe}R
zs7?&SPWJp_knA0T`9g6fZ+0rBFv4eX{tr;F8?%Nb8nXbq>S!&=aVbXIaX?c0d9=G-
zKgGl_MlG&LzJxgBS)vU+?BYl1>x?f*#d+uq6flcX|9m~;JImU6LO1@bxUrIBrIds@
z{XQv&2GPm;QbO2yGl`nuavliGoBSNd>Y>Y5<(JgjULHkUiLQh94sZg62Bvn-1Z&f~
zg7#9LqSJkb?B|875K|X9vyYqZeTvOpQ(ZvifdRUcuIoi;7pM2;v%#i?(U&Feg&m!d
zAz;9GX?KA!a<WX6Q6I}$MaeXIMC6b-p({vs8t_h?ALjr>eq$zk*ldl<W>RFS+j%(u
z#}>dN`uTdOsT0Dj6<s%juam!Kee;TEZmw>$V=JWVtNm4mhH^@jaT+9_dqkE;_bjkp
z#de9p7Ah~n`SlJ9?$&nGp4v7<U931@$Il1y6X@6$5m7jqwRm377_}Nb=M_@s`(udC
zn#R?dl;unD3evSescTe_p0j#Xw|HbXc99?{em9-b{>u?7-3d`(3AmIR)PE0hf<waS
zq$ItrD(vpQLx_NYk)p5|s69$AWRNESZ6ccCw6&4yJkthkPOamVkR_Of)NU9lRDj@|
zA6I<2UBc$uT*zV$8?LA#=C<4Aj;|P$hV>>LWq<wPU(e;e287gAqTG$H1zVg|-@Lh|
zQiZx2*6h4&%7ZcU5rF0q?=hTtS@I-KXO`1|xl@Ex-4E2vM}XEj<tDGdA#r+LCySV%
zy7@QBHHuD-Ail*dmPt3;j%>GN(dLD9l&4{h0}0lOh)F4z_RRM4K&7E18^OVR(v^+5
zV!Pd1(UA?_%U(EX<u+$I2&+f#J9z0Df+ET!kO)Sc8A$vdI<wE7BMC}4ACQM>3D(Cw
zSrpvf-ZO9OS}o8j!HGi*(oE8XCQ?BiCfC^ikzWkq=69=(>-H8e73be$S#Txh2GsjO
z`%Bdv#o_tu7lE3Hdo%qNP(|IBI0K$i*7IE&Hn{}l$O7kQ$V4?1`PIGK&Ji&_(Dc<e
z`bi>ZpL*w-$~H>YmDJixrnv($cYkWYoCRk-m&aq97dr4LYw0*GLR@|bt)j3)w;=nD
z1?c))^-wUuQ8W=O5PN20WMdfZ5`{Rao2|EQl%N;U7KGUp@?WyhEe^*DIKspOgbd~L
zI?T8EYfdHwntPQN5jO2IVSXw=nVCm?99derfd&bD?%Cp=$0tEZ&RGkTHY&xFBrE9u
zR@hJ>n^-PLgAnJzkW?+!=p!%b)!KWlrdu@!s(5^P)?U*~mHun84m!;hQoHzQk8;;~
zw@je$6-cD!eKFCapFa@_J#$ZO<;AUZ*3=P)xp^RwDw*Oj(`Vf`RR;+ya;Ylcy6d|}
zIs!sC8E-l8w$F2&hSlc~TmrGP*p&;oy#ek!C13=SZcZ5;Iqq90JOX*mJg@ca*37E4
zr`sns#X)I^@{7S3K#*o=4mK?-%8K@Ee0rZ2VwqmRIoOybTX`^;(71C;o(p!Jt(_ta
zlq?$#*<b4M<d%_=mj<5P0uZ{>Q7a!$u0nOVomk+0)rK{g<?+@@R9a??leAWE#EiBh
z`Ro81mfMTg?vAu+de{u09y7mkcpzgft*rUwoK{Oow94=xS9Y8NbKFI^&z?<sc9GHs
z5WM)+hk#eHkf`*ga8KU-tc&S}O-eB)qycdU(1aSEu4zVAew1qbvjPg-$S~(|CN{`{
z<I-!-e-!I=>O%`dS7;FW;*(O_LgvHp<W^NX%1WJgT{89oHKG~0=p|{5_s(2ocDph!
z4qo2jc~p{?UVYBXM4Y8-QHa!ZZvxo0j2v%WC`<AIFCALX4FJrg=21s;dat(J+rLxj
zK6n18s%5~J5K(+7bU~@W<7BN`T!98rjACK!Z_84%6{UhIHd~flG90AElW%J{#Hfjt
zH<pnfm*)b^c<;Eq(hI8;K=ZJZP_fNQk*Ox?+9-M;@U@<L!SC-ujPxTHrou`wFS$h^
zgXR||M(p2?_j4G|y<jCh9v%#OUxbTMzZ6LBdnZIk&Au;Rv6QW8_-J!GxGU4kA(QQO
z_Ag_o(jYQ4t$Ci*n3ebP_)F%Y$t8tLI6~muvjNZHhSwdY9AqDF*Qt|<HcfqdB@a@`
z*W^9W9F;`oXSC>7)1Gecb{b1w(3LXRGmeFgMp}HCZcs70sCjROKcfg}O<3g#Yduq`
zHnS}gj{J@`uDNROjQ1+&IK<Nm+UF&r2k)~d=5@SfJ&hkD_HKRxbE{3DrXAVN1MifL
zX;yzDEa5$v3e3*<?d5p~0VVd}hrxS<^?Tpct5U7Mgna!xoW&TwL{`CfOY;%w4SCuW
zWbj!{id7GOe{Y@~KKPU##v>1O?A(b`8KcrZzSwzDiiN|esgl%zOe4jbFK8)@KT#h2
zxjY>vvA#Jc;YaMdQO93AYnhOom^U;E<K=FB|80>P?wz<xu=kKoQ%!z!n~I9M<KS}v
zEKJiI_Of1hXNs%2wrA!k+G+9*)z|wynH%wF{q(zKPZY8-6QK}<w-fqeeZ-69Cxx^4
ziPJ!aucxldAS8~$V7~C+T}d-idb6inpjt@u6*qE}L|AC>!p%8p7gT_kdcFxyrCZtV
zn6Xq>5b%0w2O|ID_h$aA@E}6{G_&V&WnS1lu#Gy-dP(EAqE#uFm1}kxE0?xb|9!J;
zNEG>6;AFz79J7&5##^_JESO?9I!3mJ`K+tT6>iBpY)GuAIo!&;qo=k)eiY9245@>C
zpqV}zvwAskX8!cR5tIH389LqZ-PlbSx~JbH<3ZrQTb{YijF^>2y2+~&nZ}&Tnu+yE
zmybX~<<0%5U}-r@dTqw*)avd77E$A5Cg*(EG_QhqZyu_sSI=VqO5X=GEh_gwh=aJ#
zzUQds7PqwA+l_OJd^Q%jh?RUfqN4CQY;+KB_qJwN!Pn2A?!4dJGiyrX06xZ0U+rs=
zfC*{;_}vS?N;rg4K{r1W(%anJu5;t0L8w1GH8@g3iP|{SuYk#Ch4NB3%-xhUP`Od$
z7gb9C`R32-z<4?30x=r?<kYQw?;!EM^xR-ArVH{-9T8W99{G(V)M)q14#?046{kp>
zG9RX4Vv#mTZ(4reM6{w30-C#D<8OyIqDP3GcIi!LM|_X87gl`iZfNKzGQf5f3@=Hf
zPn_WQ<S*X!#vPrirl=1lHx)>29eE8tui-l#3j}lM4@i`eTfHQ{1W-Q%4U&MeEylB?
zP6Z5GO8Gu@fr*L+?u2-n+|Mob<QZ621RAJ2oqt$Kcw)N(J*g!AQ4^RiQwEd?P8{ZS
zu5q(5j{I2Es#@IB{t4j5b)NkpsT1S$i(_Que4TeA-{PUpUu^Bc79jY^5wcy`biW9I
zFVfh(y-(s0@FHBHcIkxCNs|$y?f_KFuECMJ(f|EGiT{3}#Q&}jlsNdHlmG7@igL!6
aNXdL&t~`F~7VW_u?<i~CF1h(f@c#j44w1kB

literal 0
HcmV?d00001

diff --git a/etl/customers/stonewater/map_app/assets/osmosis-Logo.svg b/etl/customers/stonewater/map_app/assets/osmosis-Logo.svg
new file mode 100644
index 00000000..d1937f9b
--- /dev/null
+++ b/etl/customers/stonewater/map_app/assets/osmosis-Logo.svg
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="UTF-8"?> <!-- Generator: Adobe Illustrator 26.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0) --> <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="Layer_1" x="0px" y="0px" viewBox="0 0 2999.4 908.9" style="enable-background:new 0 0 2999.4 908.9;" xml:space="preserve"> <style type="text/css"> .st0{clip-path:url(#SVGID_00000137816637724558900390000008976326031054167740_);} .st1{fill:#03525C;} .st2{fill:#EFC203;} </style> <g> <defs> <rect id="SVGID_1_" width="2999.4" height="908.9"></rect> </defs> <clipPath id="SVGID_00000173152017417053997170000015988764747463501718_"> <use xlink:href="#SVGID_1_" style="overflow:visible;"></use> </clipPath> <g style="clip-path:url(#SVGID_00000173152017417053997170000015988764747463501718_);"> <path class="st1" d="M125,188.4c0,21.7-17.6,39.3-39.2,39.3s-39.3-17.6-39.3-39.2s17.6-39.3,39.3-39.3c0,0,0,0,0,0 C107.4,149.2,125,166.8,125,188.4L125,188.4 M172,188.6c0-47.5-38.5-86-86-86s-86,38.5-86,86s38.5,86,86,86l0,0 C133.5,274.6,172,236.1,172,188.6"></path> <path class="st1" d="M369.9,113.7c0,37.5-30.4,67.8-67.8,67.8s-67.8-30.4-67.8-67.8s30.4-67.8,67.8-67.8 C339.6,45.9,369.9,76.2,369.9,113.7C369.9,113.7,369.9,113.7,369.9,113.7 M416.8,114.3C416.8,51.2,365.6,0,302.5,0 S188.1,51.2,188.1,114.3s51.2,114.4,114.3,114.4c0,0,0,0,0,0C365.6,228.7,416.8,177.5,416.8,114.3"></path> <path class="st1" d="M824.2,640.8c0-148-120-268-268-268c-41-0.1-81.4,9.3-118.2,27.4c5.1,15.9,8.2,32.3,9.3,49l0,0 c33.2-18.9,70.7-28.8,108.9-28.8c121.7,0,220.4,98.7,220.4,220.4s-98.7,220.4-220.4,220.4c-121.1,0-219.3-97.6-220.4-218.4 c-14.9,7.4-30.7,13-47,16.5c9.5,139.4,125.5,249.6,267.4,249.6C704.2,908.9,824.2,788.9,824.2,640.8 M376,616.3l0.1-0.1L376,616.3 M386.3,607L386.3,607L386.3,607 M391.1,602.2L391.1,602.2L391.1,602.2 M400.1,592.2l0.3-0.4L400.1,592.2 M408.6,581.4 c0.1-0.2,0.3-0.4,0.4-0.6C408.9,581,408.8,581.2,408.6,581.4 M412.8,575.6L412.8,575.6L412.8,575.6 M416.5,569.9l0.3-0.5 L416.5,569.9 M419.9,564.3c0.1-0.2,0.3-0.5,0.4-0.7C420.2,563.8,420,564.1,419.9,564.3 M423.6,557.8L423.6,557.8L423.6,557.8 M426.5,552.2l0.5-1.1L426.5,552.2 M432.3,539.5l0.5-1.2L432.3,539.5 M434.8,533.1c0.1-0.3,0.2-0.6,0.4-0.9 C435.1,532.5,434.9,532.8,434.8,533.1 M437.3,526.1c0.1-0.3,0.2-0.7,0.3-1C437.5,525.5,437.4,525.8,437.3,526.1 M439.2,519.9 l0.4-1.4L439.2,519.9 M441.4,512.3c0-0.1,0-0.2,0.1-0.3C441.4,512.1,441.4,512.2,441.4,512.3 M442.8,506.1 c0.1-0.5,0.2-1.1,0.4-1.6C443.1,505.1,443,505.6,442.8,506.1 M444.3,499c0-0.3,0.1-0.6,0.2-0.8C444.4,498.4,444.3,498.7,444.3,499 M445.5,491.8c0.1-0.5,0.1-1,0.2-1.6C445.6,490.8,445.6,491.3,445.5,491.8 M446.4,485.1c0.1-0.5,0.1-1,0.2-1.5 C446.5,484.1,446.4,484.6,446.4,485.1 M447.1,477.2c0-0.4,0-0.8,0.1-1.2C447.2,476.3,447.2,476.8,447.1,477.2 M447.5,470.5 c0-0.6,0-1.2,0.1-1.8C447.5,469.3,447.5,469.9,447.5,470.5 M447.5,455.8c0-0.2,0-0.4,0-0.6C447.5,455.4,447.5,455.6,447.5,455.8"></path> <path class="st2" d="M682.5,357.8c-0.1-90.6-73.6-163.9-164.1-163.9c-68,0.1-128.9,42.1-153.1,105.6c13.2,9.7,25.2,21,35.7,33.5 l0,0c16.3-65,82.2-104.4,147.1-88.1c62.3,15.7,101.6,77.2,89.6,140.4c13.4,4.3,26.5,9.6,39.1,16 C680.6,387.2,682.5,372.6,682.5,357.8 M424.9,368.9L424.9,368.9 M422.6,364.7L422.6,364.7L422.6,364.7 M420.2,360.5L420.2,360.5 L420.2,360.5 M417.7,356.4l-0.1-0.1L417.7,356.4 M415.2,352.4l-0.1-0.1L415.2,352.4 M412.5,348.4l-0.1-0.1L412.5,348.4 M409.7,344.5c0,0-0.1-0.1-0.1-0.1C409.7,344.4,409.7,344.4,409.7,344.5 M406.9,340.6L406.9,340.6L406.9,340.6 M404,336.8 L404,336.8L404,336.8"></path> <path class="st1" d="M936.7,510.2c0-76.4,61.9-138.3,138.2-138.3s138.3,61.9,138.3,138.2s-61.9,138.3-138.2,138.3 C998.7,648.4,936.8,586.5,936.7,510.2 M1169,510.2c0-51.9-42.1-94-94-94s-94,42.1-94,94s42.1,94,94,94l0,0c51.9,0,94-42,94-93.9 C1169,510.3,1169,510.2,1169,510.2"></path> <path class="st1" d="M1290.7,648.4c-12.2,0-22.1-9.9-22.1-22.1c0-12.2,9.9-22.1,22.1-22.1h118.9c19.9-0.3,35.7-16.7,35.3-36.5 c-0.3-19.4-16-35-35.3-35.3h-60.8c-44.3,0-80.2-35.9-80.2-80.1s35.9-80.2,80.1-80.2c0,0,0,0,0.1,0h86.3c12.2,0,22.1,9.9,22.1,22.1 c0,12.2-9.9,22.1-22.1,22.1h-86.3c-19.9-0.3-36.2,15.5-36.5,35.3c-0.3,19.9,15.5,36.2,35.3,36.5c0.4,0,0.8,0,1.2,0h60.8 c44.3,0,80.2,35.9,80.2,80.1c0,44.3-35.9,80.2-80.1,80.2c0,0,0,0-0.1,0H1290.7z"></path> <path class="st1" d="M1948.7,493.6c0-42.8-34.7-77.4-77.5-77.4c-42.7,0-77.3,34.7-77.4,77.4v132.7c0,12.2-9.9,22.1-22.1,22.1 c-12.2,0-22.1-9.9-22.1-22.1V493.6c0-42.8-34.7-77.4-77.5-77.4c-42.7,0-77.3,34.7-77.4,77.4v132.7c0,12.2-9.9,22.1-22.1,22.1 c-12.2,0-22.1-9.9-22.1-22.1c0,0,0,0,0,0V394.1c0-12.2,9.9-22.1,22.1-22.1c12.2,0,22.1,9.9,22.1,22.1c0,0,0,0,0,0v5.5 c51.9-42.7,128.6-35.2,171.3,16.7c2,2.5,3.9,5,5.7,7.6c22.7-32.5,59.9-52,99.5-52c66.9,0,121.7,54.7,121.7,121.7v132.7 c0.3,12.2-9.4,22.3-21.6,22.6c-12.2,0.3-22.3-9.4-22.6-21.6c0-0.3,0-0.6,0-0.9L1948.7,493.6z"></path> <path class="st1" d="M2053.8,510.2c0-76.4,61.9-138.3,138.2-138.3s138.3,61.9,138.3,138.2s-61.9,138.3-138.2,138.3c0,0,0,0,0,0 C2115.7,648.4,2053.8,586.5,2053.8,510.2 M2286,510.2c0-51.9-42.1-94-94-94s-94,42.1-94,94s42.1,94,94,94l0,0 c51.9,0,94-42,94-93.9C2286,510.3,2286,510.2,2286,510.2"></path> <path class="st1" d="M2407.7,648.4c-12.2,0-22.1-9.9-22.1-22.1s9.9-22.1,22.1-22.1c0,0,0,0,0,0h118.9 c19.9,0.3,36.2-15.5,36.5-35.3c0.3-19.9-15.5-36.2-35.3-36.5c-0.4,0-0.8,0-1.2,0h-60.8c-44.3,0-80.2-35.9-80.1-80.2 c0-44.3,35.9-80.1,80.1-80.1h86.2c12.2,0,22.1,9.9,22.1,22.1c0,12.2-9.9,22.1-22.1,22.1h-86.3c-19.9,0.3-35.7,16.7-35.3,36.5 c0.3,19.4,16,35,35.3,35.3h60.8c44.3,0,80.2,35.9,80.2,80.1c0,44.3-35.9,80.2-80.1,80.2c0,0,0,0-0.1,0H2407.7z"></path> <path class="st1" d="M2692.5,311.1c-13.7,0-24.9-11.1-24.9-24.9s11.1-24.9,24.9-24.9s24.9,11.1,24.9,24.9c0,0,0,0,0,0 c0.1,13.7-11,24.8-24.7,24.9C2692.6,311.1,2692.6,311.1,2692.5,311.1 M2670.4,394.1c0-12.2,9.9-22.1,22.1-22.1 c12.2,0,22.1,9.9,22.1,22.1v232.2c0,12.2-9.9,22.1-22.1,22.1c-12.2,0-22.1-9.9-22.1-22.1V394.1z"></path> <path class="st1" d="M2800.3,648.4c-12.2,0-22.1-9.9-22.1-22.1c0-12.2,9.9-22.1,22.1-22.1h118.9c19.9-0.3,35.7-16.7,35.3-36.5 c-0.3-19.4-16-35-35.3-35.3h-60.8c-44.3,0-80.2-35.9-80.2-80.1c0-44.3,35.9-80.2,80.1-80.2c0,0,0,0,0.1,0h86.3 c12.2,0,22.1,9.9,22.1,22.1c0,12.2-9.9,22.1-22.1,22.1h-86.3c-19.9-0.3-36.2,15.5-36.5,35.3c-0.3,19.9,15.5,36.2,35.3,36.5 c0.4,0,0.8,0,1.2,0h60.8c44.3,0,80.2,35.9,80.2,80.1c0,44.3-35.9,80.2-80.1,80.2c0,0,0,0-0.1,0H2800.3z"></path> <path class="st2" d="M401.2,462.2c0,85.7-69.4,155.1-155.1,155.1S91,547.9,91,462.2c0-85.7,69.4-155.1,155.1-155.1c0,0,0,0,0,0 C331.8,307.1,401.2,376.5,401.2,462.2 M447.7,462.2c0-111.3-90.3-201.6-201.6-201.6S44.5,350.9,44.5,462.2s90.2,201.6,201.6,201.6 S447.7,573.5,447.7,462.2"></path> </g> </g> <g> <path class="st2" d="M2734.4,908.9h-26.1V747.1c0-22.3,10.4-33.4,31.3-33.4h19.6c20.8,0,31.3,11.1,31.3,33.4v161.8h-26.1v-71.5 h-29.9V908.9z M2764.3,813.7v-65c0-7.6-3.8-11.4-11.4-11.4h-7.1c-7.6,0-11.4,3.8-11.4,11.4v65H2764.3z"></path> <path class="st2" d="M2891.6,863.2v12.5c0,22.1-10.4,33.2-31.3,33.2h-16.9c-20.8,0-31.3-11.1-31.3-33.2V746.8 c0-22.1,10.4-33.2,31.3-33.2h16.9c20.8,0,31.3,11.1,31.3,33.2v22h-26.1v-20.1c0-7.6-3.9-11.4-11.7-11.4h-5.2 c-6.9,0-10.3,3.8-10.3,11.4v125.1c0,7.6,3.4,11.4,10.3,11.4h6.5c6.9,0,10.3-3.8,10.3-11.4v-10.6H2891.6z"></path> <path class="st2" d="M2911.9,908.9V713.7h45.4c24.5,0,36.7,12.5,36.7,37.5v129.4c0,18.9-10.4,28.3-31.3,28.3H2911.9z M2938,885.2 h19.6c6.9,0,10.3-2.9,10.3-8.7V751.5c0-9.4-3.9-14.1-11.7-14.1H2938V885.2z"></path> </g> </svg> 
\ No newline at end of file
diff --git a/etl/customers/stonewater/map_app/assets/stonewater-logo.png b/etl/customers/stonewater/map_app/assets/stonewater-logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..0d05f58ff1486c7a0a9e1b8d09fd4b187de0c133
GIT binary patch
literal 19000
zcmeFYbyU=E+b@b@3nC>UC5?0p9V!C|2sm^INDK`l-Jl>ipfn89I)K1X(hU+a($W$$
zqaa-(14!@N=Y4+9de1rg?6vm(@2qtcapL~oaotyZu8MlBtww&0@fryU3Ay@1s4fY~
zIU;y{c;yoKZB$&N4gS0O?4gM#35nqG-+$-II@jNlkX&SP)Hn7r)_NpsgK!nHvPD?K
zh5THffzc!+@=AWstZba&UToHIdq+0~_N}Hib~Z;_1$HBdmWb9fWw?Xm!vGJsUVyf~
zO@OnFj4iv8BAdLQESSI*?q$X1=j!6-DeI@e{;zpu!Rx;t3$wHRYlxS#0{i{H3$ht&
zJ!Vrzc)-~tg+v5xL`1~dq-2CdB_$wYQUYvZB4Q%KqTmN2C@Li@AuTH|#`f<&>|iwy
zTRT}@sOrDh0`C;q9lX4r$qEbm`uYm_iVGn;?1e>TWMqUz#DvAf1i=VFPk%QrD?dRu
zPmX_PfWkd(JRF~SIU?NH{?2G+jqvtTU<W(>uOqlV`_Hs)p8qBjAegY9)iYsHA(6jF
z`qw~PoBxb^=I!C~uf=U`gyAl5SGb#(Cm1XGpRvyz5MBsR2gLu&)c^VV|FQret(Mk*
z*7%?9#ntscOL%&z_<&>ln;`$w(VqJL&)~wka8HD{hYeiC2W*q$?`fXNDto}KybvDx
z2!zW&7p3ax0ry3CID4`w>p|E=!O6j2326y7BS$w|gs<m+dk_w_@`5X{i;0Lz35tk;
zAAM1AS%`$Jn3U;%jnYEcI@<aFGfGTEP*hA%OhjKyR8|B6uKAx)fMIN{ysZ9L<7{nY
z?GPTWR^X(Lu2%MN;b(64>}>y`pR6*%1>pgf1KSb*@1LtHD?j!?*g3j@FFbYC9<Zsa
zC`*dUNJ<Kd35ot|lUiD`>TaH1R&F+Mb*KV6po);Aqphr{4P3<5PR2&i%ElHVC}9f`
z6SRU$NC;ZPZ6sx6L`6g;WyJn@9*VH>{(JZTJa7BI`Mi#YBal5Sm;d8={z~LurjdQ<
z=n1aV|KBpB2Y3JXD;Gz$fANW|mCaxN2eh#Af!nhG`$Nb7<MaR5q`nStFy&t%{(oNZ
ze~s`&*m?O{dBE@612X-?X^;OA4mL4x8Em{-RyK}qf5nCGpN-i#Sh?B5fie<i7X~L0
z{wucsbtvKg`+5KM-G3ADe})6S@%QC_6c%{%AC(7p15<kdg;w)yF`k43+My1;ukV++
zf%6U0n><kZwRA(v4F8-#GO%2d^Z^xBI!x?p{5ggTR4T61UIng=8Hgk2(RDjWc>Rkv
zcFKFt$;7@{y|wx@QwcY#g}C%3mTexM{Ge2?y<f{Y^F(oLvhR*;<9;TG*@)**gS`j_
zLk0A2=IpojvcHPBi{czJ3CTUun_th7kVugH|8M?3NQ0!;=GB{3ZLSKoQa34spBPFV
z<ddd^GOuf#BYD2Z_Kv3q?VS_T^Jg;BH%_89XlvH;*oci?c6zMC5ONf9@^z0RlyUtK
zjI9M@+mo^QHaDFtd&}2s7TkANVY^99QAl1D>;m82{KE$$ge`(0admI?(&+aI8WNJ~
z^XC*^WaIVh7`H!}#TogZl-EZ5WQw)=f?isa%h|v1paJk3CG1{u`Hb%Pp9Rg!ce%+-
zBhHcB6TkVD`sW*a$t?fL#SUhn(B&KI22pUNK6|1;P00>D>5!Iw=O86~;SFZ7*7a=4
zyxHLsKN$&0df4-9lKImUyzt+n;MkF#NF==z#iA_gy+n9faRi=QgClo{7ViC$hyL(q
z0<)ScsgVr{$yde;L}>y4gEg4UzH89T@M<V8Z%+Wqiapy%UzbeYek-WNCe7VRpM^}B
zlke^%V`8@dv!jH|m(h3<l6MMMmQN&hD=1JdMP8eG7rMBjHM*b|Jq-+cK2@Enrxj^s
zIfj+|+)JiYhE`&9%^n_(2b;adhwQ)Q5NxQqOhQt6|K65d$j@+ulJWTLUac@^jP={S
z=S(qH^x&+gyK93!)RY@Vo@UaKJ^9Pr0ZZG=XP2P<JwH^pCV1NTvQe3<q$JO8-Fdg)
z9%iqUmUK`~ZgzX-rDasK#<2PN4}AkJ>LCba0u)-^(kQB@n_M!V`2f0M{wu7c|M!Il
zGug)<{!~IISixOgKc}DyXt%j>ITODz5GEa0xMb)r9f`D6DP0fBL@!}Qx|RbR-}%ZJ
z>5e^0RU)2p$i<#E6H+dbJlDH){_yH-v%CG^uOmW0el6sW?Y+bStaHjM8~RAy-Nwd2
ztWcZC6WBPl#g&&y1}UZ&RtOX>a=)ds{xF^=A<?$FvW(rv6)3j;*~pE>357aDl(4kD
zZs9?rgXAt}&2#V71VtigbbEv~d}r%wmgjwA$8Gl^6eJ|l5Vm*w*2M6w<F;I_(hO7O
zxqAGi)?HOD_G~zBC%x$9V0{7H#&F}MKK0+9RbmF@CN>36w3RA>Hj7cd7w2)>IP}2|
z-?V+*-t1T09g`K$;>_il2)G}rboJ4wyGxzSa`1p&e6o6sYe#C_x_qNRYza_@;Wxi_
zJ_;d*j|V#8n$pAP{Gl5hgGb6RgOREd>0WD>^+(q=et+>pgo%?S@baS!?@#{%V&wZd
z4d_ybE9Ye?Pl(|yj-6Yf&RH!Du347)Bj2Ps5ZDKz+G~!~nNTQ1(DhQbm1@zp#IW3s
z-Sp+Yg9jsREVCg$8`5tn(S-ud{H${Ra?|;6*S%0hQwPL0U3ElW&t1yTxBH`}n`X3R
z2WzbDLgJCj^Oc7ZFj&>m%v{_=T8uDfssGB;g|3Q~#T0(~m}0F*n_wm}*?hC*T-Kl^
zoqQGoU1^wf8n63K2fg*f<dibV>@JgBI-Hm5967Pj0~was{Bd@)c4I|>Ylk{h0oVzs
zcF*&W_QN489M|OTsciOZ>yEnBq4=+jdV2KO$KbEGr^7mJRkXbLvQ!cZd2KR|J5X33
z=B7JWRY*uaseeztzD&&;9J;V+x}UxHbeTQnQ0do(A3eJ+O-=PqMZ$bHPiyj*-<&EC
z$;+QpS!R8HZrY+Ql8`8m@=<?1JN-;KiUjP2`7m|nVfL^$8qX}4ARYPME4LL$IwRE)
z<Z_F!tgp+C=7{Cij4APV`=xTx9+mSXpKf|+kVPMD>ah+9@7F5K+)A{op$O94yo+%<
z(t$78C}6c|Y`IpP>%^dW80dX<i-Y^_8(roQtw~dz41Mno>T@LD^9l}b1>ifkjvK#?
zA9@$F4YF&ThAm?-iP<(Ouv<3D_NI>$)wmnJxrHSL*@atv+wLlx+qu-}LPA3CmrlMs
z*;Wy(KNx(wt;Cre&zel5I92)^A<D^*vETdkCV-ti0=Zz~^9XVDj;Hh%8SJ5<M@#!x
zp14hUpP5gn8zdy8T2<%FlWC)n$E3Hga)o(`>evS1Z0V9Tq=Rw-PM}v~@9BC<_sxVn
zOrzcXCr5z!_Q`|DNl2c)bC7)3Rxy_u`GFwRWUG%^*$;^2;-c)U{`}ku300{PPD?MK
zqEwF-i?-smxp!@=NO3IW_9wc;5YMv-bPR%KO7C*95D7`VJUBxZCd7ZCtvfI6R>AT?
z<hiS>ca?7`OnRTq4l{m9Fi2s&mXUUu)88?w$$RFo<2PTpIUNbfLxF)yL|n}#7lO68
z0fj95X`hNP6_Gvaf0T6E7fZR^(|I$ha=M11F>MJe9zIOKUxyjm$(bI{oK`jYZ9km^
zmrtbz&L00W$d`>cef{#?5H)D^%A$(TBqG&ZWY)zm)1E;$Y0MOn1N8k`&@AJ%7^cF_
zQ{$-0^o^^LiUCh&667qUA6wC{9|A!>mbm@ZALXd`F#kCxq3ZPB=VCeM3;`(T<jEl>
zCstj&T=Z9F{F{5li5iJ$l&|81`)t=sIx6AFhlcuzGz(`m2A>&!k&rOHn|fXrsU<_9
z@!MM^Mo><Ysr2*)C2VR)$WqD1cv|LG_@)DU)A#Yc;IEF*)jH3wq-crno?in;4j4@$
z?~r0cvfRE-aod`9mu1WS=(}G^PeQ`^(vCC$=-vmFVx81if=}9%Q;Lzu)`Rx_{aG>J
z*)W%+Q!SMGV><cH_!3MjCEq1A`@~rI&gCksdH&vHxs6GI305h=Mza45*C0-JdR*c-
zWFaL^fcQaxv4F~QotK48X09ho!a`0X!f7d}DNl|@Mw`joOacawG@YzeqBU9u24cUP
z_t%@5geiwp4afuT8+u#{5ekw+S$+9ZVQEa(2&sBipmVZ2H5%%1#E|4m?&1m<j4hMO
zxxv>O`4DDs4f$Jmf)+Kz>?S!IH1%^B0%<e#7|P^PQhdC*O?v_A4-F+1Q6^K5*4=6H
zEVvGZZax<Yvw3S(iQx|InTicHiA36elV!#>qtNq{XMg(njuM|pkdTm_D|mFhMbG{z
zc~9^D>decma^|u|pzA)x-D*3-&AzD;d(`L(-N?u!s$sIX8bd9{*N0FB<8S+);!U)&
z!R9@4KCF66<v_UYrxAnM+t)960t1HS8-EbTeYdv|72kO9Ih?kw!Ex`;<+jG%4h?qM
z&@NJh!rWD)5{f1tAy-vXo`Z*U;J#GNn?`g^$~8mEY~C_5V_VW4$6B5$xawNXX#y%I
zwevB5mD{^%f1G&y>5z1xK&Mapto?X+N^z+?6)$?4u)il0+>Q-TbU{K2)rRgPZbUak
zWin36e2TH5SHV`~r1`ty)@O-zbTI?FTqQSwvE$1cIhR$Kd)<PIigV$xGiIP_b>_U3
z1q9yT9-U*ky*|sMBveAIZTWdUDk(~4I{?A#`L)R~O8nV+-aAgLL0j2`H08E{8T|W{
z(Fob`>)KIn)@c5}_$OI!ljU$587<b-BkU4-@NAqM3XM=QZ9eLC%w2+~ks&Jjx3`Lm
z?<T~;`LHg_jj!<By$vU&n)WcJ&O+7f6On;;X4@xG4WnC6L{5CKERvAi+(<^ee`SMw
zs5?eu`7mGwkhB0N5qMl~`9zYS3GMsM8nQ1uSo{4fs#3#6Sj$_*Kv_1n;;)GHOT*s3
zRjYrcBQ+}es3F$IvPWXoqB2{uh&MJBh<^de1tM?OOw6Im!Jv1<0)ZjH`ib{S9?W~s
z(&C_wFE03F-5{Ou5=GC{aNW3pZqJt;LAu!6KUVtha`JWB@aY-M)R`|ZsLq)zawNrC
z*4eJU-N(Z7E>It>0)0^Ybs>Z)tmKLD()IO_nlB;eC}G<=!hr~@(~thQNZYb_J$j5I
z>8WUI_Of3uR^Oy_6)L~oUD1!k|K5D!fx5%4fJNmmAB6rieN?q2>${kul%ce%1^A0(
z`Fi!&xbQpdFqm51wK*PP=91=}?x}f})9>9oz09RAc7}rNcIJiJevzufkXFC|@~L1U
zKG}gKMO=Ei`kG$8fyU1AdGp`m9?9<$xJFe&r6m<067Ph8Es(f)?&Lz_huWwS<uwZZ
zOvymMZVH{g0pcXS;peLZingYL9%1=g$+p32zqZoD-M-gXkVR#MFGm&H;H9ysWtqKv
zukg+M8SM;x*(<_V!F*TFDO}pvK(OCWMBMcXuuJtfMKt>aoLU_Tmj~GaqsdwbSnTVq
zMDF}G@7=7W`tuzk4K~K|Lt<gEa01}99VI2_=~qzB8-x4+W|ej*5EZZW^v<H*)o+ZW
zF|>fQ>V)y`Q+cx&8B}rhXpGlJr6l)&{QaQ{TE2i&o%s<xyW_n~hRI`~wYF}C$pCG5
zE-Bts`PI7(h*sS#2!!s7#++r2hWSx6!FMNf+t>Sp<;M`sL;GSn2sNc^sT)(0vuGo?
zo$>jr<-mo#M{|J~vdZ+{?fW+^4!s!XRwZ|RT+|@g+QVC34f}da%ii}5TP;T!zipes
z#{I)h?*K{5w|e~KJTf|+z?fLuY-p==)W~okcd7832~^@0vYRp?0m;dZ;YM>nz2cG6
zb6+xe&;rq!PB{(&ln1=60ehSo$PZM*e(b$mXbC6Qy01{^4feZSXey_(U@kM+nlsnr
zSgZhL1C_C)+D2%>pnHY@Wert=Qmnkz*r}YJw%1cu5$I1Yv~hu*o!(L4;}vP6_j_%Y
z?7x@~UY%)C9VkA^-6L4p;~3bT-gO0|0x+TClw2OTYYLQbU26jAura&S9ds)`vHE60
z8kMD#6y>h{JbC2w8H>Q~2BQD*5uqfX6~1@voWi+jqf`!6*tk5qhTT0W>S5RXe6BWL
zK0X#pGqc+!6p`<bkAu?X{i6fdzq#K=E;F!C<0c<uYA-!dPsD3JN(uaIEKn4epPx_g
zo>SK0%|W81B&3a|H})DJka5-2zVVHB$;mW~az0E8OL3*l3IW8+P$D;_E1EOd-N0x@
z>Z5?Kp)plbLPCWS7uUipG}*Ck#@>VJ2twKEy_l0|-O9tkv9onDFuGE5R>axks~0A`
zC~&JUy1KghEnaVYe0;w`ATZdTSvuCh!&5C=4SF<Js-~wms&w)GpQ{lqib*|QZ2bJr
z_nZBx6N<{p_TtfnQ(0FrnB+KyB&k#?;*nRA`5Nhq@0A~q-zL-SNHH*^U0u85)r9Li
z|Kd|9b&91hefa&e$;?cv{rv^b)V^I~5&Gs>sz0Z;^z2DJ&1~Z0JNJ~db$n&lC>|ze
z;wPyM3}~feT^VIO#!{igbCfMQP<Pk*nCIEXJxuK(FgX14p$#@R)<gF8Lz*TjAAN&-
z4CXGTxVVo|vShiIPwxXx(yHU-E|h5Igy8Dxaw|{w`}u{^^ZXh<cT`6yxT8NU_>}2o
z1{K+*v!``4Mur9k^y%s8y!2#=Xd)B;%=C0&;j&0dN=i&`Z*QZZu<%x``+PTRm9YQ?
z73>_B^vA8~nl)l=Z7q^uqpiIc_W8B&2?y5w<VIuGU4DLUeM3Xjj?vbgtgT1AU%ssS
zW^-|I5x^``pUvtVc9w=%Q&6Z|qZNVsn^qYaEguGNn+f!dT%>#AcT@<=4WUOU++Y!?
zHZ+=qHn&o8sAksqZF5ma6_%uqTI(7ZGz6_pl<(wyWWMF;;gKwA^P6h7wHH_8ywn>R
z*^M;I?~Eij^IOSJ9pFJ{Me+VUy4UwlFY-c93v_Z6#d++Fa|ZbY1=B*-zg4_!Rg#I#
zm+@S@G1R|4xfJ<#mS=I;zN|lA-TIlZcBTT3NuHjbrSsUhau!LKKWDHbUsxO4yPOLT
z-hW+novAJ3=YQ?Woc@>>dgE0%(~Zltw9KZ#ds9nK(o<4e5|N5=3qdBRsca#7DlJ&D
z@qTGkcNE&s7dHH(DLVSb+VN@YuE_NV@vEy&fTvJ<n>car-sGa8{xtFZ*yQBooSVaw
zCnb7PiJTt8ZP>;}+3B`Wf~Ahq$s2O0mvr_qt;))tZz8;*t!;F0C+T<HRv)3Up~1hD
zH`=*llqohHo~s(um^J5+Ha;<tz4%PJ5^ZVeS{lB(w&vD9z<f}5x~K*H=_QMxxWFD5
z?=6el+5X8;mY4TO=f<ntnfiih)5`|>$!EdGf36aApfU<A<%#c=whT;AY0yb6l!-1)
zK4HEG3R|yuK|9j`ZGRfic>DJ9_jXc$Y98$XAAGZaYQ_NHBuvR|!0PerO}Z-=gpP@$
zI@UK9hcmT#T6QP8f4rh#m93&E*j;j}QX-y6+h^k32xps3h_jUpgwMXrTeT$4C_t#u
z?wH$)x9C`p5|JUSMv;^Or>%DdW`|_528{v~?qyhq=tNmNyy~&ti!2x)8zbXDkIom}
zQx4xz3;WulrU#?(kj<L>oFqlbz{r?MnIjtB&L)>NSmVAp6IEPR*4j|V)7RT8#D*S@
z;(cf^{^IbMBI*)cEzW`|=W|!^o4#I+m6m<H`JzXI;_}de@*Lf@7P0l&hG1pa(p)Uh
zZa{Dm1~a1q6JTe*j&&E0T4pjqd3MBUf(%Eq4R+7oZZMNp@4dIT_hoNcb1kx0ccam7
zQ1FJe>(A$<txX2sW1>4mh66`bJfxY*2eK7-KFE4Yw|sndW={yJI;sz@W5NcSpo-Yh
zVg?2(?AG*=47t3k7fetAv$+;G!-(TXmSWoWCrU#y_CGjL@Y3KSW6MH}#!PLnf^$z|
ze%q4tW=_vt9!mPiJ6YP3g&3#!xj&1TvNF6(@Zp=cu{EZyz1=a05SlB0jC?*c;Dj(`
zwS?@{)KIg~weLPnLTeihhlhtno=YPl>`v<&Z|92&dD|K*Du!=nqY@HeL7!<gsl=86
z^SZ&kYj7hoJLBU%gFwX&ASoN#kZmQ?t`$M21_lQD2Fx6N9!8z<^L{DtG{)#3EpuKI
zh}bGjSEYbElab~&PACa5Y-NLI8#ThaQH0F1Bb(E}An=&ErnYv3C&#7n&xodQo1CEF
z#%62s>*tCufux-Vo0yo?eTR#sy(T?(=2~o6dV2S;AuOzho87cy=U5?;Ge$>C%e%U^
zHruLZ+M4BJlgrfa@+e+mlno(hXIG<qQm%H#-O#YM+P24Fo^N~p>Y@vw)zuj`Y^AKD
z^IM+KE7HG%<~`Lx<##}wU2v*`bTlOkQ4ZXc<{dlDT8Vj4!&n+Rx}>2=pS1`&3)_jc
z-ASH55HI&8Ty^MKYg<;xA;ayWV7rd^yva4k_OL49G->w{{0o7}t$igGD1YbZ>gsA)
zZ(m;>OsNsqVxbdaV>AB+pAC$Fe;l?&LR`GqQgF4qyPH)l4kyB>b#=|HysY-jBr#y?
zg#k?jco4xX%D(SQvxMB|FEg%;vOYdW%+|Lr_%v0@vf9JphbRG)bXJ{%;DmIigtC^s
z#+SrLoEjDucsy8mL6j+dsB)c_tP?NPr`xsPJhYVC61actK8)T{P=^j0_o?1v?O?j0
zRyXm`&Ti2v4l9mzs@e5ZajKa<4<$O?Z=d!>E(_%h?R!~<`CE#Tb*d_IqxXvBaIez#
z3BQ=-YC~~)BK9VRbN;9J$GpVn<4!v)!qmm%+?238l(e^KN?PKOZgbM77A7e6)@5gB
zX5vagPD%66L`{DB$OGl`mTE6MI!<Qaiu#wop%=AwtL%MvA*m;scusdXto{2WcTy48
z%`0BP!$9PJ_Gd_)d9fdUC_e!2W*2u30$d@{=!Zb+r1r(%-3B;=;m6g07K^JF&JPhe
z%H1y9kuJqxJT&}FH8i5t3=G=0_I-DyQ&Lk+>;}?<!#`WA?_JpyBBP$7Qi7E~q3Y#f
z*T~;1dZipT>_Am&Xl6k6%8+u{=;B4^Eh+jpYZvd(4~GMr*Q@hZ<cHM3flVm0^HlvS
zMQAW__|?1ui!XHl<FG@r3QV@H?kUTlQ_aN^!x@}mvEUT{H<h60Nfp1ozaWjsVO0z&
zH1o}!aw&UE10Tc~6rXvapRx=%)f5>PZ(KA%eeh)3cOvf8)ZCb^-B=?~4=<dQA8Z$4
zG|cKk6x3kJ1n%ND(0ESAPi;IUh8Zpr>Fk%>1LWN*XEu~Jbwb?gj$ZD6$r?NqFAR2a
z$VpC3HJ=%y!Qrw^d=^qPXM+cdc>w%6b8vKw`jZKV!xg&SpL8V;Jr#LKOS=%N0ZpsW
z9se16s`FU!>VszwV^mhwNaTvgC%i_riiLofZAdPF2z>BRyz!lP6fiV>qXa#fDw#F|
z#A&R8mftYoa*;+j8iV;g6u1{Oy0KXN*?MYe|ER#Mw6?yn7?5-)9a2}uiFO<P63e2X
z0l=g1<0I=^?tKFT-f53M%>9yx!)hJvZJb=D)LL1n)<dyfiBl^8xJ~GNYbe45SRS4T
zp0@*w&!=2smsJ1|5<d)Ty?$sql-)R^r`s>Q56Q83Ta3Z53*Gh4Hj-!A>MSiSEno3!
zM4Fj_#pY>MWL|&Io8LJ={g!8~sMi9}@5*YY3itWLn_O2q@|K08h(xj@HUs;Qd91Qt
zzlMg}UIFM>IWaLI4mSVfwaCjVQ`gWR(wpy>iCJ>KwudLKzC7qR$;tZMH?Ii9XbS^-
zH90xihC+%}cWoh#IvgZa-$Xo<cB%v`=hk@!co3HpGMX)Px<0#WYY*PY9VXiPnX=t{
zH$6SwdLvRoQbL+Tm9XX|`*~?cVQS`TFboFUPC7hnL{5a-*-d2sB{A5D2n6D-_s5eZ
z?iJ7urwPbPtEVQVlJ<6XhFxCVZEZ2W(oWVaNzb1vLVGG_UFVsAO*o@f)_xEXal~3$
zUOpL7z(A(@K0hDcI_|VKBo>3Mu^TA$Wh9Gq{ox1<{0(MVugjDRL=V&c2X|i7nYyYG
z15P-8tY9!BdmCm;y_G@s&2WdPaVP2lyB*ZuJ5wGX-(%h7Vy&BLk(C{Ak!)M_X==nd
zyE!})LMuM1;OFNz$B&KCh@OaXaO64xi?thEWP@;W!h}+uD135%VpJF4?Ce}8v{zYK
zsREVYrd;U!q@h^_C4P@Wclov8e^g}0fzw<KdzK8JLh5+1-F&OHL3&U7Vj}>qZu@bO
z<b4M*<4&4CpEq3zV|&u)9@KT05*y150k_xHqG3`1G}G-C3szCO3Y)Pb1*pUw*n-0g
z-e{a@BTybjy6Xh<AU(Z#-wtZBOX2KS-W!z|nng^xxW)80Nh+iAN3BaO0jp5t2FCX<
zCch-s)<4m@zOjcvp37KSS<y<h3{f#E4%%Hc+cOhDMgOv4ie;>=o!D-Ij-9)NYJZAc
zj)56OfGf7n7JUhh5rwXgS~@_&3&8F5qj?oaW@JotjqA@)%P&-ct&4e0XL)EW)(5P1
z&86@kQNOAI@H8+;ImteoRc5f}r7>kuFsk|ZtgWoI^@jPDm8qGTk|<*gW%1?R{F#>O
z>YpB|eY6Tn&8uP8g<QDV#nSpI`AKW5tK)GN!Om~SM<J4u{c#V!uIQVfns7&3aTp9H
z^g?(7wZ5*N;8~Pn?0o%E2Wv4#11JRN!eARV>8O_vth=o5$Pe+iKmCZVo6mKWdABcZ
zd0-3d?Ong@GckD}WihRXt9@os%U4&i4zo#7GaIXW`RPgG>jY9*ErGBnoV~=seKIjN
z_9lOid6uffsb(X(^_hd^GrO6UN`Vsr*h1zyP-jmi3!t-?wgXy3<9vJ@p44uVPUuSd
zoVIaB;Xa@s64K>wxV8X~xV`SlE3h9Eun1BYcrBLoFcxRw?c*2L{xliDiz74zF`#9Q
z?2>ZV=TH?DmFUSe^Q7kipFX7#zI_w9E8qT9<btL~v<?21^54L4N-&vdDXIKx_V+=G
zmUSzSz_E#^M_VWL6&2eelqN;NgLR951=D`S4J^{g@bD}Rzu=v5s)6Kv_@KJJKK||7
zmwH^8N|XQ(JsG}ea*d<5-<fr{bt^>U`)B25u%!ndEO%2m)pb<<>ap&zjZL4XLGsXk
z_vT&^7o(7Yfq{&_JN@1Z5Eu+P<~sb|Atr|07iuX0TxCS|a<efLaD#P(Qu*gBLV$%X
zFm&F(m<0G!1bAEMC--j_qS5k>&b(xIFzruW6t=CDE%w^OFXP?Z++rU}FlxQBPWv#c
zUUj`sfKe{!l^k2<3=@sz6~5lgI}n(tDlj?U8@<m?as~P*a}C$l)|PR;1Qw}~<3IL1
zTIn8yF9{rkAO5F;?1y2~`&o)b!3PU;xxtE-z@+;47~+U4;+I^uuVgPt-Gog}&!Xws
zQ~SIv3=Q`@qwaP()kIvoHesO!o#`B<VB#H%V-R0VnVn7836$}^2mJe%2&DDq-r9>C
zq($DhedV~XBO}MH?N6^<3;Q}AvHon#wx`ld7Q5ZzM&VL7Gvwmr6#Z>b80e3`&?Uu7
zvDVb}H0HJx&9yL*a`Ax;AJH3X12MU3aWZ&2!NbVNNMy|numLJC4XCf2o}ON?rr-I#
z$~=ryxfyvAZnN#9K%mVZZfRf@{Kf4DWp&CYL#Jl^dE`)VY4*mXxf!481Y~2W%CIE{
zv*)ds@>)|<Gv{bA1IK~Rc}&xwqp9pH`Hq@WFbC!Pw63$}U18C}RJ?#?s95d9WJ$wr
z&X9spJcd`J>%$!kM%Uk827nIx{Jgvp261kLmW4&O@1I|kIq8o+7)32BuwNr)uEk5q
z(Ogq`5}yCTR3ku6nt@aOFwBbn8mra%&cO%nMyBHN+WGuc3%|R7&G2aximSmh&0Lx&
ze7pNBHC9e(3JS}kQ1sg}5|}v~Y<~0RkvE`{>${LkUd7|LX6v^HvclLh3276vvyT89
zDltj91(ly`si^p&_WrSLf}<l!2OVc)Vy8gqeUTVDU~0%D{4N!RdNS_;Q6XfMTQ>hm
ztgn5_#SVOm)`=g5QOo{)U%r$y&PDTI50nFd(V%&|+xb;SMn=<SJ5V)`{rwM=K4@js
zir{6*4S;r7H$%Akv1QAPiHip><(##=yy%tGBk=}VR<OsCw_KYyy`99~H!=oXdq8JR
zI~qFHHI$AMCf{XcWC5|C?=o<KxU`o#B(g0~nh7aGBA1(&6f?viGrDy?J!z%(v4gEY
zLSNl&+kX8!+=Dox%Jag^t>X0kyQdo#*-JTzoP2VJ2eq}5vOep)%yK>woemNfS-}0-
z{n?YWrW(uS%}S}DaTUKLHw6`ykdz&;6*IVD%iL^%kil+a1T@VNsrT68lO4m`pZ;0`
z8u{-1n~kaxO*psdI>eT;eWv?j6I8-sje<A&0ZSZDnY^tDiZ9kxCkm-=5FCjdq|hlY
zFK=@Km(X&<5X6d7Un<$rNBUVzx;{1mR;5^eNa?P+t`S|`=t`!>h8}H3KyOn~QPGUG
z82BB0_<KW3OKaCMchXfVV*t0ArKV@Fi-6LI7FISl6W1L_KP-&0wZ2MJN5MTB<}!$A
zOjOJ66LZ&`vFYjGZObc>S@e<bA3x;GDNhM%&6Z;}K^X+KPQ2~*fM!lkQb$r*^5%tF
z`)(WTy9ov~;pwE8$)h`EbNHQHLJoJ)tFCmVXF=yFhsmqD6>LR+(XANQ?%BE&0X!$g
zsh$pnhA#NGP6nm}&;StjN;n0cwZVS}OVNZY3x-Mres^M2g3*YhM;~#I-XmaXO;9m^
zx;HmS8HE8;QeChz@j3*E7%09*qeR_hk~x3H%`JvF>`tvkd7Oir91}z)8cnRv$BeG@
z1F#TnEGwg+(6ZaSGe}?zE*W?71hT?zIzdjZT~<+1F+<1;zth;1H7}iF^(_?p+cdVI
zwA9twyCyimFMTSLs<5ai<Pf?9xU<<&GK0gdV`6-~!l&tEXnR7l&rh+U>2vK+@<%;@
z2jY}`nzBInB=&Yctx5x0srYGj^0|`(kdmZulk?cOY0|jSK-yt;A7=63E_2Jtn;y%X
zl;*p=lN{&K^5rHs?Khs)`#uW#+0utZ8urs4Il5uCAAzKuWPSO&P+;RGooY^iTN%V|
z`hIkUuqa(NdDIIQ#0T$Z$-v?Ha>(Vp6`v${{!FtbYT=UCY0H$$BC^pH@wVFeKF|WA
zwqh!8>Nl4IEZO=zIu<`sTxwHdy9s-8{hUc@ZSC0Dn8FXoja*#u=JLuPKv~v5C$5yC
zZA2n*LxEWnnm4*)K@lpTPBlI`>1`Gti7c$I_iN}IcNI<ha9ybWV_=&jE6`OZRW&sb
zG(Ihj;kLEO!ycy^KJFyPV6Olbl_~PPZ4=S$D;Lh6G}0ymxHMeG+k3uve9tO$P|2}z
zmRa!Je1_V3>P0U9nnK>x-e`>8_#V!OKJwvEg}Gqbi8r3>jtaz+OgRL`XgGsu%{lwk
z6k2iiySL3!(otWK(wx}wBKqi}NyCoqde|2I${{0Vi{ayslHo|9kriScun8~Fyp;UK
z>WNUZ+8{SWnuXk+q=J4<bnlCDcLHy;E^`+5mG*PYyIDC(KE~?m?p<ceuw*xmH`IJL
zuN;p(6%nAk@V*#6HN1QJg~2}afQ3>f)-yo&m%Prc*9#$WSUo&J{`j<3Riod{c<APE
z*sqt3jg9=+n6=Z1Uw*TbssK87+Xwd5H%ys*s$6I7xSRq4RHODo+|(*DD(+3@b-t{)
z>^-*<;W=qCpQd1Szw<*o(Vah#{&j*N9!wECzZJiLK(GQoGTpx4q1pA@M=Me>O<T6K
zN8BbfpDi^K$F?@o&CJZ4b%OMNi;(56?ncf%e&}a>GwdRB(}TUfc#376ExxSP>k)<#
zoVx+)20XmXxNNeT%N{96zew0f48g*x?#lZ$T9rGIR!BHKm2KK;P<z|LY~8k;)DW|C
zu*0?^-oUf_>@cO%;ArN-h+ZP*L#E697Co$x4dUkTZQ$<i7^6-oDNi(D$;fkZmchIE
zGJ=993m`@-u*{e9J`Wt71`xU9xtm3Y`|MPufLgO4z%Y&oEYAaM;eI}prebXb2zKOf
zF1A_B<L8%iTks#!*(l+*#f$8+(qG|fo=rH!^6sJigZGaoRf;=dNgRD63`|&G*P^>u
z?rQ>T-Bn#RHm3i+{?E6J>vY+ZHh2*BDf2toHCi(>HErw`mCoAj242~gi2#bC<Ho{m
zM#ja6oW4qdZFyGTj{;z~6{9As74(lREJAkwMx13SR{~`O!^WMGBoTP8ez)?~>8}qH
zIRW%NF1<zC*#7jfTm6p@_KsJ`Nwl?U>Q!LJ&#fM+ZM@eoFi4CVE52|a`L-n~HPy(>
z+&p+fOR98r0=W`*hvDn~WWyWF>5Q@~8Y6;tOw3CP?-uB)-Q4=&7YpB;^_kAgg+d8t
zX9vdj4F}(UboB?>dliEoYMRtDU(1CO9ezdxZ*%Z=c8WN*_0M9$YR9enK_>diX$rQ%
zx)r9b<51!&5a$|N**zqG|G3b(3wXA~0jZ{;#^c}og9UpluFAkcP5}XrD-jRO>9~OL
zi9<$G!}?zfbScw(<{EaC&Dvbcj@9A>YGY?1)zWlr|Hw7V5%_Vj%rk>6zk7>I<cH`u
zn{FiNSI!iR?d&wg=rKJrEFP${>bxECcgd9c#YH<nrAC{Mmr>@U%^I4Th`-3PUNk)+
z!T;&!=(RXG=`jb1kZ&U+BQ@_}BJ`ZEau+X;e$d*wwhXXUpLy8_<hzW7gekAngVzDf
zFIf1&_k+vsjDms#Hd2VwKME>Q8*UVRVq+5%EgVH9C0@k-v??Kyv`0J(8{K%`=-w+2
z;-%;#2hn~Y%q?JA|LfRbctyB{*bxD2U|+}l_8ZfRxx=-n<(J_sHllf(@5^y!vTN-d
zYNxwaCMUyocZdP^bcH|(8ON$jJatVQ3F+BJ$OPPtpbIl34r6^S$f~c*B~ux#8G*v0
zGedTCalsOErl`P)3+YhHgXFG^Jkl>tH7PG2y!XEpe#iQqPH-^u)W)7AxnYYnyk>g(
zjjCQ(;gf=@?45(4Niu!r^o$lt4or+{u9FD8)Sgce`@zYn9N8tCu0<AR=N2VxC`X{o
zM)If1Cv|Q+!1D3)t@EQ(2aGMGVfYvK-p!nsBqt|-&AT303EU4JobUZdGf;O-dAZqR
zb8`h*S=@q00?w@<J$?0U$huEpTyATyBCC(pZMMo1V!g&nj8xg71P2Gl+{7CY=i8X6
zJ4vFdjvsq>ndl0M;fvdlul;RlNf$i!{rauih`c6HEMjO7h)D3&jY7g1L_wnd5WO3b
zNBi4dm7|JGIV|t3Q~Jq84%VO}clNo(xIs4)1^eS&JLWU9SgUHPF;d`a)3sRmxt7i|
z$)1*bKYVJkSo#Ya#ukrrI~%x{Z2lz3Ma9O(rc3^Ne}8`?gSZ{pmljD78oc)4nd84q
ze|KYJQs`Kq{FAZ|lU}msT?(qNa=Fb=`Gdoo&3y~$)Thlowuw;=xTxQ)n~f<=db+yo
zs!<@olK<Me(6j26xsA>KrcdnrDGr-D3{cX5(#1P|bENZ+VKBuUWH(pMO=+R>$v}YH
z-q7n<*BQC1#nQB-AdfE#zEf(d^V%exiJfiA@9_V2o2*6P%WrFS0~53&^mYm;BiSfk
zd?U=m-uUcfDJeWrK2GNT4N%KjbQe;4)z|jexf69ghaiSRE`OMs-_#LbZY{S-la0Bs
z+rr1kM@d>F&!Qts>7CK1I0igNo?PDyxqxa1Cd$%BK2tYd%@Y*J0%WbO3nzg1pmx<1
zQ$>rt+T@XcB4;VXmqs!!Ab&~ePBJvv6C)!JKyps$4!djs@k<IzoLoTgZgj-;q1TT7
zkE>)Bihp4NwAMF(pMkOiclT|`@a>kof`Y&yrL({mAvfw=A_xHI#l=07&3dGwa)9d*
zx*T9%GaVGrvWZ$If+%o5h+!+mNS&P{UzTIiDmufv6C(rLgr=WtUEkb)J%UhCVGe9Q
zn#XpEKm)QjR^5bsYAiT<uVK65VM}*P!~8bgUO7+p^C&haBbPzGJ45ZQ)($z7T;7&A
zw%0{F%GgSf=2`_bd8F@$vMP20MQND~LRL3e<bO#}a=BXm85v0oHPla5>$jB5(1Vsw
z9)<SYt&$39J5#$9E^6I>4-h33$;&}cb|dY@J<_Q8Z*F>3vW>W&4Ije9iFN;P)e`@{
z%Gpsn!BlEW2n2Fk80Z_QT9>kPh$&qDm4n;-;yput$tI)ysXWb4oJBbzN~jo(w!WBk
zAtzL@3Gfrb*47r<l?q~?NBdiW&ygO$L~xUe@ox4YG!}t;7~C<j;NxF)h-f7*+$NrQ
z2ypy-OOEhXbi1lP**jKOOcQDKg)T8NH6V!fr_%A})<#?NZ_8921_Lc9UGuYdb3fh*
zoC&C}MUh>}ij8~QhQVBeWqO8PjviV%Vo-_sgRipJF?HMg;u}eR8};j!mYLa1!{r+^
zr$PZwM}Gb2oG|zM@tKrGUQV?|m;;Uc{P+eFuLN*-H)XPy&ItD=9eEv`oC30!4lZ{X
z`4pFW+foh-#9?u+a(b~gO&dk{+jTQFJx@h0UA*%R(u;Zj-inh_%hAc{jd53`6}=ce
znR^@)h!d6!JAzDVQ^59|reZ7)VRtP@E(z%qQ82Y(pjuFM`ievKry`9G{w8brWRrgR
z<m4-g<I+OjNme<@fX=a^(LqIg12(Dp*5OfpEK$vu#oTji0xjs)_K^T8Z=<7S8>=8J
z!T^|{PJ+fWw`-tKd-*Cy6k88rMF%g?8P8d)zjPfhetcSI(YBF=bA;xVq>G;}@yUij
zup*~PkPrOPR28SveU)OA<g1$|6|2iWAxiRED6Z+sg(^<~1*mHGJYF?`$oNSVc0S5f
zS#jLR#AIrIj*GP2sm4`PT|MgC%?~YW!PpUyEI*Bp%9qoy0T6t~x&kUN2TaxjwKx_N
z!pI2Ka9~ne+KT*AW`wA4Ajo}Oc>TFjNq%5$R|Jgk$Iov=1qKtdvxhx48?JLSE*}A#
z=+V&m;|mhwsn^_|iF5$bKc9m9d=;04yM1B?`ubh+<;;m72I4=knCwR22T5>~ud}nW
zt}la1s93&EZt^M2x4Wvb2$t-yxZa2!y2y19#OJc<hyDn){ocCa=39NKEQzLuV2L18
z&MUmzaQ?kBo)?2xL~JaMB9U%yA)5y-Ri{sw`&by>yxZIjC?k(}5IpT(kc6{<5H@_`
zuIh->zj1RSoUG=3-#;>110nGHoPvUa)xbTQY`|>}K$B?^3@`3Iohrz2!<(DCZb`~Q
z0B#BZ!Tv23eyaDSr5irEqLpj7m6C$aR}@pyLKS_6*#dsqTeWMr*5E*$>le;b5*%-7
zn&ueITATQo|FHoHOpwt$UfvxS7Za-`Lbe=$vfJFt-)sYg0C?AygVT{Ua@+@HkZz76
z9t6>GVaS$qipyI+uqh~<Wc-q7Bk24U#Pq|17F7K6)4_`L5;Y&D8vN1j58+JLcAP7k
zs0V|290ekdq53IE8wSgefZjl6^*5Hu<*>Q++Y}kolJn1$cP6P>{I~4b(fN8ids<&a
za^Q0fQuK`VMXoQNhaRE?bJ~u#v8*R}ke<^CHfweFQ&;a%3uA*G9}3>moYix=ln7kQ
zDD*eir1RyInyevzPE{5b7F3|s%{fy`z`ts)b{ti^4GJK<hjkVgV}lk!&N8sPfci3c
z?)t^>cDM41+KKza_ZRhtzkRFEp8tcqJsr}fWZe>sf&+mh%1DDSzvcLf7bQ$G0OuNM
z^{{*rJB|aFh&41a8oH(<p7zMvzyRJR^gVE7Z0ygQVTU*M=0Eq3ikh3(v!N;HP9|q(
zgCevGUOR{+qfl>w=Y2Fz%3yRa^e`jQe;*+jF!wlo|KV#k#oNg#JIS2Y-Az`1CD8VO
z*v!q27SEaTie&2<r5-=|B1U$RbLyu9dC5@l_YE~9(p*BQZedYoz8tbruSE8BtaiJ}
zXJh(@M?-z&C7zbE)28C}J&es*t02MMD2D37kJa+l!^--~N_Gx;+*1)=`Jpq>C~xg+
z0B%Oi*Rw(kE{ESgJl@e!3)-71Kuk<dZg8X5ki4aVH*ozxc?Ef4V>b*kq(Xy~_4M}1
zYdkzW6dM4vjqGP!H7s^@*3jr}Z0F5bIyj_eg?t1dPaD(e?Yc27ogoK{kKfl3vA&Ol
z{R6?JZEx>rBh2c5HR~Lu`_}jNMh6D90_9&-<*8U{>bkwY!TkQ@)`My1;T2k1+5)|>
zMMVnk$UEatMY<)zt$4u@?bn}2Tyu_qNqq8ir^aG@^7v4mcXJ~bVzOXC@VK97q!}Gh
zhQ=h9p!^WVk%gXj=-JWJ6W_fP6Td;G0H{}o^(@eZpgalP4Fz%irn}pionl0~!fCeA
z4Wo_S*S~krXfmA%=TpL&#Vq5M3yooVfl@mkL~Xi%o*+i@)OUfF4arPTZ;U`XAs@1#
ztytogp1jJno2sx;!}_=2-TeRZSqQ%Q;HruBvv;K?o@>)f+5BuOu*R4NWre&2(b2!f
zRbv=WoI*lEG<|#q@kC-Zjyvh#iudea-@pcd(t@TZWmQhj&R9@Km4-L1o0^(ped0Y{
z*_U(_eOLyh{grC&_V1@6((7GnJgR}OQ~k#lJU-Ko`DdR=C@3hr*E{o)Jvl=bPEK8B
zU|=}0n9?biYSxB^u528D*n*!C`54H_3Xto3m)3@xEY^Xtix}+uDX11Iwl&#w`ULE=
z81M%|b0i#pYefkm*!ZG58lIG;B6-VZtOfd-)6>$%%gJGLH7--*6m1}wSi88mSZ2N#
zq{zBKXgiDW95R<qL|WsO**`pQ{ee^%9bU~5df2-fUt%H1ui=AM#q1kkr#Ci_C3e)H
z&=A;S&>hDHbnWFs=<2!!=o?x>l)6nrmU`mc6o2JvN|tBJ?9K0MFzuutckF;R4uAJ$
zX&|~-FIgO`3hd2W>yZj}{W9$WOV^6LGHop_F3_E`v;zZ4s&ey>z{2FynR~g{9mMe@
zfk0!KAg{4m=gzTnBB%FEa48V}nwnS%c2iriv|9{h1Uwa;N(GinG#74fPiXU6`Ta7q
zh#R<~*A@1kv;!Y+;a&>EwTR{1I7MS7gyLwy(A$cq?Ne9Dm&wToYnZSn5c$Uu;u!-x
zvmVt_EOAZG;ZsVl5EE2`4iXA@LUR-{mRT;KY19O9O{cG?hp+X|+-J4<o;sdlBf{Lb
zdiPG2x*6Jv1!p*X0g}Y6b$Vy_&QmhT-;nGK0?~X2AM`;xE8S;M=K5tW5YL84Nm+L6
z1o{8zj{${erXV)D+tC$8ZPS`hua_ElPuC4W`?J&)eM}02RZ>HATiUz?njEKy%|P4w
z54k9s#V=g#VOsC2-kL_4t?s$@1}@_WRFvkT{$Ath4b-jna}7Ko92qM|eswLXZFgd3
zlTu}yh?l%T`S~U=N@c;m3M@M1?ed_C4>`l!5HQix>r@lWt3DzDD)VMH2k)`nvna1y
zTY|RBLm)NXPen4N^uzrNz#W97q^D22viOJ<VHo%01p2PB8avcXuL|7KgzF4}jQ2$f
zR>iTcZ3-!zSCxXGU@11`+~~;23CIx!w-qc|8yFc)x{fOYP+h+L(_bEgIlUuW+Bof2
zSn7SuX#&5I+|{g<azCrrxn_EOWrYFPt-U@|XP-vcit~?ldvxvul;&<$EFWlk3(TC@
z&6C(LuqVZa7VWr)E7gk)<2Dph`{LR7^tZ0h%FO|+b9z#6S+^+IX+}raYgUj+vl5ND
zimL)iC1&aiR*l*PM@rdCM?tg$?#-E5Stnzl`bQif2`)z#2(@T`x)ptPVo(bJ@=Q{*
z&qh_CUQs=^(av-tNj+o09QaH|(@rkoyhaDyweClEizMU-M_M(KH?L8E8V=Ij;cxTy
z3Gk`6J`1bleU<7a8DaC^UN-MJTnc}WZC6f9OFIJj<tKFCRC9~4@3PX;8pN}kLD2@C
zq0!7v$EcW?Sh{fi_YSDe=k>~<gYg_=0TeQgDjNORz+jveS%|H{+O&&|jV>uU&IO0E
z@(@n7>8t|&Fca5WTD?{(+w$`B4I&Bvla0PtzD!_8U#<8_x$~|*XE5h*cv)gTh!pF_
z!I-J_NS}R<zFS6Ad{>+ueYP$Hxd7Tih!5C$Iz*t?C?N;R{AX27Uf2~KM7k!_tFy#?
z&7(i^`Y3s$4bK<!5Wsg{I|I&}C{8}hg3&X1E-rYEMftv(@at*yg3%rjRNn<zR-K+o
z2`i9YP}1<q>Hul-FFVa+2ONF6%r%5j9a#|1+$NWr1+}}2AZsHJ0>*vbvROA<LV~5U
zm*7=0`Fwyy6>^8`Id5KhQ@=M&dv$7sHywl7{6)~><uuItQ$h_Y1@6F-Q6Q^+E4=-t
z2tC<pY|cFqdU>xorT{v!OU@l@nT0v`*o=MVMvev-JQT_%*G^yyQJ2EE6a02=z5E^u
z9AaL1M3y!Wn4<m`oR6Qs#S}#=BToSOHcyynEtzq~aN-L>Cg1t*_8zs2<_Yu-vM8RZ
zgB-R-()K9`%|dvDTGYZoxRmfBj7@)Ue>dX6`>oq#s+N8spFd-Dfob<g2rTWdI63Xk
zMlC?SPQz#7<TokQmB`xS6dx0r_VM{iY6nqB&`aY5dWuwsNrcMUy^aeEZrZarI!(#0
z4DTa^Po@@xh0?^hi!?u*mP$&2ObRp(l>KBWLsLDNd~Z=|85uw#;8TnS`Qd2ZR>A1G
z4=>W?a3do@*CPZ!uX;43M>*V6%Q7vk#X+kxkcA*;GX?cxz`7F*47>qFIp`y|KMI(E
zAoG3oM8!nTmDg+KZFg3@0f3y;{U(;NyX$>*=Foc}-K-X@st|*GMBLbgcrJf!A34|=
zTJ>%92^>Fx61C4`=-;E!&B@ChP<u_F8+FYdf81k_&KxNM$)=f+8T?X(Y3p%%M{IMa
zS;SmN#L{O-<oo*iKX=#z#N|O1k&~vpe|N`<m!dY5k}e4BL+NOJRHGR3;nGtni*jEs
zO1PJzPs5XJzg0hWP*>wi1?%TO`wZNK%C;EMoXIs%C)yZbUtGLFds2!8ahkhAyv9xO
z9eXqJoa&*!zMpdg)*UNT#XL%w*yx;zn_W!($-`+fPN6RkXFv-i{g`9Tky)T-N6%iy
zpCW9`exce;1vNnfp}PP)gN!1yW)W?v(=W;D+Xg2gd2YM1FHf`-dVA_p+w<}9P*pzZ
z@Q+dql&NzfGev4$lO3JY+{QCbmng*A|HB{nVZ8i5i5Elt_d=zDy?2Yvl4>2VfOfm>
zu9}bi8}^Uz5K(^b)~F@C`rb7sGUYDM{_ar^ab+kpqc4jtx>77igmLL^P2C^^yV!~}
zqSoZ>0p`9h;$EUo4-VARKmGMtfqlD?!3GEQ8V3wE%X~Axt=Gw8x?iC4F=)6k-TuN%
zhGXMfy@yxa%1l?4jlDxX6q;l=rl90(e7Lqn@WaQOsb2?8pMoc?ukcBr9nfs#?!z-$
zS}wT+cSA0Ag+A12$dCRaXx@{f>Do6G8mo815|b0wkCyIwmoe+uewU6j!MgT~txPhX
z^uVuX&0PT9CUGwZ8ID()i?&K;EL%TK-SWF1pG-~B^O{l_DjSQtw^7yYfXF)1g~1XT
z`^RI)J-q}pRkLvkm?g_65^jHP3kxarrBy!wJpqS&KVFlR91OlpJVbr!pu1rpPnk?(
zw$!rd_mNB)^ee1o#bsPv36Qyp+i=2$8lylvkr4Gzi;V0*b;#Mq>pHGlPH<se8V;#}
z)xd#nYEik#nqXi+AbcyPN_b&Iake>|Ru%Qo{GhwmYKwN#-~yD73nu8L0@_lPZ5c*(
zX7>b=P7e-ptMGmc=~b75;qqWQV@~h1)|`Qy!?C2M*Vi*HZhww1Av!Odj|6@|aJVev
zS2?FAMv=BMn@{l}h$mdx@SZQ8<hMxA?A)T=Pg-Z_w%+{h@_Bwge(bq6l$a}C;1W8I
z3=V0SsT1PuN!|Chp+i}XNp#lXHcG>)ALt&oRi&eAbzY2CkfnJymV#3^9vsLZTASBP
z^2(k8a>qYX-|&dr``a46^ZIwrg5s>vr004zAxmzDj2*qz_UrT8(!K7=cO}H4>y`#l
z`npl}3?rUTf-%{lHny1UgJ94Ag4;I&<Q5@vwnVUO8eRWzwoqY?-K)DcQiU$D?H+M1
zTdFT>SOP7(MiGZM`{mRyYci-e@^zoBzfYECrkqU#(+#<968J%b2Yvb_34nD@yzpb>
zL+gAB%U7?QQQI!O!N=-FA?@k^HY`ndz0*oI?8RuTyO&*Pb&aXYTbi1tQ8){*Xk0l!
zxDW^#-;6*XvWOQMxVL(bxS{$V4|nI!!7BHvoij9`pu_2~%+pSYlfCiJfnZXtp%gQv
zZNwH|CO`8F3v7k$&nca%N0HvCkC+YysZ(<%64xjH&i3t6a|GV;ft}N@Nqe2P4bX1B
ze*_wNxeOLMNFmD5fY`vA9ruc2?=*T^z?4z*Onsn_Q%2Xj(a2M<dujjV*HHv1Q4E|g
z^5y;O%UK~I6V=$9S@Y(h+ccIpY2iWkvRIJ^wNt+gm*)S+fV(^|S~|tPonwA(<0@eG
z@G}0O58OYang9RMnw9$;D+Ph3H0NB{b!YM3xD@%ghacAhcPKAi<ppy3JK#>Gqri!c
z+N#Sv%DrdXor`{Zlq#POn_2ot`>|0_+?~SPuI+5QfqR7+nsYBiznHsj?!@~JF4Ju{
zygf0WC32tl`9l3NyXU-ZVTMO4jC%g4N2>ijz274H)N%Wn<~Qv-t^#*$Nd&d|IK2Pz
zdh_!C`_sQo`5@Mu=kG35esIS_O>5bASKI4qPVR1w0&ewbP|P|}vXdFOZRh{S=2&0~
uVd#)&vI2^;DJ%erW-yF$hFlo@ssC@zxn_ZiCUA=#1B0ilpUXO@geCyF?KbWJ

literal 0
HcmV?d00001

diff --git a/etl/customers/stonewater/map_app/map_page.py b/etl/customers/stonewater/map_app/map_page.py
index c39a53af..4fa2406c 100644
--- a/etl/customers/stonewater/map_app/map_page.py
+++ b/etl/customers/stonewater/map_app/map_page.py
@@ -61,6 +61,43 @@ def layout():
                 dbc.Col(
                     html.Div(
                         [
+                            # Banner with logos
+                            dbc.Row(
+                                [
+                                    dbc.Col(
+                                        html.Img(src="assets/stonewater-logo.png", height="50px"),
+                                        width="auto"
+                                    ),
+                                    dbc.Col(
+                                        html.Img(src="assets/osmosis-Logo.svg", height="50px"),
+                                        width="auto"
+                                    ),
+                                    dbc.Col(
+                                        html.Div(
+                                            style={"color": "white", "font-size": "1.5rem", "font-weight": "bold"}
+                                        ),
+                                        width=True,
+                                        className="text-center"
+                                    )
+                                ],
+                                className="align-items-center",
+                                style={"background-color": "#027fa6", "padding": "10px"}
+                            ),
+                            dbc.Row(
+                                [
+                                    dbc.Col("Powered by", style={"color": "#027fa6", "fontSize": "1rem", 'zIndex': 10}, width="auto"),
+                                    dbc.Col(
+                                        html.A(
+                                            html.Img(src="assets/hestia-logo.png", height="50px"),
+                                            href="https://hestia.homes",
+                                        ),
+                                        width="auto",
+                                        style={"margin-left": "-60px"}
+                                    ),
+                                ],
+                                justify='left',
+                                align="center"
+                            ),
                             html.H1(
                                 "Stonewater Survey Map",
                                 style={"font-size": "2.5rem", "font-weight": "bold", "margin-bottom": "20px"}
@@ -79,7 +116,7 @@ def layout():
             dbc.Row(
                 dbc.Col(
                     make_map(locations=locations),
-                    width=12,
+                    width=10,
                     align="center",
                     className="text-center"
                 ),

From c5693289c31e3185d5c65182ce599119c928c627 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 1 Jul 2024 13:25:51 +0100
Subject: [PATCH 58/80] preparing mapping page

---
 etl/customers/stonewater/map_app/map_page.py | 105 ++++++++++++++++++-
 1 file changed, 102 insertions(+), 3 deletions(-)

diff --git a/etl/customers/stonewater/map_app/map_page.py b/etl/customers/stonewater/map_app/map_page.py
index 4fa2406c..bb85961e 100644
--- a/etl/customers/stonewater/map_app/map_page.py
+++ b/etl/customers/stonewater/map_app/map_page.py
@@ -7,6 +7,43 @@ import pandas as pd
 from config import MAPBOX_ACCESS_TOKEN
 
 
+def make_real_epc_piechart(real_epc_breakdown):
+    labels = [x["is_real_epc"] for x in real_epc_breakdown]
+    values = [x["count"] for x in real_epc_breakdown]
+
+    marker_colors = ["#027fa6", "rgb(225 225 225)"]
+
+    fig = go.Figure(
+        data=[go.Pie(labels=labels, values=values, marker_colors=marker_colors)],
+    )
+
+    fig.update_layout(margin={"t": 0})
+
+    plot = dcc.Graph(figure=fig, config={"displayModeBar": False})
+
+    return plot
+
+
+def make_epc_rating_piechart(epc_rating_breakdown):
+    # Re-order from G to A
+    epc_rating_breakdown = sorted(epc_rating_breakdown, key=lambda x: x["EPC"])
+
+    labels = [x["EPC"] for x in epc_rating_breakdown]
+    values = [x["count"] for x in epc_rating_breakdown]
+
+    marker_colors = ["#117d58", "#2da55c", "#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
+
+    fig = go.Figure(
+        data=[go.Pie(labels=labels, values=values, marker_colors=marker_colors, sort=False)],
+    )
+
+    fig.update_layout(margin={"t": 0})
+
+    plot = dcc.Graph(figure=fig, config={"displayModeBar": False})
+
+    return plot
+
+
 def make_map(locations):
     if not locations:
         return None
@@ -38,7 +75,7 @@ def make_map(locations):
             bearing=0,
             center=go.layout.mapbox.Center(lat=53, lon=-1.5),
             pitch=0,
-            zoom=4,
+            zoom=5,
         ),
         margin={"t": 0},
     )
@@ -55,6 +92,14 @@ def layout():
     with open("Stonewater Mapping Data.json", "r") as file:
         locations = json.load(file)
 
+    # Get the EPC breakdown data
+    with open("Stonewater real EPC breakdown.json") as file:
+        real_epc_breakdown = json.load(file)
+
+    # Get the EPC ratings data
+    with open("Stonewater EPC rating breakdown.json") as file:
+        epc_rating_breakdown = json.load(file)
+
     page = dbc.Container(
         [
             dbc.Row(
@@ -85,7 +130,8 @@ def layout():
                             ),
                             dbc.Row(
                                 [
-                                    dbc.Col("Powered by", style={"color": "#027fa6", "fontSize": "1rem", 'zIndex': 10}, width="auto"),
+                                    dbc.Col("Powered by", style={"color": "#027fa6", "fontSize": "1rem", 'zIndex': 10},
+                                            width="auto"),
                                     dbc.Col(
                                         html.A(
                                             html.Img(src="assets/hestia-logo.png", height="50px"),
@@ -120,7 +166,60 @@ def layout():
                     align="center",
                     className="text-center"
                 ),
-                className="metric-row",
+                justify="center"
+            ),
+            dbc.Row(
+                [
+                    dbc.Col(
+                        [
+                            html.Div(
+                                "Breakdown of real EPCs",
+                                style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
+                                className='text-center'
+                            ),
+                            html.Div(
+                                "This pie chart shows the proportion of real EPCs in the asset list. Currently, "
+                                "there are EPCs for 3736 of the 5245 properties that have a UPRN in the asset list",
+                                style={"marginBottom": "1em"}
+                            ),
+                            make_real_epc_piechart(real_epc_breakdown),
+                        ],
+                        width={"size": 5},
+                    ),
+                    dbc.Col(
+                        [
+                            html.Div(
+                                "EPC Ratings for properties with an EPC",
+                                style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
+                                className='text-center'
+                            ),
+                            html.Div(
+                                [
+                                    "This pie chart shows the breakdown of EPC ratings, for properties that currently "
+                                    "have an EPC. "
+                                    "The ratings range from A to G, where surprisingly, there are two EPC properties "
+                                    "that were initially "
+                                    "expected by Parity's modelled SAP, to be EPC D or below. These properties can be"
+                                    " seen ",
+                                    html.A("here",
+                                           href="https://find-energy-certificate.service.gov.uk/energy-certificate"
+                                                "/2708-5001-7327-6090-7284",
+                                           target="_blank"),
+                                    " and ",
+                                    html.A("here",
+                                           href="https://find-energy-certificate.service.gov.uk/energy-certificate"
+                                                "/1037-4032-1009-0361-7292",
+                                           target="_blank"),
+                                    "."
+                                ],
+                                style={"marginBottom": "1em"}
+                            ),
+                            make_epc_rating_piechart(epc_rating_breakdown),
+                        ],
+
+                        width={"size": 5},
+                    ),
+                ],
                 justify="center"
             )
         ],

From 51333ff31a5b89d1766342723f8a9408f324de7b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 1 Jul 2024 13:35:00 +0100
Subject: [PATCH 59/80] minor

---
 .idea/misc.xml                                |  3 +
 .../stonewater/outputs 27th June 2024.py      | 91 +++++++++++++++++--
 etl/customers/stonewater/shdf_3_clustering.py |  9 +-
 3 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..78660f34 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -4,6 +4,9 @@
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/stonewater/outputs 27th June 2024.py b/etl/customers/stonewater/outputs 27th June 2024.py
index d8bf43be..7a78469c 100644
--- a/etl/customers/stonewater/outputs 27th June 2024.py	
+++ b/etl/customers/stonewater/outputs 27th June 2024.py	
@@ -9,14 +9,16 @@ In this script, we do the following things:
 3) Mapping of the archetypes
 """
 import pandas as pd
+import json
 from utils.s3 import read_pickle_from_s3
 
-archetyped_asset_list = pd.read_csv("Stonewater asset list with archetypes V2.csv")
-archetyped_asset_list = archetyped_asset_list[
+stonewater_asset_list = pd.read_csv("Stonewater asset list with archetypes V2.csv")
+archetyped_asset_list = stonewater_asset_list[
     [
-        "internal_id", "customer_asset_id", "udprn", "uprn", "cluster", "archetype_representative", "rank"
+        "internal_id", "customer_asset_id", "external_address_id", "udprn", "uprn", "cluster",
+        "archetype_representative", "rank"
     ]
-]
+].copy()
 archetyped_asset_list = archetyped_asset_list[archetyped_asset_list["rank"] != "NO ARCHETYPE"]
 archetyped_asset_list["rank"] = archetyped_asset_list["rank"].astype(int)
 # Sort
@@ -28,12 +30,38 @@ clustering_features = read_pickle_from_s3(
     s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
 )
 
+# Move property-type and built-form to the first two columns
+columns_to_move = ['property-type', 'built-form']
+
+# Get the remaining columns
+remaining_columns = [col for col in clustering_features.columns if col not in columns_to_move]
+
+# Create the new column order
+new_column_order = columns_to_move + remaining_columns
+
+# Reorder the DataFrame
+clustering_features = clustering_features[new_column_order]
+
 archetyped_asset_list = archetyped_asset_list.merge(
     clustering_features,
     on="internal_id",
     how="inner"
 )
 
+archetyped_asset_list = archetyped_asset_list.rename(
+    columns={
+        "internal_id": "Osm. ID",
+        "customer_asset_id": "Org. ref.",
+        "external_address_id": "Address ID",
+        "cluster": "Archetype ID",
+        "archetype_representative": "Archetype Representative",
+        "rank": "Archetype Group Rank",
+    }
+)
+archetyped_asset_list["uprn"] = archetyped_asset_list["uprn"].astype('Int64')
+# Create an extract of the features
+
+
 # Look at number of combinations
 # - If we look at the number of combinations of property type & built form, we have 25 unique combinations
 # - If we look at the number of combinations of property type, built form, and walls description, this jumps
@@ -50,6 +78,55 @@ archetyped_asset_list = archetyped_asset_list.merge(
 #     ["property-type", "built-form", "walls-description", "roof-description",
 #      "floor-description"]].drop_duplicates().shape
 
-property_type_archetypes = archetyped_asset_list[
-    ["cluster", "rank", "property-type", "built-form", "walls-description"]
-]
+# Save this as an excel
+# archetyped_asset_list.to_excel("Stonewater Archetyping Features.xlsx", index=False)
+
+# We store the location data, which will be used for the mapping. We just need the longitude and latitude
+mapping_data = stonewater_asset_list[
+    stonewater_asset_list["archetype_representative"]
+][["internal_id", "uprn", "standardised_address", "standardised_postcode"]]
+
+mapping_data = mapping_data.merge(
+    clustering_features[["internal_id", "LONGITUDE", "LATITUDE"]],
+)
+mapping_data = mapping_data.drop(columns=["internal_id"])
+
+with open("etl/customers/stonewater/map_app/Stonewater Mapping Data.json", "w") as f:
+    f.write(json.dumps(mapping_data.to_dict(orient="records")))
+
+# We also include some data for visualising the breakdown of EPCS
+proportion_of_real_epcs = clustering_features["estimated"].value_counts().to_frame().reset_index()
+# Invert the true and false
+proportion_of_real_epcs["estimated"] = ~proportion_of_real_epcs["estimated"]
+proportion_of_real_epcs = proportion_of_real_epcs.rename(
+    columns={"estimated": "is_real_epc"}
+)
+
+with open("etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json", "w") as f:
+    f.write(json.dumps(proportion_of_real_epcs.to_dict(orient="records")))
+
+# Produce the breakdown of EPC ratings
+epc_rating_breakdown = (
+    clustering_features[~clustering_features["estimated"]]["current-energy-rating"]
+    .value_counts()
+    .to_frame()
+    .reset_index()
+)
+
+epc_rating_breakdown = epc_rating_breakdown.rename(
+    columns={"current-energy-rating": "EPC"}
+)
+
+with open("etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json", "w") as f:
+    f.write(json.dumps(epc_rating_breakdown.to_dict(orient="records")))
+
+epc_a_properties = clustering_features[
+    (clustering_features["current-energy-rating"] == "A")
+    & (~clustering_features["estimated"])
+    ]
+
+epc_a_properties = epc_a_properties.merge(
+    stonewater_asset_list,
+    on="internal_id",
+    how="inner"
+)
diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index fa6551b7..bdac5ec2 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -678,7 +678,8 @@ def compile_data():
     # )[["AddressId", "UDPRN"]].rename(columns={"AddressId": "external_address_id"})
 
     asset_list = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
+        "/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
+        header=4
     )
 
     udprn_data = pd.read_excel(
@@ -1128,7 +1129,8 @@ def compile_data_final():
     ########################################################################
 
     asset_list = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Downloads/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx", header=4
+        "/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
+        header=4
     )
 
     udprn_data = pd.read_excel(
@@ -1788,12 +1790,13 @@ def compile_data_final():
         property_attributes[to_col] = property_attributes[to_col].fillna("unknown")
 
     # Drop the description columns that are the keys in cleaned
+    print("PUT ME BACK!!??")
     property_attributes = property_attributes.drop(columns=list(cleaned.keys()))
     # Perform the mapping
 
     # CLUSTERING!!
     grouping_columns = [
-        'is_cavity_wall', 'is_solid_brick', 'built-form', 'property-type'
+        'is_cavity_wall', 'is_solid_brick', 'property-type', 'is_pitched', 'is_flat', 'has_dwelling_above'
     ]
 
     # Define the preprocessing for numerical and categorical features

From c6a1c5e11d51857994080416c2b8eaed7028367f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 1 Jul 2024 15:38:40 +0100
Subject: [PATCH 60/80] adding missing data files

---
 .../stonewater/map_app/Stonewater EPC rating breakdown.json      | 1 +
 .../stonewater/map_app/Stonewater real EPC breakdown.json        | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json
 create mode 100644 etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json

diff --git a/etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json b/etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json
new file mode 100644
index 00000000..bd111a27
--- /dev/null
+++ b/etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json	
@@ -0,0 +1 @@
+[{"EPC": "D", "count": 1718}, {"EPC": "C", "count": 1343}, {"EPC": "E", "count": 538}, {"EPC": "F", "count": 80}, {"EPC": "B", "count": 52}, {"EPC": "G", "count": 3}, {"EPC": "A", "count": 2}]
\ No newline at end of file
diff --git a/etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json b/etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json
new file mode 100644
index 00000000..4ae2209d
--- /dev/null
+++ b/etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json	
@@ -0,0 +1 @@
+[{"is_real_epc": true, "count": 3736}, {"is_real_epc": false, "count": 1509}]
\ No newline at end of file

From 8e47e430db8a3f0da262dc0344581c9214d0dd7c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 1 Jul 2024 15:51:50 +0100
Subject: [PATCH 61/80] fixed type and added favion

---
 .../stonewater/map_app/assets/favicon.ico       | Bin 0 -> 24618 bytes
 etl/customers/stonewater/map_app/server.py      |   3 ++-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 etl/customers/stonewater/map_app/assets/favicon.ico

diff --git a/etl/customers/stonewater/map_app/assets/favicon.ico b/etl/customers/stonewater/map_app/assets/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..30ce09624020d14b97a4662747d30901f7ccf0f7
GIT binary patch
literal 24618
zcmeHPX;@R&)&^q(3P_z8M2HnzY7v8_7A=HesYNfJ##V2wWr#B^2#KHsNJvCM9ICmF
zv?4^S7VAJ%1j3YqAR;Qn37G-`B18l-5a1*vIp^Ca$k5xL-}8LW9r7daJmj1`ti9H|
z-u3RyKD=&ipv}}dQ_alGY*w%O>RU52+#Bp;ISKrw{9ZsV_%(&TYE!J4nbl0}V{Uft
zyn~t9JhRnbt=JHMzORL;B*ukGw5QzD{jEz*#hyI%dclGZmKRDCZWUM7rOyAjeA;{8
zT-Z9hH~MeW|JeBO2Cn>0&F`r;mp#8Zy>knFQsYzqbJn?`FLQQmY>_W6x}F*F_=f%I
z)N8BE8F882cV9i$8fq0RDi7Adza${f$e+#4QiycAq+vM`%FP~X#p4||?=ye*<7!?-
zJVj42oA|en+vU?e%gk`rBR|#|6qSqRTF0?>Ht$QGA$Kt!{T@77hyL)kBa3G~q2a$Y
z!oOl_<ev>|vWbJqPfW36!jp9d6BL-Bz?4Q!)WXvQ1%R9Wi?IUwfVHpT;B~52f52g9
zThrs5+@&Mr_+?`K-|b<&_0#$?63ti2B^O#$SeGC>WK}%75i)!gecj_PVA@$*!Yp@U
zDjnWYjWO$WLHLt&k;p8`Mj)ti4Hi@tm>FBl`@=S%U7ojOo>+aeWY%K2@PH#m)+4I@
zX8;O*fp2aP$8WcSHFicee&O@MEgy?ZKgXvqg=f;sD_a<z7%TVpa9mYRBI5>@wsp90
zceT4nCK;*x%isO1Q;X{#?4yec`o9$jbklZX&0PHuw^|?z-zUU}h@~ey#L{46Igs4+
zGVLNAZgO~EuKg}-sy#z;Z)Awu=S?wAdEZf{@10a8%gR_QtjieooFxx)!j+RGkAm7e
z<UYS(MaG4*)(hG}X)rv8zMC7EPv_D`s)UI3goEWRk=y(QvfzY;A>t$pFRYoDCzBV5
z)n5Ko++w-vp`BdiVXQ;^quy4m6ELIRS@CGGVl!5;B>(W~KT?=%{F}(KN|H-sIZ61Q
zv9!6mCdpwcgMZAtJ&V?Pg}xhhequ5@=0+x?n>^hFFD7^~VY(@iVszxcFOf#h0d&Sh
zq)bH0#9mD$gQ;XN)oG^U*i;-Fr5aOl{QoKg-Me$XNbxk)UK}qyQ_rrr=#CD1hxE4)
zi<CA*GFrp;gRxIlSZrHuv#3Ztlq*k=I-<&B;reP+2mUntO!0+`f><E6K}1cheb~TB
zJ~U8R5W4$KQDlzb^>FMo@m!QP8OU3qkY#y`>*mcmAd>;Xs+fOUofY#UH@ficY)QP*
zf;xDb0c}C6T9Mb(BKWdI`$px1e9lHMZsQCJtB~FnHwk)ny~J?Pz$h6OD_HF-MZ(TU
zR<*^|voHy&Y1O(Z49YvHNqOm0caAxaSz9Bazj@S5jk(t%DMn8*GW#2Cc@7*;jvmMO
zC2jO9Ks5=1%Tzp}YzhH+_M#Obg+u*X3!0$6>BW%qk>W`}4il^#r>mY*VAXjc`YOC7
zrp_shjU+L84QFh9<&oSj>ce)W&emDi&CF8HB;Qr&gncF9l9*1RSM;%r$vEo`=x96b
zE+W@<zmvFdNk7j}7o<pR`IU@jxv{(3w{=iD9%?HnbJe*F9;?2mmZhZ@YueuPeqeLJ
z9DhwV-*QCtqjTYZ&<PceI-21@b|{ix@1iLzrXc-?zDKPzPjm!S?%j*_Na)qcxwOL>
z7NB$9c;~kB+FLWNhyqm}QK-B`yGiuhvx_y5A$x2~P<tu9(0cHfD+io%j*BJ%jr1{b
z|A<Cv3CF{h@bBV1b$K-QU?G_SS5trL_v&$Kqb4brFgOZCMmEHtTO;*J`;&hdbQ~Qi
z;jrYB*n#i59tGua)YnA@s)Dr8uk(yHlk0+>B9|yKvJ$j^u$QhG4sfx4xm5(TNK6Gy
z(XC3+9j86g%3206e+0|-ypvJBs63p^>L`}nLVxuRn~mPUf#sQV6bQALoKT*$pgbwt
zZo8s8Eo}IFkl)ad8(JLLzb&c;62O-s-4Wn&IZ)|R^tLVjlVR(WVz-B7LpGN12}{@&
zjVMHp^ssxETCk-7+{2XE863^wZF0`zKSay-q{eIoK6Z0({)B#-oo>Se0esDa1L!~|
zRTjAQ_}iI=0TLy?<|xrO34&fOL_N&#5xHPO%-B)kS@M8UT__Xl8Pwgae4pDi<wc~+
z;HGbgoTkw_wm<am`~BqfKUYTLn3|<9Qq~PY?i?%KR2FhiinR8Hhw3+QoFduwW&X~F
z@Ok1=FMK}DU%<UzY4DVwUeP7PNWuBH*;_k^LqHCfab5hnS_#WSg;s(1h{U?lmEpoi
z=-d*$tTnDOKoK9q`c|_ZkMsppwI=8`$w-~QYr<WDc6_{cz9oZpG`Y{|eYxx7>Pmx%
zqedkVbNHx2UGL^mBP00AA$?A4?H4+`&iiJ#rHf-m<Y0e6mD5TAtv8ROrWW?vx4Jhr
zD;s%!s$%M%Eq$a}=y?zmFKa#a)8Q;k3VHbbRC|~_NK%QLb2Kk~D91(bHG3+tU>Z!z
zIlTtB;`)fdQHeY-|1=$iiTdSf3z$sd@WU(^eJWIo)CUw=K6o-v4^}t)KBlo(erpLg
zF`Lh%hj1j$Ov8g;mgq5)qvYjM9M2pd9a(BQD%1O&40Vyqc#IdB1-!v4Y23rMlYFAv
zW-8*JgSQEsZ5YtVfid`LBgS|UQGZZ}h2RNDby`G@4`EGSmWUT>p5Er@uGrrvI05VB
z=K<V%u?53D-NUk`%yy(kRMP9E;IKmcKHt`M!iQ4$`b$n@1Y1$&AEk1%WN;6TH0~8y
zkgD4OfJkw?ba!9Ftz=0Nnv(7(JE>7)PXC?V3ITJ8^)AUiI>9lx*i8)c5_*CGP}};k
zcZrwJ#|wwvWMPont)m?yuk<V{+X9O2)~UE=?|I_Dp;wEiz%+K(4k%8tO`1R5TS+d}
z9Zq*H`2d-B&kVQM0fZE3)`(ASX9;-l=?u9c`Gw(Hl_n@S4MS&1|Bc5T#F@a>oJPzf
zDI=r))mhF_2QMD(<DP#%{pUJsR_8Mkh88M#3P=JFeL4ub@{yqX9~;<xaUsGJYcC7J
zZtQBq(VBV!Tcy{h;@^rm`0^H=F<IzVeSJ{p&f_Rymt}r(yB;SD=Y+~h(85ol+|3t|
z8<_W7zZ>cI0&3zj8(w@`022va+JWl-s0w9h_o}k3K<DUo(0Ta1cb!j#H~OJ%vbjw!
zh}T@9z33DQ)c#h3!voWDO2Cq3yz6~uiVlOI69rjw(hm#q;GNw75LW6x1aSP8{Qcy1
z`3lclNf`9t$_?*{1bPWeb6>D}kGC-u%!fYuw6qos^8gqo$#P_vm-h;IAcLS?L%G7?
zRbj3>9W>7?s!Dnb)9({L2QHIR99UL19jumXx0BpEbZ;L~sJJZfxAsy3pG;8Y5|5E-
zo@M)vg2#Y^hu@3%%m!8@wx?2AZyE%opFdMt5XeJdD89(Ud;WdH;cbCCd?$gf%PNx=
z^pzWK<kGa&gjU9eOl)y@2IXOBk76}1QC7_>={Zel-TO#X((@Yat0JH@KUT*^mm03M
zlc%yW?f9VL+JkiGyq*t%m+muf&tc9<)2-PUs)Nw`Qbx&hb|;Rj3#2va0{DrKtay7H
zuh_THn}h+JRzll=p}?g`eh#yFaTuoy!()E8rqF5-W%UYz`K2ErU0|#oz)J7-om_vK
zW%Q(`djzgljH^Ola(i`vVeIK%>Ag$G-J{&^B+>NA3`7O<OV;sabMP2!FFz^6w$faK
z!LryaF)qt?r#wrzG@FoE-y;&AK!rSJn}(x#;3d@ln%+k;h)CCI2eLysgMpGC*ielx
zTb1PIyi9jiGZn=8nF-10l`Ra|MmI=9F)8R$2p~?N!ngD9#AWdhkYH{rVeskRdf!CZ
z3FMpG=aMNU($zjXPFFJU5hWI=?jwO(-O<hl%n0dB;*w6H`>BbVUv%;YCg5Jty(0Z1
z89{$jM>#~X^ZBtYr#{nB@}B3qV#RnO-Snk<ny!GX=y26u9FiopX$nfZ%GQgZpVXZG
zqr5~Zwr(2w?e)gk-^h~Q(|iqPv15D|53PUj5>r|jZ=m7eaGH*h&uZURRoa(lQA?6_
zwQ*9D!o|Z`ie>_j826~m-Y-tPW+P3GO7PaFG54K>KMoL;(CBY<P`4Z;F|Gu8-DjxR
zUP|L?E%-iOF*{JNLTR1sLEz7spDG`d2bZD0MX<8h6|dfZ)VMzg5|s6Yx^hF&W4`W*
zu0Wg7L{6w*=uOQP6I+p18cD)u5-KQ&Qm8pP36~d{l{`v3PAr-&C$fjG02mNN=?gS@
zIu{*anx7#uRIRyPKlhOB9G@L+%~_1y`jyGaP0kZ$Jg~`4Ox1N*L3-tqV1pX)fuRze
zjV>KEKNvtLk>1tZxb2E(E0U!|s6p~*mSmwg-)X^Uf+RGOB2#o13rgdByt}C^$!d{6
z^>%=O+-l%--Gr2_r#hq$jyuE;)UJ=<OM0`Y?l1RD8A&DpW51^Ois8)NJr=EZ8P_?z
zeBUBjpCmSp5826nzs@%t;L^~7;H?XB{3KPX%Kklk#IV;pnhB`kE&%~}A?8QH1}}6t
z%|%_39XefdC`y?mi3?+OG)%`~o_sVbaT_+}IR@*|oXcI5wOK%oX+H*OgdXU?Lkq>S
zzVmO0KaxM4X2>n(IS{^xyI+<>Ysz4+-Q1SnIKfon)v3Cg`Ve&wA@Rvs<SOQ!^`*W0
zHS_0wAS%*c<NKZD`5YIQ)w{H)*N94dk>ASvYm)F|^UYtoKnx*I&p_&D(t-h*zMk-f
zir<>JgJl($!nr%h>#jP7_fsQP@UXo{nSWm5_?vgMbWWYsH1<zNp&=EP57eUIPGSHl
zW-+8WNNyS?3h$r8BkFYDrC6Uf6Z*W<PDhDh{V#7K7ao?9>BA+Lx+sQ+Ld_d&QPs%I
z+T~ASYLOYP;3gmhV?LO<=Ah0AGL-O1MXBWGbb?<O4Z3Tv%|m|`?-|TvKPMp?#t{Ou
zCK>a?r64K4<J^Sm2jzz28+pZ7$PF0lc~A6<o`x8t1?1&^Nxg@XiN5jT1j?`KOCey#
zJz;!)-_`CcH`D~*7?PBrDcPYCDgUdM&xvTHeN{FWM2cM?Jn-ekkN!5c`gdZEvjv1i
znPhov5a%*y@Lp92r*f~#f^)Z#IeZb-Au~H{&3v}DF$l|ADMeL`sY}<6ar6_1V^3EY
zo-&0r^ep1bOF*SWpZEZM9_`>wg!kF^jb#}^sn|HLo(c1MW(9+4r-*hGx}|>s`)+y8
zPtpe^ysFt)NiYLYfN^fDiXngEx$eM(umpXTCrKO&*spJ9M|V^D$q+oHSRqmR3;>?D
z1i{~0j}IH%f1n$9cXY<U_zLu9T$(P|)d~jhc_5EY^NMw^TePm_WaL10l?3ZAyEP%k
z=+=LF$JrJG@*>nDxHu1eQoPoWKIEaYcWJ*ODqH~iI)I-R%2=cSCOP?B|HQ5`ZQ%8=
z^KhGUc2u{mk|NRW4OhI1;FlI;#<q3wcXbl4ly~5<x>gkkg7DFVAau5Y0|57a#a2q!
zE5xM*LmUyY1iq~CDqHKQ0Su>&`pq%LtcW9%N6uD&ge~1ED+k7h(4RdFfv7D9?Wt`V
ztP5k(n%~l%-0#4L>3f5ug!;`FzV^k9z0~*z9qQQ|tUL1>?UlO~<)Md*Ak`WRsmKiE
z0ik!_F;2*V<T<MGIsKgBJO%clkt2i9i&)(#ua$hD4v(qntgqym$2~P7_pjl7Lj*G%
zE^j7?aOUcFMA19YMNVV(C3_g$kmsdteb=BJ-JZ(^Qn}9doGA?}aO@vGDqSy#IygR~
z$6b~cBR-EVt3$#g^q|B!1WoR}kPOHrM?cdEl(nN-yF6)o<J{E(33AdZe}Fd#GJxQK
zz1V?MZPgD7Jt8s#tP#Su_5B>VTIN3pIG*u%41mhBHTe~WU>>vjEZe*=0=3$Xv|Z1d
z0Y<b5S0Bm_)dzrH)DwHzMHKUx&9ga5)izs{+(~VP8-j>MA0P+QJB?Z3!_DC~CGs8i
z_a(C(u69kpxI2ws3nZYVf^=o7G?btMia`9a9x3(ez$S0%Y-5iEWzYvNB%7`xGsX~O
zQO3=K7#G(Zvn6#YQ7r~>w8E})nx4?}#u;KGh5D1LD*xKww*o4?bq%CPs+<wB-pbYm
z-WXl<39`qB{$mh1F=fz()%T1<0lANKjAj=d>9Jz8{#|Sf=hg~4WfJxM3e*j~SH(?g
zeUWnrYF?~wO5z?P<9lzILp3DfcUQZ{MylwdlcOXTQ6y@w25lgcSx<t5$aBr5s1$0a
zAD6(YTi>CWYP~@=ZcFbp9T#ouPD26zxWo(thpSZf%|`*Z`4Dlqm5XP0qzxG-RXc9P
z92=O>^oZt&HeF&ya}HBe3OWtL-2~FokvE3qr=+~*%@@8PkGXiUmzX*v4gz(W9g<A$
zt-|H!Q56QntAlIq6WJDPJC}fp@whte7$Qkh+mX9He54~FSQF*OLHC9~AVjRk3g1cs
zE>2vw+#>oo*~4>P6mN_pwAjhhL*k_%107C7xx)&1Lom~}61>$q)Pz%2?Q`~~8)kz5
zv2(bZKVdFYVJ}fpCr^@_N<}m(ba*wBvazpO!Y3GVVzJ>xm(jjD#)-Z7MbIcb3o}57
zr*Q`ER-kEmH8U%IsIlNa@5jj20cY6*<LIu3wbi?Krd!p0HwMcsGv&d<2|cLPz$K%;
zoPgIS<n=JrQKH#acHAkx;&Fk}d#16X2m{o0jpsaZT{U%vTw?gm$`%U3sBK|yn~iW6
zNWU<<M_ZJTmFx`-hcts)Ew9D)>vs7dcttJg3{qv68-A*%8gyxf$nAbW*zyidx~aJA
zyhJ&iJ_SxL8C+_y$P#CLnhsk!G?bW=Kk@Pt^AeFY5OT3<ZWr1CCB10?n$?vx;)PgE
z!)Rrnb>8ysXm~jZIpe!XOlfAk0cGp*C+JPS-yK<}sJLN{{DL_kE$9#v4xah$;NU#5
zlv-U;ZU96!L4yV(XNsMv{@@{IYy3f)`^1M%l^Z}Knn<pT9DPJq)?ac-C9Iq&cbzU5
z-f+C?s@Xh1%ue*j5RA8*P~Ag~L$IPF)q?Bu1l4j&24Aj+wMWmUFp=(jf5GGL@P899
zCtH67B$pFnRi+Gi3&{2pw4EYRr{wBa1_g#gA>zRK;v}lqI!uuSDh0%?QdU+Vom?nU
zvW=H0gnsP)F^Weg9HzrbHU3kK#f}T=*BZr=CrfF9=losdBiG&j(g<TVCK+YLzM0Bo
zY%t1744%`7so$5Y6w;Ooj5;|>_MEul(_|F5in-H9Mr#RcV3oYxnbWh&slePQWVK{l
z;6B7icM4Oli%(a!HS>DY$<aGoFlDS*5jo+-=r*7y=m90))dC(~IJ8orGuvV;Blpp{
z90FGcB8=0wqrNfkm?M2RHZ8~U@o8n%U<?S<i{js+o&$alzFzXDuY1Dcl0BMlffb(7
zE<cqr@_$Fb8IpW|fmsr^dw#;mfYYW-81N@pLCK_$w^w3As`VG>JSPt6bpbi19Mp%K
zq50szBeon_p5x0QQ`G^Q{$^A9rkv9w-MXeu=$2c+;X)rQ)i%!m%NnGq$ikkUwav1?
z=2RxKa9V&sC)_!13Q9+T9KO`2rOdX+R6os55Z4Ve(V6>_g(DPzwxU;%jJb6x`!1<8
zq&(?u1UTr4JvJKtekU2F$P>+r{dC?DdO?fz%jv9#Rt(!i;Fu%2n0llh>SamX-5r1k
z4IEfi^X1;N<2CJcInY(WWThFYZx@XLJrFv9tUEfKv3_Q>3oS84lO)`9wRQ~QxnBu@
zAPW|WOS8L{JD#UaO`8mYoxue)w#h^OyF(@qKoNG<8`L8vR4}1}2^Idc5^0rp55*YT
z6Ul{f>vK9r4VkRmb(tTINmhsH)X5lebCE3C5I!zW&KKLSz%oUZw^iMyu_^^{5U@-9
zPrT(nyy@zY_e0Fg2=}pj0kHqpF%q<YW#e!I%bxk0NT7)oV+W|Q&^DC`f2z++2KlFg
z)nt%=1B1K*I**B|nXv57T9YYs{|!RdB;}c;29qLdlBi8&+_Z^d+SU0FR-H}4x@mXm
z|Fj{S#jdF1vyc_mUUaY>56+@QD5C1a;zIRHHdonTmm862;P`SMyVeT)j{$aZkMV+`
zeaX$QQ0QZ<J{-R@bmW-0@m`*h%XtQ&vqj+O(8NnbjRzsdFA?2<M*e{v#5FZyvWdx0
z#_!ezXAw=YW5N>?Jc4XvN*N|7FhPL{3jW=o0J*bB&oSTbK2ZJY?26am@_)0{E7yLN
J^X2CK{|gm_2`B&n

literal 0
HcmV?d00001

diff --git a/etl/customers/stonewater/map_app/server.py b/etl/customers/stonewater/map_app/server.py
index 87f10e21..040959f6 100644
--- a/etl/customers/stonewater/map_app/server.py
+++ b/etl/customers/stonewater/map_app/server.py
@@ -34,10 +34,11 @@ def init_app():
         SECRET_KEY=SECRET_KEY,
     )
 
-    app.title = "Hesta X Stonewater"
+    app.title = "Hestia X Stonewater"
 
     # Define the layout
     app.layout = layout()
+    app._favicon = "favico.ico"
 
     return app
 

From eac20467657fdc181659402de0d92ea4e473e64c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 10:31:52 +0100
Subject: [PATCH 62/80] removed rubbish code from epc clean

---
 backend/apis/GoogleSolarApi.py         |  4 ++
 backend/app/plan/router.py             | 22 +++++-----
 backend/ml_models/AnnualBillSavings.py | 11 +++++
 etl/bill_savings/data_collection.py    | 56 ++++++++++++++++++++++++++
 etl/epc_clean/app.py                   |  3 --
 5 files changed, 82 insertions(+), 14 deletions(-)
 create mode 100644 etl/bill_savings/data_collection.py

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 6d2ddf6c..d29e3da5 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -213,6 +213,10 @@ class GoogleSolarApi:
         # 1) Convert Solar Energy AD production from the DC production
         panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
 
+        # This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a
+        # property could be 100% electric
+        average_electricity_consumption
+
         # Remove anything where the total ac energy is less than half of the array wattage
         panel_performance = panel_performance[
             (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 80392c88..258449c2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -284,16 +284,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            # if not is_new:
-            #     continue
-            #
-            # create_property_targets(
-            #     session,
-            #     property_id=property_id,
-            #     portfolio_id=body.portfolio_id,
-            #     epc_target=body.goal_value,
-            #     heat_demand_target=None
-            # )
+            if not is_new:
+                continue
+
+            create_property_targets(
+                session,
+                property_id=property_id,
+                portfolio_id=body.portfolio_id,
+                epc_target=body.goal_value,
+                heat_demand_target=None
+            )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
@@ -356,7 +356,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             p.get_spatial_data(uprn_filenames)
             # Call Google Solar API
             # TODO: Complete me
-            # solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
+            solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 7395ab6b..e6494bcd 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -1,5 +1,16 @@
 import numpy as np
 
+QUARTERLY_ENERGY_PRICES = [
+    # 2024 Q1
+    {"start": "2024-01-01", "end": "2024-03-31", "electricity": 0.2, "gas": 0.042},
+    # 2023 Q4
+    {"start": "2023-10-01", "end": "2023-12-31", "electricity": 0.202, "gas": 0.51},
+    # 2023 Q3
+    {"start": "2023-07-01", "end": "2023-09-30", "electricity": 0.188, "gas": 0.46},
+    # 2023 Q2
+    {"start": "2023-04-01", "end": "2023-06-30", "electricity": 0.177, "gas": 0.456},
+]
+
 
 class AnnualBillSavings:
     """
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
new file mode 100644
index 00000000..25023894
--- /dev/null
+++ b/etl/bill_savings/data_collection.py
@@ -0,0 +1,56 @@
+import inspect
+import pandas as pd
+from tqdm import tqdm
+from etl.epc_clean.EpcClean import EpcClean
+from etl.epc.settings import EARLIEST_EPC_DATE
+from pathlib import Path
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    """
+    This application is tasked with pulling a large quantity of data from the find my epc website, containing the
+    estimated energy consumption for properties
+    :return:
+    """
+
+    cleaned_data = {}
+    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+
+    data = []
+    for directory in tqdm(epc_directories):
+        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        # Rename the columns to the same format as the api returns
+        data.columns = [c.replace("_", "-").lower() for c in data.columns]
+        # Take just date before the date threshold
+        data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+        data = data[~pd.isnull(data["uprn"])]
+        data = data[data["mains-gas-flag"] == "N"]
+        data = data[data["main-fuel"] == "electricity (not community)"]
+        data[data["current-energy-efficiency"].astype(float) > 80]["uprn"].astype(int)
+
+        # Convert to list of dictioaries as returned by the api
+        data = data.to_dict("records")
+
+        # Incorporate input data into cleaning
+        cleaner = EpcClean(data)
+
+        cleaner.clean()
+        # Extended cleaned_data
+        for k, data in cleaner.cleaned.items():
+            if k not in cleaned_data:
+                cleaned_data[k] = data
+            else:
+                existing_descriptions = [x["original_description"] for x in cleaned_data[k]]
+                new_data = [x for x in data if x["original_description"] not in existing_descriptions]
+                cleaned_data[k].extend(new_data)
+
+    # Basic check to make sure all descriptions are unique
+    for _, cleaned in cleaned_data.items():
+        descriptions = [x["original_description"] for x in cleaned]
+        if len(descriptions) != len(set(descriptions)):
+            raise ValueError("Duplicated descriptions found, check me")
diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py
index 59561b3c..1d833b72 100644
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@@ -39,11 +39,8 @@ def app():
     cleaned_data = {}
     epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
 
-    WALLS = []
     for directory in tqdm(epc_directories):
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
-        z = data["WALLS_DESCRIPTION"].unique().tolist()
-        WALLS.extend(z)
         # Rename the columns to the same format as the api returns
         data.columns = [c.replace("_", "-").lower() for c in data.columns]
         # Take just date before the date threshold

From 1db6dfebdfa29854315ea2896e33bf779a0a9ddb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 11:06:11 +0100
Subject: [PATCH 63/80] created basic data collection process

---
 etl/bill_savings/data_collection.py | 110 ++++++++++++++++++++++------
 1 file changed, 86 insertions(+), 24 deletions(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 25023894..22b12c6e 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -1,13 +1,79 @@
+import time
+
+import requests
 import inspect
 import pandas as pd
 from tqdm import tqdm
-from etl.epc_clean.EpcClean import EpcClean
+from bs4 import BeautifulSoup
 from etl.epc.settings import EARLIEST_EPC_DATE
 from pathlib import Path
+import numpy as np
 
 src_file_path = inspect.getfile(lambda: None)
 
 EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+SEARCH_POSTCODE_URL = (
+    "https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
+)
+BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
+
+
+def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str):
+    """
+    For a post code and address, we pull out all the required data from the find my epc website
+    """
+
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                      'Chrome/111.0.0.0 Safari/537.36'
+    }
+    postcode_input = postcode.replace(" ", "+")
+    postcode_search = SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
+    postcode_response = requests.get(postcode_search, headers=headers)
+
+    postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
+    address_links_full = postcode_res.findAll('a', {'class': 'govuk-link', 'rel': 'nofollow'})
+    address_links = {element.text.lstrip().rstrip(): BASE_ENERGY_URL + element['href'] for element in
+                     address_links_full}
+
+    address_cleaned = address.replace(",", "").replace(" ", "").lower()
+    address_links_cleaned = [
+        x.replace(",", "").replace(" ", "").lower() for x in list(address_links.keys())
+    ]
+
+    index_of_address = [key.startswith(address_cleaned) for key in address_links_cleaned]
+    if sum(index_of_address) > 1:
+        # If we have two or more addresses, we can't be sure which one is the correct one so we exit for simplicity
+        return None
+    chosen_epc = address_links[list(address_links.keys())[np.where(index_of_address)[0][0]]]
+
+    epc_certificate = chosen_epc.split('/')[-1]
+
+    address_response = requests.get(chosen_epc, headers=headers)
+    address_res = BeautifulSoup(address_response.text, features="html.parser")
+
+    ratings = address_res.find('desc', {'id': 'svg-desc'}).text
+    current_rating = ratings.split(".")[0]
+    potential_rating = ratings.split(".")[1]
+
+    # Retrieve the energy consumption
+    bills = address_res.find('div', {'id': 'bills-affected'})
+    heating_text = bills.find_all('li')[0].text
+    hot_water_text = bills.find_all('li')[1].text
+
+    resulting_data = {
+        'uprn': uprn,
+        'address': address,
+        'epc_certificate': epc_certificate,
+        'current_epc_rating': current_rating.split(' ')[-6],
+        'current_epc_efficiency': int(current_rating.split(' ')[-1]),
+        'potential_epc_rating': potential_rating.split(' ')[-6],
+        "potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
+        "heating_text": heating_text,
+        "hot_water_text": hot_water_text,
+    }
+
+    return resulting_data
 
 
 def app():
@@ -20,7 +86,9 @@ def app():
     cleaned_data = {}
     epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
 
-    data = []
+    sample_size = 100
+
+    energy_consumption_data = []
     for directory in tqdm(epc_directories):
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
         # Rename the columns to the same format as the api returns
@@ -28,29 +96,23 @@ def app():
         # Take just date before the date threshold
         data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
 
-        data = data[~pd.isnull(data["uprn"])]
-        data = data[data["mains-gas-flag"] == "N"]
-        data = data[data["main-fuel"] == "electricity (not community)"]
-        data[data["current-energy-efficiency"].astype(float) > 80]["uprn"].astype(int)
+        data = data.sample(sample_size)
+        # We use the addreess data to find the related information
 
-        # Convert to list of dictioaries as returned by the api
-        data = data.to_dict("records")
+        collected_data = []
+        for _, property_data in data.iterrows():
+            # Sleep for a random time between 0.1 and 1.5 seconds
+            time.sleep(np.random.uniform(0.1, 1.5))
 
-        # Incorporate input data into cleaning
-        cleaner = EpcClean(data)
+            uprn = int(property_data["uprn"])
+            address = property_data["address1"]
+            postcode = property_data["postcode"]
 
-        cleaner.clean()
-        # Extended cleaned_data
-        for k, data in cleaner.cleaned.items():
-            if k not in cleaned_data:
-                cleaned_data[k] = data
-            else:
-                existing_descriptions = [x["original_description"] for x in cleaned_data[k]]
-                new_data = [x for x in data if x["original_description"] not in existing_descriptions]
-                cleaned_data[k].extend(new_data)
+            response = retrieve_find_my_epc_data(
+                uprn=uprn,
+                postcode=postcode,
+                address=address
+            )
+            collected_data.append(response)
 
-    # Basic check to make sure all descriptions are unique
-    for _, cleaned in cleaned_data.items():
-        descriptions = [x["original_description"] for x in cleaned]
-        if len(descriptions) != len(set(descriptions)):
-            raise ValueError("Duplicated descriptions found, check me")
+        energy_consumption_data.extend(energy_consumption_data)

From 3b3c6c3cc4bd8e028efef268ac1ef797e72134ff Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 11:55:23 +0100
Subject: [PATCH 64/80] Added more robust address selection

---
 etl/bill_savings/data_collection.py | 85 +++++++++++++++++++++++------
 1 file changed, 69 insertions(+), 16 deletions(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 22b12c6e..793c13c4 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -1,4 +1,6 @@
 import time
+from datetime import datetime, timedelta
+from dateutil.relativedelta import relativedelta
 
 import requests
 import inspect
@@ -8,6 +10,7 @@ from bs4 import BeautifulSoup
 from etl.epc.settings import EARLIEST_EPC_DATE
 from pathlib import Path
 import numpy as np
+from utils.s3 import save_pickle_to_s3
 
 src_file_path = inspect.getfile(lambda: None)
 
@@ -18,7 +21,13 @@ SEARCH_POSTCODE_URL = (
 BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
 
 
-def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str):
+def calculate_expiry_date(lodgement_date):
+    lodgement_date_dt = datetime.strptime(lodgement_date, '%Y-%m-%d')
+    expiry_date_dt = lodgement_date_dt + relativedelta(years=10) - timedelta(days=1)
+    return expiry_date_dt.strftime('%d %B %Y')
+
+
+def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_expiry_date: str):
     """
     For a post code and address, we pull out all the required data from the find my epc website
     """
@@ -31,22 +40,52 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str):
     postcode_search = SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
     postcode_response = requests.get(postcode_search, headers=headers)
 
-    postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
-    address_links_full = postcode_res.findAll('a', {'class': 'govuk-link', 'rel': 'nofollow'})
-    address_links = {element.text.lstrip().rstrip(): BASE_ENERGY_URL + element['href'] for element in
-                     address_links_full}
-
     address_cleaned = address.replace(",", "").replace(" ", "").lower()
-    address_links_cleaned = [
-        x.replace(",", "").replace(" ", "").lower() for x in list(address_links.keys())
-    ]
+    postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
+    rows = postcode_res.find_all('tr', class_='govuk-table__row')
 
-    index_of_address = [key.startswith(address_cleaned) for key in address_links_cleaned]
-    if sum(index_of_address) > 1:
-        # If we have two or more addresses, we can't be sure which one is the correct one so we exit for simplicity
+    extracted_table = []
+    for row in rows:
+        # Extract the address and URL
+        address_tag = row.find('a', class_='govuk-link')
+        if address_tag is None:
+            continue
+        extracted_address = None
+        extracted_address_url = None
+        if address_tag:
+            extracted_address = address_tag.text.strip()
+            extracted_address_url = address_tag['href']
+
+            extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower()
+            if not extracted_address_cleaned.startswith(address_cleaned):
+                continue
+
+            # If the address is a match, we can extract the data
+
+        # Extract the expiry date
+        expiry_date_tag = row.find('td', class_='govuk-table__cell date')
+        expiry_date = None
+        if expiry_date_tag is not None:
+            expiry_date = expiry_date_tag.parent.find('span').text.strip()
+
+        extracted_table.append(
+            {
+                "extracted_address": extracted_address,
+                "extracted_address_url": extracted_address_url,
+                "expiry_date": expiry_date
+            }
+        )
+
+    extracted_table = [entry for entry in extracted_table if entry['expiry_date'] == expected_expiry_date]
+
+    if len(extracted_table) > 1:
+        print("Multiple candidates found, skipping for now")
         return None
-    chosen_epc = address_links[list(address_links.keys())[np.where(index_of_address)[0][0]]]
 
+    if not extracted_table:
+        raise Exception("Fix me")
+
+    chosen_epc = BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
     epc_certificate = chosen_epc.split('/')[-1]
 
     address_response = requests.get(chosen_epc, headers=headers)
@@ -83,7 +122,6 @@ def app():
     :return:
     """
 
-    cleaned_data = {}
     epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
 
     sample_size = 100
@@ -96,6 +134,10 @@ def app():
         # Take just date before the date threshold
         data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
 
+        data = data[~pd.isnull(data["uprn"])]
+        # Take just the newest EPC per uprn, based on lodgement-date
+        data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
+
         data = data.sample(sample_size)
         # We use the addreess data to find the related information
 
@@ -107,12 +149,23 @@ def app():
             uprn = int(property_data["uprn"])
             address = property_data["address1"]
             postcode = property_data["postcode"]
+            expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
 
             response = retrieve_find_my_epc_data(
                 uprn=uprn,
                 postcode=postcode,
-                address=address
+                address=address,
+                expected_expiry_date=expected_expiry_date
             )
+            if response is None:
+                continue
             collected_data.append(response)
 
-        energy_consumption_data.extend(energy_consumption_data)
+        energy_consumption_data.extend(collected_data)
+
+    # Store the pickle in s3
+    save_time = datetime.now()
+    save_pickle_to_s3(
+        energy_consumption_data, bucket_name="retrofit-datalake-dev",
+        s3_file_name=f"energy_consumption_data/{save_time}.pkl"
+    )

From 298bb5a148db4a43ef752d65ed5fba99671c0e6c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 12:00:24 +0100
Subject: [PATCH 65/80] extract leading zero from date

---
 etl/bill_savings/data_collection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 793c13c4..873bf957 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -24,7 +24,7 @@ BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
 def calculate_expiry_date(lodgement_date):
     lodgement_date_dt = datetime.strptime(lodgement_date, '%Y-%m-%d')
     expiry_date_dt = lodgement_date_dt + relativedelta(years=10) - timedelta(days=1)
-    return expiry_date_dt.strftime('%d %B %Y')
+    return expiry_date_dt.strftime('%-d %B %Y')
 
 
 def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_expiry_date: str):

From b8e769347936fd5df8c299484d5e61f942f45dfc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 12:06:18 +0100
Subject: [PATCH 66/80] Adding epc directory to output

---
 etl/bill_savings/data_collection.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 873bf957..26ed156e 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -159,7 +159,12 @@ def app():
             )
             if response is None:
                 continue
-            collected_data.append(response)
+            collected_data.append(
+                {
+                    **response,
+                    "epc_directory": directory
+                }
+            )
 
         energy_consumption_data.extend(collected_data)
 

From d562324dd906c8c8ab49ac27845c3c4faac4da12 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 12:22:35 +0100
Subject: [PATCH 67/80] skip cases with no candidates

---
 etl/bill_savings/data_collection.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 26ed156e..521a3783 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -83,7 +83,8 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_e
         return None
 
     if not extracted_table:
-        raise Exception("Fix me")
+        print("No candidates found, skipping for now")
+        return None
 
     chosen_epc = BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
     epc_certificate = chosen_epc.split('/')[-1]

From 654251c084b0c8f2729834235725187636c3c433 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 12:25:16 +0100
Subject: [PATCH 68/80] handle case of no respone

---
 etl/bill_savings/data_collection.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 521a3783..2632c296 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -102,8 +102,8 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_e
     hot_water_text = bills.find_all('li')[1].text
 
     resulting_data = {
-        'uprn': uprn,
-        'address': address,
+        'extracted_uprn': uprn,
+        'extracted_address': address,
         'epc_certificate': epc_certificate,
         'current_epc_rating': current_rating.split(' ')[-6],
         'current_epc_efficiency': int(current_rating.split(' ')[-1]),
@@ -163,6 +163,7 @@ def app():
             collected_data.append(
                 {
                     **response,
+                    "epc": property_data.to_dict(),
                     "epc_directory": directory
                 }
             )

From 4bcd17596e9931508c1df3cfc13b16d440b36980 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 13:11:23 +0100
Subject: [PATCH 69/80] handle missing bills data

---
 etl/bill_savings/data_collection.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 2632c296..1f787d48 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -98,8 +98,11 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_e
 
     # Retrieve the energy consumption
     bills = address_res.find('div', {'id': 'bills-affected'})
-    heating_text = bills.find_all('li')[0].text
-    hot_water_text = bills.find_all('li')[1].text
+    bills_list = bills.find_all('li')
+    if not bills_list:
+        return None
+    heating_text = bills_list[0].text
+    hot_water_text = bills_list[1].text
 
     resulting_data = {
         'extracted_uprn': uprn,

From dd0deab0ee3274d8503093483e242dbfee10c4ff Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 14:59:16 +0100
Subject: [PATCH 70/80] setting up energy consumption model class

---
 etl/bill_savings/EnergyConsumptionModel.py | 89 ++++++++++++++++++++
 etl/bill_savings/data_collation.py         | 94 ++++++++++++++++++++++
 etl/bill_savings/data_collection.py        |  9 ++-
 utils/s3.py                                | 30 +++++++
 4 files changed, 218 insertions(+), 4 deletions(-)
 create mode 100644 etl/bill_savings/EnergyConsumptionModel.py
 create mode 100644 etl/bill_savings/data_collation.py

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
new file mode 100644
index 00000000..2ca88da5
--- /dev/null
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -0,0 +1,89 @@
+import pandas as pd
+from datetime import datetime
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error, r2_score
+from utils.s3 import save_pickle_to_s3, read_pickle_from_s3
+
+
+class EnergyConsumptionModel:
+    FEATURES = ['feature_1', 'feature_2']
+    TARGETS = ['heating_kwh', 'hot_water_kwh']
+
+    def __init__(self, model_paths=None):
+        self.models = {}
+        self.model_paths = model_paths or {}
+        self.data = None
+
+        self.X_train = None
+        self.X_test = None
+        self.y_train = None
+        self.y_test = None
+
+        if model_paths:
+            for target, path in model_paths.items():
+                self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
+
+    def read_dataset(self, file_path):
+        self.data = pd.read_csv(file_path)
+
+    def feature_engineering(self):
+        # Example feature engineering steps
+        self.data['feature_1'] = self.data['original_feature_1'] ** 2
+        self.data['feature_2'] = self.data['original_feature_2'] ** 0.5
+        # Add more feature engineering steps as required
+
+    def split_dataset(self, target, test_size=0.2, random_state=42):
+        X = self.data[self.FEATURES]
+        y = self.data[target]
+        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
+            X, y, test_size=test_size, random_state=random_state
+        )
+
+    def fit_model(self, target):
+        self.models[target] = LinearRegression()
+        self.models[target].fit(self.X_train, self.y_train)
+
+    def evaluate_model(self, target):
+        y_pred = self.models[target].predict(self.X_test)
+        mse = mean_squared_error(self.y_test, y_pred)
+        r2 = r2_score(self.y_test, y_pred)
+        return {'MSE': mse, 'R2': r2}
+
+    def save_model(self, target):
+        run_date = datetime.now().strftime("%Y-%m-%d")
+        save_pickle_to_s3(
+            self.models[target],
+            bucket_name="retrofit-model-directory-dev",
+            s3_file_name=f"model_directory/energy_consumption_model/{target}_{run_date}.pkl"
+        )
+
+    def score_new_data(self, new_data, target):
+        if target not in self.models:
+            raise ValueError(f"Model for target {target} not loaded or trained")
+
+        new_data_transformed = self.transform_new_data(new_data)
+        return self.models[target].predict(new_data_transformed)
+
+    def transform_new_data(self, new_data):
+        # Apply the same transformations as in feature_engineering
+        new_data['feature_1'] = new_data['original_feature_1'] ** 2
+        new_data['feature_2'] = new_data['original_feature_2'] ** 0.5
+        return new_data[self.FEATURES]
+
+# Example usage:
+# model = EnergyConsumptionModel()
+# model.read_dataset('/mnt/data/energy_consumption_dataset.csv')
+# model.feature_engineering()
+
+# For heating_kwh
+# model.split_dataset(target='heating_kwh')
+# model.fit_model(target='heating_kwh')
+# print(model.evaluate_model(target='heating_kwh'))
+# model.save_model(target='heating_kwh')
+
+# For hot_water_kwh
+# model.split_dataset(target='hot_water_kwh')
+# model.fit_model(target='hot_water_kwh')
+# print(model.evaluate_model(target='hot_water_kwh'))
+# model.save_model(target='hot_water_kwh')
diff --git a/etl/bill_savings/data_collation.py b/etl/bill_savings/data_collation.py
new file mode 100644
index 00000000..ef2b286b
--- /dev/null
+++ b/etl/bill_savings/data_collation.py
@@ -0,0 +1,94 @@
+import re
+from datetime import datetime
+
+import pandas as pd
+
+from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
+
+# These columns we co-erce to strings before saving
+PROBLEMATIC_COLUMNS = ["main-heating-controls"]
+
+
+def extract_kwh_value(text):
+    """
+    Extract the numerical kWh value from a given string.
+
+    :param text: The input string containing the kWh value.
+    :return: The extracted numerical kWh value as an integer.
+    """
+    # Use regular expression to find the numerical value followed by "kWh per year"
+    match = re.search(r'([\d,]+) kWh per year', text)
+
+    if match:
+        # Remove commas from the extracted value and convert to integer
+        kwh_value = int(match.group(1).replace(',', ''))
+        return kwh_value
+    else:
+        # If no match is found, return None or raise an exception
+        return None
+
+
+def app():
+    """
+    Given the files written in our datalake in s3, this application will collate the data into a single file
+    and store it back in s3 for analysis
+    :return:
+    """
+
+    # Firstly, list all of the saved files in s3
+    data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
+
+    run_date = datetime.now().strftime("%Y-%m-%d")
+
+    complete_data = []
+    for files in data_files:
+        dataset_run_date = files.split("/")[-1].split(".")[0]
+        # Extract the date from the file name
+        dataset_run_date = pd.Timestamp(dataset_run_date)
+
+        # Load the data from the file
+        data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
+
+        # We check that the retrieved energy consumption sufficiently matches the EPC data
+        internal_dataset = []
+        for x in data:
+            epc_data = x["epc"]
+            epc_sap = epc_data["current-energy-efficiency"]
+            epc_potential_sap = epc_data["potential-energy-efficiency"]
+            # Make sure this matches the extracted sap
+            if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
+                x["potential_epc_efficiency"]
+            ):
+                continue
+
+            heating_kwh = extract_kwh_value(x["heating_text"])
+            hot_water_kwh = extract_kwh_value(x["hot_water_text"])
+            internal_dataset.append(
+                {
+                    **epc_data,
+                    "heating_kwh": heating_kwh,
+                    "hot_water_kwh": hot_water_kwh,
+                    "dataset_run_date": dataset_run_date
+                }
+            )
+
+        complete_data.extend(internal_dataset)
+
+    df = pd.DataFrame(complete_data)
+    # Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
+    # the uprn level, so we dedupe based on the newest dataset_run_date
+
+    df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
+    df = df.drop(columns=["dataset_run_date"])
+
+    for col in PROBLEMATIC_COLUMNS:
+        df[col] = df[col].astype(str)
+
+    # Save the data back to s3, but this time as a parquet file
+    save_dataframe_to_s3_parquet(
+        bucket_name="retrofit-data-dev",
+        file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
+        df=df
+    )
+
+    df.to_csv("energy_consumption_dataset.csv", index=False)
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 1f787d48..3b503122 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -131,7 +131,8 @@ def app():
     sample_size = 100
 
     energy_consumption_data = []
-    for directory in tqdm(epc_directories):
+    for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
+
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
         # Rename the columns to the same format as the api returns
         data.columns = [c.replace("_", "-").lower() for c in data.columns]
@@ -147,8 +148,8 @@ def app():
 
         collected_data = []
         for _, property_data in data.iterrows():
-            # Sleep for a random time between 0.1 and 1.5 seconds
-            time.sleep(np.random.uniform(0.1, 1.5))
+            # Sleep for a random time between 0.1 and 1.4 seconds
+            time.sleep(np.random.uniform(0.1, 1.4))
 
             uprn = int(property_data["uprn"])
             address = property_data["address1"]
@@ -167,7 +168,7 @@ def app():
                 {
                     **response,
                     "epc": property_data.to_dict(),
-                    "epc_directory": directory
+                    "epc_directory": str(directory)
                 }
             )
 
diff --git a/utils/s3.py b/utils/s3.py
index 05482271..1b14ca97 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -246,3 +246,33 @@ def read_csv_from_s3(bucket_name, filepath):
     data = list(reader)
 
     return data
+
+
+def list_files_in_s3_folder(bucket_name, folder_name):
+    """
+    List all files in a given folder in an S3 bucket.
+
+    :param bucket_name: The name of the S3 bucket.
+    :param folder_name: The folder name within the S3 bucket.
+    :return: A list of file keys in the specified S3 folder.
+    """
+    try:
+        s3 = boto3.client('s3')
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
+
+        if 'Contents' not in response:
+            logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
+            return []
+
+        file_keys = [content['Key'] for content in response['Contents']]
+        return file_keys
+
+    except NoCredentialsError:
+        logger.error("Credentials not available.")
+        return []
+    except PartialCredentialsError:
+        logger.error("Incomplete credentials provided.")
+        return []
+    except Exception as e:
+        logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        return []

From 7790822e76cd968e0af10b76ff451e73d2362b56 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 16:06:01 +0100
Subject: [PATCH 71/80] making the data objects dictionaries for different
 targets

---
 etl/bill_savings/EnergyConsumptionModel.py | 109 +++++++++++++++------
 etl/bill_savings/data_collection.py        |   6 +-
 2 files changed, 81 insertions(+), 34 deletions(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 2ca88da5..d2c77e48 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -3,51 +3,87 @@ from datetime import datetime
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_squared_error, r2_score
-from utils.s3 import save_pickle_to_s3, read_pickle_from_s3
+from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
 
 
 class EnergyConsumptionModel:
-    FEATURES = ['feature_1', 'feature_2']
+    FEATURES = {
+        "heating_kwh": [
+            "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
+            "heating-cost-current",
+        ],
+        "hot_water_kwh": [
+            "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
+            "hot-water-cost-current"
+        ]
+    }
     TARGETS = ['heating_kwh', 'hot_water_kwh']
+    CATEGORICAL_COLUMNS = ["lodgement-year", "lodgement-month"]
+    NUMERICAL_COLUMNS = ["current-energy-efficiency", "energy-consumption-current", "heating-cost-current",
+                         "hot-water-cost-current"]
 
     def __init__(self, model_paths=None):
         self.models = {}
         self.model_paths = model_paths or {}
         self.data = None
+        self.dummy_columns = None
 
-        self.X_train = None
-        self.X_test = None
-        self.y_train = None
-        self.y_test = None
+        self.x_train = {}
+        self.x_test = {}
+        self.y_train = {}
+        self.y_test = {}
 
         if model_paths:
             for target, path in model_paths.items():
                 self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
 
     def read_dataset(self, file_path):
-        self.data = pd.read_csv(file_path)
+        self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
 
     def feature_engineering(self):
-        # Example feature engineering steps
-        self.data['feature_1'] = self.data['original_feature_1'] ** 2
-        self.data['feature_2'] = self.data['original_feature_2'] ** 0.5
-        # Add more feature engineering steps as required
+        # Extract date features
+        self.data["lodgement-date"] = pd.to_datetime(self.data["lodgement-date"])
+        self.data["lodgement-year"] = self.data["lodgement-date"].dt.year
+        self.data["lodgement-month"] = self.data["lodgement-date"].dt.month
+
+        # Convert data types
+        self.data[self.NUMERICAL_COLUMNS] = self.data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
+        self.data[self.CATEGORICAL_COLUMNS] = self.data[self.CATEGORICAL_COLUMNS].astype(str)
+
+        # Convert categorical columns to dummies
+        self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
+
+        # Store the dummy columns
+        self.dummy_columns = {}
+        for target in self.TARGETS:
+            target_features = self.FEATURES[target]
+            dummy_feature_columns = []
+            for feature in target_features:
+                if feature in self.CATEGORICAL_COLUMNS:
+                    dummy_feature_columns.extend([col for col in self.data.columns if col.startswith(feature + '_')])
+                else:
+                    dummy_feature_columns.append(feature)
+            self.dummy_columns[target] = dummy_feature_columns
 
     def split_dataset(self, target, test_size=0.2, random_state=42):
-        X = self.data[self.FEATURES]
+
+        if target not in self.TARGETS:
+            raise ValueError(f"Target {target} not in {self.TARGETS}")
+
+        x = self.data[self.dummy_columns[target]]
         y = self.data[target]
-        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
-            X, y, test_size=test_size, random_state=random_state
+        self.x_train[target], self.x_test[target], self.y_train[target], self.y_test[target] = train_test_split(
+            x, y, test_size=test_size, random_state=random_state
         )
 
     def fit_model(self, target):
         self.models[target] = LinearRegression()
-        self.models[target].fit(self.X_train, self.y_train)
+        self.models[target].fit(self.x_train[target], self.y_train[target])
 
     def evaluate_model(self, target):
-        y_pred = self.models[target].predict(self.X_test)
-        mse = mean_squared_error(self.y_test, y_pred)
-        r2 = r2_score(self.y_test, y_pred)
+        y_pred = self.models[target].predict(self.x_test[target])
+        mse = mean_squared_error(self.y_test[target], y_pred)
+        r2 = r2_score(self.y_test[target], y_pred)
         return {'MSE': mse, 'R2': r2}
 
     def save_model(self, target):
@@ -67,23 +103,32 @@ class EnergyConsumptionModel:
 
     def transform_new_data(self, new_data):
         # Apply the same transformations as in feature_engineering
-        new_data['feature_1'] = new_data['original_feature_1'] ** 2
-        new_data['feature_2'] = new_data['original_feature_2'] ** 0.5
-        return new_data[self.FEATURES]
+        new_data["lodgement-date"] = pd.to_datetime(new_data["lodgement-date"])
+        new_data["lodgement-year"] = new_data["lodgement-date"].dt.year
+        new_data["lodgement-month"] = new_data["lodgement-date"].dt.month
+
+        # Convert categorical columns to dummies
+        new_data = pd.get_dummies(new_data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
+
+        # Align new data with the dummy columns from training data
+        new_data = new_data.reindex(columns=self.dummy_columns, fill_value=0)
+
+        return new_data.drop(columns=[target for target in self.TARGETS if target in new_data.columns])
+
 
 # Example usage:
-# model = EnergyConsumptionModel()
-# model.read_dataset('/mnt/data/energy_consumption_dataset.csv')
-# model.feature_engineering()
+model = EnergyConsumptionModel()
+model.read_dataset('energy_consumption/2024-07-02/energy_consumption_dataset.parquet')
+model.feature_engineering()
 
 # For heating_kwh
-# model.split_dataset(target='heating_kwh')
-# model.fit_model(target='heating_kwh')
-# print(model.evaluate_model(target='heating_kwh'))
-# model.save_model(target='heating_kwh')
+model.split_dataset(target='heating_kwh')
+model.fit_model(target='heating_kwh')
+print(model.evaluate_model(target='heating_kwh'))
+model.save_model(target='heating_kwh')
 
 # For hot_water_kwh
-# model.split_dataset(target='hot_water_kwh')
-# model.fit_model(target='hot_water_kwh')
-# print(model.evaluate_model(target='hot_water_kwh'))
-# model.save_model(target='hot_water_kwh')
+model.split_dataset(target='hot_water_kwh')
+model.fit_model(target='hot_water_kwh')
+print(model.evaluate_model(target='hot_water_kwh'))
+model.save_model(target='hot_water_kwh')
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 3b503122..79afa936 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -132,6 +132,9 @@ def app():
 
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
+        # Skip the first 50
+        if i < 50:
+            continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
         # Rename the columns to the same format as the api returns
@@ -148,8 +151,7 @@ def app():
 
         collected_data = []
         for _, property_data in data.iterrows():
-            # Sleep for a random time between 0.1 and 1.4 seconds
-            time.sleep(np.random.uniform(0.1, 1.4))
+            time.sleep(np.random.uniform(0.3, 2))
 
             uprn = int(property_data["uprn"])
             address = property_data["address1"]

From 39a4c2e975d1cb72ab09da36b101ba8e23f9c777 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 17:29:34 +0100
Subject: [PATCH 72/80] updated to use xgboost - much better performance

---
 etl/bill_savings/EnergyConsumptionModel.py | 146 +++++++++++++++++----
 1 file changed, 123 insertions(+), 23 deletions(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index d2c77e48..ca221175 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -1,16 +1,23 @@
 import pandas as pd
+from xgboost import XGBRegressor
 from datetime import datetime
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
-from sklearn.metrics import mean_squared_error, r2_score
+from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
+from sklearn.feature_selection import RFECV
 from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 class EnergyConsumptionModel:
     FEATURES = {
         "heating_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
-            "heating-cost-current",
+            "heating-cost-current", "main-fuel", "total-floor-area", "number-heated-rooms", "number-habitable-rooms",
+            "mainheat-energy-eff"
         ],
         "hot_water_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
@@ -18,34 +25,52 @@ class EnergyConsumptionModel:
         ]
     }
     TARGETS = ['heating_kwh', 'hot_water_kwh']
-    CATEGORICAL_COLUMNS = ["lodgement-year", "lodgement-month"]
+    CATEGORICAL_COLUMNS = [
+        "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
+        "number-habitable-rooms", "mainheat-energy-eff"
+    ]
     NUMERICAL_COLUMNS = ["current-energy-efficiency", "energy-consumption-current", "heating-cost-current",
-                         "hot-water-cost-current"]
+                         "hot-water-cost-current", "total-floor-area"]
 
     def __init__(self, model_paths=None):
         self.models = {}
         self.model_paths = model_paths or {}
         self.data = None
+        self.input_data = None
         self.dummy_columns = None
+        self.training_predictions = {}
+        self.testing_predictions = {}
 
         self.x_train = {}
         self.x_test = {}
         self.y_train = {}
         self.y_test = {}
+        self.selected_features = {}
 
         if model_paths:
             for target, path in model_paths.items():
                 self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
 
     def read_dataset(self, file_path):
+        """Reads the dataset from the specified file path."""
+        logging.info(f"Reading dataset from {file_path}")
         self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
+        self.input_data = self.data.copy()
 
     def feature_engineering(self):
-        # Extract date features
+        """Performs feature engineering on the dataset."""
+        logging.info("Starting feature engineering")
         self.data["lodgement-date"] = pd.to_datetime(self.data["lodgement-date"])
         self.data["lodgement-year"] = self.data["lodgement-date"].dt.year
         self.data["lodgement-month"] = self.data["lodgement-date"].dt.month
 
+        # Modify number of heated rooms and number of habitable rooms
+        # self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply(lambda x: "10+" if x > 10 else
+        # str(x))
+        # self.data["number-habitable-rooms"] = self.data["number-habitable-rooms"].apply(
+        #     lambda x: "10+" if x > 10 else str(x)
+        # )
+
         # Convert data types
         self.data[self.NUMERICAL_COLUMNS] = self.data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
         self.data[self.CATEGORICAL_COLUMNS] = self.data[self.CATEGORICAL_COLUMNS].astype(str)
@@ -65,28 +90,97 @@ class EnergyConsumptionModel:
                     dummy_feature_columns.append(feature)
             self.dummy_columns[target] = dummy_feature_columns
 
-    def split_dataset(self, target, test_size=0.2, random_state=42):
+        logging.info("Feature engineering completed")
 
+    def split_dataset(self, target, test_size=0.2, random_state=42):
+        """Splits the dataset into training and testing sets."""
         if target not in self.TARGETS:
             raise ValueError(f"Target {target} not in {self.TARGETS}")
 
+        logging.info(f"Splitting dataset for target {target}")
         x = self.data[self.dummy_columns[target]]
         y = self.data[target]
         self.x_train[target], self.x_test[target], self.y_train[target], self.y_test[target] = train_test_split(
             x, y, test_size=test_size, random_state=random_state
         )
 
+    def feature_selection(self, target):
+        """Performs feature selection using RFECV."""
+        if target not in self.TARGETS:
+            raise ValueError(f"Target {target} not in {self.TARGETS}")
+
+        logging.info(f"Starting feature selection for target {target}")
+        x = self.x_train[target]
+        y = self.y_train[target]
+
+        # Initialize the XGBoost model and RFECV
+        model = XGBRegressor(objective='reg:squarederror')
+        selector = RFECV(model, step=1, cv=5, scoring='neg_mean_absolute_percentage_error')
+        selector = selector.fit(x, y)
+
+        # Get the selected features
+        self.selected_features[target] = x.columns[selector.support_]
+
+        # Update x_train and x_test with selected features
+        self.x_train[target] = x[self.selected_features[target]]
+        self.x_test[target] = self.x_test[target][self.selected_features[target]]
+
+        logging.info(f"Feature selection completed for target {target}")
+
     def fit_model(self, target):
-        self.models[target] = LinearRegression()
+        """Fits the linear regression model to the training data."""
+        logging.info(f"Fitting model for target {target}")
+        self.models[target] = XGBRegressor(objective='reg:squarederror')
         self.models[target].fit(self.x_train[target], self.y_train[target])
+        logging.info(f"Model fitting completed for target {target}")
 
     def evaluate_model(self, target):
-        y_pred = self.models[target].predict(self.x_test[target])
-        mse = mean_squared_error(self.y_test[target], y_pred)
-        r2 = r2_score(self.y_test[target], y_pred)
-        return {'MSE': mse, 'R2': r2}
+        """Evaluates the model on training and testing data."""
+        logging.info(f"Evaluating model for target {target}")
+        y_train_pred = self.models[target].predict(self.x_train[target])
+        train_mse = mean_squared_error(self.y_train[target], y_train_pred)
+        train_r2 = r2_score(self.y_train[target], y_train_pred)
+        train_mape = mean_absolute_percentage_error(self.y_train[target], y_train_pred)
+
+        self.training_predictions[target] = pd.DataFrame({
+            'Actual': self.y_train[target],
+            'Predicted': y_train_pred
+        })
+
+        y_test_pred = self.models[target].predict(self.x_test[target])
+        test_mse = mean_squared_error(self.y_test[target], y_test_pred)
+        test_r2 = r2_score(self.y_test[target], y_test_pred)
+        test_mape = mean_absolute_percentage_error(self.y_test[target], y_test_pred)
+
+        self.testing_predictions[target] = pd.DataFrame({
+            'Actual': self.y_test[target],
+            'Predicted': y_test_pred
+        })
+
+        feature_importance = pd.DataFrame({
+            'Feature': self.selected_features[target],
+            'Importance': self.models[target].feature_importances_
+        }).sort_values(by='Importance', ascending=False)
+
+        logging.info(f"Evaluation completed for target {target}")
+
+        return {
+            'train': {
+                'MSE': train_mse,
+                'R2': train_r2,
+                'MAPE': train_mape,
+                'Feature Importance': feature_importance
+            },
+            'test': {
+                'MSE': test_mse,
+                'R2': test_r2,
+                'MAPE': test_mape
+            }
+        }
 
     def save_model(self, target):
+        """Saves the model to S3."""
+        logging.info(f"Saving model for target {target}")
         run_date = datetime.now().strftime("%Y-%m-%d")
         save_pickle_to_s3(
             self.models[target],
@@ -95,14 +189,17 @@ class EnergyConsumptionModel:
         )
 
     def score_new_data(self, new_data, target):
+        """Scores new data using the trained model."""
         if target not in self.models:
             raise ValueError(f"Model for target {target} not loaded or trained")
 
-        new_data_transformed = self.transform_new_data(new_data)
+        new_data_transformed = self.transform_new_data(new_data, target)
         return self.models[target].predict(new_data_transformed)
 
-    def transform_new_data(self, new_data):
-        # Apply the same transformations as in feature_engineering
+    def transform_new_data(self, new_data, target):
+        """Applies the same transformations to new data as were applied to the training data."""
+
+        # TODO THis should jsut use our other transformation function
         new_data["lodgement-date"] = pd.to_datetime(new_data["lodgement-date"])
         new_data["lodgement-year"] = new_data["lodgement-date"].dt.year
         new_data["lodgement-month"] = new_data["lodgement-date"].dt.month
@@ -111,9 +208,12 @@ class EnergyConsumptionModel:
         new_data = pd.get_dummies(new_data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
 
         # Align new data with the dummy columns from training data
-        new_data = new_data.reindex(columns=self.dummy_columns, fill_value=0)
+        new_data = new_data.reindex(columns=self.dummy_columns[target], fill_value=0)
 
-        return new_data.drop(columns=[target for target in self.TARGETS if target in new_data.columns])
+        # Select the features used by the model
+        new_data = new_data[self.selected_features[target]]
+
+        return new_data
 
 
 # Example usage:
@@ -123,12 +223,12 @@ model.feature_engineering()
 
 # For heating_kwh
 model.split_dataset(target='heating_kwh')
+model.feature_selection(target='heating_kwh')
 model.fit_model(target='heating_kwh')
-print(model.evaluate_model(target='heating_kwh'))
-model.save_model(target='heating_kwh')
+evaluation_results = model.evaluate_model(target='heating_kwh')
+from pprint import pprint
 
-# For hot_water_kwh
-model.split_dataset(target='hot_water_kwh')
-model.fit_model(target='hot_water_kwh')
-print(model.evaluate_model(target='hot_water_kwh'))
-model.save_model(target='hot_water_kwh')
+pprint(evaluation_results["train"])
+pprint(evaluation_results["test"])
+
+importance_df = evaluation_results["train"]["Feature Importance"]

From 0a1f728f37705a396f4d18879ae7d89881544ea9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 17:48:06 +0100
Subject: [PATCH 73/80] implemented xgboost which performs really well

---
 etl/bill_savings/EnergyConsumptionModel.py | 5 ++---
 etl/bill_savings/data_collection.py        | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index ca221175..51972a36 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -2,7 +2,6 @@ import pandas as pd
 from xgboost import XGBRegressor
 from datetime import datetime
 from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
 from sklearn.feature_selection import RFECV
 from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
@@ -16,8 +15,8 @@ class EnergyConsumptionModel:
     FEATURES = {
         "heating_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
-            "heating-cost-current", "main-fuel", "total-floor-area", "number-heated-rooms", "number-habitable-rooms",
-            "mainheat-energy-eff"
+            "heating-cost-current", "total-floor-area", "number-heated-rooms", "number-habitable-rooms",
+            # "mainheat-energy-eff", "mainheat-description", "main-fuel",
         ],
         "hot_water_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 79afa936..24b10d7f 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,7 +133,7 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 50:
+        if i < 90:
             continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)

From 14417c37dfe9dcbe5ba717d84e83199c2d58181f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 18:13:23 +0100
Subject: [PATCH 74/80] error analysis - not working though

---
 etl/bill_savings/EnergyConsumptionModel.py | 60 +++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 51972a36..27fcc518 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -15,7 +15,8 @@ class EnergyConsumptionModel:
     FEATURES = {
         "heating_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
-            "heating-cost-current", "total-floor-area", "number-heated-rooms", "number-habitable-rooms",
+            "heating-cost-current", "total-floor-area", "number-heated-rooms",
+            # "number-habitable-rooms",
             # "mainheat-energy-eff", "mainheat-description", "main-fuel",
         ],
         "hot_water_kwh": [
@@ -214,6 +215,63 @@ class EnergyConsumptionModel:
 
         return new_data
 
+    def error_analysis(self, target, top_n=10, unique_threshold=0.8):
+        """
+        Perform error analysis on the provided model and dataset.
+        """
+
+        # Calculate predictions and residuals
+        y_train_pred = self.models[target].predict(self.x_train[target])
+        y_test_pred = self.models[target].predict(self.x_test[target])
+
+        train_residuals = self.y_train[target] - y_train_pred
+        test_residuals = self.y_test[target] - y_test_pred
+
+        # Identify top N poorly performing rows by absolute residuals
+        top_train_indices = train_residuals.abs().nlargest(top_n).index
+        top_test_indices = test_residuals.abs().nlargest(top_n).index
+
+        top_train_data = self.input_data.loc[top_train_indices]
+        top_test_data = self.input_data.loc[top_test_indices]
+
+        def exclude_columns(data, threshold):
+            exclude_cols = []
+            num_rows = data.shape[0]
+            for col in data.columns:
+                if data[col].dtype == 'object' and data[col].nunique() / num_rows >= threshold:
+                    exclude_cols.append(col)
+            return exclude_cols
+
+        exclude_cols = exclude_columns(top_train_data, unique_threshold)
+
+        top_train_data = top_train_data.drop(columns=exclude_cols)
+        top_test_data = top_test_data.drop(columns=exclude_cols)
+
+        # TODO: Not working
+
+        # One-hot encode categorical variables
+        categorical_columns = top_train_data.select_dtypes(include=['object']).columns.tolist()
+        top_train_data_encoded = pd.get_dummies(top_train_data, columns=categorical_columns, drop_first=True)
+        top_test_data_encoded = pd.get_dummies(top_test_data, columns=categorical_columns, drop_first=True)
+
+        # Align the encoded data with the training data
+        top_train_data_encoded = top_train_data_encoded.reindex(columns=self.x_train[target].columns, fill_value=0)
+        top_test_data_encoded = top_test_data_encoded.reindex(columns=self.x_test[target].columns, fill_value=0)
+
+        # Correlation analysis with residuals
+        train_corr = top_train_data_encoded.corrwith(train_residuals.loc[top_train_indices])
+        test_corr = top_test_data_encoded.corrwith(test_residuals.loc[top_test_indices])
+
+        # Return summaries
+        summary = {
+            "train_corr": train_corr,
+            "test_corr": test_corr,
+            "top_train_data": top_train_data,
+            "top_test_data": top_test_data
+        }
+
+        return summary
+
 
 # Example usage:
 model = EnergyConsumptionModel()

From b0449b9e90560505d4d45a63c41cd5cb5213e345 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 18:28:27 +0100
Subject: [PATCH 75/80] decent performing model

---
 etl/bill_savings/EnergyConsumptionModel.py | 58 +++++++++++++++-------
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 27fcc518..6492c7a6 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -16,8 +16,7 @@ class EnergyConsumptionModel:
         "heating_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
             "heating-cost-current", "total-floor-area", "number-heated-rooms",
-            # "number-habitable-rooms",
-            # "mainheat-energy-eff", "mainheat-description", "main-fuel",
+            "mainheat-description", "main-fuel", "mainheat-energy-eff", "number-habitable-rooms",
         ],
         "hot_water_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
@@ -104,25 +103,41 @@ class EnergyConsumptionModel:
             x, y, test_size=test_size, random_state=random_state
         )
 
-    def feature_selection(self, target):
-        """Performs feature selection using RFECV."""
+    def feature_selection(self, target, cv_folds=3, sample_fraction=0.1, random_state=42):
+        """
+        Performs feature selection using RFECV with XGBoost.
+
+        Parameters:
+        - target: The target variable for feature selection.
+        - cv_folds: Number of cross-validation folds.
+        - sample_fraction: Fraction of the data to use for feature selection.
+        - random_state: Random state for reproducibility.
+        """
         if target not in self.TARGETS:
             raise ValueError(f"Target {target} not in {self.TARGETS}")
 
         logging.info(f"Starting feature selection for target {target}")
-        x = self.x_train[target]
-        y = self.y_train[target]
+
+        # Sample the data if specified
+        if sample_fraction < 1.0:
+            x_sample, _, y_sample, _ = train_test_split(
+                self.x_train[target], self.y_train[target],
+                train_size=sample_fraction, random_state=random_state
+            )
+        else:
+            x_sample = self.x_train[target]
+            y_sample = self.y_train[target]
 
         # Initialize the XGBoost model and RFECV
-        model = XGBRegressor(objective='reg:squarederror')
-        selector = RFECV(model, step=1, cv=5, scoring='neg_mean_absolute_percentage_error')
-        selector = selector.fit(x, y)
+        model = XGBRegressor(objective='reg:squarederror', n_jobs=-1)
+        selector = RFECV(model, step=1, cv=cv_folds, scoring='neg_mean_absolute_percentage_error')
+        selector = selector.fit(x_sample, y_sample)
 
         # Get the selected features
-        self.selected_features[target] = x.columns[selector.support_]
+        self.selected_features[target] = x_sample.columns[selector.support_]
 
         # Update x_train and x_test with selected features
-        self.x_train[target] = x[self.selected_features[target]]
+        self.x_train[target] = self.x_train[target][self.selected_features[target]]
         self.x_test[target] = self.x_test[target][self.selected_features[target]]
 
         logging.info(f"Feature selection completed for target {target}")
@@ -218,6 +233,14 @@ class EnergyConsumptionModel:
     def error_analysis(self, target, top_n=10, unique_threshold=0.8):
         """
         Perform error analysis on the provided model and dataset.
+
+        Parameters:
+        - target: The target variable to analyze.
+        - top_n: Number of top residuals to consider for analysis.
+        - unique_threshold: Threshold to exclude columns with high unique values.
+
+        Returns:
+        - summary: Dictionary summarizing common features among poorly performing rows.
         """
 
         # Calculate predictions and residuals
@@ -234,6 +257,7 @@ class EnergyConsumptionModel:
         top_train_data = self.input_data.loc[top_train_indices]
         top_test_data = self.input_data.loc[top_test_indices]
 
+        # Automatically detect and exclude columns
         def exclude_columns(data, threshold):
             exclude_cols = []
             num_rows = data.shape[0]
@@ -247,16 +271,14 @@ class EnergyConsumptionModel:
         top_train_data = top_train_data.drop(columns=exclude_cols)
         top_test_data = top_test_data.drop(columns=exclude_cols)
 
-        # TODO: Not working
-
         # One-hot encode categorical variables
         categorical_columns = top_train_data.select_dtypes(include=['object']).columns.tolist()
         top_train_data_encoded = pd.get_dummies(top_train_data, columns=categorical_columns, drop_first=True)
         top_test_data_encoded = pd.get_dummies(top_test_data, columns=categorical_columns, drop_first=True)
 
-        # Align the encoded data with the training data
-        top_train_data_encoded = top_train_data_encoded.reindex(columns=self.x_train[target].columns, fill_value=0)
-        top_test_data_encoded = top_test_data_encoded.reindex(columns=self.x_test[target].columns, fill_value=0)
+        # Ensure all original columns are included in the encoded data
+        top_train_data_encoded = top_train_data_encoded.reindex(columns=self.input_data.columns, fill_value=0)
+        top_test_data_encoded = top_test_data_encoded.reindex(columns=self.input_data.columns, fill_value=0)
 
         # Correlation analysis with residuals
         train_corr = top_train_data_encoded.corrwith(train_residuals.loc[top_train_indices])
@@ -264,6 +286,8 @@ class EnergyConsumptionModel:
 
         # Return summaries
         summary = {
+            "train_summary": top_train_data.describe(include='all').T,
+            "test_summary": top_test_data.describe(include='all').T,
             "train_corr": train_corr,
             "test_corr": test_corr,
             "top_train_data": top_train_data,
@@ -280,7 +304,7 @@ model.feature_engineering()
 
 # For heating_kwh
 model.split_dataset(target='heating_kwh')
-model.feature_selection(target='heating_kwh')
+model.feature_selection(target='heating_kwh', cv_folds=3, sample_fraction=0.1)
 model.fit_model(target='heating_kwh')
 evaluation_results = model.evaluate_model(target='heating_kwh')
 from pprint import pprint

From 77aaecf04fa45ad285ce5cb3262bf7f549996545 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 18:38:55 +0100
Subject: [PATCH 76/80] Added some additional features

---
 etl/bill_savings/EnergyConsumptionModel.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 6492c7a6..e0a52e19 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -17,6 +17,10 @@ class EnergyConsumptionModel:
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
             "heating-cost-current", "total-floor-area", "number-heated-rooms",
             "mainheat-description", "main-fuel", "mainheat-energy-eff", "number-habitable-rooms",
+            "mainheatcont-description", "property-type", "built-form",
+            # To test
+            # "hotwater-description" - make a days since lodgment variable?
+            # A geographic variable
         ],
         "hot_water_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
@@ -26,7 +30,7 @@ class EnergyConsumptionModel:
     TARGETS = ['heating_kwh', 'hot_water_kwh']
     CATEGORICAL_COLUMNS = [
         "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
-        "number-habitable-rooms", "mainheat-energy-eff"
+        "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type", "built-form",
     ]
     NUMERICAL_COLUMNS = ["current-energy-efficiency", "energy-consumption-current", "heating-cost-current",
                          "hot-water-cost-current", "total-floor-area"]

From 58e60ae3765844580a80b4651a870f0e6d8bea85 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Jul 2024 18:41:10 +0100
Subject: [PATCH 77/80] Added age band

---
 etl/bill_savings/EnergyConsumptionModel.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index e0a52e19..c02d4c8c 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -17,7 +17,7 @@ class EnergyConsumptionModel:
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
             "heating-cost-current", "total-floor-area", "number-heated-rooms",
             "mainheat-description", "main-fuel", "mainheat-energy-eff", "number-habitable-rooms",
-            "mainheatcont-description", "property-type", "built-form",
+            "mainheatcont-description", "property-type", "built-form", "construction-age-band"
             # To test
             # "hotwater-description" - make a days since lodgment variable?
             # A geographic variable
@@ -31,6 +31,7 @@ class EnergyConsumptionModel:
     CATEGORICAL_COLUMNS = [
         "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
         "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type", "built-form",
+        "construction-age-band"
     ]
     NUMERICAL_COLUMNS = ["current-energy-efficiency", "energy-consumption-current", "heating-cost-current",
                          "hot-water-cost-current", "total-floor-area"]

From b63de79043b2b7a1e9498754621944315aaa76f7 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Wed, 3 Jul 2024 23:35:02 +0100
Subject: [PATCH 78/80] add cost to EPCRecord, Difference record and pipeline

---
 etl/epc/Pipeline.py |  4 +++-
 etl/epc/Record.py   | 12 ++++++++++++
 etl/epc/settings.py |  6 ++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py
index 47cddeb0..bc3bfd91 100644
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@@ -22,6 +22,7 @@ from etl.epc.settings import (
     EFFICIENCY_FEATURES,
     POTENTIAL_COLUMNS,
     ROOM_FEATURES,
+    COST_FEATURES,
 )
 
 # TODO: change in setting file
@@ -42,6 +43,7 @@ VARIABLE_DATA_FEATURES = (
     # + POTENTIAL_COLUMNS
     + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
 )
+COST_FEATURES = [x.lower() for x in COST_FEATURES]
 
 
 def get_cleaned_description_mapping():
@@ -278,7 +280,7 @@ class EPCPipeline:
 
         # We include the lodgement date here as we probably need to factor time into the
         # model, since EPC standards and rigour have changed over time
-        variable_data = property_data[VARIABLE_DATA_FEATURES]
+        variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES]
 
         uprn = str(uprn)
         epc_records = [
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index 9b69c33a..b8471ccf 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -76,6 +76,9 @@ class EPCRecord:
     mainheat_energy_eff: str = None
     mainheatc_energy_eff: str = None
     lighting_energy_eff: str = None
+    lighting_cost_current: float = None
+    heating_cost_current: float = None
+    hot_water_cost_current: float = None
     # potential_energy_efficiency: float = None
     # environment_impact_potential: float = None
     # energy_consumption_potential: float = None
@@ -249,6 +252,9 @@ class EPCRecord:
         self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"]
         self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"]
         self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"]
+        self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"]
+        self.heating_cost_current: float = self.prepared_epc["heating_cost_current"]
+        self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"]
         # self.potential_energy_efficiency: float = float(
         #     self.prepared_epc["potential_energy_efficiency"]
         # )
@@ -1044,6 +1050,12 @@ class EPCDifferenceRecord:
             "heat_demand_ending": self.record2.get(HEAT_DEMAND_RESPONSE),
             "carbon_starting": self.record1.get(CARBON_RESPONSE),
             "carbon_ending": self.record2.get(CARBON_RESPONSE),
+            "lighting_cost_starting": self.record1.get("lighting_cost_current"),
+            "lighting_cost_ending": self.record2.get("lighting_cost_current"),
+            "heating_cost_starting": self.record1.get("heating_cost_current"),
+            "heating_cost_ending": self.record2.get("heating_cost_current"),
+            "hot_water_cost_starting": self.record1.get("hot_water_cost_current"),
+            "hot_water_cost_ending": self.record2.get("hot_water_cost_current"),
             # "potential_energy_efficiency": self.earliest_record.get(
             #     "potential_energy_efficiency"
             # ),
diff --git a/etl/epc/settings.py b/etl/epc/settings.py
index 18dbaa7c..a814750f 100644
--- a/etl/epc/settings.py
+++ b/etl/epc/settings.py
@@ -110,6 +110,12 @@ DEPLOYMENT_FOLDER = "deployment"
 TOTAL_FLOOR_AREA_NATIONAL_AVERAGE = 70
 FLOOR_HEIGHT_NATIONAL_AVERAGE = 2.45
 
+COST_FEATURES = [
+    "LIGHTING_COST_CURRENT",
+    "HEATING_COST_CURRENT",
+    "HOT_WATER_COST_CURRENT",
+]
+
 AVERAGE_FIXED_FEATURES = [
     "TOTAL_FLOOR_AREA",
     "FLOOR_HEIGHT",

From fa6e61f0b9628ab45c7f1d45c4934b46b67dad8c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 4 Jul 2024 19:52:19 +0100
Subject: [PATCH 79/80] hot water model working nicely

---
 etl/bill_savings/EnergyConsumptionModel.py    | 164 ++++++++++++++----
 etl/bill_savings/data_collection.py           |   2 +-
 .../{data_collation.py => data_combining.py}  |   7 +-
 3 files changed, 137 insertions(+), 36 deletions(-)
 rename etl/bill_savings/{data_collation.py => data_combining.py} (95%)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index c02d4c8c..89847ca1 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -6,6 +6,7 @@ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percenta
 from sklearn.feature_selection import RFECV
 from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
 import logging
+from pprint import pprint
 
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -15,42 +16,58 @@ class EnergyConsumptionModel:
     FEATURES = {
         "heating_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
-            "heating-cost-current", "total-floor-area", "number-heated-rooms",
-            "mainheat-description", "main-fuel", "mainheat-energy-eff", "number-habitable-rooms",
-            "mainheatcont-description", "property-type", "built-form", "construction-age-band"
-            # To test
-            # "hotwater-description" - make a days since lodgment variable?
-            # A geographic variable
+            "heating-cost-current",
+            "total-floor-area", "number-heated-rooms",
+            "mainheat-description", "mainheat-energy-eff", "main-fuel",
+            # TESTING
+            "secondheat-description",
+            # , , "number-habitable-rooms",
+            # "mainheatcont-description",
+            # "co2-emissions-current",
+            # "property-type", "built-form",
         ],
         "hot_water_kwh": [
-            "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
-            "hot-water-cost-current"
+            "lodgement-year", "lodgement-month",
+            "current-energy-efficiency",
+            "energy-consumption-current",
+            "hot-water-cost-current",
+            "total-floor-area", "number-heated-rooms",
+            "hotwater-description", "hot-water-energy-eff", "main-fuel", "property-type", "built-form",
+            "co2-emissions-current",
         ]
     }
     TARGETS = ['heating_kwh', 'hot_water_kwh']
     CATEGORICAL_COLUMNS = [
         "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
         "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type", "built-form",
-        "construction-age-band"
+        "construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
     ]
-    NUMERICAL_COLUMNS = ["current-energy-efficiency", "energy-consumption-current", "heating-cost-current",
-                         "hot-water-cost-current", "total-floor-area"]
 
-    def __init__(self, model_paths=None):
+    def __init__(self, model_paths=None, n_jobs=1):
         self.models = {}
         self.model_paths = model_paths or {}
+        self.n_jobs = n_jobs
+
         self.data = None
         self.input_data = None
         self.dummy_columns = None
         self.training_predictions = {}
         self.testing_predictions = {}
+        self.best_iteration = {}
 
         self.x_train = {}
         self.x_test = {}
+        self.x_val = {}
+        self.y_val = {}
         self.y_train = {}
         self.y_test = {}
         self.selected_features = {}
 
+        self.NUMERICAL_COLUMNS = list({
+            x for x in self.FEATURES["heating_kwh"] + self.FEATURES["hot_water_kwh"]
+            if x not in self.CATEGORICAL_COLUMNS
+        })
+
         if model_paths:
             for target, path in model_paths.items():
                 self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
@@ -96,18 +113,32 @@ class EnergyConsumptionModel:
 
         logging.info("Feature engineering completed")
 
-    def split_dataset(self, target, test_size=0.2, random_state=42):
-        """Splits the dataset into training and testing sets."""
+    def split_dataset(self, target, test_size=0.2, validation_size=0.2, random_state=42):
+        """Splits the dataset into training, validation, and testing sets."""
         if target not in self.TARGETS:
             raise ValueError(f"Target {target} not in {self.TARGETS}")
 
         logging.info(f"Splitting dataset for target {target}")
-        x = self.data[self.dummy_columns[target]]
-        y = self.data[target]
-        self.x_train[target], self.x_test[target], self.y_train[target], self.y_test[target] = train_test_split(
-            x, y, test_size=test_size, random_state=random_state
+
+        # Split into train + validation and test sets
+        x_train_val, x_test, y_train_val, y_test = train_test_split(
+            self.data[self.dummy_columns[target]],
+            self.data[target],
+            test_size=test_size,
+            random_state=random_state
         )
 
+        # Split train + validation into train and validation sets
+        x_train, x_val, y_train, y_val = train_test_split(
+            x_train_val,
+            y_train_val,
+            test_size=validation_size / (1 - test_size),
+            random_state=random_state
+        )
+
+        self.x_train[target], self.x_val[target], self.x_test[target] = x_train, x_val, x_test
+        self.y_train[target], self.y_val[target], self.y_test[target] = y_train, y_val, y_test
+
     def feature_selection(self, target, cv_folds=3, sample_fraction=0.1, random_state=42):
         """
         Performs feature selection using RFECV with XGBoost.
@@ -134,26 +165,72 @@ class EnergyConsumptionModel:
             y_sample = self.y_train[target]
 
         # Initialize the XGBoost model and RFECV
-        model = XGBRegressor(objective='reg:squarederror', n_jobs=-1)
-        selector = RFECV(model, step=1, cv=cv_folds, scoring='neg_mean_absolute_percentage_error')
+        model = self.init_model(feature_selection=True)
+        selector = RFECV(
+            model, step=1, cv=cv_folds, scoring='neg_mean_absolute_percentage_error', verbose=1, n_jobs=self.n_jobs
+        )
         selector = selector.fit(x_sample, y_sample)
 
         # Get the selected features
         self.selected_features[target] = x_sample.columns[selector.support_]
 
-        # Update x_train and x_test with selected features
+        # Update x_train, x_test and x_val with selected features
         self.x_train[target] = self.x_train[target][self.selected_features[target]]
         self.x_test[target] = self.x_test[target][self.selected_features[target]]
+        self.x_val[target] = self.x_val[target][self.selected_features[target]]
 
         logging.info(f"Feature selection completed for target {target}")
 
+    def init_model(self, feature_selection=False):
+
+        if feature_selection:
+            # Set up a smaller model to work it
+            return XGBRegressor(
+                objective='reg:squarederror',
+                n_estimators=50,
+                learning_rate=0.05,
+                max_depth=6,
+                subsample=0.8,
+                colsample_bytree=0.8,
+                # n_jobs=self.n_jobs
+            )
+
+        return XGBRegressor(
+            objective='reg:squarederror',
+            n_estimators=1000,
+            learning_rate=0.05,
+            max_depth=6,
+            subsample=0.8,
+            colsample_bytree=0.8,
+            # n_jobs=self.n_jobs
+        )
+
     def fit_model(self, target):
         """Fits the linear regression model to the training data."""
         logging.info(f"Fitting model for target {target}")
-        self.models[target] = XGBRegressor(objective='reg:squarederror')
-        self.models[target].fit(self.x_train[target], self.y_train[target])
+        self.models[target] = self.init_model()
+        self.models[target].fit(
+            self.x_train[target],
+            self.y_train[target],
+            eval_set=[(self.x_val[target], self.y_val[target])],
+            early_stopping_rounds=50
+        )
         logging.info(f"Model fitting completed for target {target}")
 
+        # Store the best iteration
+        self.best_iteration[target] = self.models[target].best_iteration
+
+    def re_train_final_model(self, target):
+        """Re-trains the final model on the combined training and validation set."""
+        logging.info(f"Re-training final model for target {target}")
+        x_train_val = pd.concat([self.x_train[target], self.x_val[target]])
+        y_train_val = pd.concat([self.y_train[target], self.y_val[target]])
+
+        self.models[target] = self.init_model()
+
+        self.models[target].fit(x_train_val, y_train_val, verbose=False)
+        logging.info(f"Re-training final model completed for target {target}")
+
     def evaluate_model(self, target):
         """Evaluates the model on training and testing data."""
         logging.info(f"Evaluating model for target {target}")
@@ -166,6 +243,9 @@ class EnergyConsumptionModel:
             'Actual': self.y_train[target],
             'Predicted': y_train_pred
         })
+        self.training_predictions[target]["residual"] = abs(
+            self.training_predictions[target]["Actual"] - self.training_predictions[target]["Predicted"]
+        )
 
         y_test_pred = self.models[target].predict(self.x_test[target])
         test_mse = mean_squared_error(self.y_test[target], y_test_pred)
@@ -176,11 +256,20 @@ class EnergyConsumptionModel:
             'Actual': self.y_test[target],
             'Predicted': y_test_pred
         })
+        self.testing_predictions[target]["residual"] = abs(
+            self.testing_predictions[target]["Actual"] - self.testing_predictions[target]["Predicted"]
+        )
 
-        feature_importance = pd.DataFrame({
-            'Feature': self.selected_features[target],
-            'Importance': self.models[target].feature_importances_
-        }).sort_values(by='Importance', ascending=False)
+        if target in self.selected_features:
+            feature_importance = pd.DataFrame({
+                'Feature': self.selected_features[target],
+                'Importance': self.models[target].feature_importances_
+            }).sort_values(by='Importance', ascending=False)
+        else:
+            feature_importance = pd.DataFrame({
+                'Feature': self.x_train[target].columns,
+                'Importance': self.models[target].feature_importances_
+            }).sort_values(by='Importance', ascending=False)
 
         logging.info(f"Evaluation completed for target {target}")
 
@@ -303,18 +392,31 @@ class EnergyConsumptionModel:
 
 
 # Example usage:
-model = EnergyConsumptionModel()
-model.read_dataset('energy_consumption/2024-07-02/energy_consumption_dataset.parquet')
+model = EnergyConsumptionModel(n_jobs=2)
+model.read_dataset('energy_consumption/2024-07-04/energy_consumption_dataset.parquet')
 model.feature_engineering()
 
 # For heating_kwh
 model.split_dataset(target='heating_kwh')
-model.feature_selection(target='heating_kwh', cv_folds=3, sample_fraction=0.1)
+# model.feature_selection(target='heating_kwh', cv_folds=3, sample_fraction=0.1)
 model.fit_model(target='heating_kwh')
+
+model.re_train_final_model(target='heating_kwh')
 evaluation_results = model.evaluate_model(target='heating_kwh')
-from pprint import pprint
 
 pprint(evaluation_results["train"])
 pprint(evaluation_results["test"])
 
 importance_df = evaluation_results["train"]["Feature Importance"]
+testing_predictions = model.testing_predictions["heating_kwh"]
+testing_predictions = testing_predictions.sort_values("residual", ascending=False)
+# Merge on model.input_data, by the index
+merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
+
+# For hot_water_kwh
+model.split_dataset(target='hot_water_kwh')
+model.fit_model(target='hot_water_kwh')
+model.re_train_final_model(target='hot_water_kwh')
+evaluation_results = model.evaluate_model(target='hot_water_kwh')
+pprint(evaluation_results["train"])
+pprint(evaluation_results["test"])
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 24b10d7f..ecc62015 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,7 +133,7 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 90:
+        if i < 305:
             continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
diff --git a/etl/bill_savings/data_collation.py b/etl/bill_savings/data_combining.py
similarity index 95%
rename from etl/bill_savings/data_collation.py
rename to etl/bill_savings/data_combining.py
index ef2b286b..a111ecf2 100644
--- a/etl/bill_savings/data_collation.py
+++ b/etl/bill_savings/data_combining.py
@@ -1,12 +1,13 @@
 import re
 from datetime import datetime
+from tqdm import tqdm
 
 import pandas as pd
 
 from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
 
 # These columns we co-erce to strings before saving
-PROBLEMATIC_COLUMNS = ["main-heating-controls"]
+PROBLEMATIC_COLUMNS = ["main-heating-controls", "floor-level"]
 
 
 def extract_kwh_value(text):
@@ -41,7 +42,7 @@ def app():
     run_date = datetime.now().strftime("%Y-%m-%d")
 
     complete_data = []
-    for files in data_files:
+    for files in tqdm(data_files):
         dataset_run_date = files.split("/")[-1].split(".")[0]
         # Extract the date from the file name
         dataset_run_date = pd.Timestamp(dataset_run_date)
@@ -90,5 +91,3 @@ def app():
         file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
         df=df
     )
-
-    df.to_csv("energy_consumption_dataset.csv", index=False)

From 1320416dc355af0170306bc921064744d436f54b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Jul 2024 12:15:01 +0100
Subject: [PATCH 80/80] Added new ecr instances

---
 etl/bill_savings/EnergyConsumptionModel.py | 163 ++++++++++++++++++---
 etl/bill_savings/data_collection.py        |   2 +-
 infrastructure/terraform/main.tf           |  46 ++++--
 3 files changed, 174 insertions(+), 37 deletions(-)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 89847ca1..534b8d60 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -1,10 +1,12 @@
 import pandas as pd
+import numpy as np
+import msgpack
 from xgboost import XGBRegressor
 from datetime import datetime
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
 from sklearn.feature_selection import RFECV
-from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
+from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_from_s3
 import logging
 from pprint import pprint
 
@@ -14,17 +16,36 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
 
 class EnergyConsumptionModel:
     FEATURES = {
+        # "heating_kwh": [
+        #     "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
+        #     "heating-cost-current",
+        #     "total-floor-area", "number-heated-rooms",
+        #     "mainheat-description", "mainheat-energy-eff", "main-fuel", "secondheat-description",
+        #     "property-type", "built-form", "mainheatcont-description", 'hotwater-description', 'hot-water-energy-eff',
+        #     # TESTING
+        #     # "walls-description",
+        #     "walls-energy-eff",
+        #     # "roof-description",
+        #     "roof-energy-eff",
+        #     # "floor-description",
+        #     # "county"
+        #     # "co2-emissions-current", - Made it worse
+        #     # TODO: Should hot water features go in here?
+        #     # , , "number-habitable-rooms",
+        #     #
+        #     #
+        #     #
+        # ],
         "heating_kwh": [
             "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
-            "heating-cost-current",
-            "total-floor-area", "number-heated-rooms",
-            "mainheat-description", "mainheat-energy-eff", "main-fuel",
-            # TESTING
-            "secondheat-description",
-            # , , "number-habitable-rooms",
-            # "mainheatcont-description",
-            # "co2-emissions-current",
-            # "property-type", "built-form",
+            "heating-cost-current", "heating-cost-potential", "total-floor-area", "number-heated-rooms",
+            "mainheat-description", "mainheat-energy-eff", "main-fuel", "secondheat-description", "property-type",
+            "built-form", "mainheatcont-description", "hotwater-description", "hot-water-energy-eff",
+            "walls-energy-eff",
+            "roof-energy-eff", "windows-description", "windows-energy-eff", "floor-description", "flat-top-storey",
+            "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
+            "low-energy-lighting", "environment-impact-current", "energy-tariff",
+            "county", "construction-age-band", "co2-emissions-current"
         ],
         "hot_water_kwh": [
             "lodgement-year", "lodgement-month",
@@ -41,9 +62,15 @@ class EnergyConsumptionModel:
         "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
         "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type", "built-form",
         "construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
+        "walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
+        "county",
+        "windows-description", "windows-energy-eff", "flat-top-storey",
+        "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
+        "low-energy-lighting", "environment-impact-current", "energy-tariff"
     ]
 
-    def __init__(self, model_paths=None, n_jobs=1):
+    def __init__(self, cleaned, model_paths=None, n_jobs=1):
+        self.cleaned = cleaned
         self.models = {}
         self.model_paths = model_paths or {}
         self.n_jobs = n_jobs
@@ -85,6 +112,55 @@ class EnergyConsumptionModel:
         self.data["lodgement-year"] = self.data["lodgement-date"].dt.year
         self.data["lodgement-month"] = self.data["lodgement-date"].dt.month
 
+        # For walls, roof, floor description where we have average thermal transmittance, to avoid too many categories
+        # we group them
+        ranges = {
+            "lessthan 0.1": (0, 0.1),
+            "0.1 - 0.3": (0.1, 0.3),
+            "0.3 - 0.5": (0.3, 0.5),
+            "morethan 0.5": (0.5, 2.5),
+        }
+
+        # Generate the lookup table
+        thermal_transmittance_lookup_table = []
+        for i in range(1, 251):
+            value = i / 100
+            for label, (low, high) in ranges.items():
+                if low < value <= high:
+                    thermal_transmittance_lookup_table.append({"from": value, "to": label})
+                    break
+
+        # Convert to DataFrame for display
+        thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
+        thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
+
+        # Apply the lookup table to the data
+        for feature in ["walls-description", "roof-description", "floor-description"]:
+            cleaned_df = pd.DataFrame(self.cleaned[feature])[["original_description", "thermal_transmittance"]]
+            # Round to 2 decimal places and convert to string
+            cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
+
+            self.data = self.data.merge(
+                cleaned_df,
+                how="left",
+                left_on=feature,
+                right_on="original_description",
+            )
+            # We now have the thermal transmittance in the data, which we can use to group with the lookup table
+            self.data = self.data.merge(
+                thermal_transmittance_lookup_table,
+                how="left",
+                left_on="thermal_transmittance",
+                right_on="from",
+            )
+            # Where "to" is populated, replace feature with to
+            self.data[feature] = np.where(
+                ~pd.isnull(self.data["to"]),
+                self.data["to"],
+                self.data[feature]
+            )
+            self.data = self.data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
+
         # Modify number of heated rooms and number of habitable rooms
         # self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply(lambda x: "10+" if x > 10 else
         # str(x))
@@ -192,7 +268,8 @@ class EnergyConsumptionModel:
                 max_depth=6,
                 subsample=0.8,
                 colsample_bytree=0.8,
-                # n_jobs=self.n_jobs
+                reg_alpha=0.1,
+                reg_lambda=0.1
             )
 
         return XGBRegressor(
@@ -200,26 +277,62 @@ class EnergyConsumptionModel:
             n_estimators=1000,
             learning_rate=0.05,
             max_depth=6,
+            min_child_weight=3,
             subsample=0.8,
             colsample_bytree=0.8,
+            reg_alpha=0.1,
+            reg_lambda=0.1
             # n_jobs=self.n_jobs
         )
 
     def fit_model(self, target):
-        """Fits the linear regression model to the training data."""
+        """Fits the model to the training data and removes zero-importance features."""
+
         logging.info(f"Fitting model for target {target}")
-        self.models[target] = self.init_model()
-        self.models[target].fit(
+
+        # Initialize and fit the model
+        model = self.init_model()
+        model.fit(
             self.x_train[target],
             self.y_train[target],
             eval_set=[(self.x_val[target], self.y_val[target])],
             early_stopping_rounds=50
         )
-        logging.info(f"Model fitting completed for target {target}")
+
+        # Store the model
+        self.models[target] = model
+
+        # Identify and remove zero-importance features
+        feature_importance = pd.DataFrame({
+            'Feature': self.x_train[target].columns,
+            'Importance': model.feature_importances_
+        })
+        zero_importance_features = feature_importance[feature_importance['Importance'] == 0]['Feature'].tolist()
+
+        if zero_importance_features:
+            logging.info(f"Removing zero-importance features for target {target}: {zero_importance_features}")
+
+            self.x_train[target] = self.x_train[target].drop(columns=zero_importance_features)
+            self.x_val[target] = self.x_val[target].drop(columns=zero_importance_features)
+            self.x_test[target] = self.x_test[target].drop(columns=zero_importance_features)
+
+            # Re-fit the model with the reduced feature set
+            model = self.init_model()
+            model.fit(
+                self.x_train[target],
+                self.y_train[target],
+                eval_set=[(self.x_val[target], self.y_val[target])],
+                early_stopping_rounds=50
+            )
+
+            # Update the model
+            self.models[target] = model
 
         # Store the best iteration
         self.best_iteration[target] = self.models[target].best_iteration
 
+        logging.info(f"Model fitting completed for target {target}")
+
     def re_train_final_model(self, target):
         """Re-trains the final model on the combined training and validation set."""
         logging.info(f"Re-training final model for target {target}")
@@ -391,16 +504,21 @@ class EnergyConsumptionModel:
         return summary
 
 
-# Example usage:
-model = EnergyConsumptionModel(n_jobs=2)
-model.read_dataset('energy_consumption/2024-07-04/energy_consumption_dataset.parquet')
+# Usage:
+cleaned = read_from_s3(
+    s3_file_name="cleaned_epc_data/cleaned.bson",
+    bucket_name="retrofit-data-dev"
+)
+
+cleaned = msgpack.unpackb(cleaned, raw=False)
+
+model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
+model.read_dataset('energy_consumption/2024-07-05/energy_consumption_dataset.parquet')
 model.feature_engineering()
 
 # For heating_kwh
 model.split_dataset(target='heating_kwh')
-# model.feature_selection(target='heating_kwh', cv_folds=3, sample_fraction=0.1)
 model.fit_model(target='heating_kwh')
-
 model.re_train_final_model(target='heating_kwh')
 evaluation_results = model.evaluate_model(target='heating_kwh')
 
@@ -410,8 +528,11 @@ pprint(evaluation_results["test"])
 importance_df = evaluation_results["train"]["Feature Importance"]
 testing_predictions = model.testing_predictions["heating_kwh"]
 testing_predictions = testing_predictions.sort_values("residual", ascending=False)
+training_predictions = model.training_predictions["heating_kwh"]
+training_predictions = training_predictions.sort_values("residual", ascending=False)
 # Merge on model.input_data, by the index
 merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
+merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
 
 # For hot_water_kwh
 model.split_dataset(target='hot_water_kwh')
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index ecc62015..4d913e8f 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,7 +133,7 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 305:
+        if i < 36:
             continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index 0da850c5..f968aba8 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -49,30 +49,30 @@ resource "aws_security_group" "allow_db" {
 
   ingress {
     # TLS (change to whatever ports you need)
-    from_port   = 5432
-    to_port     = 5432
-    protocol    = "tcp"
+    from_port = 5432
+    to_port   = 5432
+    protocol  = "tcp"
     cidr_blocks = ["0.0.0.0/0"]
   }
 
   egress {
-    from_port   = 0
-    to_port     = 0
-    protocol    = "-1"
+    from_port = 0
+    to_port   = 0
+    protocol  = "-1"
     cidr_blocks = ["0.0.0.0/0"]
   }
 }
 
 resource "aws_db_instance" "default" {
-  allocated_storage      = var.allocated_storage
-  engine                 = "postgres"
-  engine_version         = "14.10"
-  instance_class         = var.instance_class
-  db_name                = var.database_name
-  username               = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
-  password               = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_password"]
-  parameter_group_name   = "default.postgres14"
-  skip_final_snapshot    = true
+  allocated_storage    = var.allocated_storage
+  engine               = "postgres"
+  engine_version       = "14.10"
+  instance_class       = var.instance_class
+  db_name              = var.database_name
+  username             = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
+  password             = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_password"]
+  parameter_group_name = "default.postgres14"
+  skip_final_snapshot  = true
   vpc_security_group_ids = [aws_security_group.allow_db.id]
   lifecycle {
     prevent_destroy = true
@@ -187,6 +187,22 @@ module "lambda_heat_prediction_ecr" {
   source   = "./modules/ecr"
 }
 
+# ECR repos for lighting cost, heating cost and hot water cost models
+module "lambda_lighting_cost_prediction_ecr" {
+  ecr_name = "lighting-cost-prediction-${var.stage}"
+  source   = "./modules/ecr"
+}
+
+module "lambda_heating_cost_prediction_ecr" {
+  ecr_name = "heating-cost-prediction-${var.stage}"
+  source   = "./modules/ecr"
+}
+
+module "lambda_hot_water_cost_prediction_ecr" {
+  ecr_name = "hot-water-cost-prediction-${var.stage}"
+  source   = "./modules/ecr"
+}
+
 ##############################################
 # CDN - Cloudfront
 ##############################################