From 615f2289e758c136e73dfaac88d0ff906785f03a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 12:39:46 +0000
Subject: [PATCH 001/262] Debugging list loading

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 81 +++++++------------
 3 files changed, 29 insertions(+), 56 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 92956337..7bb8b40c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -131,9 +131,17 @@ class DataLoader:
 
         return ciga_list
 
+    @staticmethod
+    def get_sheetname(workbook):
+        if "Asset List" in workbook.sheetnames:
+            return "Asset List"
+        else:
+            return "Assets"
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
-        asset_sheet = workbook["Assets"]
+        sheetname = self.get_sheetname(workbook)
+        asset_sheet = workbook[sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
 
         rows_data = []
@@ -170,8 +178,10 @@ class DataLoader:
             # Remove columns that are None
             survey_list = survey_list.loc[:, survey_list.columns.notnull()]
             survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
+
             # Perform survey list merge
-            survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
+            if not survey_list.empty:
+                survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
 
         # We check if there are CIGA checks
         ciga_list = pd.DataFrame()
@@ -185,9 +195,10 @@ class DataLoader:
             ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
             # Remove columns that are None
             ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
             # Perform ciga list merge
-            ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
+            if not ciga_list.empty:
+                ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+                ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
         return asset_list, survey_list, ciga_list
 
@@ -208,6 +219,10 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def correct_ha39_asset_list(asset_list):
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -337,6 +352,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha39_survey_list(survey_list):
+        return survey_list
+
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
         # Correct the asset list
@@ -491,23 +510,10 @@ class DataLoader:
                 ha_name=ha_name,
             )
 
-            if file_config.get("survey_list"):
-                # TODO: Delete this
-                logger.info("Loading survey list for {}".format(ha_name))
-                survey_list, matched_lookup = self.load_survey_list(
-                    asset_list=asset_list,
-                    file_path=file_config["survey_list"]["filepath"],
-                    ha_name=ha_name,
-                    sheet_name=file_config["survey_list"]["sheetname"]
-                )
-            else:
-                survey_list = None
-                matched_lookup = None
-
             data[ha_name] = {
                 "asset_list": asset_list,
                 "survey_list": survey_list,
-                "matched_lookup": matched_lookup
+                "ciga_list": ciga_list
             }
 
         self.data = data
@@ -1288,42 +1294,9 @@ def app():
     # List all of the data in the folder
     directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
 
-    files = {
-        "ha_1": {
-            "asset_list": {
-                "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx",
-                "sheetname": "Energy data"
-            }
-        },
-        "ha_6": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
-                "sheetname": "HA 6"
-            },
-            "survey_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
-                "sheetname": "HA 6"
-            }
-        },
-        "ha_14": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx",
-                "sheetname": "HA 14"
-            }
-        },
-        "ha_39": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx",
-                "sheetname": "Sheet1"
-            }
-        },
-        "ha_107": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx",
-                "sheetname": "HA 107"
-            }
-        }
-    }
+    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
+    # Filter down the directories to only the priority HAs
+    directories = [d for d in directories if d.split("/")[2] in priority_has]
 
     loader = DataLoader(directories, use_cache)
     loader.load()

From a1b2f9bf5bdd2d059c6327612fe2cb83c5be1687 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 12:42:04 +0000
Subject: [PATCH 002/262] Added ciga list id

---
 .../ha_15_32/ha_analysis_batch_3.py           | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7bb8b40c..fffc9daf 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -195,6 +195,7 @@ class DataLoader:
             ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
             # Remove columns that are None
             ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+            survey_list["survey_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(survey_list))]
             # Perform ciga list merge
             if not ciga_list.empty:
                 ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
@@ -440,14 +441,14 @@ class DataLoader:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
                     if df.shape[0] != 1:
                         postcode_lower = row["Post Code"].lower()
-                        if postcode_lower in missed_postcodes:
-                            matching_lookup.append(
-                                {
-                                    "survey_list_row_id": row["survey_list_row_id"],
-                                    "asset_list_row_id": None,
-                                }
-                            )
-                            continue
+                        # if postcode_lower in missed_postcodes:
+                        #     matching_lookup.append(
+                        #         {
+                        #             "survey_list_row_id": row["survey_list_row_id"],
+                        #             "asset_list_row_id": None,
+                        #         }
+                        #     )
+                        #     continue
 
                         print(row["Street / Block Name"])
                         print(house_number)
@@ -456,13 +457,18 @@ class DataLoader:
 
             matching_lookup.append(
                 {
-                    "survey_list_row_id": row["survey_list_row_id"],
+                    "ciga_list_row_id": row["ciga_list_row_id"],
                     "asset_list_row_id": df["asset_list_row_id"].values[0],
                 }
             )
 
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        # Merge onto the ciga list
+        ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id")
+
+        return ciga_list
+
     @staticmethod
     def identify_built_form_ha6(property_string):
         """

From d3bff08df8a4ce0d786acc10f9ab605abc938131 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 12:53:01 +0000
Subject: [PATCH 003/262] debugging survey matching for ha14

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index fffc9daf..d27bf8e8 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -221,7 +221,7 @@ class DataLoader:
         return asset_list
 
     @staticmethod
-    def correct_ha39_asset_list(asset_list):
+    def correct_ha14_asset_list(asset_list):
         return asset_list
 
     @staticmethod
@@ -354,7 +354,15 @@ class DataLoader:
         return survey_list
 
     @staticmethod
-    def correct_ha39_survey_list(survey_list):
+    def correct_ha14_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Godfrey Road", "Godfrey Drive"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Oiliver Road", "Oliver Road"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
@@ -389,7 +397,7 @@ class DataLoader:
             if df.shape[0] != 1:
                 df = df[df["HouseNo"] == str(house_number)]
                 if df.shape[0] != 1:
-                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
+                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
                     if df.shape[0] != 1:
                         postcode_lower = row["Post Code"].lower()
                         if postcode_lower in missed_postcodes:

From c6daf520467b0c994a67f7746b51450f36b6bea7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 16:00:23 +0000
Subject: [PATCH 004/262] Trying to handle streetname extraction and edge case
 in ciga matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 192 +++++++++++++-----
 1 file changed, 143 insertions(+), 49 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d27bf8e8..cb4b9885 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1,4 +1,5 @@
 import os
+import re
 import openpyxl
 from pathlib import Path
 import msgpack
@@ -36,6 +37,10 @@ class DataLoader:
         }
     }
 
+    UNMATCHED_CIGA = {
+        "HA14": 6
+    }
+
     def __init__(self, directories, use_cache):
         self.directories = directories
         self.use_cache = use_cache
@@ -101,6 +106,9 @@ class DataLoader:
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
+            # If we have "flat" or valley" as the house number, then the house number is actually in the second column
+            house_numbers[0] = np.where(house_numbers[0].isin(["flat", "valley"]), house_numbers[1], house_numbers[0])
+
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
             # many columns there might be
             house_numbers = house_numbers.iloc[:, 0:1]
@@ -117,7 +125,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA6"]:
+        if ha_name in ["HA6", "HA14"]:
             split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
@@ -132,16 +140,23 @@ class DataLoader:
         return ciga_list
 
     @staticmethod
-    def get_sheetname(workbook):
+    def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
             return "Asset List"
         else:
             return "Assets"
 
+    @staticmethod
+    def get_ciga_sheetname(workbook):
+        if "CIGA Checks" in workbook.sheetnames:
+            return "CIGA Checks"
+        else:
+            return "CIGA"
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
-        sheetname = self.get_sheetname(workbook)
-        asset_sheet = workbook[sheetname]
+        asset_sheetname = self.get_asset_sheetname(workbook)
+        asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
 
         rows_data = []
@@ -165,41 +180,46 @@ class DataLoader:
 
         asset_list = self.append_asset_list_built_form(ha_name=ha_name, asset_list=asset_list)
 
+        # We correct the asset list if it needs it
+        # Correct the asset list
+        correction_function_name = f"correct_{ha_name.lower()}_asset_list"
+        if hasattr(self, correction_function_name):
+            asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
+            asset_list = asset_list_correction_function(asset_list)
+
         # We check if there is a survey list
-        survey_list = pd.DataFrame()
-        if "ECO Surveys" in workbook.sheetnames:
-            survey_sheet = workbook["ECO Surveys"]
-            survey_rows = []
-            for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                survey_rows.append(row_data)
+        survey_sheetname = "ECO Surveys"
+        survey_sheet = workbook[survey_sheetname]
+        survey_rows = []
+        for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            survey_rows.append(row_data)
 
-            survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
-            # Remove columns that are None
-            survey_list = survey_list.loc[:, survey_list.columns.notnull()]
-            survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
+        survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+        # Remove columns that are None
+        survey_list = survey_list.loc[:, survey_list.columns.notnull()]
+        survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
 
-            # Perform survey list merge
-            if not survey_list.empty:
-                survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
+        # Perform survey list merge
+        if not survey_list.empty:
+            survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
 
         # We check if there are CIGA checks
-        ciga_list = pd.DataFrame()
-        if "CIGA Checks" in workbook.sheetnames:
-            ciga_sheet = workbook["CIGA Checks"]
-            ciga_rows = []
-            for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                ciga_rows.append(row_data)
+        ciga_sheetname = self.get_ciga_sheetname(workbook)
+        ciga_sheet = workbook[ciga_sheetname]
+        ciga_rows = []
+        for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            ciga_rows.append(row_data)
 
-            ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
-            # Remove columns that are None
-            ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-            survey_list["survey_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(survey_list))]
-            # Perform ciga list merge
-            if not ciga_list.empty:
-                ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
-                ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
+        ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
+        # Remove columns that are None
+        ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+        ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
+        # Perform ciga list merge
+        if not ciga_list.empty:
+            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+            ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
         return asset_list, survey_list, ciga_list
 
@@ -222,6 +242,21 @@ class DataLoader:
 
     @staticmethod
     def correct_ha14_asset_list(asset_list):
+
+        # For 5 Queens Court, DE72 3NP, the postcode is actually DE72 3QZ
+        asset_list.loc[
+            (asset_list["Address 1"] == "5 Queens Court") &
+            (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+            "matching_postcode"
+        ] = "DE72 3QZ"
+
+        # We then correct the matching_address
+        asset_list.loc[
+            (asset_list["Address 1"] == "5 Queens Court") &
+            (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+            "matching_address"
+        ] = "5 queens court, garfield avenue, draycott, derby, de72 3qz"
+
         return asset_list
 
     @staticmethod
@@ -363,13 +398,22 @@ class DataLoader:
             "Oiliver Road", "Oliver Road"
         )
 
+        # For postodes DE7 4FB, DE7 4EZ, it's actually spelled WINDERMERE AVENUE, not WINDEREMERE AVENUE (without the
+        # extra e)
+        survey_list.loc[
+            (survey_list["Street / Block Name"] == "WINDEREMERE AVENUE") &
+            (survey_list["Post Code"].isin(["DE7 4FB", "DE7 4EZ"])),
+            "Street / Block Name"
+        ] = "WINDERMERE AVENUE"
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "MACDONALD SQAURE", "MACDONALD SQUARE"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
-        # Correct the asset list
-        asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
-        asset_list = asset_list_correction_function(asset_list)
         # Correct the survey list
         survey_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_survey_list")
         survey_list = survey_list_correction_function(survey_list)
@@ -411,7 +455,7 @@ class DataLoader:
 
                         print(row["Street / Block Name"])
                         print(house_number)
-                        print(row["Post Code"].lower())
+                        print(row["Post Code"])
                         raise ValueError("Investigate")
 
             matching_lookup.append(
@@ -428,8 +472,38 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def extract_streetname(address, house_number=None, postcode=None):
+        """
+        Cleans an address by removing the house number and postcode, and converts everything to lower case.
+
+        :param address: The full address as a string.
+        :param house_number: The house number to remove, as a string or integer.
+        :param postcode: The postcode to remove, as a string.
+        :return: The cleaned address.
+        """
+        # Convert everything to lower case
+        address = address.lower()
+
+        if house_number is not None:
+            # Remove the house number
+            address = re.sub(r'\b{}\b'.format(house_number), '', address, flags=re.IGNORECASE).strip()
+
+        if postcode is not None:
+            # Remove the postcode
+            address = re.sub(r'\b{}\b'.format(re.escape(postcode)), '', address, flags=re.IGNORECASE).strip()
+
+        # Get first section before a comma
+        address = address.split(",")[0]
+        # Additional cleaning to remove extra spaces and commas left over
+        address = re.sub(r'\s+', ' ', address)  # Replace multiple spaces with a single space
+        address = re.sub(r'\s*,\s*', ', ', address)  # Clean up space around commas
+
+        return address
+
     def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name):
         matching_lookup = []
+        unmatched_addresses = []
         for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)):
 
             house_number = row["HouseNo"]
@@ -442,22 +516,35 @@ class DataLoader:
             ].copy()
 
             df = df[df["HouseNo"] == str(house_number)]
+            # For ciga, we skip
+            if df.empty:
+                if row["Matched Postcode"] == "LE3 3EE":
+                    dew
+                unmatched_addresses.append(
+                    {
+                        "ciga_list_row_id": row["ciga_list_row_id"],
+                        "HouseNo": house_number,
+                        "Matched Postcode": row["Matched Postcode"]
+                    }
+                )
+                continue
             # TODO: Might need to consider street name at some point
             if df.shape[0] != 1:
 
-                if df.shape[0] != 1:
-                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
-                    if df.shape[0] != 1:
-                        postcode_lower = row["Post Code"].lower()
-                        # if postcode_lower in missed_postcodes:
-                        #     matching_lookup.append(
-                        #         {
-                        #             "survey_list_row_id": row["survey_list_row_id"],
-                        #             "asset_list_row_id": None,
-                        #         }
-                        #     )
-                        #     continue
+                # We split house number and postcode out of the matched address for ciga
+                street_name = self.extract_streetname(
+                    address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
+                )
+                df = df[df["matching_address"].str.contains(street_name)]
 
+                if df.shape[0] != 1:
+                    # The final check we do here is to check for the presence of flat in the address
+                    if "flat" in row["Matched Address"]:
+                        df = df[df["matching_address"].str.contains("flat")]
+                    else:
+                        df = df[df["matching_address"].str.contains("flat") == False]
+
+                    if df.shape[0] != 1:
                         print(row["Street / Block Name"])
                         print(house_number)
                         print(row["Post Code"].lower())
@@ -470,6 +557,13 @@ class DataLoader:
                 }
             )
 
+        # We have an acceptable number of ciga failures for each HA
+        if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
+            raise ValueError(f"Unmatched addresses for {ha_name} is not as expected")
+
+        # In ciga: 35 Valley Drive, Leicester, LE3 3EE
+        #
+
         matching_lookup = pd.DataFrame(matching_lookup)
 
         # Merge onto the ciga list

From 75102704cdfeacaac68194c9646e23f208e48baf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 16:05:31 +0000
Subject: [PATCH 005/262] ciga matching for ha14

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index cb4b9885..1a28500b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -38,7 +38,9 @@ class DataLoader:
     }
 
     UNMATCHED_CIGA = {
-        "HA14": 6
+        # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
+        # the asset list
+        "HA14": 4
     }
 
     def __init__(self, directories, use_cache):
@@ -518,8 +520,6 @@ class DataLoader:
             df = df[df["HouseNo"] == str(house_number)]
             # For ciga, we skip
             if df.empty:
-                if row["Matched Postcode"] == "LE3 3EE":
-                    dew
                 unmatched_addresses.append(
                     {
                         "ciga_list_row_id": row["ciga_list_row_id"],
@@ -528,18 +528,18 @@ class DataLoader:
                     }
                 )
                 continue
-            # TODO: Might need to consider street name at some point
+            
             if df.shape[0] != 1:
 
                 # We split house number and postcode out of the matched address for ciga
                 street_name = self.extract_streetname(
                     address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
                 )
-                df = df[df["matching_address"].str.contains(street_name)]
+                df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
 
                 if df.shape[0] != 1:
                     # The final check we do here is to check for the presence of flat in the address
-                    if "flat" in row["Matched Address"]:
+                    if "flat" in row["Matched Address"].lower():
                         df = df[df["matching_address"].str.contains("flat")]
                     else:
                         df = df[df["matching_address"].str.contains("flat") == False]

From 32352bbde145c6a0c76f503c766e7fca80c2af99 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 17:46:11 +0000
Subject: [PATCH 006/262] working on survey match for ha107

---
 .../ha_15_32/ha_analysis_batch_3.py           | 45 +++++++++++++------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1a28500b..9e850c0e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -40,7 +40,9 @@ class DataLoader:
     UNMATCHED_CIGA = {
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
-        "HA14": 4
+        "HA14": 4,
+        # There's just too many unmatched here - if we identify some homes that
+        "HA6": 117
     }
 
     def __init__(self, directories, use_cache):
@@ -78,11 +80,11 @@ class DataLoader:
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
-                                             asset_list["Street"].str.lower().str.strip() + ", " + \
-                                             asset_list["Town"].str.lower().str.strip() + ", " + \
-                                             asset_list["District"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+                                             asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["District"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         else:
             raise NotImplementedError("implement me")
 
@@ -155,6 +157,13 @@ class DataLoader:
         else:
             return "CIGA"
 
+    @staticmethod
+    def get_survey_sheetname(workbook):
+        if "ECO Surveys" in workbook.sheetnames:
+            return "ECO Surveys"
+        else:
+            return "ECO surveys"
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
@@ -189,8 +198,13 @@ class DataLoader:
             asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
             asset_list = asset_list_correction_function(asset_list)
 
+        # For HA1, there is an exception in the structure of the data. We don't have any survey or ciga lists, and so
+        # we can return the asset list now
+        if ha_name == "HA1":
+            return asset_list, pd.DataFrame(), pd.DataFrame()
+
         # We check if there is a survey list
-        survey_sheetname = "ECO Surveys"
+        survey_sheetname = self.get_survey_sheetname(workbook)
         survey_sheet = workbook[survey_sheetname]
         survey_rows = []
         for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
@@ -217,6 +231,9 @@ class DataLoader:
         ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
         # Remove columns that are None
         ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+        # Remove rows with missing postcode which happens in a small number of cases
+        ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
+
         ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
         # Perform ciga list merge
         if not ciga_list.empty:
@@ -414,6 +431,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha107_survey_list(survey_list):
+        return survey_list
+
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
         # Correct the survey list
@@ -441,7 +462,7 @@ class DataLoader:
 
             df = df[df["matching_address"].str.contains(str(house_number))]
             if df.shape[0] != 1:
-                df = df[df["HouseNo"] == str(house_number)]
+                df = df[df["HouseNo"].astype(str) == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
                     if df.shape[0] != 1:
@@ -506,6 +527,7 @@ class DataLoader:
     def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name):
         matching_lookup = []
         unmatched_addresses = []
+
         for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)):
 
             house_number = row["HouseNo"]
@@ -528,7 +550,7 @@ class DataLoader:
                     }
                 )
                 continue
-            
+
             if df.shape[0] != 1:
 
                 # We split house number and postcode out of the matched address for ciga
@@ -561,9 +583,6 @@ class DataLoader:
         if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
             raise ValueError(f"Unmatched addresses for {ha_name} is not as expected")
 
-        # In ciga: 35 Valley Drive, Leicester, LE3 3EE
-        #
-
         matching_lookup = pd.DataFrame(matching_lookup)
 
         # Merge onto the ciga list
@@ -612,7 +631,7 @@ class DataLoader:
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
             # Load asset list
-            logger.info("Loading asset list for {}".format(ha_name))
+            logger.info("Loading data for {}".format(ha_name))
             asset_list, survey_list, ciga_list = self.load_asset_list(
                 filepath=filepath,
                 ha_name=ha_name,

From d038d668b8fa8360577ef0f83403e3d4cb6e854e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 17:52:20 +0000
Subject: [PATCH 007/262] ha107 matching 73% complete

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9e850c0e..46581eca 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -433,6 +433,16 @@ class DataLoader:
 
     @staticmethod
     def correct_ha107_survey_list(survey_list):
+        # Replace Front Street, East Stockham with Front Street, East Stockwith
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Front Street, East Stockham", "Front Street, East Stockwith"
+        )
+
+        # Replace "HONEYHOLE L;ANE" with "HONEYHOLES LANE"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "HONEYHOLE L;ANE", "HONEYHOLES LANE"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):

From ccb764d4a968efeaef67a068f1cc21f92dfe7000 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 18:01:24 +0000
Subject: [PATCH 008/262] ha107 matching 74% done

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 46581eca..60ef485a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -443,6 +443,16 @@ class DataLoader:
             "HONEYHOLE L;ANE", "HONEYHOLES LANE"
         )
 
+        # Replace "Croft Lane Cherry Willingham, Lincoln" with "Croft Lane, Cherry Willingham, Lincoln"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Croft Lane Cherry Willingham, Lincoln", "Croft Lane, Cherry Willingham, Lincoln"
+        )
+
+        # Replace "Snelland Road Wickenby, Lincoln" with "Snelland Road, Wickenby, Lincoln"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):

From cef20c6e2cf97275146f36f97349f4d0a46d2410 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 12:08:44 +0000
Subject: [PATCH 009/262] completed matching for ha107, added levenstein method

---
 .../ha_15_32/ha_analysis_batch_3.py           | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 60ef485a..bf3e6d31 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1,6 +1,7 @@
 import os
 import re
 import openpyxl
+import Levenshtein
 from pathlib import Path
 import msgpack
 from datetime import datetime
@@ -453,6 +454,41 @@ class DataLoader:
             "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln"
         )
 
+        # Replace Reasby Road Snelland, Lincoln with Reasby Road, Snelland, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Reasby Road Snelland, Lincoln", "Reasby Road, Snelland, Lincoln"
+        )
+
+        # Replace Silver Street Bardney, Lincoln with Silver Street, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Silver Street Bardney, Lincoln", "Silver Street, Bardney, Lincoln"
+        )
+
+        # Replace Manor Close Bardney, Lincoln with Manor Close, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Manor Close Bardney, Lincoln", "Manor Close, Bardney, Lincoln"
+        )
+
+        # Replace Ferry Road Southrey, Lincoln with Ferry Road, Southrey, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Ferry Road Southrey, Lincoln", "Ferry Road, Southrey, Lincoln"
+        )
+
+        # Replace Harvey Kent Gardens Bardney, Lincoln with Harvey Kent Gardens, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Harvey Kent Gardens Bardney, Lincoln", "Harvey Kent Gardens, Bardney, Lincoln"
+        )
+
+        # Replace Wragby Road Bardney, Lincoln with Wragby Road, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Wragby Road Bardney, Lincoln", "Wragby Road, Bardney, Lincoln"
+        )
+
+        # Replace SPRINKHILL ROAD with SPINKHILL ROAD
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "SPRINKHILL ROAD", "SPINKHILL ROAD"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
@@ -481,10 +517,35 @@ class DataLoader:
             ].copy()
 
             df = df[df["matching_address"].str.contains(str(house_number))]
+
+            if df.empty:
+                print(row["Street / Block Name"])
+                print(house_number)
+                print(row["Post Code"])
+                raise ValueError("Investigate")
+
             if df.shape[0] != 1:
                 df = df[df["HouseNo"].astype(str) == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
+
+                    full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + row[
+                        "Town/Area"].lower().strip() + row["Post Code"].lower().strip()
+                    # Remove any spaces from the full key
+                    full_key = full_key.replace(" ", "")
+
+                    match_to = df["matching_address"].tolist()
+                    # Strip out punctuation and spaces
+                    match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+                    match_to = [x.replace(" ", "") for x in match_to]
+
+                    # Perform matching between full key and match_to
+                    distances = [Levenshtein.distance(full_key, s) for s in match_to]
+                    best_match_index = distances.index(min(distances))
+                    # We might want to consider a threshold for the distance, however for the momeny,
+                    # we don't consider this for the moment
+                    df = df.iloc[best_match_index:best_match_index + 1]
+
                     if df.shape[0] != 1:
                         postcode_lower = row["Post Code"].lower()
                         if postcode_lower in missed_postcodes:
@@ -510,6 +571,9 @@ class DataLoader:
 
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        if matching_lookup.shape[0] != survey_list.shape[0]:
+            raise ValueError("Mismatch in the number of survey rows and matching lookup rows")
+
         # Merge onto the survey list
         survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
 

From bc0a2b8e37eab7dcfc4130b18b5c3ebe1c0953cc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 12:11:00 +0000
Subject: [PATCH 010/262] debygging location of dropping nulls from ciga list

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bf3e6d31..f1709d6e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -232,12 +232,11 @@ class DataLoader:
         ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
         # Remove columns that are None
         ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-        # Remove rows with missing postcode which happens in a small number of cases
-        ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
-
-        ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
         # Perform ciga list merge
         if not ciga_list.empty:
+            # Remove rows with missing postcode which happens in a small number of cases
+            ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
+            ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
             ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 

From 5a451f2f8239aaac05237c93b99c435de83a8652 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 12:20:46 +0000
Subject: [PATCH 011/262] fixed logic for missed postcodes for ha6

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index f1709d6e..95ca3901 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -518,6 +518,17 @@ class DataLoader:
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:
+
+                postcode_lower = row["Post Code"].lower()
+                if postcode_lower in missed_postcodes:
+                    matching_lookup.append(
+                        {
+                            "survey_list_row_id": row["survey_list_row_id"],
+                            "asset_list_row_id": None,
+                        }
+                    )
+                    continue
+
                 print(row["Street / Block Name"])
                 print(house_number)
                 print(row["Post Code"])
@@ -546,16 +557,6 @@ class DataLoader:
                     df = df.iloc[best_match_index:best_match_index + 1]
 
                     if df.shape[0] != 1:
-                        postcode_lower = row["Post Code"].lower()
-                        if postcode_lower in missed_postcodes:
-                            matching_lookup.append(
-                                {
-                                    "survey_list_row_id": row["survey_list_row_id"],
-                                    "asset_list_row_id": None,
-                                }
-                            )
-                            continue
-
                         print(row["Street / Block Name"])
                         print(house_number)
                         print(row["Post Code"])

From 75183902c193a8c5634b8cbc9c7bf045dd5a0898 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 15:54:28 +0000
Subject: [PATCH 012/262] completed creationg of matching tables

---
 .../ha_15_32/ha_analysis_batch_3.py           | 63 ++++++++++++++-----
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 95ca3901..2d95a946 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -43,7 +43,8 @@ class DataLoader:
         # the asset list
         "HA14": 4,
         # There's just too many unmatched here - if we identify some homes that
-        "HA6": 117
+        "HA6": 117,
+        "HA107": 52
     }
 
     def __init__(self, directories, use_cache):
@@ -130,7 +131,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA6", "HA14"]:
+        if ha_name in ["HA6", "HA14", "HA107"]:
             split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
@@ -153,8 +154,11 @@ class DataLoader:
 
     @staticmethod
     def get_ciga_sheetname(workbook):
+
         if "CIGA Checks" in workbook.sheetnames:
             return "CIGA Checks"
+        elif "CIGA checks" in workbook.sheetnames:
+            return "CIGA checks"
         else:
             return "CIGA"
 
@@ -490,6 +494,22 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def levenstein_match(matching_string, df):
+        match_to = df["matching_address"].tolist()
+        # Strip out punctuation and spaces
+        match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+        match_to = [x.replace(" ", "") for x in match_to]
+
+        # Perform matching between full key and match_to
+        distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+        best_match_index = distances.index(min(distances))
+        # We might want to consider a threshold for the distance, however for the momeny,
+        # we don't consider this for the moment
+        df = df.iloc[best_match_index:best_match_index + 1]
+
+        return df
+
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
         # Correct the survey list
@@ -544,17 +564,7 @@ class DataLoader:
                     # Remove any spaces from the full key
                     full_key = full_key.replace(" ", "")
 
-                    match_to = df["matching_address"].tolist()
-                    # Strip out punctuation and spaces
-                    match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
-                    match_to = [x.replace(" ", "") for x in match_to]
-
-                    # Perform matching between full key and match_to
-                    distances = [Levenshtein.distance(full_key, s) for s in match_to]
-                    best_match_index = distances.index(min(distances))
-                    # We might want to consider a threshold for the distance, however for the momeny,
-                    # we don't consider this for the moment
-                    df = df.iloc[best_match_index:best_match_index + 1]
+                    df = self.levenstein_match(full_key, df)
 
                     if df.shape[0] != 1:
                         print(row["Street / Block Name"])
@@ -623,7 +633,7 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Matched Postcode"].lower().strip())
             ].copy()
 
-            df = df[df["HouseNo"] == str(house_number)]
+            df = df[df["HouseNo"].astype(str) == str(house_number)]
             # For ciga, we skip
             if df.empty:
                 unmatched_addresses.append(
@@ -641,7 +651,9 @@ class DataLoader:
                 street_name = self.extract_streetname(
                     address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
                 )
-                df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
+                # We check if any of the rows contains the street name and if they do, filter
+                if any(df["matching_address"].str.replace(",", "").str.contains(street_name)):
+                    df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
 
                 if df.shape[0] != 1:
                     # The final check we do here is to check for the presence of flat in the address
@@ -650,6 +662,13 @@ class DataLoader:
                     else:
                         df = df[df["matching_address"].str.contains("flat") == False]
 
+                    if df.shape[0] != 1:
+                        full_key = str(row["HouseNo"]).lower().strip() + row["Matched Address"].lower().strip() + row[
+                            "Matched Postcode"].lower().strip()
+                        # Remove any spaces from the full key
+                        full_key = full_key.replace(" ", "")
+                        df = self.levenstein_match(full_key, df)
+
                     if df.shape[0] != 1:
                         print(row["Street / Block Name"])
                         print(house_number)
@@ -737,6 +756,19 @@ class DataLoader:
             s3_file_name="ha-analysis/batch3-inputs.pickle",
         )
 
+    def ha_facts_and_figures(self):
+        """
+        This function will return a dictionary of facts and figures for each HA
+        :return:
+        """
+        ha_facts_and_figures = []
+        for ha_name, data_assets in self.data.items():
+            asset_list = data_assets["asset_list"]
+            survey_list = data_assets["survey_list"]
+            ciga_list = data_assets["ciga_list"]
+
+        return ha_facts_and_figures
+
 
 def get_epc_data(
     loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=True
@@ -1511,6 +1543,7 @@ def app():
 
     loader = DataLoader(directories, use_cache)
     loader.load()
+    loader.ha_facts_and_figures()
 
     # TODO: We probably need to make sure that we have all of the columns that we need
 

From 6693ab4ca6e12a6b9da112e8c8a3d48b1fe6ad87 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 17:13:18 +0000
Subject: [PATCH 013/262] Added in read of december figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 55 +++++++++++++++++--
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2d95a946..dbe12e92 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -47,11 +47,13 @@ class DataLoader:
         "HA107": 52
     }
 
-    def __init__(self, directories, use_cache):
+    def __init__(self, directories, december_figures_filepath, use_cache):
         self.directories = directories
         self.use_cache = use_cache
+        self.december_figures_filepath = december_figures_filepath
 
         self.data = {}
+        self.december_figures = None
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
@@ -730,6 +732,11 @@ class DataLoader:
             )
             return
 
+        # Get the december figures, which is just a csv
+        self.december_figures = pd.read_csv(self.december_figures_filepath)
+        # Remove the spaces in HA Name
+        self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "")
+
         data = {}
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
@@ -763,9 +770,43 @@ class DataLoader:
         """
         ha_facts_and_figures = []
         for ha_name, data_assets in self.data.items():
-            asset_list = data_assets["asset_list"]
-            survey_list = data_assets["survey_list"]
-            ciga_list = data_assets["ciga_list"]
+            asset_list = data_assets["asset_list"].copy()
+            survey_list = data_assets["survey_list"].copy()
+            ciga_list = data_assets["ciga_list"].copy()
+
+            asset_list["ECO Eligibility"].value_counts()
+
+            # We merge on ciga and update the status to reflect if it has failed ciga or not
+            # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
+            # check
+            asset_list = asset_list.merge(
+                ciga_list[["asset_list_row_id", "Guarantee"]],
+                how='left',
+                on="asset_list_row_id"
+            )
+
+            asset_list["ECO Eligibility"].value_counts()
+
+            asset_list["ECO Eligibility"] = np.where(
+                (
+                    asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) &
+                    (asset_list["Guarantee"] == "Yes")
+                ),
+                "Failed CIGA",
+                asset_list["ECO Eligibility"]
+            )
+
+            # We replace any remaining "Subject to CIGA" with pass Ciga
+            asset_list["ECO Eligibility"] = np.where(
+                asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
+                "Pass CIGA",
+                asset_list["ECO Eligibility"]
+            )
+
+            asset_list = asset_list.drop(columns=["Guarantee"])
+
+            # Update the asset list with the categorisations
+            self.data[ha_name]["asset_list"] = asset_list
 
         return ha_facts_and_figures
 
@@ -1532,16 +1573,18 @@ def app():
     :return:
     """
 
-    use_cache = False
+    use_cache = True
 
     # List all of the data in the folder
     directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
+    # Grab the December HA figures filepath
+    december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 
-    loader = DataLoader(directories, use_cache)
+    loader = DataLoader(directories, december_figures_filepath, use_cache)
     loader.load()
     loader.ha_facts_and_figures()
 

From 8b48dbac9e5e9f25e3c738c1322b1f3a9fbb11db Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 13:37:50 +0000
Subject: [PATCH 014/262] working on eco eligibility code

---
 .../ha_15_32/ha_analysis_batch_3.py           | 153 ++++++++++++++----
 1 file changed, 122 insertions(+), 31 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index dbe12e92..fdc00876 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -725,6 +725,13 @@ class DataLoader:
 
     def load(self):
 
+        # Get the december figures, which is just a csv
+        self.december_figures = pd.read_csv(self.december_figures_filepath)
+        # Remove the spaces in HA Name
+        self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "")
+        self.december_figures["ECO4"] = self.december_figures["ECO4"].astype("Int64")
+        self.december_figures["GBIS"] = self.december_figures["GBIS"].astype("Int64")
+
         if self.use_cache:
             self.data = read_pickle_from_s3(
                 bucket_name="retrofit-datalake-dev",
@@ -732,11 +739,6 @@ class DataLoader:
             )
             return
 
-        # Get the december figures, which is just a csv
-        self.december_figures = pd.read_csv(self.december_figures_filepath)
-        # Remove the spaces in HA Name
-        self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "")
-
         data = {}
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
@@ -768,46 +770,135 @@ class DataLoader:
         This function will return a dictionary of facts and figures for each HA
         :return:
         """
+
+        scheme_map = {
+            "ECO4": "ECO4",
+            "AFFORDABLE WARMTH": "ECO4",
+        }
+
+        eco_eligibility_map = {
+            "not eligble": "not eligible"
+        }
+
         ha_facts_and_figures = []
         for ha_name, data_assets in self.data.items():
             asset_list = data_assets["asset_list"].copy()
             survey_list = data_assets["survey_list"].copy()
             ciga_list = data_assets["ciga_list"].copy()
 
-            asset_list["ECO Eligibility"].value_counts()
+            # Change the column name if it's ECO eligibility
+            asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"})
+            # Remove surplus whitespace from the ECO Eligibility column
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip()
+            # Push to lower case
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower()
+            # Remap
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map)
 
-            # We merge on ciga and update the status to reflect if it has failed ciga or not
-            # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
-            # check
-            asset_list = asset_list.merge(
-                ciga_list[["asset_list_row_id", "Guarantee"]],
-                how='left',
-                on="asset_list_row_id"
-            )
+            if not ciga_list.empty:
+                # We merge on ciga and update the status to reflect if it has failed ciga or not
+                # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
+                # check
+                asset_list = asset_list.merge(
+                    ciga_list[["asset_list_row_id", "Guarantee"]],
+                    how='left',
+                    on="asset_list_row_id"
+                )
 
-            asset_list["ECO Eligibility"].value_counts()
+                asset_list["ECO Eligibility"].value_counts()
 
-            asset_list["ECO Eligibility"] = np.where(
-                (
-                    asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) &
-                    (asset_list["Guarantee"] == "Yes")
-                ),
-                "Failed CIGA",
-                asset_list["ECO Eligibility"]
-            )
+                asset_list["ECO Eligibility"] = np.where(
+                    (
+                        asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+                        (asset_list["Guarantee"] == "Yes")
+                    ),
+                    "failed ciga",
+                    asset_list["ECO Eligibility"]
+                )
 
-            # We replace any remaining "Subject to CIGA" with pass Ciga
-            asset_list["ECO Eligibility"] = np.where(
-                asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
-                "Pass CIGA",
-                asset_list["ECO Eligibility"]
-            )
+                # We replace any remaining "Subject to CIGA" with pass Ciga
+                asset_list["ECO Eligibility"] = np.where(
+                    asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
+                    "eco4 - passed ciga",
+                    asset_list["ECO Eligibility"]
+                )
 
-            asset_list = asset_list.drop(columns=["Guarantee"])
+                asset_list = asset_list.drop(columns=["Guarantee"])
 
-            # Update the asset list with the categorisations
+            # Update the asset list with the categorisations and rename changes
             self.data[ha_name]["asset_list"] = asset_list
 
+            # Report on sales
+            sales_report = {}
+            if not survey_list.empty:
+                scheme_column = survey_list.columns[0]
+                # We clean up the survey list installation or cancelled
+                survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+                # Remove all punctuation
+                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                    r'[^\w\s]', '', regex=True
+                )
+                # Remove double spaces
+                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                    r'\s+', ' ', regex=True
+                )
+                # Remove trailing spaces
+                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+
+                # Remap the values in the scheme column
+                survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map)
+
+                survey_list["installation_status"] = None
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+                    "installed",
+                    survey_list["installation_status"]
+                )
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+                    "cancelled",
+                    survey_list["installation_status"]
+                )
+                # Find partial installations
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+                    "partially installed",
+                    survey_list["installation_status"]
+                )
+                # Find partial cancellations
+                # TODO: We might have more indications of partial cancellations
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+                    "partially cancelled",
+                    survey_list["installation_status"]
+                )
+
+                # Finally, for other cases, we set the status to "in progress"
+                survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+
+                # We concatenate the scheme name with the installation status
+                survey_list["installation_status"] = (
+                    survey_list[scheme_column] + " - " + survey_list["installation_status"]
+                )
+
+                # We get the sales
+                sales_report = survey_list["installation_status"].value_counts().to_dict()
+
+            ha_facts_and_figures.append(
+                {
+                    "HA Name": ha_name,
+                    **asset_list["ECO Eligibility"].value_counts().to_dict(),
+                    **sales_report
+                }
+            )
+
+        ha_facts_and_figures = pd.DataFrame(ha_facts_and_figures)
+        ha_facts_and_figures = ha_facts_and_figures.drop(
+            columns=["not eligible"]
+        )
+
+        ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+
         return ha_facts_and_figures
 
 

From ae2cc3fab57687bdc83d4aef4d60c23bd3a3b5e8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 14:14:19 +0000
Subject: [PATCH 015/262] working on ha facts and figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index fdc00876..d75a9f34 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -42,7 +42,7 @@ class DataLoader:
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
         "HA14": 4,
-        # There's just too many unmatched here - if we identify some homes that
+        # There's just too many unmatched here
         "HA6": 117,
         "HA107": 52
     }
@@ -786,6 +786,8 @@ class DataLoader:
             survey_list = data_assets["survey_list"].copy()
             ciga_list = data_assets["ciga_list"].copy()
 
+            asset_list_starting_size = asset_list.shape[0]
+
             # Change the column name if it's ECO eligibility
             asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"})
             # Remove surplus whitespace from the ECO Eligibility column
@@ -793,19 +795,17 @@ class DataLoader:
             # Push to lower case
             asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower()
             # Remap
-            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map)
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].replace(eco_eligibility_map)
 
             if not ciga_list.empty:
                 # We merge on ciga and update the status to reflect if it has failed ciga or not
                 # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
                 # check
-                asset_list = asset_list.merge(
-                    ciga_list[["asset_list_row_id", "Guarantee"]],
-                    how='left',
-                    on="asset_list_row_id"
-                )
 
-                asset_list["ECO Eligibility"].value_counts()
+                ciga_list_to_merge = ciga_list[["asset_list_row_id", "Guarantee"]].copy()
+                ciga_list_to_merge = ciga_list_to_merge[~pd.isnull(ciga_list_to_merge["asset_list_row_id"])]
+
+                asset_list = asset_list.merge(ciga_list_to_merge, how='left', on="asset_list_row_id")
 
                 asset_list["ECO Eligibility"] = np.where(
                     (
@@ -818,7 +818,10 @@ class DataLoader:
 
                 # We replace any remaining "Subject to CIGA" with pass Ciga
                 asset_list["ECO Eligibility"] = np.where(
-                    asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
+                    (
+                        asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+                        (asset_list["Guarantee"] == "No")
+                    ),
                     "eco4 - passed ciga",
                     asset_list["ECO Eligibility"]
                 )
@@ -826,6 +829,8 @@ class DataLoader:
                 asset_list = asset_list.drop(columns=["Guarantee"])
 
             # Update the asset list with the categorisations and rename changes
+            if asset_list.shape[0] != asset_list_starting_size:
+                raise ValueError("The asset list has changed in size")
             self.data[ha_name]["asset_list"] = asset_list
 
             # Report on sales
@@ -846,7 +851,7 @@ class DataLoader:
                 survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
 
                 # Remap the values in the scheme column
-                survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map)
+                survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
 
                 survey_list["installation_status"] = None
                 survey_list["installation_status"] = np.where(

From 8ef0198606486cf3eee9abf84723181ef221ea6b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 16:22:50 +0000
Subject: [PATCH 016/262] handling deduping ciga match

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d75a9f34..6ffe50e3 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -41,7 +41,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
-        "HA14": 4,
+        "HA14": 3,
         # There's just too many unmatched here
         "HA6": 117,
         "HA107": 52
@@ -147,6 +147,17 @@ class DataLoader:
 
         return ciga_list
 
+    @staticmethod
+    def dedupe_ciga_list(ciga_list):
+        ciga_list["unique_key"] = ciga_list["Matched Address"] + ciga_list["Matched Postcode"]
+        # Remove spaces from the unique key
+        ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(" ", "")
+        # Remove punctuation from the unique key
+        ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(r'[^\w\s]', '')
+        # Drop duplicated keys
+        ciga_list = ciga_list[~ciga_list["unique_key"].duplicated()]
+        return ciga_list
+
     @staticmethod
     def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
@@ -244,6 +255,7 @@ class DataLoader:
             ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
             ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
             ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+            ciga_list = self.dedupe_ciga_list(ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
         return asset_list, survey_list, ciga_list
@@ -686,10 +698,15 @@ class DataLoader:
 
         # We have an acceptable number of ciga failures for each HA
         if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
-            raise ValueError(f"Unmatched addresses for {ha_name} is not as expected")
+            raise ValueError(
+                f"Unmatched addresses for {ha_name} is not as expected, got {len(unmatched_addresses)} unmatched")
 
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        # Check dupes as this will cause problems later on
+        if matching_lookup["asset_list_row_id"].duplicated().any():
+            raise ValueError("Duplicated asset list row ids")
+
         # Merge onto the ciga list
         ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id")
 

From 78f5226ad7a5ec81e4da1ca6f9e78565146e0457 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 16:38:14 +0000
Subject: [PATCH 017/262] put together ha facts and figures

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6ffe50e3..bd4d5128 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -44,7 +44,7 @@ class DataLoader:
         "HA14": 3,
         # There's just too many unmatched here
         "HA6": 117,
-        "HA107": 52
+        "HA107": 51
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache):
@@ -54,6 +54,7 @@ class DataLoader:
 
         self.data = {}
         self.december_figures = None
+        self.ha_facts_and_figures = None
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
@@ -794,7 +795,8 @@ class DataLoader:
         }
 
         eco_eligibility_map = {
-            "not eligble": "not eligible"
+            "not eligble": "not eligible",
+            "eco 4(subject to ciga)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -919,9 +921,15 @@ class DataLoader:
             columns=["not eligible"]
         )
 
-        ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+        ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+        # Make all columns apart from HA NAme integers
+        for col in ha_facts_and_figures.columns[1:]:
+            ha_facts_and_figures[col] = ha_facts_and_figures[col].astype(int)
 
-        return ha_facts_and_figures
+        ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+        ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+
+        self.ha_facts_and_figures = ha_facts_and_figures
 
 
 def get_epc_data(

From c18740eebda1a2b307a91e215f78fdeafcad8402 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 18:44:11 +0000
Subject: [PATCH 018/262] updating eligibility detection

---
 etl/eligibility/Eligibility.py                |  57 +--
 .../ha_15_32/ha_analysis_batch_3.py           | 402 ++++++++++--------
 2 files changed, 249 insertions(+), 210 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 906ff594..b09d2df5 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -340,7 +340,6 @@ class Eligibility:
 
         # Check if the property is suitable for cavity wall
         self.cavity_insulation()
-        self.loft_insulation()
 
         self.gbis_warmfront = (self.cavity["suitability"]) and (
             int(self.epc["current-energy-efficiency"]) <= 68
@@ -384,43 +383,49 @@ class Eligibility:
         if current_sap >= 69:
             self.eco4_warmfront = {
                 "eligible": False,
-                "message": "sap too high",
+                "message": "SAP too high",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if post_retrofit_sap is None:
-
-            if current_sap >= 55:
-                message = "Possibly eligible but property currently EPC D"
-            else:
-                message = "subject to post retrofit sap" if is_eligible else "not eligible"
-
-            # Update the message to flag properties that failed just because of a full cavity.
-            # We need to double check that the wall is a cavity, that the loft is suitable and that the
-            # sap is within reason
-            # We can then estimate the age of the cavity fill
-            if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
-                message = "Failed due to full cavity - check cavity age"
-
+        if not is_eligible and current_sap >= 55:
             self.eco4_warmfront = {
-                "eligible": is_eligible,
-                "message": message,
+                "eligible": False,
+                "message": "failed fabric and SAP check",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        is_eligible = is_eligible & (post_retrofit_sap >= 69)
+        if not is_eligible and current_sap < 55:
+            self.eco4_warmfront = {
+                "eligible": False,
+                "message": "failed fabric check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
 
-        self.eco4_warmfront = {
-            "eligible": is_eligible,
-            "message": None,
-            "cavity_type": self.cavity["type"],
-            "loft_type": self.loft["thickness_classification"]
-        }
-        return
+        if is_eligible and current_sap >= 55:
+            self.eco4_warmfront = {
+                "eligible": True,
+                "message": "Meets fabric, fails SAP check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        if is_eligible and current_sap < 55:
+            self.eco4_warmfront = {
+                "eligible": True,
+                "message": "Meets fabric and SAP check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        raise ValueError("Implement me")
 
     def check_gbis(self):
 
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bd4d5128..5dd9b6e1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -25,6 +25,84 @@ DATA_FOLDER = Path(__file__).parent / "local_data" / "ha_data"
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 
+PROPERTY_TYPE_LOOKUP = {
+    "HA1": {
+        "built_form": {
+            'Mid Terrace': 'Mid-Terrace',
+            'Semi-Detached': 'Semi-Detached',
+            'End Terrace': 'End-Terrace',
+            'Detached': 'Detached',
+            'Enclosed Mid': 'Mid-Terrace',
+            'Detached Local Connect': 'Detached',
+        }
+    },
+    "HA6": {
+        "property_type": {
+            'HOUSE': "House",
+            'GROUND FLOOR FLAT': "Flat",
+            'UPPER FLOOR FLAT': "Flat",
+            'MAISONETTE': "Maisonette",
+            'BUNGALOW': "Bungalow",
+            'WARDEN BUNGALOW': "Bungalow",
+            'WARDEN FLAT': "Flat",
+            'EXTRACARE SCHEME': "Flat",
+        }
+    },
+    "HA14": {
+        "property_type": {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+        }
+    },
+    "HA39": {
+        "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
+        "1st floor flat": {"property_type": "Flat", "built_form": None},
+        "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
+        "Ground floor flat": {"property_type": "Flat", "built_form": None},
+        "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
+        "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
+        "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
+        "2nd floor flat": {"property_type": "Flat", "built_form": None},
+        "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
+        "3rd floor flat": {"property_type": "Flat", "built_form": None},
+        "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
+        "Maisonette": {"property_type": "Maisonette", "built_form": None},
+        "Detached house": {"property_type": "House", "built_form": "Detached"},
+        "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
+        "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
+        "Basement flat": {"property_type": "Flat", "built_form": None},
+        "Cluster House": {"property_type": "House", "built_form": "Detached"},
+        "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
+        "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
+        "4th floor flat": {"property_type": "Flat", "built_form": None},
+        "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
+        "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
+    },
+    "HA107": {
+        "property_type": {
+            "HOUSE": "House",
+            "BUNGALOW": "Bungalow",
+            "GRD FLOOR FLAT": "Flat",
+            "FIRST FLOOR FLAT": "Flat",
+            "SHELTERED BUNGALOW": "Bungalow",
+            "MAISONETTE": "Maisonette",
+            "SECOND FLOOR FLAT": "Flat",
+            "SHELTERED FIRST FLR": "Flat",
+            "SHELTERED GROUND FLR": "Flat",
+            "GRD FLOOR BED SIT": "House"
+        },
+        "built_form": {
+            "Semi Detached": "Semi-Detached",
+            "Mid Terrace": "Mid-Terrace",
+            "End Terrace": "End-Terrace",
+            "Detached": "Detached",
+            "Detatched": "Detached",
+        }
+    }
+}
+
 
 class DataLoader:
     COLUMN_CONFIG = {
@@ -54,7 +132,7 @@ class DataLoader:
 
         self.data = {}
         self.december_figures = None
-        self.ha_facts_and_figures = None
+        self.facts_and_figures = None
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
@@ -929,7 +1007,77 @@ class DataLoader:
         ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
         ha_facts_and_figures = ha_facts_and_figures.fillna(0)
 
-        self.ha_facts_and_figures = ha_facts_and_figures
+        self.facts_and_figures = ha_facts_and_figures
+
+
+def get_property_type_and_built_form(property_meta, ha_name):
+    if ha_name == "HA1":
+        property_type = property_meta["Asset Type"]
+        # We correct a small error
+        if property_type == "a":
+            property_type = "House"
+
+        # Remap bedsits to flats
+        if property_type in ["Bedsit", "Room"]:
+            property_type = "Flat"
+
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None)
+    elif ha_name == "HA6":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
+        built_form = property_meta["built_form"]
+    elif ha_name == "HA14":
+        if property_meta["Asset Type Description"] == "Block - Repair":
+            # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
+            if "room" in property_meta["Address 1"].lower():
+                property_type = "House"
+            else:
+                property_type = "Flat"
+
+        else:
+            property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][
+                property_meta["Asset Type Description"]
+            ]
+
+        built_form = None
+    elif ha_name == "HA39":
+
+        property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
+        property_type = property_type_config.get("property_type", None)
+        built_form = property_type_config.get("built_form", None)
+
+        if property_type is None:
+            # We check for the presence of room or flat
+            if "flat" in property_meta["matching_address"]:
+                property_type = "Flat"
+            else:
+                property_type = "House"
+    elif ha_name == "HA107":
+
+        dwelling_style = property_meta["Dwelling Style"]
+        if isinstance(dwelling_style, str):
+            dwelling_style = dwelling_style.strip()
+
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"])
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None)
+
+        if property_type is None:
+            if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
+                property_type = "House"
+
+            if "flat" in property_meta["Wall Construction"].lower():
+                property_type = "Flat"
+
+            if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
+                # Hand a few specific cases
+                property_type = "Bungalow"
+
+            if property_meta["Street"] == "School View":
+                property_type = "Bungalow"
+
+    else:
+        raise NotImplementedError("Implement me")
+
+    return property_type, built_form
 
 
 def get_epc_data(
@@ -938,84 +1086,6 @@ def get_epc_data(
     if not loader.data:
         raise ValueError("Data not found - please run loader.load() first")
 
-    property_type_lookup = {
-        "ha_1": {
-            "built_form": {
-                'Mid Terrace': 'Mid-Terrace',
-                'Semi-Detached': 'Semi-Detached',
-                'End Terrace': 'End-Terrace',
-                'Detached': 'Detached',
-                'Enclosed Mid': 'Mid-Terrace',
-                'Detached Local Connect': 'Detached',
-            }
-        },
-        "ha_6": {
-            "property_type": {
-                'HOUSE': "House",
-                'GROUND FLOOR FLAT': "Flat",
-                'UPPER FLOOR FLAT': "Flat",
-                'MAISONETTE': "Maisonette",
-                'BUNGALOW': "Bungalow",
-                'WARDEN BUNGALOW': "Bungalow",
-                'WARDEN FLAT': "Flat",
-                'EXTRACARE SCHEME': "Flat",
-            }
-        },
-        "ha_14": {
-            "property_type": {
-                "House": "House",
-                "Flat": "Flat",
-                "Bungalow": "Bungalow",
-                "Maisonette": "Maisonette",
-            }
-        },
-        "ha_39": {
-            "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
-            "1st floor flat": {"property_type": "Flat", "built_form": None},
-            "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
-            "Ground floor flat": {"property_type": "Flat", "built_form": None},
-            "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
-            "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
-            "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
-            "2nd floor flat": {"property_type": "Flat", "built_form": None},
-            "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
-            "3rd floor flat": {"property_type": "Flat", "built_form": None},
-            "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
-            "Maisonette": {"property_type": "Maisonette", "built_form": None},
-            "Detached house": {"property_type": "House", "built_form": "Detached"},
-            "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
-            "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
-            "Basement flat": {"property_type": "Flat", "built_form": None},
-            "Cluster House": {"property_type": "House", "built_form": "Detached"},
-            "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
-            "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
-            "4th floor flat": {"property_type": "Flat", "built_form": None},
-            "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
-            "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
-        },
-        "ha_107": {
-            "property_type": {
-                "HOUSE": "House",
-                "BUNGALOW": "Bungalow",
-                "GRD FLOOR FLAT": "Flat",
-                "FIRST FLOOR FLAT": "Flat",
-                "SHELTERED BUNGALOW": "Bungalow",
-                "MAISONETTE": "Maisonette",
-                "SECOND FLOOR FLAT": "Flat",
-                "SHELTERED FIRST FLR": "Flat",
-                "SHELTERED GROUND FLR": "Flat",
-                "GRD FLOOR BED SIT": "House"
-            },
-            "built_form": {
-                "Semi Detached": "Semi-Detached",
-                "Mid Terrace": "Mid-Terrace",
-                "End Terrace": "End-Terrace",
-                "Detached": "Detached",
-                "Detatched": "Detached",
-            }
-        }
-    }
-
     outputs = {}
     for ha_name, data_assets in loader.data.items():
 
@@ -1049,77 +1119,15 @@ def get_epc_data(
             if property_meta["matching_postcode"] is None:
                 continue
 
-            if ha_name == "ha_1":
-                property_type = property_meta["Asset Type"]
-                # We correct a small error
-                if property_type == "a":
-                    property_type = "House"
-
-                # Remap bedsits to flats
-                if property_type in ["Bedsit", "Room"]:
-                    property_type = "Flat"
-
-                built_form = property_type_lookup[ha_name]["built_form"].get(property_meta["Property Type"], None)
-            elif ha_name == "ha_6":
-                property_type = property_type_lookup[ha_name]["property_type"][property_meta["Dwelling type"]]
-                built_form = property_meta["built_form"]
-            elif ha_name == "ha_14":
-                if property_meta["Asset Type Description"] == "Block - Repair":
-                    # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
-                    if "room" in property_meta["Address 1"].lower():
-                        property_type = "House"
-                    else:
-                        property_type = "Flat"
-
-                else:
-                    property_type = property_type_lookup[ha_name]["property_type"][
-                        property_meta["Asset Type Description"]
-                    ]
-
-                built_form = None
-            elif ha_name == "ha_39":
-
-                property_type_config = property_type_lookup[ha_name].get(property_meta["ConstructionStyle"], {})
-                property_type = property_type_config.get("property_type", None)
-                built_form = property_type_config.get("built_form", None)
-
-                if property_type is None:
-                    # We check for the presence of room or flat
-                    if "flat" in property_meta["matching_address"]:
-                        property_type = "Flat"
-                    else:
-                        property_type = "House"
-            elif ha_name == "ha_107":
-
-                dwelling_style = property_meta["Dwelling Style"]
-                if isinstance(dwelling_style, str):
-                    dwelling_style = dwelling_style.strip()
-
-                property_type = property_type_lookup[ha_name]["property_type"].get(property_meta["DwellingType"])
-                built_form = property_type_lookup[ha_name]["built_form"].get(dwelling_style, None)
-
-                if property_type is None:
-                    if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
-                        property_type = "House"
-
-                    if "flat" in property_meta["Wall Construction"].lower():
-                        property_type = "Flat"
-
-                    if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
-                        # Hand a few specific cases
-                        property_type = "Bungalow"
-
-                    if property_meta["Street"] == "School View":
-                        property_type = "Bungalow"
-
-            else:
-                raise NotImplementedError("Implement me")
+            property_type, built_form = get_property_type_and_built_form(
+                property_meta=property_meta, ha_name=ha_name
+            )
 
             searcher = SearchEpc(
                 address1=str(property_meta["HouseNo"]),
                 postcode=property_meta["matching_postcode"],
                 auth_token=EPC_AUTH_TOKEN,
-                os_api_key=None,
+                os_api_key="",
                 full_address=property_meta["matching_address"]
             )
             searcher.ordnance_survey_client.property_type = property_type
@@ -1150,9 +1158,21 @@ def get_epc_data(
             eligibility.check_gbis_warmfront()
             eligibility.check_eco4_warmfront()
 
-            if (not eligibility.eco4_warmfront["eligible"]) and (
-                not eligibility.gbis_warmfront
-            ) and consider_penultimate_epc:
+            # We check the conditions for checking the penultimate epc
+            identified_for_gbis = property_meta["ECO Eligibility"] == "gbis"
+            identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"]
+
+            # condition 1 - identified for gbis and not eligible
+            condition_1 = (
+                identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]
+            ) & consider_penultimate_epc
+
+            # condition 2 - identified for eco4 and not eligible
+            condition_2 = (
+                identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
+            ) & consider_penultimate_epc
+
+            if identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]:
                 # We check the penultimate epc
                 eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
                 eligibility.check_gbis_warmfront()
@@ -1161,6 +1181,10 @@ def get_epc_data(
                 # We don't update just to make data cleaning easier
                 if penultimate_epc.get("estimated") is None:
                     older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+            elif identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]:
+
+            else:
+                blah
 
             # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
             # Loft MUST be suitable
@@ -1199,6 +1223,7 @@ def get_epc_data(
                 {
                     "row_id": property_meta["asset_list_row_id"],
                     "uprn": eligibility.epc["uprn"],
+                    "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "property_type": eligibility.epc["property-type"],
                     "gbis_eligible": eligibility.gbis_warmfront,
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
@@ -1219,7 +1244,6 @@ def get_epc_data(
                     "cavity_age": cavity_age,
                     **eligibility.walls,
                     **eligibility.roof,
-                    "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
                     "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
                 }
@@ -1687,38 +1711,7 @@ def analyse_ha_data(outputs, loader):
                 writer.sheets[sheet].set_column(i, i, width)
 
 
-def app():
-    """
-    This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
-    Only HA 6 has surveys
-    :return:
-    """
-
-    use_cache = True
-
-    # List all of the data in the folder
-    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
-    # Grab the December HA figures filepath
-    december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
-
-    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
-    # Filter down the directories to only the priority HAs
-    directories = [d for d in directories if d.split("/")[2] in priority_has]
-
-    loader = DataLoader(directories, december_figures_filepath, use_cache)
-    loader.load()
-    loader.ha_facts_and_figures()
-
-    # TODO: We probably need to make sure that we have all of the columns that we need
-
-    # We load in the additional data required to perform the analysis
-
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-
+def patch_cleaned(cleaned):
     # Patch to handle the a missing description
     cleaned["floor-description"].extend(
         [
@@ -1762,16 +1755,57 @@ def app():
             x["another_property_below"] = True
             x["thermal_transmittance"] = 0
 
+    return cleaned
+
+
+def app():
+    """
+    This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
+    Only HA 6 has surveys
+    :return:
+    """
+
+    # Determines if we want to use the cached data in s3
+    use_cache = True
+    # Determines if we want to perform the data pull
+    pull_data = True
+
+    # List all of the data in the folder
+    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
+    # Grab the December HA figures filepath
+    december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
+
+    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
+    # Filter down the directories to only the priority HAs
+    directories = [d for d in directories if d.split("/")[2] in priority_has]
+
+    loader = DataLoader(directories, december_figures_filepath, use_cache)
+    loader.load()
+    loader.ha_facts_and_figures()
+
+    # We load in the additional data required to perform the analysis
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+    cleaned = patch_cleaned(cleaned)
+
     cleaning_data = read_dataframe_from_s3_parquet(
         bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
     )
-
     created_at = datetime.now().isoformat()
 
     photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
 
     outputs = get_epc_data(
-        loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False
+        loader=loader,
+        cleaned=cleaned,
+        cleaning_data=cleaning_data,
+        created_at=created_at,
+        photo_supply_lookup=photo_supply_lookup,
+        floor_area_decile_thresholds=floor_area_decile_thresholds,
+        pull_data=pull_data
     )
 
     # for ha_name, datasets in outputs.items():

From 807ce14790600dce8a810847f47bc216bcddf6b3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 19:09:19 +0000
Subject: [PATCH 019/262] updating the code to do eligibility

---
 .../ha_15_32/ha_analysis_batch_3.py           | 42 +++++++++++++------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 5dd9b6e1..3d0964c6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1164,15 +1164,33 @@ def get_epc_data(
 
             # condition 1 - identified for gbis and not eligible
             condition_1 = (
-                identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]
-            ) & consider_penultimate_epc
+                              identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[
+                              "eligible"]
+                          ) & consider_penultimate_epc
 
             # condition 2 - identified for eco4 and not eligible
-            condition_2 = (
-                identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
-            ) & consider_penultimate_epc
+            condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[
+                "eligible"]) & consider_penultimate_epc
 
-            if identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]:
+            # successfully identigied gbis
+            condition_3 = (
+                identified_for_gbis and (eligibility.gbis_warmfront or eligibility.eco4_warmfront["eligible"])
+            )
+
+            # Nothing identified
+            condition_4 = (
+                not identified_for_gbis and not identified_for_eco4 and not eligibility.gbis_warmfront and not
+            eligibility.eco4_warmfront["eligible"]
+            )
+
+            # Not identified but seemingly eligible for eco4 or gbis
+            condition_5 = (
+                not identified_for_gbis and not identified_for_eco4 and (
+                eligibility.eco4_warmfront["eligible"] or eligibility.gbis_warmfront
+            )
+            )
+
+            if condition_1 or condition_2:
                 # We check the penultimate epc
                 eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
                 eligibility.check_gbis_warmfront()
@@ -1181,10 +1199,11 @@ def get_epc_data(
                 # We don't update just to make data cleaning easier
                 if penultimate_epc.get("estimated") is None:
                     older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
-            elif identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]:
-
+            elif condition_3 or condition_4 or condition_5:
+                # If we have successfully identified for gbis, we don't need to check the penultimate epc
+                pass
             else:
-                blah
+                NotImplementedError("Implement me")
 
             # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
             # Loft MUST be suitable
@@ -1229,10 +1248,7 @@ def get_epc_data(
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                     "eco4_message": eligibility.eco4_warmfront["message"],
                     "sap": float(eligibility.epc["current-energy-efficiency"]),
-                    "gbis_eligible_future": eligibility.gbis["eligible"],
-                    "gbis_eligible_future_message": eligibility.gbis["message"],
-                    "eco4_eligible_future": eligibility.eco4["eligible"],
-                    "eco4_eligible_future_message": eligibility.eco4["message"],
+
                     # Property components
                     "roof": eligibility.roof["clean_description"],
                     "walls": eligibility.walls["clean_description"],

From 69dcc73363c43d12076b887707db802384046e07 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 19:18:58 +0000
Subject: [PATCH 020/262] deugging null lodgement-date

---
 backend/SearchEpc.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 4f6fd33d..4a3f371a 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -609,7 +609,11 @@ class SearchEpc:
         # Insert an estimated lodgement datetime, with a weighted average
         estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
         # Extract logement date
-        estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
+        # It is possible that there is still no lodgement date, so we need to handle this
+        if pd.isnull(estimated_epc["lodgement-datetime"]):
+            estimated_epc["lodgement-date"] = None
+        else:
+            estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
 
         estimated_epc["postcode"] = self.postcode
         estimated_epc["uprn"] = self.uprn

From b80ffda392e0601f08dd376cfaacba73e733fc9c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 19:29:46 +0000
Subject: [PATCH 021/262] updating eligibility pipeline to factor in ciga

---
 .../ha_15_32/ha_analysis_batch_3.py           | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3d0964c6..ecbb4e0a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1159,8 +1159,11 @@ def get_epc_data(
             eligibility.check_eco4_warmfront()
 
             # We check the conditions for checking the penultimate epc
-            identified_for_gbis = property_meta["ECO Eligibility"] == "gbis"
+            identified_for_gbis = property_meta["ECO Eligibility"] in ["gbis"]
             identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"]
+            subject_to_ciga = property_meta["ECO Eligibility"] in [
+                "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"
+            ]
 
             # condition 1 - identified for gbis and not eligible
             condition_1 = (
@@ -1179,8 +1182,11 @@ def get_epc_data(
 
             # Nothing identified
             condition_4 = (
-                not identified_for_gbis and not identified_for_eco4 and not eligibility.gbis_warmfront and not
-            eligibility.eco4_warmfront["eligible"]
+                not identified_for_gbis
+                and not identified_for_eco4
+                and not eligibility.gbis_warmfront
+                and not subject_to_ciga
+                and not eligibility.eco4_warmfront["eligible"]
             )
 
             # Not identified but seemingly eligible for eco4 or gbis
@@ -1190,6 +1196,10 @@ def get_epc_data(
             )
             )
 
+            condition_6 = (
+                subject_to_ciga and not eligibility.eco4_warmfront["eligible"]
+            )
+
             if condition_1 or condition_2:
                 # We check the penultimate epc
                 eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
@@ -1199,8 +1209,7 @@ def get_epc_data(
                 # We don't update just to make data cleaning easier
                 if penultimate_epc.get("estimated") is None:
                     older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
-            elif condition_3 or condition_4 or condition_5:
-                # If we have successfully identified for gbis, we don't need to check the penultimate epc
+            elif condition_3 or condition_4 or condition_5 or condition_6:
                 pass
             else:
                 NotImplementedError("Implement me")

From 281c6f626c833a482a199ba120e1b0e8b1869cf1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 23:23:29 +0000
Subject: [PATCH 022/262] working on eligibility

---
 backend/Property.py                           |   3 +-
 etl/eligibility/Eligibility.py                |  90 ++++++++--
 etl/eligibility/ha_15_32/app.py               |  18 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 156 +++++++++---------
 4 files changed, 167 insertions(+), 100 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 4a55e504..f86e33dc 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -147,7 +147,8 @@ class Property:
         # self.base_difference_record.df
 
     def adjust_difference_record_with_recommendations(
-        self, property_recommendations,
+        self,
+        property_recommendations,
         property_representative_recommendations
     ):
         """
diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index b09d2df5..bda34923 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -145,6 +145,7 @@ class Eligibility:
                 "reason": None,
                 "thickness_classification": thickness_classification
             }
+            return
 
         # Insulation is already thick enough
         self.loft = {
@@ -164,8 +165,10 @@ class Eligibility:
         """
 
         is_cavity = self.walls["is_cavity_wall"]
-        is_empty = (not self.walls["is_filled_cavity"]) or (
+        is_empty = (not self.walls["is_filled_cavity"])
+        is_as_built = (
             self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
+            and self.walls["is_assumed"]
         )
         is_partial_filled = "partial" in self.walls["clean_description"].lower()
         # We look for potentially under performing cavities - anything that is assumed, as built and insulated
@@ -175,6 +178,7 @@ class Eligibility:
 
         is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
         is_partial_filled_cavity = is_cavity and is_partial_filled
+        is_assumed_filled_cavity = is_cavity and is_as_built
         is_underperforming_cavity = is_cavity and is_underperforming
 
         # Check if it has internal or external wall insulation
@@ -195,6 +199,13 @@ class Eligibility:
             }
             return
 
+        if is_assumed_filled_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "as built assumed",
+            }
+            return
+
         if is_partial_filled_cavity:
             self.cavity = {
                 "suitability": True,
@@ -345,7 +356,7 @@ class Eligibility:
             int(self.epc["current-energy-efficiency"]) <= 68
         )
 
-    def check_eco4_warmfront(self, post_retrofit_sap=None):
+    def check_eco4_warmfront(self):
         """
         This funciton will check if the property is eligible for funding under the ECO4 scheme
 
@@ -377,49 +388,100 @@ class Eligibility:
         self.cavity_insulation()
         self.loft_insulation()
 
-        # make sure conditions 2 and 3 are true
-        is_eligible = self.cavity["suitability"] & self.loft["suitability"]
-
-        if current_sap >= 69:
+        # Case 1: No conditions meet
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
             self.eco4_warmfront = {
                 "eligible": False,
-                "message": "SAP too high",
+                "strict": False,
+                "message": "All conditions fail",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if not is_eligible and current_sap >= 55:
+        # Case 2 - perfect match
+        if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
             self.eco4_warmfront = {
-                "eligible": False,
-                "message": "failed fabric and SAP check",
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if not is_eligible and current_sap < 55:
+        # Case 2.5 - near perfect match - but we would not recommend this using the model
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 3 - cavity is suitable, loft is not, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity and sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
             self.eco4_warmfront = {
                 "eligible": False,
+                "strict": False,
                 "message": "failed fabric check",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if is_eligible and current_sap >= 55:
+        # Case 5 - cavity and loft suitable, sap too high
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
             self.eco4_warmfront = {
                 "eligible": True,
+                "strict": False,
                 "message": "Meets fabric, fails SAP check",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if is_eligible and current_sap < 55:
+        # Case 6 - meets just cavity
+        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
             self.eco4_warmfront = {
                 "eligible": True,
-                "message": "Meets fabric and SAP check",
+                "strict": False,
+                "message": "Meets just cavity",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 7 - fails cavity, loft but meets sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity nd lodt, meets SAP",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 8 - fails cavity, meets loft, fails sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity, meets loft, fails SAP",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index a68bf272..378a0e83 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -387,17 +387,19 @@ def prepare_model_data_row(
     }
 
     simulations = [
-        [cavity_simulation],
-        [loft_simulation]
+        cavity_simulation,
+        loft_simulation
     ]
 
-    p.adjust_difference_record_with_recommendations(simulations)
+    recommendation_record = p.base_difference_record.df.to_dict("records")[0].copy()
+    scoring_dict = p.create_recommendation_scoring_data(
+        property_id=p.id,
+        recommendation_record=recommendation_record,
+        recommendations=simulations,
+        primary_recommendation_id=cavity_simulation["recommendation_id"]
+    )
 
-    # Make sure we definitely have the correct data
-    cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0]
-    loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0]
-
-    return [cavity_scoring, loft_scoring]
+    return [scoring_dict]
 
 
 def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ecbb4e0a..239fce65 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1114,7 +1114,7 @@ def get_epc_data(
         results = []
         scoring_data = []
         nodata = []
-        for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        for index, property_meta in tqdm(eco4.iterrows(), total=len(eco4)):
 
             if property_meta["matching_postcode"] is None:
                 continue
@@ -1226,10 +1226,6 @@ def get_epc_data(
                 # We check the age of the cavity and if it's particularly old, we flag it
                 cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
 
-            # Full checks
-            eligibility.check_gbis()
-            eligibility.check_eco4()
-
             if eligibility.eco4_warmfront["eligible"]:
                 if eligibility.epc["uprn"] == "":
                     eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
@@ -1256,8 +1252,8 @@ def get_epc_data(
                     "gbis_eligible": eligibility.gbis_warmfront,
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                     "eco4_message": eligibility.eco4_warmfront["message"],
+                    "eco4_strict": eligibility.eco4_warmfront["strict"],
                     "sap": float(eligibility.epc["current-energy-efficiency"]),
-
                     # Property components
                     "roof": eligibility.roof["clean_description"],
                     "walls": eligibility.walls["clean_description"],
@@ -1267,91 +1263,97 @@ def get_epc_data(
                     "date_epc": eligibility.epc["lodgement-date"],
                     "loft_thickness": eligibility.roof["insulation_thickness"],
                     "cavity_age": cavity_age,
-                    **eligibility.walls,
-                    **eligibility.roof,
                     "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
                     "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
                 }
             )
 
-        scoring_df = pd.DataFrame(scoring_data)
-        scoring_df = scoring_df.drop(
-            columns=[
-                "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
-                "carbon_ending"
-            ]
-        )
-
-        model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
-
-        # scoring_df["is_community"].value_counts()
-        # scoring_df[scoring_df["is_community"] == "Unknown"]
-        # property_meta = asset_list[asset_list["asset_list_row_id"] == "ha_67238"].squeeze()
-
-        all_predictions = model_api.predict_all(
-            df=scoring_df,
-            bucket="retrofit-data-dev",
-            prediction_buckets={
-                "sap_change_predictions": "retrofit-sap-predictions-dev",
-                "heat_demand_predictions": "retrofit-heat-predictions-dev",
-                "carbon_change_predictions": "retrofit-carbon-predictions-dev"
-            }
-        )
-
         results_df = pd.DataFrame(results)
+        scoring_df = pd.DataFrame(scoring_data)
+        results_df["post_install_sap"] = None
+        results_df["eligibility_classification"] = None
 
-        predictions = all_predictions["sap_change_predictions"].copy()
+        eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
+        z = results_df[results_df["row_id"].isin(eco4["asset_list_row_id"])]
+        z["walls"].value_counts()
+        z1 = z[z["walls"] == "Cavity wall, as built, no insulation"]
+        k = z1[z1["roof"] == "Pitched, 100 mm loft insulation"]
+        property_meta = asset_list[asset_list["asset_list_row_id"] == k["row_id"].values[0]].squeeze()
+        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
+        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
 
-        predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
-            results_df[["row_id", "sap"]], how="left", on="row_id"
-        )
-        predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
-        predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+        if not scoring_df.empty:
+            scoring_df = scoring_df.drop(
+                columns=[
+                    "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                    "carbon_ending"
+                ]
+            )
 
-        results_df = results_df.merge(
-            predictions[["sap_uplift", "row_id"]],
-            how="left",
-            on="row_id"
-        )
-        results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+            model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
 
-        eligibility_assessment = []
-        for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
-            # The upgrade requirements are dependent on the current SAP
-
-            # If the property is an F or G, it only needs to upgrade to an %
-            if row["sap"] <= 38:
-                if row["post_install_sap"] >= 57:
-                    eligibility_classification = "highest confidence"
-                elif row["post_install_sap"] >= 55:
-                    eligibility_classification = "high confidence"
-                elif row["post_install_sap"] >= 53:
-                    eligibility_classification = "medium confidence"
-                else:
-                    eligibility_classification = "unlikely"
-            else:
-
-                if row["post_install_sap"] >= 71:
-                    eligibility_classification = "highest confidence"
-                elif row["post_install_sap"] >= 69:
-                    eligibility_classification = "high confidence"
-                elif row["post_install_sap"] >= 67:
-                    eligibility_classification = "medium confidence"
-                else:
-                    eligibility_classification = "unlikely"
-
-            eligibility_assessment.append(
-                {
-                    "row_id": row["row_id"],
-                    "eligibility_classification": eligibility_classification
+            all_predictions = model_api.predict_all(
+                df=scoring_df,
+                bucket="retrofit-data-dev",
+                prediction_buckets={
+                    "sap_change_predictions": "retrofit-sap-predictions-dev",
+                    "heat_demand_predictions": "retrofit-heat-predictions-dev",
+                    "carbon_change_predictions": "retrofit-carbon-predictions-dev"
                 }
             )
 
-        eligibility_assessment = pd.DataFrame(eligibility_assessment)
+            predictions = all_predictions["sap_change_predictions"].copy()
 
-        results_df = results_df.merge(
-            eligibility_assessment, how="left", on="row_id"
-        )
+            predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+                results_df[["row_id", "sap"]], how="left", on="row_id"
+            )
+            predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+            predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+            results_df = results_df.merge(
+                predictions[["sap_uplift", "row_id"]],
+                how="left",
+                on="row_id"
+            )
+            results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+            eligibility_assessment = []
+            for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+                # The upgrade requirements are dependent on the current SAP
+
+                # If the property is an F or G, it only needs to upgrade to an %
+                if row["sap"] <= 38:
+                    if row["post_install_sap"] >= 57:
+                        eligibility_classification = "highest confidence"
+                    elif row["post_install_sap"] >= 55:
+                        eligibility_classification = "high confidence"
+                    elif row["post_install_sap"] >= 53:
+                        eligibility_classification = "medium confidence"
+                    else:
+                        eligibility_classification = "unlikely"
+                else:
+
+                    if row["post_install_sap"] >= 71:
+                        eligibility_classification = "highest confidence"
+                    elif row["post_install_sap"] >= 69:
+                        eligibility_classification = "high confidence"
+                    elif row["post_install_sap"] >= 67:
+                        eligibility_classification = "medium confidence"
+                    else:
+                        eligibility_classification = "unlikely"
+
+                eligibility_assessment.append(
+                    {
+                        "row_id": row["row_id"],
+                        "eligibility_classification": eligibility_classification
+                    }
+                )
+
+            eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+            results_df = results_df.merge(
+                eligibility_assessment, how="left", on="row_id"
+            )
 
         # We store the results in S3 as a pickle
         save_pickle_to_s3(

From f4d27aa68dea5595037d55e7ad8c54cc9d7967ad Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 23:30:06 +0000
Subject: [PATCH 023/262] fixing eligibility

---
 etl/eligibility/Eligibility.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index bda34923..15e3158f 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -421,8 +421,19 @@ class Eligibility:
             }
             return
 
+        # Case 3 - cavity is suitable, loft is within 150mm, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity, loft borderline, meets sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
         # Case 3 - cavity is suitable, loft is not, sap is good
-        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+        if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
             self.eco4_warmfront = {
                 "eligible": True,
                 "strict": False,
@@ -444,7 +455,7 @@ class Eligibility:
             return
 
         # Case 5 - cavity and loft suitable, sap too high
-        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
             self.eco4_warmfront = {
                 "eligible": True,
                 "strict": False,
@@ -470,7 +481,7 @@ class Eligibility:
             self.eco4_warmfront = {
                 "eligible": False,
                 "strict": False,
-                "message": "Fails cavity nd lodt, meets SAP",
+                "message": "Fails cavity and loft, meets SAP",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }

From 97ce8dc32ea0edd3d24ecefe942a0eb4e8df418e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 23:36:45 +0000
Subject: [PATCH 024/262] fixing eligibility

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 239fce65..1ba75e2b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1114,7 +1114,7 @@ def get_epc_data(
         results = []
         scoring_data = []
         nodata = []
-        for index, property_meta in tqdm(eco4.iterrows(), total=len(eco4)):
+        for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:
                 continue
@@ -1218,10 +1218,7 @@ def get_epc_data(
             # Loft MUST be suitable
             cavity_age = None
             if (
-                eligibility.walls["is_cavity_wall"] and
-                eligibility.walls["is_filled_cavity"] and
-                eligibility.loft["suitability"] and
-                eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+                identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
             ):
                 # We check the age of the cavity and if it's particularly old, we flag it
                 cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)

From 0fbf00451291a09349c0bdeeb67bbc80bd4dc9bc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 10:20:55 +0000
Subject: [PATCH 025/262] Expanding gbis eligibiity checks

---
 etl/eligibility/Eligibility.py                | 44 +++++++++++++++++--
 .../ha_15_32/ha_analysis_batch_3.py           | 20 +++++----
 etl/epc/Dataset.py                            | 16 +++----
 3 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 15e3158f..f7a5ed98 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -352,9 +352,41 @@ class Eligibility:
         # Check if the property is suitable for cavity wall
         self.cavity_insulation()
 
-        self.gbis_warmfront = (self.cavity["suitability"]) and (
-            int(self.epc["current-energy-efficiency"]) <= 68
-        )
+        current_sap = int(self.epc["current-energy-efficiency"])
+        # We have a strict suitability check and a non-strict check
+
+        # Perfect strictness
+        if (self.cavity["type"] == "empty") and (current_sap < 69):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
+            }
+            return
+
+        # Near perfect
+        if self.cavity["suitability"] and (current_sap < 55):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Near perfect suitability",
+            }
+            return
+
+        # Suitable cavity, but high sap
+        if self.cavity["suitability"] and (current_sap >= 55):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity, fails SAP check",
+            }
+            return
+
+        self.gbis_warmfront = {
+            "eligible": False,
+            "strict": False,
+            "message": "All conditions fail",
+        }
 
     def check_eco4_warmfront(self):
         """
@@ -388,6 +420,10 @@ class Eligibility:
         self.cavity_insulation()
         self.loft_insulation()
 
+        # We put in a placeholder when the roof is not a loft
+        if self.loft["reason"] == "roof not loft":
+            self.loft["thickness"] = 999
+
         # Case 1: No conditions meet
         if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
             self.eco4_warmfront = {
@@ -415,7 +451,7 @@ class Eligibility:
             self.eco4_warmfront = {
                 "eligible": True,
                 "strict": True,
-                "message": "Perfect suitability",
+                "message": "Near perfect suitability",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1ba75e2b..28efadd0 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1270,15 +1270,6 @@ def get_epc_data(
         results_df["post_install_sap"] = None
         results_df["eligibility_classification"] = None
 
-        eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
-        z = results_df[results_df["row_id"].isin(eco4["asset_list_row_id"])]
-        z["walls"].value_counts()
-        z1 = z[z["walls"] == "Cavity wall, as built, no insulation"]
-        k = z1[z1["roof"] == "Pitched, 100 mm loft insulation"]
-        property_meta = asset_list[asset_list["asset_list_row_id"] == k["row_id"].values[0]].squeeze()
-        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
-        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
-
         if not scoring_df.empty:
             scoring_df = scoring_df.drop(
                 columns=[
@@ -1763,6 +1754,17 @@ def patch_cleaned(cleaned):
         ]
     )
 
+    cleaned["roof-description"].extend(
+        [
+            {'original_description': 'Pitched, 300+mm loft insulation',
+             'clean_description': 'Pitched, 300+ mm loft insulation', 'thermal_transmittance': None,
+             'thermal_transmittance_unit': None, 'is_pitched': True, 'is_roof_room': False, 'is_loft': True,
+             'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+             'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': '300+'
+             }
+        ]
+    )
+
     # Patch mainheatcont-description
     cleaned["mainheatcont-description"].extend(
         [
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index dac829e2..7040d66c 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
         common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
 
         self.df = self.df.loc[
-            :,
-            no_suffix_cols
-            + only_ending_cols
-            + [col for cols in common_cols for col in cols],
-        ]
+                  :,
+                  no_suffix_cols
+                  + only_ending_cols
+                  + [col for cols in common_cols for col in cols],
+                  ]
 
     def _remove_abnormal_change_in_floor_area(self):
         """
@@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_sandstone_or_limestone"]
                     == expanded_df["is_sandstone_or_limestone_ending"]
                 )
-            ]
+                ]
         elif component == "floor":
             expanded_df = expanded_df[
                 (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_to_external_air"]
                     == expanded_df["is_to_external_air_ending"]
                 )
-            ]
+                ]
         elif component == "roof":
             expanded_df = expanded_df[
                 (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["has_dwelling_above"]
                     == expanded_df["has_dwelling_above_ending"]
                 )
-            ]
+                ]
 
         return expanded_df
 

From 7b080094fdf08daf720ac01c10bfad380a917062 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 11:02:12 +0000
Subject: [PATCH 026/262] created distributed scoring for prediction

---
 .../ha_15_32/ha_analysis_batch_3.py           | 46 ++++++++++++-------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 28efadd0..3dc4d45f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1166,10 +1166,9 @@ def get_epc_data(
             ]
 
             # condition 1 - identified for gbis and not eligible
-            condition_1 = (
-                              identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[
-                              "eligible"]
-                          ) & consider_penultimate_epc
+            condition_1 = (identified_for_gbis and not eligibility.gbis_warmfront
+                           and not eligibility.eco4_warmfront["eligible"]
+                           ) & consider_penultimate_epc
 
             # condition 2 - identified for eco4 and not eligible
             condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[
@@ -1246,10 +1245,12 @@ def get_epc_data(
                     "uprn": eligibility.epc["uprn"],
                     "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "property_type": eligibility.epc["property-type"],
-                    "gbis_eligible": eligibility.gbis_warmfront,
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                     "eco4_message": eligibility.eco4_warmfront["message"],
                     "eco4_strict": eligibility.eco4_warmfront["strict"],
+                    "gbis_eligible": eligibility.gbis_warmfront["eligible"],
+                    "gbis_message": eligibility.gbis_warmfront["message"],
+                    "gbis_strict": eligibility.gbis_warmfront["strict"],
                     "sap": float(eligibility.epc["current-energy-efficiency"]),
                     # Property components
                     "roof": eligibility.roof["clean_description"],
@@ -1279,24 +1280,32 @@ def get_epc_data(
             )
 
             model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
+            model_api.MODEL_PREFIXES = ["sap_change_predictions"]
 
-            all_predictions = model_api.predict_all(
-                df=scoring_df,
-                bucket="retrofit-data-dev",
-                prediction_buckets={
-                    "sap_change_predictions": "retrofit-sap-predictions-dev",
-                    "heat_demand_predictions": "retrofit-heat-predictions-dev",
-                    "carbon_change_predictions": "retrofit-carbon-predictions-dev"
-                }
-            )
+            scoring_df["id"] = scoring_df["id"] + "phase=0"
+            # We split up the scoring_df and score
+            predictions = []
+            to_loop_over = range(0, scoring_df.shape[0], 400)
+            for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+                predictions_dict = model_api.predict_all(
+                    df=scoring_df.iloc[chunk:chunk + 400],
+                    bucket="retrofit-data-dev",
+                    prediction_buckets={
+                        "sap_change_predictions": "retrofit-sap-predictions-dev",
+                    }
+                )
 
-            predictions = all_predictions["sap_change_predictions"].copy()
+                predictions.append(predictions_dict["sap_change_predictions"])
+
+            predictions = pd.concat(predictions)
+            predictions_size = predictions.shape[0]
 
             predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
                 results_df[["row_id", "sap"]], how="left", on="row_id"
             )
+            if predictions.shape[0] != predictions_size:
+                raise ValueError("Predictions size has changed")
             predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
-            predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
 
             results_df = results_df.merge(
                 predictions[["sap_uplift", "row_id"]],
@@ -1339,9 +1348,12 @@ def get_epc_data(
 
             eligibility_assessment = pd.DataFrame(eligibility_assessment)
 
+            # Make sure the results haven't changed in size
             results_df = results_df.merge(
                 eligibility_assessment, how="left", on="row_id"
             )
+            if results_df.shape[0] != len(results):
+                raise ValueError("results has changed size")
 
         # We store the results in S3 as a pickle
         save_pickle_to_s3(
@@ -1809,6 +1821,8 @@ def app():
     loader.load()
     loader.ha_facts_and_figures()
 
+    loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False)
+
     # We load in the additional data required to perform the analysis
     cleaned = read_from_s3(
         s3_file_name="cleaned_epc_data/cleaned.bson",

From 3ef346b248ed89e04a08d07a0231db987809521b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 13:12:54 +0000
Subject: [PATCH 027/262] patching roof description in cleaned further

---
 .../ha_15_32/ha_analysis_batch_3.py           | 60 ++++++++++++++++++-
 etl/epc/Dataset.py                            | 28 +++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3dc4d45f..e261710e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1755,7 +1755,16 @@ def patch_cleaned(cleaned):
         ]
     )
 
-    # We treat unknown loft insulation as no insulation
+    cleaned["roof-description"].extend(
+        [
+            {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
+             'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True,
+             'is_roof_room': False,
+             'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
+             'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}
+        ]
+    )
+
     cleaned["roof-description"].extend(
         [
             {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
@@ -1777,6 +1786,55 @@ def patch_cleaned(cleaned):
         ]
     )
 
+    thermal_transmittance_values = list(np.arange(0, 2, 0.01))
+    for ttv in thermal_transmittance_values:
+        ttv_roundeded = round(ttv, 2)
+        # We look for an instance of that thermal transmittance value
+        rec = [
+            x for x in cleaned["roof-description"] if
+            (x["thermal_transmittance"] == ttv_roundeded) and "Average thermal transmittance" in x["clean_description"]
+        ]
+
+        if rec:
+            continue
+        else:
+            # We patch the record
+            cleaned["roof-description"].extend(
+                [{'original_description': f'Average thermal transmittance {ttv_roundeded} W/m-¦K',
+                  'clean_description': f'Average thermal transmittance {ttv_roundeded} w/m-¦k',
+                  'thermal_transmittance': ttv_roundeded,
+                  'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+                  'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+                  'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+            )
+
+    # We also patch a funny unit value we found
+    for ttv in thermal_transmittance_values:
+        ttv_rounded = round(ttv, 2)
+        # We look for an instance of that thermal transmittance value
+        rec = [
+            x for x in cleaned["roof-description"] if
+            (x["thermal_transmittance"] == ttv_rounded) and "Average thermal transmittance" in x["clean_description"]
+            and x["thermal_transmittance_unit"] == "w/m?K"
+        ]
+
+        if rec:
+            continue
+        else:
+            # We patch the record
+            ttv_string = str(ttv_rounded)
+            if len(ttv_string) == 3:
+                ttv_string = f"{ttv_string}0"
+
+            cleaned["roof-description"].extend(
+                [{'original_description': f'Average thermal transmittance {ttv_string} W/m?K',
+                  'clean_description': f'Average thermal transmittance {ttv_string} w/m-¦k',
+                  'thermal_transmittance': ttv_rounded,
+                  'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+                  'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+                  'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+            )
+
     # Patch mainheatcont-description
     cleaned["mainheatcont-description"].extend(
         [
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 7040d66c..cf241747 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -658,6 +658,34 @@ class TrainingDataset(BaseDataset):
 
         components_to_expand = cols_to_drop.keys()
 
+        for comp in list(components_to_expand):
+            if comp == "main-fuel":
+                cleaned_key = "main-fuel"
+                left_on_starting = "main_fuel_starting"
+                left_on_ending = "main_fuel_ending"
+                original_cols = ["main_fuel_starting", "main_fuel_ending"]
+            else:
+                cleaned_key = f"{comp}-description"
+                left_on_starting = f"{comp}_description_starting"
+                left_on_ending = f"{comp}_description_ending"
+                original_cols = [
+                    f"{comp}_description_starting",
+                    f"{comp}_description_ending",
+                ]
+            df = pd.DataFrame(cleaned_lookup[cleaned_key])
+            # Check for the existence
+            filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]]
+            filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]]
+            if filtered_1.empty:
+                print(comp)
+                print(self.df[left_on_starting].values[0])
+
+            if filtered_2.empty:
+                print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup")
+
+        z = pd.DataFrame(cleaned_lookup["roof-description"])
+        z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"]
+
         for component in components_to_expand:
             # TODO: change cleaned dataframe to have underscores instead of dashes
             if component == "main-fuel":

From 730ad0fd7144b2b5e86d98b8c3ef4e5d71ccd0cb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 13:13:28 +0000
Subject: [PATCH 028/262] removing temp code

---
 etl/epc/Dataset.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index cf241747..7040d66c 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -658,34 +658,6 @@ class TrainingDataset(BaseDataset):
 
         components_to_expand = cols_to_drop.keys()
 
-        for comp in list(components_to_expand):
-            if comp == "main-fuel":
-                cleaned_key = "main-fuel"
-                left_on_starting = "main_fuel_starting"
-                left_on_ending = "main_fuel_ending"
-                original_cols = ["main_fuel_starting", "main_fuel_ending"]
-            else:
-                cleaned_key = f"{comp}-description"
-                left_on_starting = f"{comp}_description_starting"
-                left_on_ending = f"{comp}_description_ending"
-                original_cols = [
-                    f"{comp}_description_starting",
-                    f"{comp}_description_ending",
-                ]
-            df = pd.DataFrame(cleaned_lookup[cleaned_key])
-            # Check for the existence
-            filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]]
-            filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]]
-            if filtered_1.empty:
-                print(comp)
-                print(self.df[left_on_starting].values[0])
-
-            if filtered_2.empty:
-                print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup")
-
-        z = pd.DataFrame(cleaned_lookup["roof-description"])
-        z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"]
-
         for component in components_to_expand:
             # TODO: change cleaned dataframe to have underscores instead of dashes
             if component == "main-fuel":

From d573c4d8a0ae911edd0e2f181eceb4087e3e78e4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 15:15:05 +0000
Subject: [PATCH 029/262] added try except mechanism

---
 .../ha_15_32/ha_analysis_batch_3.py           | 35 ++++++++++++-------
 etl/epc/Record.py                             | 32 ++++++++---------
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e261710e..da484daa 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1089,6 +1089,9 @@ def get_epc_data(
     outputs = {}
     for ha_name, data_assets in loader.data.items():
 
+        if ha_name == "HA39":
+            continue
+
         if not pull_data:
             # Then we retrieve the data from S3
             processed_ha_results = read_pickle_from_s3(
@@ -1114,6 +1117,7 @@ def get_epc_data(
         results = []
         scoring_data = []
         nodata = []
+        failed_model_rows = []
         for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:
@@ -1225,19 +1229,24 @@ def get_epc_data(
             if eligibility.eco4_warmfront["eligible"]:
                 if eligibility.epc["uprn"] == "":
                     eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
-
-                scoring_dictionary = prepare_model_data_row(
-                    property_id=property_meta["asset_list_row_id"],
-                    modelling_epc=eligibility.epc,
-                    cleaned=cleaned,
-                    cleaning_data=cleaning_data,
-                    created_at=created_at,
-                    old_data=older_epcs,
-                    full_sap_epc=full_sap_epc,
-                    photo_supply_lookup=photo_supply_lookup,
-                    floor_area_decile_thresholds=floor_area_decile_thresholds
-                )
-                scoring_data.extend(scoring_dictionary)
+                try:
+                    scoring_dictionary = prepare_model_data_row(
+                        property_id=property_meta["asset_list_row_id"],
+                        modelling_epc=eligibility.epc,
+                        cleaned=cleaned,
+                        cleaning_data=cleaning_data,
+                        created_at=created_at,
+                        old_data=older_epcs,
+                        full_sap_epc=full_sap_epc,
+                        photo_supply_lookup=photo_supply_lookup,
+                        floor_area_decile_thresholds=floor_area_decile_thresholds
+                    )
+                    scoring_data.extend(scoring_dictionary)
+                except Exception as e:
+                    # If we fail, we just keep a record of it
+                    failed_model_rows.append(
+                        property_meta["asset_list_row_id"]
+                    )
 
             results.append(
                 {
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index c793716f..e74330a2 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -725,26 +725,26 @@ class EPCRecord:
         if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES:
             if self.old_data:
                 # Take the most recent
-                max_datetime = max(
-                    [
-                        old_record["lodgement-datetime"]
-                        for old_record in self.old_data
-                        if old_record["construction-age-band"]
-                           not in DATA_ANOMALY_MATCHES
-                    ]
-                )
-
-                most_recent = [
-                    old_record
+                old_age_bands = [
+                    old_record["lodgement-datetime"]
                     for old_record in self.old_data
-                    if old_record["lodgement-datetime"] == max_datetime
+                    if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES
                 ]
 
-                self.prepared_epc["construction-age-band"] = (
-                    EPCDataProcessor.clean_construction_age_band(
-                        most_recent[0]["construction-age-band"]
+                if old_age_bands:
+                    max_datetime = max(old_age_bands)
+
+                    most_recent = [
+                        old_record
+                        for old_record in self.old_data
+                        if old_record["lodgement-datetime"] == max_datetime
+                    ]
+
+                    self.prepared_epc["construction-age-band"] = (
+                        EPCDataProcessor.clean_construction_age_band(
+                            most_recent[0]["construction-age-band"]
+                        )
                     )
-                )
 
         self.construction_age_band = self.prepared_epc["construction-age-band"]
         self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)

From b26e44b465e5c832a65b5bd09767f1015c2dfc1a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 15:45:33 +0000
Subject: [PATCH 030/262] Extending to HA 7

---
 .../ha_15_32/ha_analysis_batch_3.py           | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index da484daa..2fb26e73 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -48,6 +48,10 @@ PROPERTY_TYPE_LOOKUP = {
             'EXTRACARE SCHEME': "Flat",
         }
     },
+    "HA7": {
+        "property_type": {},
+        "built_form": {}
+    },
     "HA14": {
         "property_type": {
             "House": "House",
@@ -143,6 +147,13 @@ class DataLoader:
             asset_list["matching_postcode"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["postcode"]
             ].str.lower().str.strip()
+        elif ha_name == "HA7":
+            # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+            asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].str.lower().str.strip() + ", " + \
+                                             asset_list["Address3"].str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
         elif ha_name == "HA14":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
             asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
@@ -241,6 +252,8 @@ class DataLoader:
     def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
             return "Asset List"
+        elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
+            return "Asset"
         else:
             return "Assets"
 
@@ -311,6 +324,8 @@ class DataLoader:
         survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
         # Remove columns that are None
         survey_list = survey_list.loc[:, survey_list.columns.notnull()]
+        # Remove rows that are completely empty
+        survey_list = survey_list.loc[survey_list.loc[:, survey_list.columns].notnull().any(axis=1)]
         survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
 
         # Perform survey list merge
@@ -328,6 +343,8 @@ class DataLoader:
         ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
         # Remove columns that are None
         ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+        # Remove rows that are completely None
+        ciga_list = ciga_list.loc[ciga_list.loc[:, ciga_list.columns].notnull().any(axis=1)]
         # Perform ciga list merge
         if not ciga_list.empty:
             # Remove rows with missing postcode which happens in a small number of cases
@@ -1880,7 +1897,7 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From eb216e55d39817a6d7bdd6c582c6da6826050ac9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 16:45:37 +0000
Subject: [PATCH 031/262] Handling missing dates in SearchEpc class

---
 backend/SearchEpc.py                            | 15 ++++++++++-----
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py |  1 +
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 4a3f371a..3d2df9fb 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -30,7 +30,7 @@ vartypes = {
     'environment-impact-potential': "Int64",
     'glazed-type': 'str',
     'heating-cost-current': 'float',
-    'address3': 'str',
+    # 'address3': 'str',
     'mainheatcont-description': 'str',
     'sheating-energy-eff': 'str',
     'property-type': 'str',
@@ -40,7 +40,7 @@ vartypes = {
     'mechanical-ventilation': 'str',
     'hot-water-cost-current': 'str',
     'county': 'str',
-    'postcode': 'str',
+    # 'postcode': 'str',
     'solar-water-heating-flag': 'str',
     'constituency': 'str',
     'co2-emissions-potential': 'float',
@@ -55,7 +55,7 @@ vartypes = {
     # 'inspection-date': str,
     'mains-gas-flag': 'str',
     'co2-emiss-curr-per-floor-area': 'float',
-    'address1': 'str',
+    # 'address1': 'str',
     'heat-loss-corridor': 'str',
     'flat-storey-count': "Int64",
     'constituency-label': 'str',
@@ -67,7 +67,7 @@ vartypes = {
     'roof-description': 'str',
     'floor-energy-eff': 'str',
     'number-habitable-rooms': 'float',
-    'address2': 'str',
+    # 'address2': 'str',
     'hot-water-env-eff': 'str',
     'posttown': 'str',
     'mainheatc-energy-eff': 'str',
@@ -98,7 +98,7 @@ vartypes = {
     # 'lodgement-date',
     'extension-count': "Int64",
     'mainheatc-env-eff': 'str',
-    'lmk-key': 'str',
+    # 'lmk-key': 'str',
     'wind-turbine-count': "Int64",
     'tenure': 'str',
     'floor-level': 'str',
@@ -575,6 +575,11 @@ class SearchEpc:
             property_type=property_type
         )
 
+        # If we have missing lodgment date, we fill it with inspection-date
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
+        # If we still have missing dates, we set it to the mean of the non NA dates
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
+
         # For each attribute, we need to determine the datatype and use an appropriate method
         # to estimate.
         estimated_epc = {}
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2fb26e73..a8f0bfa9 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1135,6 +1135,7 @@ def get_epc_data(
         scoring_data = []
         nodata = []
         failed_model_rows = []
+        # Failed at index 13691
         for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:

From 2a4d16162abc8bcda788950d44a0762148e8904d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 18:01:29 +0000
Subject: [PATCH 032/262] Added ha7

---
 .../ha_15_32/ha_analysis_batch_3.py           | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a8f0bfa9..889ae776 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -49,8 +49,19 @@ PROPERTY_TYPE_LOOKUP = {
         }
     },
     "HA7": {
-        "property_type": {},
-        "built_form": {}
+        "property_type": {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+        },
+        "built_form": {
+            "Semi Detached": "Semi-Detached",
+            "Mid Terrace": "Mid-Terrace",
+            "End Terrace": "End-Terrace",
+            "Detached": "Detached",
+            "End Terraced": "End-Terrace",
+        }
     },
     "HA14": {
         "property_type": {
@@ -1042,6 +1053,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA6":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
+    elif ha_name == "HA7":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Archetype"]]
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"][property_meta["Property Type"]]
     elif ha_name == "HA14":
         if property_meta["Asset Type Description"] == "Block - Repair":
             # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
@@ -1106,9 +1120,6 @@ def get_epc_data(
     outputs = {}
     for ha_name, data_assets in loader.data.items():
 
-        if ha_name == "HA39":
-            continue
-
         if not pull_data:
             # Then we retrieve the data from S3
             processed_ha_results = read_pickle_from_s3(
@@ -1135,7 +1146,6 @@ def get_epc_data(
         scoring_data = []
         nodata = []
         failed_model_rows = []
-        # Failed at index 13691
         for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:
@@ -1906,8 +1916,6 @@ def app():
     loader.load()
     loader.ha_facts_and_figures()
 
-    loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False)
-
     # We load in the additional data required to perform the analysis
     cleaned = read_from_s3(
         s3_file_name="cleaned_epc_data/cleaned.bson",

From 9ca6c179bca70cfffd34da4e278e144ff8263e24 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 18:34:49 +0000
Subject: [PATCH 033/262] Adding HA16

---
 .../ha_15_32/ha_analysis_batch_3.py           | 139 +++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 889ae776..a707cfa5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -128,6 +128,10 @@ class DataLoader:
         "HA6": {
             "address": "propertyaddress",
             "postcode": "address"  # The 'address' column actually contains postcode
+        },
+        "HA16": {
+            "address": "Address",
+            "postcode": "Postcode"
         }
     }
 
@@ -135,9 +139,10 @@ class DataLoader:
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
         "HA14": 3,
+        "HA16": 7,
         # There's just too many unmatched here
         "HA6": 117,
-        "HA107": 51
+        "HA107": 51,
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache):
@@ -151,7 +156,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6"]:
+        if ha_name in ["HA1", "HA6", "HA16"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].str.lower().str.strip()
@@ -173,6 +178,7 @@ class DataLoader:
                                              asset_list["Address 4"].str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -234,7 +240,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA6", "HA14", "HA107"]:
+        if ha_name in ["HA6", "HA14", "HA107", "HA16"]:
             split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
@@ -556,6 +562,129 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha16_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "REEDS RD",
+            "Reeds ROAD",
+            survey_list["Street / Block Name"]
+        )
+        # Replace " rd " with "road"
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road',
+                                                                                            regex=True)
+
+        # Replace " , " with ", "
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
+            " , ", ', ',
+        )
+        # Fix "{place} ,{place}" with "{place}, {place}"
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ',
+                                                                                            regex=True)
+        # Strip whitespace
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
+
+        # Correct errors
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "M38 0SA",
+            "M38 9SA",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
+            "M44 5JF",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road",
+                                                                                            "chatley road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
+                                                                                            "plantation avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
+                                                                                            "howclough drive")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
+                                                                                            "brookhurst lane")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
+                                                                                            "birch road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
+                                                                                            "hodson road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
+                                                                                            "narbonne avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "cumberland road, cadishead",
+            "cumberland avenue, cadishead")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
+                                                                                            "ashton field drive")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
+                                                                                            "wedgwood road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
+                                                                                            "hamilton avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "lichens crescent, fitton hill",
+            "lichens crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
+                                                                                            "south croft")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr",
+                                                                                            "fir tree avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
+                                                                                            "hawthorn crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
+                                                                                            "reins lee avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
+                                                                                            "wester hill road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
+                                                                                            "saint martins road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
+                                                                                            "timperley close")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
+                                                                                            "eastwood avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
+                                                                                            "grasmere road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
+                                                                                            "hulton avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
+                                                                                            "beechfield road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
+                                                                                            "princes avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
+                                                                                            "edge fold crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
+                                                                                            "coniston avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
+                                                                                            "blackthorn crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
+                                                                                            "wellstock lane")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
+                                                                                            "brackley street")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
+                                                                                            "brook avenue, swinton")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
+                                                                                            "green avenue, swinton")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
+                                                                                            "grasmere avenue, wardley")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
+                                                                                            "mardale avenue, wardle")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
+                                                                                            "cartleach Grove")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
+                                                                                            "arbor Grove")
+
+        # Replacement for clively avenue 66-68
+        survey_list["NO."] = np.where(
+            survey_list["NO."] == "66-68",
+            "66",
+            survey_list["NO."]
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -898,6 +1027,8 @@ class DataLoader:
         scheme_map = {
             "ECO4": "ECO4",
             "AFFORDABLE WARMTH": "ECO4",
+            "ECO4 A/W": "ECO4",
+            "ECO4 GBIS (ECO+)": "GBIS"
         }
 
         eco_eligibility_map = {
@@ -1908,7 +2039,7 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From 102600b19651964c4b6c7945307a8defd454f9d1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 18:40:17 +0000
Subject: [PATCH 034/262] Added HA16

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a707cfa5..ee23f238 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -71,6 +71,24 @@ PROPERTY_TYPE_LOOKUP = {
             "Maisonette": "Maisonette",
         }
     },
+    "HA16": {
+        'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+        'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
+        'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
+        'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
+        'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
+        'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+        'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Detached House': {"property-type": "House", "built-form": "Detached"},
+        'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
+        'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -1201,6 +1219,10 @@ def get_property_type_and_built_form(property_meta, ha_name):
             ]
 
         built_form = None
+    elif ha_name == "HA16":
+        config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
+        property_type = config.get("property-type")
+        built_form = config.get("built-form")
     elif ha_name == "HA39":
 
         property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})

From a1c19b5b8883ead263880c2d589bd76da76d6403 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 19:01:32 +0000
Subject: [PATCH 035/262] Adding ha24 wip

---
 .../ha_15_32/ha_analysis_batch_3.py           | 47 ++++++++++++++++++-
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ee23f238..94df8ceb 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -150,6 +150,10 @@ class DataLoader:
         "HA16": {
             "address": "Address",
             "postcode": "Postcode"
+        },
+        "HA24": {
+            "address": "Address",
+            "postcode": "Postcode"
         }
     }
 
@@ -174,7 +178,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].str.lower().str.strip()
@@ -289,6 +293,8 @@ class DataLoader:
             return "Asset List"
         elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
             return "Asset"
+        elif "Decent Homes Stock" in workbook.sheetnames:
+            return "Decent Homes Stock"
         else:
             return "Assets"
 
@@ -703,6 +709,43 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha24_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, nidds lane", "nidds lane"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "wirral avenue", "wirrall avenue"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st ives road", "st. ives crescent"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "sundringham road", "sandringham road"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "milton avenue", "milton road"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st ives crescent", "st. ives crescent"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, waterbelly lane", "waterbelly lane"
+        )
+        # Generally remove "councile house, " from the start of the street name
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, ", ""
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st. leodegars close", "st leodegars close"
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -2061,7 +2104,7 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From e9bfd63c3588206cd9e7c79b25c6067b617bf436 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 21:00:23 +0000
Subject: [PATCH 036/262] Fixed getting property type and built form for ha107

---
 .../ha_15_32/ha_analysis_batch_3.py           | 77 ++++++++++++++-----
 1 file changed, 57 insertions(+), 20 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 94df8ceb..5cbfb90c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -223,12 +223,67 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def extract_property_info_ha107(properties):
+        property_types = {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+            "Bedsit": None
+        }
+
+        built_forms = {
+            "Detached": "Detached",
+            "Semi Detached": "Semi-Detached",
+            "End Terrace": "End-Terrace",
+            "Mid Terrace": "Mid-Terrace"
+        }
+
+        # Function to extract property type and built form from a description
+        def extract_from_description(description):
+            property_type = None
+            built_form = None
+
+            for key in property_types:
+                if key in description:
+                    property_type = property_types[key]
+                    break
+
+            for key in built_forms:
+                if key in description:
+                    built_form = built_forms[key]
+                    break
+
+            return property_type, built_form
+
+        # Process each property in the list
+        results = []
+        for property_description in properties:
+            property_type, built_form = extract_from_description(property_description)
+            results.append(
+                {
+                    "Property type": property_description,
+                    "property_type": property_type,
+                    "built_form": built_form
+                }
+            )
+        results = pd.DataFrame(results)
+
+        return results
+
     def append_asset_list_built_form(self, ha_name, asset_list):
 
         # Finally, we process property_type or built form, where needed
         if ha_name == "HA6":
             asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6)
 
+        if ha_name == "HA107":
+            mapped_df = self.extract_property_info_ha107(asset_list["Property type"].unique())
+            asset_list = asset_list.merge(
+                mapped_df, how="left", on="Property type"
+            )
+
         return asset_list
 
     @staticmethod
@@ -1280,26 +1335,8 @@ def get_property_type_and_built_form(property_meta, ha_name):
                 property_type = "House"
     elif ha_name == "HA107":
 
-        dwelling_style = property_meta["Dwelling Style"]
-        if isinstance(dwelling_style, str):
-            dwelling_style = dwelling_style.strip()
-
-        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"])
-        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None)
-
-        if property_type is None:
-            if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
-                property_type = "House"
-
-            if "flat" in property_meta["Wall Construction"].lower():
-                property_type = "Flat"
-
-            if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
-                # Hand a few specific cases
-                property_type = "Bungalow"
-
-            if property_meta["Street"] == "School View":
-                property_type = "Bungalow"
+        property_type = property_meta.get("property_type", None)
+        built_form = property_meta.get("built_form", None)
 
     else:
         raise NotImplementedError("Implement me")

From 6ae21bbcb023139961eb69749ac1380a7d3ac001 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 28 Feb 2024 12:31:48 +0000
Subject: [PATCH 037/262] Creating the output structure

---
 etl/eligibility/Eligibility.py                |  11 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 548 +++++++-----------
 2 files changed, 220 insertions(+), 339 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index f7a5ed98..b594579f 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -365,7 +365,7 @@ class Eligibility:
             return
 
         # Near perfect
-        if self.cavity["suitability"] and (current_sap < 55):
+        if self.cavity["suitability"] and (current_sap < 69):
             self.gbis_warmfront = {
                 "eligible": True,
                 "strict": True,
@@ -373,15 +373,6 @@ class Eligibility:
             }
             return
 
-        # Suitable cavity, but high sap
-        if self.cavity["suitability"] and (current_sap >= 55):
-            self.gbis_warmfront = {
-                "eligible": True,
-                "strict": False,
-                "message": "Meets cavity, fails SAP check",
-            }
-            return
-
         self.gbis_warmfront = {
             "eligible": False,
             "strict": False,
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 5cbfb90c..61c4a243 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1646,10 +1646,26 @@ def get_epc_data(
 
 
 def get_col_widths(dataframe):
-    # First we find the maximum length of the index column
-    idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
-    # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
-    return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+    # Define a maximum width for any column to prevent excessively wide columns
+    max_allowed_width = 25
+
+    # Calculate widths for columns
+    widths = []
+
+    if isinstance(dataframe.columns, pd.MultiIndex):
+        # For MultiIndex, calculate max width considering the header and data
+        header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values]  # +2 for padding
+        for i, column in enumerate(dataframe.columns):
+            max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i])
+            widths.append(min(max_data_width, max_allowed_width))
+    else:
+        # For non-MultiIndex, calculate width normally
+        for col in dataframe.columns:
+            # Calculate the max length of data or column name and limit it
+            max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2)  # +2 for padding
+            widths.append(min(max_length, max_allowed_width))
+
+    return widths
 
 
 def analyse_ha_data(outputs, loader):
@@ -1671,42 +1687,13 @@ def analyse_ha_data(outputs, loader):
     :return:
     """
 
-    eco4_rate = 1710
-    gbis_rate = 600
-
     ha_analysis_results = []
-    ha_revenue_results = []
     for ha_name, datasets in outputs.items():
-
         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-        # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for
-        #       yet
-        #
-        import random
-        randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0])
-        inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes
-        inputs["asset_list"]["funding_scheme"] = None
-        inputs["asset_list"]["funding_scheme"] = np.where(
-            inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)",
-            inputs["asset_list"]["randomly_allocated_schemes"],
-            inputs["asset_list"]["funding_scheme"]
-        )
-
-        # TODO: Also temp, just for HA 6
-        if ha_name == "ha_6":
-            inputs["survey_list"]["funding_scheme"] = None
-            inputs["survey_list"]["funding_scheme"] = np.where(
-                inputs["survey_list"][
-                    'AFFORDABLE WARMTH                 OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH",
-                "ECO4",
-                "GBIS"
-            )
-
-        # End placholder
 
         results_df = datasets["results_df"].copy()
 
-        analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename(
+        analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
             columns={"row_meaning": "asset_identification_status"}
         ).merge(
             results_df,
@@ -1715,293 +1702,236 @@ def analyse_ha_data(outputs, loader):
             left_on="asset_list_row_id"
         )
 
-        # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is
-        # remaining
+        ################################################################################################
+        # We take the properties that strictly qualified under eco
+        ################################################################################################
 
-        if inputs["matched_lookup"] is not None:
-            analysis_data = analysis_data.merge(
-                inputs["matched_lookup"], how="left", on="asset_list_row_id"
+        eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
+        eco4_identified["identification_type"] = None
+        eco4_identified["identification_type"] = np.where(
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
+            "strict",
+            eco4_identified["identification_type"]
+        )
+
+        eco4_identified["identification_type"] = np.where(
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False),
+            "expansive",
+            eco4_identified["identification_type"]
+        )
+        ################################################################################################
+        # We take the properties dependent on CIGA
+        ################################################################################################
+
+        ciga_dependent_identified = analysis_data[
+            analysis_data["ECO Eligibility"].isin(
+                [
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga"
+                ]
             )
-            # Drop any rows that have a survey_list_row_id
-            analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])]
+        ].copy()
 
-        # If we have a survey list, we merge this onto the results
-        n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique()
-
-        properties_sold = (
-            inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if
-            inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"])
-        )
-        properties_sold_eco4 = (
-            properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0
-        )
-        properties_sold_gbis = (
-            properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0
+        # These are properties that show filled cavity
+        ciga_dependent_identified["identification_type"] = None
+        ciga_dependent_identified["identification_type"] = np.where(
+            ciga_dependent_identified["eco4_message"].isin(
+                [
+                    "Perfect suitability",
+                    "Meets cavity and sap",
+                    "Fails cavity, meets loft, fails SAP",
+                    "Meets fabric, fails SAP check",
+                    "Meets cavity, loft borderline, meets sap",
+                ]
+            ),
+            "strict",
+            ciga_dependent_identified["identification_type"]
         )
 
-        # We now calculate the number of remaining properties, by scheme
-        remaining_properties = analysis_data[
-            analysis_data["asset_identification_status"] == "identified potential eco works (CWI)"
-            ].copy()
-        remaining_properties["prospect_type"] = None
-
-        remaining_properties_by_scheme = (
-            remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index()
+        ciga_dependent_identified["identification_type"] = np.where(
+            (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) &
+            (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])),
+            "expansive",
+            ciga_dependent_identified["identification_type"]
         )
 
-        n_remaining_properties_eco4 = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "ECO4"
-            ]["asset_list_row_id"].values[0]
+        ciga_dependent_identified["identification_type"] = np.where(
+            (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+                ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+            ),
+            "expansive",
+            ciga_dependent_identified["identification_type"]
+        )
 
-        n_remaining_properties_gbis = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "GBIS"
-            ]["asset_list_row_id"].values[0]
+        ################################################################################################
+        # We properties that qualified for gbis
+        ################################################################################################
+        gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
+        gbis_identified["identification_type"] = None
+        gbis_identified["identification_type"] = np.where(
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
+            "strict",
+            gbis_identified["identification_type"]
+        )
 
-        # For the remaining properties, we use the results of the eligibility process to classify the property into
-        # one of multiple categories
-        #
-        # For properties that have been identified as ECO4
-        # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because
-        #    Warmfront regularly re-surveys properties which then fall within the SAP requirement
-        #    - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties
-        #      here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have
-        #      very old EPCs which may score lower when re-done
-        # 2) Meets Fabric requirements, not SAP
-        #    Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but
-        #    label is separately as not a strict
-        # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity.
-        #    - we don't have a SAP constraint here because the EPC is (currently) showing what the property might
-        #      actually look like after retrofit and so the EPC currently being a C or above means little, because
-        #      the updated EPC, showing an empty cavity, could bring the property within
-        # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation.
-        #   - No SAP constraint, for the same reason as in category 2)
-        # 5) Looks like GBIS instead
-        # 6) Does not look like ECO4 candidate
-        #
-        # For properties that have been identified as GBIS
-        # 1) Strict GBIS candidates
-        # 2) Properties that actually look like strict GBIS candidates
-        # 3) Subject to CIGA check - Filled cavity
-        # 4) Does not look like a GBIS candidate
+        gbis_identified["identification_type"] = np.where(
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69),
+            "expansive",
+            gbis_identified["identification_type"]
+        )
 
-        remaining_eco4_df = remaining_properties[
-            remaining_properties["funding_scheme"] == "ECO4"
-            ].copy()
+        # Finally, we look at the properties that have not been identified by Warmfront
+        not_identified = analysis_data[
+            analysis_data["ECO Eligibility"].isin(
+                [
+                    "not eligible"
+                ]
+            )
+        ].copy()
 
-        ####################################
+        surplus_eco4 = not_identified[
+            (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
+                ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
+            ))
+            ]
+
+        surplus_gbis = not_identified[
+            (not_identified["gbis_eligible"] == True) & (
+                ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
+            ) & (not_identified["sap"] < 69) & (
+                (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
+                not_identified["walls"].str.contains("partial", case=False, na=False)
+            )
+            )
+            ]
+        surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
+
+        # Output variables
         # ECO4
-        ####################################
-
-        # 1) We identify this if:
-        #   - remaining_properties["eco4_eligible"] == True
-
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["eco4_eligible"] == True),
-            "strict ECO4",
-            remaining_eco4_df["prospect_type"]
+        n_properties_in_asset_list = inputs["asset_list"].shape[0]
+        n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
+        eco4_of_which_identified_strict = (
+            eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
+            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
         )
-
-        # 2) Meets fabric requirements
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                (remaining_eco4_df["eco4_message"] == "sap too high") &
-                remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
-                pd.isnull(remaining_eco4_df["prospect_type"])
-            ),
-            "ECO4 if SAP downgrade",
-            remaining_eco4_df["prospect_type"]
+        eco4_of_which_identified_expansive = (
+            eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
+            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
         )
-
-        # 3) We identify this if it has a filled cavity but meets the loft conditions
-        # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm)
-        #       to account for measurement error
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
-            ),
-            "ECO4 - Filled cavity - subject to CIGA check",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"])
-            ),
-            "ECO4 prospect - empty cavity, loft insulation below regulation",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 5) Looks like GBIS instead
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
-            "Looks like GBIS",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm)
-        remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna(
-            "Does not look like ECO4 candidate"
-        )
-
-        ####################################
         # GBIS
-        ####################################
-
-        remaining_gbis = remaining_properties[
-            remaining_properties["funding_scheme"] == "GBIS"
-            ].copy()
-
-        # 1) Strict GBIS candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False)
-            ),
-            "strict GBIS",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 2) GBIS candidates that look like strict ECO4 candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (remaining_gbis["eco4_eligible"] == True),
-            "GBIS - Upgradable to ECO4",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 3) Subject to CIGA check - Filled cavity
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
-                pd.isnull(remaining_gbis["prospect_type"])
-            ),
-            "GBIS - Filled cavity - subject to CIGA check",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 4) Everything else
-        remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna(
-            "Does not look like GBIS candidate"
-        )
-
-        ####################################
-        # Surplus properties
-        ####################################
-
-        # Take properties that were not identified by Warmfront and identify those that look like they would qualify
-        # under the strictest criteria
-        surplus_df = analysis_data[
-            analysis_data["asset_identification_status"] != "identified potential eco works (CWI)"
-            ].copy()
-
-        eco4_surplus = surplus_df[
-            (
-                (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") &
-                (
-                    surplus_df["eligibility_classification"].isin(
-                        ["high confidence", "highest confidence", "medium confidence"]
-                    )
-                )
-            )
-        ].copy()
-
-        gbis_surplus = surplus_df[
-            (
-                (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & (
-                surplus_df["eligibility_cavity_type"].isin(["empty", "partial"])
-            )
-            )
-        ].copy()
-
-        # Perform some checks to make sure we have all of the values
-        remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining ECO4 dict"
-            )
-
-        remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining GBIS dict"
-            )
+        n_warmfront_identified_gbis = gbis_identified.shape[0]
+        gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
+        gbis_of_which_identified_expansive = \
+            gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
 
         to_append = {
-            "ha_name": ha_name,
-            "n_properties_in_asset_list": n_properties_in_asset_list,
+            ("", "HA Name"): ha_name,
+            ("", "# Properties in asset list"): n_properties_in_asset_list,
             ############
             # ECO4
             ############
-            "properties_sold_eco4": properties_sold_eco4,
-            "n_remaining_properties_eco4": n_remaining_properties_eco4,
-            **remaining_eco4_dict,
+            ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4,
+            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
+            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
+            ("ECO4", "Of which identified by model - total"): (
+                eco4_of_which_identified_strict + eco4_of_which_identified_expansive),
+            ("ECO4", "Additional properties"): surplus_eco4.shape[0],
             ############
             # GBIS
             ############
-            "properties_sold_gbis": properties_sold_gbis,
-            "n_remaining_properties_gbis": n_remaining_properties_gbis,
-            **remaining_gbis_dict,
-            ############
-            # GBIS
-            ############
-            "n_eco4_surplus": eco4_surplus.shape[0],
-            "n_gbis_surplus": gbis_surplus.shape[0],
+            ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis,
+            ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
+            ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
+            ("GBIS", "Of which identified by model - total"): (
+                gbis_of_which_identified_strict + gbis_of_which_identified_expansive
+            ),
+            ("GBIS", "Additional properties"): surplus_gbis.shape[0]
         }
 
         ha_analysis_results.append(to_append)
 
-        revenue_to_append = {
-            "ha_name": ha_name,
-            "£ Remaining from asset list": (
-                n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
-            ),
-            "Of which: Strict": (
-                to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
-                to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
-            ),
-            "Of which: Subject to CIGA": (
-                to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
-                to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
-            ),
-            "Of which: Prospect, not perfect strict prospect": (
-                to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
-                to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
-            ),
-            "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
-            "Of which: Does not look like prospect": (
-                to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
-                to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
-            ),
-            "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
-            "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
-        }
-
-        # Perform a quick check:
-        if revenue_to_append["£ Remaining from asset list"] - (
-            revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
-            revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
-            revenue_to_append["Of which: Potential downgrade to GBIS"] +
-            revenue_to_append["Of which: Does not look like prospect"]
-        ) > 1:
-            raise ValueError("Error between top level revenue figures and breakdown - investigate me")
-
-        ha_revenue_results.append(revenue_to_append)
-
     ha_analysis_results = pd.DataFrame(ha_analysis_results)
-    ha_revenue_results = pd.DataFrame(ha_revenue_results)
+    ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
 
+    facts_and_figures = loader.facts_and_figures.copy()
+    facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
+    facts_and_figures = facts_and_figures.sort_values("ha_number")
+    facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
+
+    # Rename some of the cols
+    facts_and_figures = facts_and_figures.rename(
+        columns={
+            # ECO4 cols
+            "ECO4": "ECO4 - December",
+            "GBIS": "GBIS - December",
+            "eco4 (subject to ciga)": "ECO4 - subject to ciga",
+            "eco4": "ECO4 - doesn't need CIGA",
+            "eco4 - passed ciga": "ECO4 - passed CIGA",
+            "failed ciga": "ECO4 - failed CIGA",
+            "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
+            "ECO4 - in progress": "ECO4 - Install in progress",
+            "ECO4 - cancelled": "ECO4 - Install cancelled",
+            # GBIS cols
+            "gbis": "GBIS total (asset list)"
+        }
+    )
+    # We calculate the eco4 total from the asset list
+    # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
+    # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
+    # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
+    # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
+    facts_and_figures["ECO4 total (asset list)"] = np.where(
+        facts_and_figures["ECO4 - passed CIGA"] > 0,
+        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
+        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"]
+    )
+
+    # Re-arrange the columns
+    facts_and_figures = facts_and_figures[
+        [
+            'HA Name',
+            'ECO4 - December',
+            'GBIS - December',
+            'ECO4 total (asset list)',
+            'GBIS total (asset list)',
+            'ECO4 - subject to ciga',
+            "ECO4 - doesn't need CIGA",
+            'ECO4 - passed CIGA',
+            'ECO4 - failed CIGA',
+            'ECO4 - installed',
+            'ECO4 - Install in progress',
+            'ECO4 - Install cancelled',
+            'ECO4 - partially installed',
+            'ECO4 - Install downgrade to GBIS',
+        ]
+    ]
+    # Addd a note to flag any rows where ECO4 (
+    # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
+    # )
+    facts_and_figures["Missed CIGA checks opportunity"] = None
+    facts_and_figures["Missed CIGA checks opportunity"] = np.where(
+        (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
+        "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
+            str) + " ECO4 properties needing a CIGA check",
+        facts_and_figures["Missed CIGA checks opportunity"]
+    )
+
+    # Re arrage the columns
+
+    # Also sort ha_analysis_results by ha number
+    ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
+    ha_analysis_results = ha_analysis_results.sort_values("ha_number")
+    ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
+
+    # We save 2 sheets
     # Automate creation of the excel
     # Create a Pandas Excel writer using XlsxWriter as the engine
-    with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
+    with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
         # Write each dataframe to a different worksheet without the index
-        for df, sheet in [(ha_revenue_results, 'Total Revenue'),
-                          (ha_analysis_results, 'By ECO4 and GBIS')]:
+        for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
+                          (ha_analysis_results, 'Asset Identification')]:
 
-            df.to_excel(writer, sheet_name=sheet, index=False)
+            df.to_excel(writer, sheet_name=sheet)
 
             # Auto-adjust columns' width
             for i, width in enumerate(get_col_widths(df)):
@@ -2134,7 +2064,7 @@ def app():
     # Determines if we want to use the cached data in s3
     use_cache = True
     # Determines if we want to perform the data pull
-    pull_data = True
+    pull_data = False
 
     # List all of the data in the folder
     directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
@@ -2173,43 +2103,3 @@ def app():
         floor_area_decile_thresholds=floor_area_decile_thresholds,
         pull_data=pull_data
     )
-
-    # for ha_name, datasets in outputs.items():
-    #     datasets["results_df"] = datasets["results_df"].drop(
-    #         columns=["eligibility_cavity_type", "eligibility_loft_type"]
-    #     )
-    #
-    #     # Re-do
-    #     res = []
-    #     for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]):
-    #         epc = {
-    #             "walls-description": row["walls"],
-    #             "roof-description": row["roof"],
-    #             "floor-description": "",
-    #             "tenure": "",
-    #             "current-energy-efficiency": row["sap"],
-    #         }
-    #         eligibility = Eligibility(epc=epc, cleaned=cleaned)
-    #         eligibility.check_eco4_warmfront()
-    #         res.append(
-    #             {
-    #                 "row_id": row["row_id"],
-    #                 "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
-    #                 "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
-    #             }
-    #         )
-    #
-    #     # Merge back on
-    #     res = pd.DataFrame(res)
-    #     datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id")
-    #
-    #     # Re-save in s3
-    #     save_pickle_to_s3(
-    #         data={
-    #             "results_df": datasets["results_df"],
-    #             "scoring_df": datasets["scoring_df"],
-    #             "nodata": datasets["nodata"]
-    #         },
-    #         bucket_name="retrofit-datalake-dev",
-    #         s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
-    #     )

From 8b8e2bf902f8cc6c588eab8b64253580f3364694 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 16:29:19 +0000
Subject: [PATCH 038/262] working on new forecast approach for warmfront
 remaining sales

---
 .../ha_15_32/ha_analysis_batch_3.py           | 811 +++++++++++++++++-
 utils/s3.py                                   |   2 +-
 2 files changed, 768 insertions(+), 45 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 61c4a243..bb27029e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -17,6 +17,7 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
+from etl.epc.Record import EPCRecord
 
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -181,25 +182,25 @@ class DataLoader:
         if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
-            ].str.lower().str.strip()
+            ].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["postcode"]
-            ].str.lower().str.strip()
+            ].astype(str).str.lower().str.strip()
         elif ha_name == "HA7":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
-            asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address2"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address3"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+            asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA14":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
-            asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 2"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 3"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 4"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
 
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
@@ -209,7 +210,7 @@ class DataLoader:
                                              asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["post_code"].astype(str).str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -1098,8 +1099,8 @@ class DataLoader:
         self.december_figures = pd.read_csv(self.december_figures_filepath)
         # Remove the spaces in HA Name
         self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "")
-        self.december_figures["ECO4"] = self.december_figures["ECO4"].astype("Int64")
-        self.december_figures["GBIS"] = self.december_figures["GBIS"].astype("Int64")
+        for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]:
+            self.december_figures[col] = self.december_figures[col].astype("Int64")
 
         if self.use_cache:
             self.data = read_pickle_from_s3(
@@ -1203,7 +1204,6 @@ class DataLoader:
             # Update the asset list with the categorisations and rename changes
             if asset_list.shape[0] != asset_list_starting_size:
                 raise ValueError("The asset list has changed in size")
-            self.data[ha_name]["asset_list"] = asset_list
 
             # Report on sales
             sales_report = {}
@@ -1259,7 +1259,31 @@ class DataLoader:
                 )
 
                 # We get the sales
-                sales_report = survey_list["installation_status"].value_counts().to_dict()
+                sales_report = {
+                    "ECO4 - surveys sold": survey_list.shape[0],
+                    **survey_list["installation_status"].value_counts().to_dict()
+                }
+
+                # We find some cases where properties have sold but are missing CIGA checks
+                survey_list_to_merge = survey_list[["asset_list_row_id"]].copy()
+                survey_list_to_merge["has_a_survey_record"] = True
+                survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])]
+
+                asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & (
+                        asset_list["has_a_survey_record"] == True
+                    ),
+                    "eco4 - passed ciga",
+                    asset_list["ECO Eligibility"]
+                )
+                asset_list = asset_list.drop(columns=["has_a_survey_record"])
+
+                # Update the survey list with installation status
+                self.data[ha_name]["survey_list"] = survey_list
+
+            # Insert updated asset list
+            self.data[ha_name]["asset_list"] = asset_list
 
             ha_facts_and_figures.append(
                 {
@@ -1687,7 +1711,21 @@ def analyse_ha_data(outputs, loader):
     :return:
     """
 
+    eco4_rate = 1710
+    gbis_rate = 600
+    old_eco4_rate = 1456
+    old_gbis_rate = 432
+
+    epc_c_threshold = 80
+    scheme_map = {
+        "ECO4": "ECO4",
+        "AFFORDABLE WARMTH": "ECO4",
+        "ECO4 A/W": "ECO4",
+        "ECO4 GBIS (ECO+)": "GBIS"
+    }
+
     ha_analysis_results = []
+    total_revenue_results = []
     for ha_name, datasets in outputs.items():
         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
 
@@ -1702,6 +1740,88 @@ def analyse_ha_data(outputs, loader):
             left_on="asset_list_row_id"
         )
 
+        analysis_data["is_remaining"] = True
+
+        n_sold_eco4 = 0
+        n_sold_gbis = 0
+        if not inputs["survey_list"].empty:
+            # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
+            # a survey)
+            survey_list = inputs["survey_list"].copy()
+
+            # TODO: TEMP
+            scheme_column = survey_list.columns[0]
+            # We clean up the survey list installation or cancelled
+            survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+            # Remove all punctuation
+            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                r'[^\w\s]', '', regex=True
+            )
+            # Remove double spaces
+            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                r'\s+', ' ', regex=True
+            )
+            # Remove trailing spaces
+            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+
+            # Remap the values in the scheme column
+            survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+
+            survey_list["installation_status"] = None
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+                "installed",
+                survey_list["installation_status"]
+            )
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+                "cancelled",
+                survey_list["installation_status"]
+            )
+            # Find partial installations
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+                "partially installed",
+                survey_list["installation_status"]
+            )
+            # Find partial cancellations
+            # TODO: We might have more indications of partial cancellations
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+                "partially cancelled",
+                survey_list["installation_status"]
+            )
+
+            # Finally, for other cases, we set the status to "in progress"
+            survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+
+            # We concatenate the scheme name with the installation status
+            survey_list["installation_status"] = (
+                survey_list[scheme_column] + " - " + survey_list["installation_status"]
+            )
+
+            # TODO: END TEMP
+
+            survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
+            survey_list_to_merge["is_remaining"] = False
+            analysis_data = analysis_data.drop(columns="is_remaining").merge(
+                survey_list_to_merge,
+                how="left", on="asset_list_row_id"
+            )
+            analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
+
+            n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
+            n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
+
+        # Take just remaining
+        analysis_data = analysis_data[analysis_data["is_remaining"]]
+
+        # Also, if the HA has started selling, we remove any that are still subject to ciga
+        n_eco4_missed_subject_to_ciga = 0
+        if not inputs["survey_list"].empty:
+            n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
+            analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
+
         ################################################################################################
         # We take the properties that strictly qualified under eco
         ################################################################################################
@@ -1714,8 +1834,11 @@ def analyse_ha_data(outputs, loader):
             eco4_identified["identification_type"]
         )
 
+        # For expansive, the property can be no higher than an EPC C
         eco4_identified["identification_type"] = np.where(
-            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False),
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
+                eco4_identified["sap"] <= epc_c_threshold
+            ),
             "expansive",
             eco4_identified["identification_type"]
         )
@@ -1743,21 +1866,17 @@ def analyse_ha_data(outputs, loader):
                     "Meets fabric, fails SAP check",
                     "Meets cavity, loft borderline, meets sap",
                 ]
-            ),
+            ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
             "strict",
             ciga_dependent_identified["identification_type"]
         )
 
         ciga_dependent_identified["identification_type"] = np.where(
-            (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) &
-            (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])),
-            "expansive",
-            ciga_dependent_identified["identification_type"]
-        )
-
-        ciga_dependent_identified["identification_type"] = np.where(
-            (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+            ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
                 ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+            )) & (
+                (ciga_dependent_identified["sap"] <= epc_c_threshold) &
+                pd.isnull(ciga_dependent_identified["identification_type"])
             ),
             "expansive",
             ciga_dependent_identified["identification_type"]
@@ -1775,7 +1894,9 @@ def analyse_ha_data(outputs, loader):
         )
 
         gbis_identified["identification_type"] = np.where(
-            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69),
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
+                pd.isnull(gbis_identified["identification_type"])
+            ),
             "expansive",
             gbis_identified["identification_type"]
         )
@@ -1806,9 +1927,16 @@ def analyse_ha_data(outputs, loader):
             ]
         surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
 
-        # Output variables
+        # Output variables - the data was sent to us in December, but the remaining figures are
+        # what was in November
+        november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
+
         # ECO4
-        n_properties_in_asset_list = inputs["asset_list"].shape[0]
+        n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
+        november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
+        november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
+        eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
+
         n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
         eco4_of_which_identified_strict = (
             eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
@@ -1820,26 +1948,37 @@ def analyse_ha_data(outputs, loader):
         )
         # GBIS
         n_warmfront_identified_gbis = gbis_identified.shape[0]
+        november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
+        november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
+        gbis_sales_since_november = n_sold_gbis - november_gbis_sold
         gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
         gbis_of_which_identified_expansive = \
             gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
 
         to_append = {
             ("", "HA Name"): ha_name,
-            ("", "# Properties in asset list"): n_properties_in_asset_list,
+            ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
             ############
             # ECO4
             ############
-            ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4,
+            ("ECO4", "# remaining November file"): november_eco4_remaining,
+            ("ECO4", "# sold in November file"): november_eco4_sold,
+            ("ECO4", "# sold (survey list)"): n_sold_eco4,
+            ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
+            ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
             ("ECO4", "Of which identified by model - total"): (
-                eco4_of_which_identified_strict + eco4_of_which_identified_expansive),
+                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+            ),
             ("ECO4", "Additional properties"): surplus_eco4.shape[0],
             ############
             # GBIS
             ############
-            ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis,
+            ("GBIS", "# remaining November file"): november_gbis_remaining,
+            ("GBIS", "# sold in November file"): november_gbis_sold,
+            ("GBIS", "# sold (survey list)"): n_sold_gbis,
+            ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
             ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
             ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
             ("GBIS", "Of which identified by model - total"): (
@@ -1850,6 +1989,24 @@ def analyse_ha_data(outputs, loader):
 
         ha_analysis_results.append(to_append)
 
+        # Calculate the revenue results
+        to_append_revenue = {
+            ("", "HA Name"): ha_name,
+            # Eco4 revenue
+            ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
+            ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
+            ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
+            ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
+            ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
+            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
+            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
+            ("ECO4", "Of which identified by model - total"): eco4_rate * (
+                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+            ),
+            ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
+        }
+        total_revenue_results.append(to_append_revenue)
+
     ha_analysis_results = pd.DataFrame(ha_analysis_results)
     ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
 
@@ -1862,8 +2019,8 @@ def analyse_ha_data(outputs, loader):
     facts_and_figures = facts_and_figures.rename(
         columns={
             # ECO4 cols
-            "ECO4": "ECO4 - December",
-            "GBIS": "GBIS - December",
+            "ECO4": "ECO4 - November",
+            "GBIS": "GBIS - November",
             "eco4 (subject to ciga)": "ECO4 - subject to ciga",
             "eco4": "ECO4 - doesn't need CIGA",
             "eco4 - passed ciga": "ECO4 - passed CIGA",
@@ -1880,19 +2037,27 @@ def analyse_ha_data(outputs, loader):
     # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
     # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
     # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
-    facts_and_figures["ECO4 total (asset list)"] = np.where(
+    facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
+        facts_and_figures["ECO4 - doesn't need CIGA"] +
+        facts_and_figures["ECO4 - subject to ciga"] +
+        facts_and_figures["ECO4 - passed CIGA"]
+    )
+
+    facts_and_figures["ECO4 total (asset list - post ciga)"] = None
+    facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
         facts_and_figures["ECO4 - passed CIGA"] > 0,
         facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
-        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"]
+        facts_and_figures["ECO4 total (asset list - post ciga)"]
     )
 
     # Re-arrange the columns
     facts_and_figures = facts_and_figures[
         [
             'HA Name',
-            'ECO4 - December',
-            'GBIS - December',
-            'ECO4 total (asset list)',
+            'ECO4 - November',
+            'GBIS - November',
+            'ECO4 total (asset list - pre ciga)',
+            'ECO4 total (asset list - post ciga)',
             'GBIS total (asset list)',
             'ECO4 - subject to ciga',
             "ECO4 - doesn't need CIGA",
@@ -1916,6 +2081,8 @@ def analyse_ha_data(outputs, loader):
         facts_and_figures["Missed CIGA checks opportunity"]
     )
 
+    facts_and_figures.to_csv("Facts and figures sample.csv")
+
     # Re arrage the columns
 
     # Also sort ha_analysis_results by ha number
@@ -1937,6 +2104,333 @@ def analyse_ha_data(outputs, loader):
             for i, width in enumerate(get_col_widths(df)):
                 writer.sheets[sheet].set_column(i, i, width)
 
+    # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
+    #               description, and what proportion of time they get identified via non-invasive surveys
+
+    # true_eco4_assets = []
+    # ciga_dependent_assets = []
+    # not_eligible = []
+    # as_built_insulated = []
+    # date_cols = {
+    #     "HA39": "date_built",
+    #     "HA14": "Built In Year",
+    #     "HA6": "Construction Year",
+    #     "HA1": "Build Date",
+    #     "HA107": "YEAR BUILT"
+    # }
+    # for ha_name, data_objects in outputs.items():
+    #     inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+    #
+    #     date_col = date_cols[ha_name]
+    #     results_df = data_objects["results_df"].copy()
+    #     df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
+    #         columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
+    #     ).merge(
+    #         results_df,
+    #         how="left",
+    #         right_on="row_id",
+    #         left_on="asset_list_row_id"
+    #     )
+    #
+    #     # take the true ECO4
+    #     true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
+    #     ciga_dependent = df[
+    #         df["ECO Eligibility"].isin(
+    #             [
+    #                 "eco4 (subject to ciga)",
+    #                 "failed ciga",
+    #                 "eco4 - passed ciga"
+    #             ]
+    #         )
+    #     ]
+    #     insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
+    #     # We convert date built to datetime
+    #     try:
+    #         insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
+    #         insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
+    #         as_built_insulated.append(insulated_assumed)
+    #     except Exception as e:
+    #         print("oh well")
+    #
+    #     true_eco4_assets.append(true_eco4)
+    #     ciga_dependent_assets.append(ciga_dependent)
+    #
+    # true_eco4_assets = pd.concat(true_eco4_assets)
+    # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
+    # as_built_insulated = pd.concat(as_built_insulated)
+    #
+    # true_eco4_assets["walls"].value_counts(normalize=True)
+    # ciga_dependent_assets["walls"].value_counts(normalize=True)
+    #
+    # from recommendations.recommendation_utils import extract_insulation_thickness
+    #
+    # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
+    #     lambda x: extract_insulation_thickness(x)
+    # )
+    #
+    # true_eco4_assets["e"] = true_eco4_assets.merge(
+    #     pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
+    #     how="left",
+    #     left_on="roof",
+    #     right_on="original_description"
+    # )
+    #
+    # true_eco4_assets["sap"].mean()
+    #
+    # true_eco4_assets["insulation_thickness"].isin(
+    #     ["250", "150", "200", "100", "75", "50"]
+    # ).sum() / true_eco4_assets.shape[0]
+    #
+    # true_eco4_assets["insulation_thickness"].isin(
+    #     ["100"]
+    # ).sum() / true_eco4_assets.shape[0]
+    #
+    # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
+
+
+def get_propensity_model_data(
+    loader, cleaned, cleaning_data, created_at, photo_supply_lookup,
+    floor_area_decile_thresholds, pull_data=True
+):
+    # TODO: Set a seed!
+    model_data = []
+    for ha_name, data_assets in loader.data.items():
+
+        logger.info("Processing HA: %s", ha_name)
+        if data_assets["survey_list"].empty:
+            continue
+
+        number_sold = data_assets["survey_list"].shape[0]
+
+        # For each HA, we read pull in the data required, and store in S3
+        asset_list = data_assets["asset_list"].copy()
+        # We determine the number of properties that we should select that are eligible
+        asset_list_size = asset_list.shape[0]
+        # Number eligible
+        n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+        success_rate = n_eligibile / asset_list_size
+        needed_sample_size = np.ceil(number_sold / success_rate)
+        number_negative_samples = int(needed_sample_size - number_sold)
+
+        sold_asset_list_ids = data_assets["survey_list"]["asset_list_row_id"].tolist()
+        negative_sample_asset_list_ids = asset_list["asset_list_row_id"].sample(number_negative_samples).tolist()
+        sample_ids = sold_asset_list_ids + negative_sample_asset_list_ids
+
+        sample_asset_list = asset_list[asset_list["asset_list_row_id"].isin(sample_ids)]
+
+        # In order to have the most confidence, we should take just properties that have 1 EPC. We might need to
+        # cut down the number of properties that we include because of this
+        # Note: This is an imbalanced problem so we will need to build a model accomadating of that
+
+        data = []
+        errors = []
+        for index, property_meta in tqdm(sample_asset_list.iterrows(), total=len(sample_asset_list)):
+
+            if property_meta["matching_postcode"] is None:
+                continue
+
+            property_type, built_form = get_property_type_and_built_form(
+                property_meta=property_meta, ha_name=ha_name
+            )
+
+            searcher = SearchEpc(
+                address1=str(property_meta["HouseNo"]),
+                postcode=property_meta["matching_postcode"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                full_address=property_meta["matching_address"]
+            )
+            searcher.ordnance_survey_client.property_type = property_type
+            searcher.ordnance_survey_client.built_form = built_form
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                continue
+
+            if searcher.newest_epc.get("estimated"):
+                # We insert the row ID as our proxy for UPRN
+                searcher.newest_epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
+
+            newest_epc = searcher.newest_epc
+            older_epcs = searcher.older_epcs
+            full_sap_epc = searcher.full_sap_epc
+
+            # If we have more than 1 EPC for the moment we just continue
+            if older_epcs or full_sap_epc:
+                continue
+            try:
+
+                # We clean up the data
+                epc_records = {
+                    'original_epc': newest_epc.copy(),
+                    'full_sap_epc': full_sap_epc.copy(),
+                    'old_data': older_epcs.copy(),
+                }
+
+                epc_record = EPCRecord(
+                    epc_records=epc_records,
+                    run_mode="newdata",
+                    cleaning_data=cleaning_data
+                )
+
+                # If we have some data, continue
+                data.append(
+                    {
+                        "ECO Eligibility": property_meta["ECO Eligibility"],
+                        "asset_list_row_id": property_meta["asset_list_row_id"],
+                        **epc_record.get("prepared_epc")
+                    }
+                )
+            except Exception as e:
+                errors.append(
+                    {
+                        "error": str(e),
+                        "asset_list_row_id": property_meta["asset_list_row_id"],
+                        "matching_postcode": property_meta["matching_postcode"],
+                        "matching_address": property_meta["matching_address"]
+                    }
+                )
+
+        data = pd.DataFrame(data)
+        # We store the results in S3 as a pickle
+        save_pickle_to_s3(
+            data=data,
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+        )
+
+        # Store the errors
+        if errors:
+            save_pickle_to_s3(
+                data=errors,
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"propensity_model_data/{ha_name}/errors.pickle"
+            )
+
+        model_data.append(data)
+
+    return model_data
+
+
+def conversion_model(loader):
+    # Read in the model data
+
+    model_data = []
+    for ha_name in loader.data.keys():
+        try:
+            picked = read_pickle_from_s3(
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+            )
+            data = pd.DataFrame(picked)
+
+            # We merge on the sales data
+            sales_data = loader.data[ha_name]["survey_list"].copy()
+            data = data.merge(
+                sales_data[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            data["ha_name"] = ha_name
+
+        except Exception as e:
+            logger.error("Error reading in the data for %s", ha_name)
+            continue
+
+        model_data.append(data)
+
+    model_data = pd.concat(model_data)
+
+    model_data["response"] = model_data["installation_status"].isin(
+        [
+            "ECO4 - in progress",
+            "ECO4 - installed"
+        ]
+    ).astype(int)
+
+    # Because of how we pulled the data, we need to re-balance the sample
+    ha_names = model_data["ha_name"].unique()
+
+    balanced_sample = []
+    for ha_name in ha_names:
+        df = model_data[model_data["ha_name"] == ha_name]
+        positive_samples = df[df["response"] == 1]
+        negative_samples = df[df["response"] != 1]
+
+        inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+        asset_list = inputs["asset_list"].copy()
+        asset_list_size = asset_list.shape[0]
+        n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+        success_rate = n_eligibile / asset_list_size
+        needed_sample_size = np.ceil(positive_samples.shape[0] / success_rate)
+        number_negative_samples = int(needed_sample_size - positive_samples.shape[0])
+        negative_samples_subset = negative_samples.sample(number_negative_samples)
+
+        output = pd.concat([positive_samples, negative_samples_subset])
+
+        balanced_sample.append(output)
+
+    balanced_sample = pd.concat(balanced_sample)
+
+    # We work with a small sample
+    # Drop the ECO Eligibility column and installation_status column
+    # We keep the ID column
+    balanced_sample = balanced_sample.drop(
+        columns=['ECO Eligibility', 'asset_list_row_id', 'address', 'uprn_source', 'address3', 'local_authority_label',
+                 'county', 'postcode', 'constituency', 'local_authority', 'inspection_date', 'address1',
+                 'constituency_label', 'building_reference_number', 'address2', 'posttown', 'lodgement_datetime',
+                 'uprn', 'lodgement_date', 'lmk_key', 'installation_status', 'ha_name']
+    )
+
+    # POC model
+    df = balanced_sample.copy()
+    # FIll missings with means, if they exist
+    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
+    df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
+
+    categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+    df[categorical_cols] = df[categorical_cols].fillna("other")
+
+    # Reduce the number of categories to a specific number and the rest to other
+    max_n_categories = 10
+    for col in categorical_cols:
+        top_categories = df[col].value_counts().nlargest(max_n_categories).index
+        df[col] = df[col].where(df[col].isin(top_categories), other="other")
+
+    # Use a model based approach to feature selection
+    import xgboost as xgb
+    from sklearn.model_selection import train_test_split
+
+    # Assuming your outcome column is named 'target'
+    X = df.drop(columns=['response'])
+    y = df['response']
+    df["low_energy_fixed_light_count"].va
+
+    # Encoding categorical variables if not already done
+    X = pd.get_dummies(X, drop_first=True)
+
+    # Splitting the data into train and test sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Initialize an XGBoost classifier
+    model = xgb.XGBClassifier()
+
+    # Fit the model
+    model.fit(X_train, y_train)
+
+    # Get feature importances
+    feature_importances = model.feature_importances_
+
+    # Map feature importances to their corresponding column names
+    feature_importance_dict = {feature: importance for feature, importance in zip(X.columns, feature_importances)}
+
+    # Sort features by importance
+    sorted_features = sorted(feature_importance_dict.items(), key=lambda item: item[1], reverse=True)
+
+    # Display sorted features
+    for feature, importance in sorted_features:
+        print(f"{feature}: {importance}")
+
 
 def patch_cleaned(cleaned):
     # Patch to handle the a missing description
@@ -2054,6 +2548,218 @@ def patch_cleaned(cleaned):
     return cleaned
 
 
+def forecast_remaining_sales(loader):
+    # Assumptions:
+    # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
+    # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
+    maximum_ciga_conversion = 0.75
+
+    gbis_rate = 600
+    eco4_rate = 1710
+    old_gbis_rate = 432
+    old_eco4_rate = 1456
+
+    # 1) Calculate the conversion rate from passed CIGA to actual sale
+    converted_ciga_jobs = []
+    for ha_name, input_data in loader.data.items():
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        if survey_list.empty:
+            continue
+
+        ciga_dependent_assets = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 - passed ciga"
+            ]
+
+        # These are now the ciga dependent assets at installation
+        ciga_dependent_assets_at_installation = ciga_dependent_assets.merge(
+            survey_list[["asset_list_row_id", "installation_status"]],
+            how="inner",
+            on="asset_list_row_id"
+        )
+
+        # We then calculate how many get cancelled
+        ciga_dependent_assets_sold = ciga_dependent_assets_at_installation[
+            ciga_dependent_assets_at_installation["installation_status"].isin(
+                [
+                    "ECO4 - installed", "ECO4 - in progress"
+                ]
+            )
+        ]
+
+        ciga_dependent_assets_failed = ciga_dependent_assets_at_installation[
+            ~ciga_dependent_assets_at_installation["installation_status"].isin(
+                [
+                    "ECO4 - installed", "ECO4 - in progress"
+                ]
+            )
+        ]
+
+        converted_ciga_jobs.append(
+            {
+                "HA Name": ha_name,
+                "# Ciga dependent at installation": ciga_dependent_assets_at_installation.shape[0],
+                "# Ciga dependent successfully installed": ciga_dependent_assets_sold.shape[0],
+                "# Ciga dependent failed install": ciga_dependent_assets_failed.shape[0]
+            }
+        )
+
+    converted_ciga_jobs = pd.DataFrame(converted_ciga_jobs)
+
+    # We calculate a ciga pass to install conversaion rate
+    median_ciga_pass_to_install = (
+        converted_ciga_jobs["# Ciga dependent successfully installed"].sum() /
+        converted_ciga_jobs["# Ciga dependent at installation"].sum()
+    )
+
+    # 2) Calculate the conversion rate from CIGA dependent ciga passed
+    ciga_passrates = []
+    for ha_name, input_data in loader.data.items():
+
+        # If we don't have a ciga list, we can't do anything
+        if input_data["ciga_list"].empty:
+            continue
+
+        # 1) Calculate the conversion rate for CIGA to actual sale
+        asset_list = input_data["asset_list"].copy()
+
+        ciga_completed_assets = asset_list[
+            asset_list["ECO Eligibility"].isin(
+                [
+                    "eco4 - passed ciga",
+                    "failed ciga"
+                ]
+            )
+        ]
+
+        ciga_passed = ciga_completed_assets[
+            ciga_completed_assets["ECO Eligibility"].isin(
+                [
+                    "eco4 - passed ciga"
+                ]
+            )
+        ]
+
+        ciga_passrates.append(
+            {
+                "Ha Name": ha_name,
+                "# CIGA dependent": ciga_completed_assets.shape[0],
+                "# CIGA passed": ciga_passed.shape[0],
+            }
+        )
+
+    ciga_passrates = pd.DataFrame(ciga_passrates)
+
+    median_ciga_pass_to_install = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
+
+    # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
+    eco4_ciga_independent_passrates = []
+    gbis_ciga_independent_passrates = []
+    for ha_name, input_data in loader.data.items():
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        if survey_list.empty:
+            continue
+
+        # For properties that were identified as a typical ECO4 job, we calculate the number of properties that
+        # installed
+        # vs cancelled
+
+        typical_eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
+        typical_gbis = asset_list[asset_list["ECO Eligibility"] == "gbis"]
+
+        # Merge on the surveys
+        typical_eco4_installed = typical_eco4.merge(
+            survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+        )
+
+        if not typical_eco4_installed.empty:
+            typical_eco4_sold = typical_eco4_installed[
+                typical_eco4_installed["installation_status"].isin(
+                    [
+                        "ECO4 - installed", "ECO4 - in progress"
+                    ]
+                )
+            ]
+
+            eco4_ciga_independent_passrates.append(
+                {
+                    "Ha Name": ha_name,
+                    "# ECO4 at install stage": typical_eco4_installed.shape[0],
+                    "# ECO4 successfully installed": typical_eco4_sold.shape[0]
+                }
+            )
+
+        typical_gbis_installed = typical_gbis.merge(
+            survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+        )
+        if not typical_gbis_installed.empty:
+            typical_gbis_sold = typical_gbis_installed[
+                typical_gbis_installed["installation_status"].isin(
+                    [
+                        "GBIS - in progress", "GBIS - installed"
+                    ]
+                )
+            ]
+
+            gbis_ciga_independent_passrates.append(
+                {
+                    "Ha Name": ha_name,
+                    "# GBIS at install stage": typical_gbis_installed.shape[0],
+                    "# GBIS successfully installed": typical_gbis_sold.shape[0]
+                }
+            )
+
+    eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates)
+    gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates)
+
+    median_eco4_to_install = (
+        eco4_ciga_independent_passrates["# ECO4 successfully installed"].sum() /
+        eco4_ciga_independent_passrates["# ECO4 at install stage"].sum()
+    )
+
+    median_gbis_to_install = (
+        gbis_ciga_independent_passrates["# GBIS successfully installed"].sum() /
+        gbis_ciga_independent_passrates["# GBIS at install stage"].sum()
+    )
+
+    # Produce the final output
+    december_figures = loader.december_figures.copy()
+    december_figures = december_figures.fillna(0)
+    results = []
+    for ha_name, input_data in loader.data.items():
+        # Original warmfront figures
+        original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
+
+        original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
+        original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
+        original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
+        original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
+
+        original_warmfront_eco4_revenue = (
+            original_warmfront_remaining_eco4 * eco4_rate +
+            (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
+        )
+        original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
+
+        original_warmfront_gbis_revenue = (
+            original_warmfront_remaining_gbis * gbis_rate +
+            (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
+        )
+
+        results.append(
+            {
+                ("", "", "HA Name"): ha_name,
+                ("Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+                ("", "Remaining - #", ""): original_warmfront_remaining_eco4,
+                ("", "Total - £", ""): original_warmfront_eco4_revenue,
+                ("", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+            }
+        )
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
@@ -2067,11 +2773,14 @@ def app():
     pull_data = False
 
     # List all of the data in the folder
-    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
+
+    directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir()
+                   for file in entry.iterdir() if file.suffix == '.xlsx']
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
+    # priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 
@@ -2103,3 +2812,17 @@ def app():
         floor_area_decile_thresholds=floor_area_decile_thresholds,
         pull_data=pull_data
     )
+
+    analyse_ha_data(outputs, loader)
+
+    # import pickle
+    # with open("ha_analysis.pickle", "wb") as f:
+    #     pickle.dump({"outputs": outputs, "loader": loader}, f)
+
+    # To read:
+    # import pickle
+    # with open("ha_analysis.pickle", "rb") as f:
+    #     outputs = pickle.load(f)["outputs"]
+    #
+    # with open("loader.pickle", "rb") as f:
+    #     loader = pickle.load(f)
diff --git a/utils/s3.py b/utils/s3.py
index cb55094a..8d36bdb3 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -184,7 +184,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
         logger.errpr("Incomplete credentials provided.")
         return None
     except Exception as e:
-        logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
+        logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
         return None
 
     # Deserialize data from pickle format

From 9e679bd3fdb6e38a263f804ffdb07dda3892e7b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 16:59:22 +0000
Subject: [PATCH 039/262] working on new forecast methodology

---
 .../ha_15_32/ha_analysis_batch_3.py           | 81 +++++++++++++++++--
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bb27029e..21af73ff 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2728,15 +2728,22 @@ def forecast_remaining_sales(loader):
     # Produce the final output
     december_figures = loader.december_figures.copy()
     december_figures = december_figures.fillna(0)
+    # If we have negative remaining, it means that actually sold more gbis than they initially thought so we set
+    # remaining to 0
+    december_figures["ECO4 remaining"] = np.where(
+        december_figures["ECO4 remaining"] < 0, 0, december_figures["ECO4 remaining"]
+    )
+    december_figures["GBIS remaining"] = np.where(
+        december_figures["GBIS remaining"] < 0, 0, december_figures["GBIS remaining"]
+    )
+
     results = []
     for ha_name, input_data in loader.data.items():
-        # Original warmfront figures
+        # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
-        original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
-        original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
 
         original_warmfront_eco4_revenue = (
             original_warmfront_remaining_eco4 * eco4_rate +
@@ -2744,21 +2751,79 @@ def forecast_remaining_sales(loader):
         )
         original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
 
+        # Original warmfront figures - GBIS
+
+        original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
+        original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
+
         original_warmfront_gbis_revenue = (
             original_warmfront_remaining_gbis * gbis_rate +
             (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
         )
+        original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
+
+        # Asset list
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        asset_list_remaining = asset_list.merge(
+            survey_list[["asset_list_row_id", "installation_status"]],
+            how="left",
+            on="asset_list_row_id"
+        )
+        asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+
+        eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
+        eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
+
+        eco4_pre_ciga = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"].isin(
+                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+            )
+        ]["count"].sum()
+
+        eco4_pre_ciga_remaining = eligiblity_counts_remaining[
+            eligiblity_counts["ECO Eligibility"].isin(
+                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+            )
+        ]["count"].sum()
+
+        eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate
+        eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate
+
+        # We check if the property has done a CIGA check
+        has_ciga_check = not input_data["ciga_list"].empty
+
+        if has_ciga_check:
+            eco4_post_ciga = eligiblity_counts[
+                eligiblity_counts["ECO Eligibility"].isin(
+                    ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                )
+            ]["count"].sum()
 
         results.append(
             {
-                ("", "", "HA Name"): ha_name,
-                ("Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
-                ("", "Remaining - #", ""): original_warmfront_remaining_eco4,
-                ("", "Total - £", ""): original_warmfront_eco4_revenue,
-                ("", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+                ("", "", "", "HA Name"): ha_name,
+                # ECO4 - original warmfront figures
+                ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+                ("ECO4", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
+                ("ECO4", "", "Total - £", ""): original_warmfront_eco4_revenue,
+                ("ECO4", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+                # GBIS - original warmfront figures
+                ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
+                ("GBIS", "", "Remaining - #", ""): original_warmfront_gbis,
+                ("GBIS", "", "Total - £", ""): original_warmfront_gbis_revenue,
+                ("GBIS", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
+                # ECO4 - asset list
+                ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
+                ("ECO4", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
+                ("ECO4", "", "Total - £", ""): eco4_pre_ciga_revenue,
+                ("ECO4", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             }
         )
 
+    results = pd.DataFrame(results)
+
 
 def app():
     """

From a81f1f2520479e706479bada1761aaa92bb01a44 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 17:37:57 +0000
Subject: [PATCH 040/262] Adding in eligible properties left estimation

---
 .../ha_15_32/ha_analysis_batch_3.py           | 101 ++++++++++++------
 1 file changed, 69 insertions(+), 32 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 21af73ff..cf9dfa53 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2613,7 +2613,7 @@ def forecast_remaining_sales(loader):
         converted_ciga_jobs["# Ciga dependent at installation"].sum()
     )
 
-    # 2) Calculate the conversion rate from CIGA dependent ciga passed
+    # 2) Calculate the conversion rate from CIGA dependent to ciga passed
     ciga_passrates = []
     for ha_name, input_data in loader.data.items():
 
@@ -2651,7 +2651,7 @@ def forecast_remaining_sales(loader):
 
     ciga_passrates = pd.DataFrame(ciga_passrates)
 
-    median_ciga_pass_to_install = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
+    median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
 
     # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
     eco4_ciga_independent_passrates = []
@@ -2762,16 +2762,20 @@ def forecast_remaining_sales(loader):
         )
         original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
 
-        # Asset list
+        # Asset list - ECO4
         asset_list = input_data["asset_list"].copy()
         survey_list = input_data["survey_list"].copy()
 
-        asset_list_remaining = asset_list.merge(
-            survey_list[["asset_list_row_id", "installation_status"]],
-            how="left",
-            on="asset_list_row_id"
-        )
-        asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+        if survey_list.empty:
+            asset_list_remaining = asset_list.copy()
+        else:
+            asset_list_remaining = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+            asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
         eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
@@ -2791,36 +2795,69 @@ def forecast_remaining_sales(loader):
         eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate
         eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate
 
-        # We check if the property has done a CIGA check
-        has_ciga_check = not input_data["ciga_list"].empty
+        # Total Eligible - this is what passed ciga checks + strict. If we don't have what passed CIGA, we estimate
+        # We check if the HA has done a CIGA check. Also, if we have assets dormant at CIGA, we estimate what will
+        # convert
+        # We estimate a conversion for anything left post CIGA
+        ha_ciga_conversion = ciga_passrates[ciga_passrates["Ha Name"] == ha_name]
+        if not ha_ciga_conversion.empty:
+            ha_ciga_conversion_rate = (
+                ha_ciga_conversion["# CIGA passed"].values[0] / ha_ciga_conversion["# CIGA dependent"].values[0]
+            )
+        else:
+            ha_ciga_conversion_rate = (
+                median_ciga_success_rate if median_ciga_success_rate <= median_ciga_success_rate else
+                median_ciga_success_rate
+            )
 
+        remaining_needing_ciga_check = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
+            ]["count"].sum()
+
+        has_ciga_check = not input_data["ciga_list"].empty
         if has_ciga_check:
             eco4_post_ciga = eligiblity_counts[
                 eligiblity_counts["ECO Eligibility"].isin(
-                    ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                    ["eco4", "eco4 - passed ciga", "failed ciga"]
                 )
             ]["count"].sum()
 
-        results.append(
-            {
-                ("", "", "", "HA Name"): ha_name,
-                # ECO4 - original warmfront figures
-                ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
-                ("ECO4", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
-                ("ECO4", "", "Total - £", ""): original_warmfront_eco4_revenue,
-                ("ECO4", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
-                # GBIS - original warmfront figures
-                ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
-                ("GBIS", "", "Remaining - #", ""): original_warmfront_gbis,
-                ("GBIS", "", "Total - £", ""): original_warmfront_gbis_revenue,
-                ("GBIS", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
-                # ECO4 - asset list
-                ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
-                ("ECO4", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
-                ("ECO4", "", "Total - £", ""): eco4_pre_ciga_revenue,
-                ("ECO4", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
-            }
-        )
+            if remaining_needing_ciga_check > 0:
+                # We update the eco4 post ciga with the converted remaining
+                eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        else:
+            eco4_post_ciga = eligiblity_counts[
+                                 eligiblity_counts["ECO Eligibility"] == "eco4"
+                                 ]["count"].sum() + np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+
+        eco4_post_ciga = int(eco4_post_ciga)
+
+        to_append = {
+            ("", "", "", "HA Name"): ha_name,
+            # ECO4 - original warmfront figures
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+            ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
+            ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
+            ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+            # GBIS - original warmfront figures
+            ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
+            ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
+            ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
+            ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
+            # ECO4 - asset list, pre-ciga
+            ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
+            ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
+            ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
+            # ECO4 - asset list, post ciga
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
+        }
+
+        # Make sure nothing is forgotten due to duplicate multi-index keys
+        if len(to_append) != 14:
+            raise ValueError("Something went wrong")
+
+        results.append(to_append)
 
     results = pd.DataFrame(results)
 

From 6544adc6c3c9d811f789a0372a33a19bd32beb78 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 17:47:51 +0000
Subject: [PATCH 041/262] Added eligibility calculations

---
 .../ha_15_32/ha_analysis_batch_3.py           | 55 ++++++++++++-------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index cf9dfa53..8a46703e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2548,6 +2548,33 @@ def patch_cleaned(cleaned):
     return cleaned
 
 
+def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate):
+    remaining_needing_ciga_check = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
+        ]["count"].sum()
+
+    has_ciga_check = not input_data["ciga_list"].empty
+    if has_ciga_check:
+        eco4_post_ciga = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"].isin(
+                ["eco4", "eco4 - passed ciga", "failed ciga"]
+            )
+        ]["count"].sum()
+
+        if remaining_needing_ciga_check > 0:
+            # We update the eco4 post ciga with the converted remaining
+            eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+    else:
+        eco4_post_ciga = (
+            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() +
+            np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        )
+    eco4_post_ciga = int(eco4_post_ciga)
+    eco4_post_ciga_revenue = eco4_post_ciga * eco4_rate
+
+    return eco4_post_ciga, eco4_post_ciga_revenue
+
+
 def forecast_remaining_sales(loader):
     # Assumptions:
     # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
@@ -2810,27 +2837,13 @@ def forecast_remaining_sales(loader):
                 median_ciga_success_rate
             )
 
-        remaining_needing_ciga_check = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
-            ]["count"].sum()
+        eco4_post_ciga, eco4_post_ciga_revenue = calculate_eco4_post_ciga(
+            eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
+        )
 
-        has_ciga_check = not input_data["ciga_list"].empty
-        if has_ciga_check:
-            eco4_post_ciga = eligiblity_counts[
-                eligiblity_counts["ECO Eligibility"].isin(
-                    ["eco4", "eco4 - passed ciga", "failed ciga"]
-                )
-            ]["count"].sum()
-
-            if remaining_needing_ciga_check > 0:
-                # We update the eco4 post ciga with the converted remaining
-                eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
-        else:
-            eco4_post_ciga = eligiblity_counts[
-                                 eligiblity_counts["ECO Eligibility"] == "eco4"
-                                 ]["count"].sum() + np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
-
-        eco4_post_ciga = int(eco4_post_ciga)
+        eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga(
+            eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
+        )
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -2851,6 +2864,8 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga
             ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
+            ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From 5c686f5ec471b3c5c84b307e0851e2a0462934c0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 17:56:45 +0000
Subject: [PATCH 042/262] working on forecast

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8a46703e..0bf34e70 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2841,6 +2841,9 @@ def forecast_remaining_sales(loader):
             eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
         )
 
+        # Calculate the delta compared to Warmfront's original estimate
+        eco4_delta_vs_original_estimate = 200 * (eco4_post_ciga - original_warmfront_eco4) / original_warmfront_eco4
+
         eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga(
             eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
         )
@@ -2862,14 +2865,17 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
-            # ECO4 - asset list, post ciga
+            # ECO4 - asset list, post ciga, total
             ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
-            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue,
+            ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate,
+            # ECO4 - asset list, post ciga, remaining
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
+            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 14:
+        if len(to_append) != 18:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From c47af474b92282a1159c2866e8810e8e883db7bd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 18:13:11 +0000
Subject: [PATCH 043/262] Added in remaining breakdowns into forecast and
 confirmed

---
 .../ha_15_32/ha_analysis_batch_3.py           | 59 ++++++++++++++-----
 1 file changed, 45 insertions(+), 14 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0bf34e70..77c18e80 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2555,24 +2555,40 @@ def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_r
 
     has_ciga_check = not input_data["ciga_list"].empty
     if has_ciga_check:
-        eco4_post_ciga = eligiblity_counts[
+        eco4_confirmed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"].isin(
-                ["eco4", "eco4 - passed ciga", "failed ciga"]
+                ["eco4", "eco4 - passed ciga"]
             )
         ]["count"].sum()
 
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
-            eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
+        else:
+            eco4_remaining_forecast = 0
+            eco4_post_ciga = eco4_confirmed
     else:
+        eco4_confirmed = 0
+        eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
         eco4_post_ciga = (
-            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() +
-            np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast
         )
     eco4_post_ciga = int(eco4_post_ciga)
-    eco4_post_ciga_revenue = eco4_post_ciga * eco4_rate
+    eco4_remaining_forecast = int(eco4_remaining_forecast)
 
-    return eco4_post_ciga, eco4_post_ciga_revenue
+    results = {
+        # Counts
+        "ECO4 - post CIGA - #": eco4_post_ciga,
+        "Of which confirmed - #": eco4_confirmed,
+        "Of which forecast - #": eco4_remaining_forecast,
+        # Revenue
+        "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
+        "Of which confirmed - £": eco4_confirmed * eco4_rate,
+        "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+    }
+
+    return results
 
 
 def forecast_remaining_sales(loader):
@@ -2837,14 +2853,16 @@ def forecast_remaining_sales(loader):
                 median_ciga_success_rate
             )
 
-        eco4_post_ciga, eco4_post_ciga_revenue = calculate_eco4_post_ciga(
+        eco4_post_ciga_total_results = calculate_eco4_post_ciga(
             eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
         )
 
         # Calculate the delta compared to Warmfront's original estimate
-        eco4_delta_vs_original_estimate = 200 * (eco4_post_ciga - original_warmfront_eco4) / original_warmfront_eco4
+        eco4_delta_vs_original_estimate = 100 * (
+            eco4_post_ciga_total_results["ECO4 - post CIGA - #"] - original_warmfront_eco4
+        ) / original_warmfront_eco4
 
-        eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga(
+        eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
             eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
         )
 
@@ -2866,12 +2884,25 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
-            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
-            ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue,
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga_total_results[
+                "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
+                "ECO4 - post CIGA - £"],
             ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate,
             # ECO4 - asset list, post ciga, remaining
-            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
-            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_revenue,
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
+                "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_results[
+                "ECO4 - post CIGA - £"],
+            ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
+                eco4_post_ciga_remaining_results["Of which confirmed - #"],
+            ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""):
+                eco4_post_ciga_remaining_results["Of which confirmed - £"],
+            ("ECO4 post-ciga", "", "Of which forecast - #", ""):
+                eco4_post_ciga_remaining_results["Of which forecast - #"],
+            ("ECO4 post-ciga", "", "Of which forecast - £", ""):
+                eco4_post_ciga_remaining_results["Of which forecast - £"],
+            # CIGA failures
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From 752f0b0f8384a1082161abf31c18638864c45f1e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 18:37:47 +0000
Subject: [PATCH 044/262] splitting out post ciga figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 71 +++++++++++++++----
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 77c18e80..4f33bf34 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2548,34 +2548,52 @@ def patch_cleaned(cleaned):
     return cleaned
 
 
-def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate):
+def calculate_eco4_post_ciga(
+    eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate,
+    eco4_rate
+):
     remaining_needing_ciga_check = eligiblity_counts[
         eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
         ]["count"].sum()
 
     has_ciga_check = not input_data["ciga_list"].empty
     if has_ciga_check:
-        eco4_confirmed = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"].isin(
-                ["eco4", "eco4 - passed ciga"]
-            )
-        ]["count"].sum()
+
+        eco4_no_ciga_needed = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4"
+            ]["count"].sum()
+
+        eco4_ciga_passed = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
+            ]["count"].sum()
+
+        eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
+        eco4_confirmed = np.round(eco4_confirmed)
 
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
-            eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eco4_remaining_forecast = np.round(
+                remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
+            )
             eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
         else:
             eco4_remaining_forecast = 0
             eco4_post_ciga = eco4_confirmed
     else:
-        eco4_confirmed = 0
-        eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        eco4_no_ciga_needed = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4"
+            ]["count"].sum()
+        eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+        eco4_remaining_forecast = np.round(
+            remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
+        )
         eco4_post_ciga = (
             eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast
         )
+
     eco4_post_ciga = int(eco4_post_ciga)
     eco4_remaining_forecast = int(eco4_remaining_forecast)
+    eco4_confirmed = int(eco4_confirmed)
 
     results = {
         # Counts
@@ -2853,8 +2871,32 @@ def forecast_remaining_sales(loader):
                 median_ciga_success_rate
             )
 
+        # We also need the ha ciga passed to install success rate
+        ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name]
+        if not ha_ciga_pass_to_sale.empty:
+            ha_ciga_pass_to_sale_rate = (
+                ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] /
+                ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0]
+            )
+        else:
+            ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install
+
+        ha_eco4_to_sale = eco4_ciga_independent_passrates[eco4_ciga_independent_passrates["Ha Name"] == ha_name]
+        if not ha_eco4_to_sale.empty:
+            ha_eco4_to_sale_rate = (
+                ha_eco4_to_sale['# ECO4 successfully installed'].values[0] /
+                ha_eco4_to_sale['# ECO4 at install stage'].values[0]
+            )
+        else:
+            ha_eco4_to_sale_rate = median_eco4_to_install
+
         eco4_post_ciga_total_results = calculate_eco4_post_ciga(
-            eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
+            eligiblity_counts=eligiblity_counts,
+            input_data=input_data,
+            ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+            ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+            ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+            eco4_rate=eco4_rate
         )
 
         # Calculate the delta compared to Warmfront's original estimate
@@ -2863,7 +2905,12 @@ def forecast_remaining_sales(loader):
         ) / original_warmfront_eco4
 
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
-            eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
+            eligiblity_counts=eligiblity_counts_remaining,
+            input_data=input_data,
+            ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+            ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+            ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+            eco4_rate=eco4_rate
         )
 
         to_append = {
@@ -2906,7 +2953,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 18:
+        if len(to_append) != 22:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From 56ee7224f58e7363a1732ed46aaebd29a71f7acd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 19:53:28 +0000
Subject: [PATCH 045/262] Added gbis remaining columns

---
 .../ha_15_32/ha_analysis_batch_3.py           | 1100 +++++++++--------
 1 file changed, 592 insertions(+), 508 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4f33bf34..191ca74c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1692,500 +1692,500 @@ def get_col_widths(dataframe):
     return widths
 
 
-def analyse_ha_data(outputs, loader):
-    """
-    The approach we take within this function is the following:
-    For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
-    characterisation can be broken down as the following:
-    1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
-    2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
-    a CIGA check
-    3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
-    insulation
-    4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
-    any cirsumstances, given the available data
-
-    Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
-    qualify under the strictest criteria, and mark these as potential additional opportunities.
-
-    :return:
-    """
-
-    eco4_rate = 1710
-    gbis_rate = 600
-    old_eco4_rate = 1456
-    old_gbis_rate = 432
-
-    epc_c_threshold = 80
-    scheme_map = {
-        "ECO4": "ECO4",
-        "AFFORDABLE WARMTH": "ECO4",
-        "ECO4 A/W": "ECO4",
-        "ECO4 GBIS (ECO+)": "GBIS"
-    }
-
-    ha_analysis_results = []
-    total_revenue_results = []
-    for ha_name, datasets in outputs.items():
-        inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-
-        results_df = datasets["results_df"].copy()
-
-        analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
-            columns={"row_meaning": "asset_identification_status"}
-        ).merge(
-            results_df,
-            how="left",
-            right_on="row_id",
-            left_on="asset_list_row_id"
-        )
-
-        analysis_data["is_remaining"] = True
-
-        n_sold_eco4 = 0
-        n_sold_gbis = 0
-        if not inputs["survey_list"].empty:
-            # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
-            # a survey)
-            survey_list = inputs["survey_list"].copy()
-
-            # TODO: TEMP
-            scheme_column = survey_list.columns[0]
-            # We clean up the survey list installation or cancelled
-            survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
-            # Remove all punctuation
-            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                r'[^\w\s]', '', regex=True
-            )
-            # Remove double spaces
-            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                r'\s+', ' ', regex=True
-            )
-            # Remove trailing spaces
-            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
-
-            # Remap the values in the scheme column
-            survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
-
-            survey_list["installation_status"] = None
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
-                "installed",
-                survey_list["installation_status"]
-            )
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
-                "cancelled",
-                survey_list["installation_status"]
-            )
-            # Find partial installations
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
-                "partially installed",
-                survey_list["installation_status"]
-            )
-            # Find partial cancellations
-            # TODO: We might have more indications of partial cancellations
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
-                "partially cancelled",
-                survey_list["installation_status"]
-            )
-
-            # Finally, for other cases, we set the status to "in progress"
-            survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
-
-            # We concatenate the scheme name with the installation status
-            survey_list["installation_status"] = (
-                survey_list[scheme_column] + " - " + survey_list["installation_status"]
-            )
-
-            # TODO: END TEMP
-
-            survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
-            survey_list_to_merge["is_remaining"] = False
-            analysis_data = analysis_data.drop(columns="is_remaining").merge(
-                survey_list_to_merge,
-                how="left", on="asset_list_row_id"
-            )
-            analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
-
-            n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
-            n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
-
-        # Take just remaining
-        analysis_data = analysis_data[analysis_data["is_remaining"]]
-
-        # Also, if the HA has started selling, we remove any that are still subject to ciga
-        n_eco4_missed_subject_to_ciga = 0
-        if not inputs["survey_list"].empty:
-            n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
-            analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
-
-        ################################################################################################
-        # We take the properties that strictly qualified under eco
-        ################################################################################################
-
-        eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
-        eco4_identified["identification_type"] = None
-        eco4_identified["identification_type"] = np.where(
-            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
-            "strict",
-            eco4_identified["identification_type"]
-        )
-
-        # For expansive, the property can be no higher than an EPC C
-        eco4_identified["identification_type"] = np.where(
-            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
-                eco4_identified["sap"] <= epc_c_threshold
-            ),
-            "expansive",
-            eco4_identified["identification_type"]
-        )
-        ################################################################################################
-        # We take the properties dependent on CIGA
-        ################################################################################################
-
-        ciga_dependent_identified = analysis_data[
-            analysis_data["ECO Eligibility"].isin(
-                [
-                    "eco4 (subject to ciga)",
-                    "eco4 - passed ciga"
-                ]
-            )
-        ].copy()
-
-        # These are properties that show filled cavity
-        ciga_dependent_identified["identification_type"] = None
-        ciga_dependent_identified["identification_type"] = np.where(
-            ciga_dependent_identified["eco4_message"].isin(
-                [
-                    "Perfect suitability",
-                    "Meets cavity and sap",
-                    "Fails cavity, meets loft, fails SAP",
-                    "Meets fabric, fails SAP check",
-                    "Meets cavity, loft borderline, meets sap",
-                ]
-            ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
-            "strict",
-            ciga_dependent_identified["identification_type"]
-        )
-
-        ciga_dependent_identified["identification_type"] = np.where(
-            ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
-                ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
-            )) & (
-                (ciga_dependent_identified["sap"] <= epc_c_threshold) &
-                pd.isnull(ciga_dependent_identified["identification_type"])
-            ),
-            "expansive",
-            ciga_dependent_identified["identification_type"]
-        )
-
-        ################################################################################################
-        # We properties that qualified for gbis
-        ################################################################################################
-        gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
-        gbis_identified["identification_type"] = None
-        gbis_identified["identification_type"] = np.where(
-            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
-            "strict",
-            gbis_identified["identification_type"]
-        )
-
-        gbis_identified["identification_type"] = np.where(
-            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
-                pd.isnull(gbis_identified["identification_type"])
-            ),
-            "expansive",
-            gbis_identified["identification_type"]
-        )
-
-        # Finally, we look at the properties that have not been identified by Warmfront
-        not_identified = analysis_data[
-            analysis_data["ECO Eligibility"].isin(
-                [
-                    "not eligible"
-                ]
-            )
-        ].copy()
-
-        surplus_eco4 = not_identified[
-            (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
-                ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
-            ))
-            ]
-
-        surplus_gbis = not_identified[
-            (not_identified["gbis_eligible"] == True) & (
-                ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
-            ) & (not_identified["sap"] < 69) & (
-                (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
-                not_identified["walls"].str.contains("partial", case=False, na=False)
-            )
-            )
-            ]
-        surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
-
-        # Output variables - the data was sent to us in December, but the remaining figures are
-        # what was in November
-        november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
-
-        # ECO4
-        n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
-        november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
-        november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
-        eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
-
-        n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
-        eco4_of_which_identified_strict = (
-            eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
-            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
-        )
-        eco4_of_which_identified_expansive = (
-            eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
-            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
-        )
-        # GBIS
-        n_warmfront_identified_gbis = gbis_identified.shape[0]
-        november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
-        november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
-        gbis_sales_since_november = n_sold_gbis - november_gbis_sold
-        gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
-        gbis_of_which_identified_expansive = \
-            gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
-
-        to_append = {
-            ("", "HA Name"): ha_name,
-            ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
-            ############
-            # ECO4
-            ############
-            ("ECO4", "# remaining November file"): november_eco4_remaining,
-            ("ECO4", "# sold in November file"): november_eco4_sold,
-            ("ECO4", "# sold (survey list)"): n_sold_eco4,
-            ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
-            ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
-            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
-            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
-            ("ECO4", "Of which identified by model - total"): (
-                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
-            ),
-            ("ECO4", "Additional properties"): surplus_eco4.shape[0],
-            ############
-            # GBIS
-            ############
-            ("GBIS", "# remaining November file"): november_gbis_remaining,
-            ("GBIS", "# sold in November file"): november_gbis_sold,
-            ("GBIS", "# sold (survey list)"): n_sold_gbis,
-            ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
-            ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
-            ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
-            ("GBIS", "Of which identified by model - total"): (
-                gbis_of_which_identified_strict + gbis_of_which_identified_expansive
-            ),
-            ("GBIS", "Additional properties"): surplus_gbis.shape[0]
-        }
-
-        ha_analysis_results.append(to_append)
-
-        # Calculate the revenue results
-        to_append_revenue = {
-            ("", "HA Name"): ha_name,
-            # Eco4 revenue
-            ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
-            ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
-            ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
-            ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
-            ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
-            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
-            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
-            ("ECO4", "Of which identified by model - total"): eco4_rate * (
-                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
-            ),
-            ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
-        }
-        total_revenue_results.append(to_append_revenue)
-
-    ha_analysis_results = pd.DataFrame(ha_analysis_results)
-    ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
-
-    facts_and_figures = loader.facts_and_figures.copy()
-    facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
-    facts_and_figures = facts_and_figures.sort_values("ha_number")
-    facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
-
-    # Rename some of the cols
-    facts_and_figures = facts_and_figures.rename(
-        columns={
-            # ECO4 cols
-            "ECO4": "ECO4 - November",
-            "GBIS": "GBIS - November",
-            "eco4 (subject to ciga)": "ECO4 - subject to ciga",
-            "eco4": "ECO4 - doesn't need CIGA",
-            "eco4 - passed ciga": "ECO4 - passed CIGA",
-            "failed ciga": "ECO4 - failed CIGA",
-            "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
-            "ECO4 - in progress": "ECO4 - Install in progress",
-            "ECO4 - cancelled": "ECO4 - Install cancelled",
-            # GBIS cols
-            "gbis": "GBIS total (asset list)"
-        }
-    )
-    # We calculate the eco4 total from the asset list
-    # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
-    # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
-    # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
-    # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
-    facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
-        facts_and_figures["ECO4 - doesn't need CIGA"] +
-        facts_and_figures["ECO4 - subject to ciga"] +
-        facts_and_figures["ECO4 - passed CIGA"]
-    )
-
-    facts_and_figures["ECO4 total (asset list - post ciga)"] = None
-    facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
-        facts_and_figures["ECO4 - passed CIGA"] > 0,
-        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
-        facts_and_figures["ECO4 total (asset list - post ciga)"]
-    )
-
-    # Re-arrange the columns
-    facts_and_figures = facts_and_figures[
-        [
-            'HA Name',
-            'ECO4 - November',
-            'GBIS - November',
-            'ECO4 total (asset list - pre ciga)',
-            'ECO4 total (asset list - post ciga)',
-            'GBIS total (asset list)',
-            'ECO4 - subject to ciga',
-            "ECO4 - doesn't need CIGA",
-            'ECO4 - passed CIGA',
-            'ECO4 - failed CIGA',
-            'ECO4 - installed',
-            'ECO4 - Install in progress',
-            'ECO4 - Install cancelled',
-            'ECO4 - partially installed',
-            'ECO4 - Install downgrade to GBIS',
-        ]
-    ]
-    # Addd a note to flag any rows where ECO4 (
-    # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
-    # )
-    facts_and_figures["Missed CIGA checks opportunity"] = None
-    facts_and_figures["Missed CIGA checks opportunity"] = np.where(
-        (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
-        "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
-            str) + " ECO4 properties needing a CIGA check",
-        facts_and_figures["Missed CIGA checks opportunity"]
-    )
-
-    facts_and_figures.to_csv("Facts and figures sample.csv")
-
-    # Re arrage the columns
-
-    # Also sort ha_analysis_results by ha number
-    ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
-    ha_analysis_results = ha_analysis_results.sort_values("ha_number")
-    ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
-
-    # We save 2 sheets
-    # Automate creation of the excel
-    # Create a Pandas Excel writer using XlsxWriter as the engine
-    with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
-        # Write each dataframe to a different worksheet without the index
-        for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
-                          (ha_analysis_results, 'Asset Identification')]:
-
-            df.to_excel(writer, sheet_name=sheet)
-
-            # Auto-adjust columns' width
-            for i, width in enumerate(get_col_widths(df)):
-                writer.sheets[sheet].set_column(i, i, width)
-
-    # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
-    #               description, and what proportion of time they get identified via non-invasive surveys
-
-    # true_eco4_assets = []
-    # ciga_dependent_assets = []
-    # not_eligible = []
-    # as_built_insulated = []
-    # date_cols = {
-    #     "HA39": "date_built",
-    #     "HA14": "Built In Year",
-    #     "HA6": "Construction Year",
-    #     "HA1": "Build Date",
-    #     "HA107": "YEAR BUILT"
-    # }
-    # for ha_name, data_objects in outputs.items():
-    #     inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-    #
-    #     date_col = date_cols[ha_name]
-    #     results_df = data_objects["results_df"].copy()
-    #     df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
-    #         columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
-    #     ).merge(
-    #         results_df,
-    #         how="left",
-    #         right_on="row_id",
-    #         left_on="asset_list_row_id"
-    #     )
-    #
-    #     # take the true ECO4
-    #     true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
-    #     ciga_dependent = df[
-    #         df["ECO Eligibility"].isin(
-    #             [
-    #                 "eco4 (subject to ciga)",
-    #                 "failed ciga",
-    #                 "eco4 - passed ciga"
-    #             ]
-    #         )
-    #     ]
-    #     insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
-    #     # We convert date built to datetime
-    #     try:
-    #         insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
-    #         insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
-    #         as_built_insulated.append(insulated_assumed)
-    #     except Exception as e:
-    #         print("oh well")
-    #
-    #     true_eco4_assets.append(true_eco4)
-    #     ciga_dependent_assets.append(ciga_dependent)
-    #
-    # true_eco4_assets = pd.concat(true_eco4_assets)
-    # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
-    # as_built_insulated = pd.concat(as_built_insulated)
-    #
-    # true_eco4_assets["walls"].value_counts(normalize=True)
-    # ciga_dependent_assets["walls"].value_counts(normalize=True)
-    #
-    # from recommendations.recommendation_utils import extract_insulation_thickness
-    #
-    # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
-    #     lambda x: extract_insulation_thickness(x)
-    # )
-    #
-    # true_eco4_assets["e"] = true_eco4_assets.merge(
-    #     pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
-    #     how="left",
-    #     left_on="roof",
-    #     right_on="original_description"
-    # )
-    #
-    # true_eco4_assets["sap"].mean()
-    #
-    # true_eco4_assets["insulation_thickness"].isin(
-    #     ["250", "150", "200", "100", "75", "50"]
-    # ).sum() / true_eco4_assets.shape[0]
-    #
-    # true_eco4_assets["insulation_thickness"].isin(
-    #     ["100"]
-    # ).sum() / true_eco4_assets.shape[0]
-    #
-    # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
+# def analyse_ha_data(outputs, loader):
+#     """
+#     The approach we take within this function is the following:
+#     For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
+#     characterisation can be broken down as the following:
+#     1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
+#     2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
+#     a CIGA check
+#     3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
+#     insulation
+#     4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
+#     any cirsumstances, given the available data
+#
+#     Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
+#     qualify under the strictest criteria, and mark these as potential additional opportunities.
+#
+#     :return:
+#     """
+#
+#     eco4_rate = 1710
+#     gbis_rate = 600
+#     # old_eco4_rate = 1456
+#     old_gbis_rate = 432
+#
+#     epc_c_threshold = 80
+#     scheme_map = {
+#         "ECO4": "ECO4",
+#         "AFFORDABLE WARMTH": "ECO4",
+#         "ECO4 A/W": "ECO4",
+#         "ECO4 GBIS (ECO+)": "GBIS"
+#     }
+#
+#     ha_analysis_results = []
+#     total_revenue_results = []
+#     for ha_name, datasets in outputs.items():
+#         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+#
+#         results_df = datasets["results_df"].copy()
+#
+#         analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
+#             columns={"row_meaning": "asset_identification_status"}
+#         ).merge(
+#             results_df,
+#             how="left",
+#             right_on="row_id",
+#             left_on="asset_list_row_id"
+#         )
+#
+#         analysis_data["is_remaining"] = True
+#
+#         n_sold_eco4 = 0
+#         n_sold_gbis = 0
+#         if not inputs["survey_list"].empty:
+#             # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
+#             # a survey)
+#             survey_list = inputs["survey_list"].copy()
+#
+#             # TODO: TEMP
+#             scheme_column = survey_list.columns[0]
+#             # We clean up the survey list installation or cancelled
+#             survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+#             # Remove all punctuation
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+#                 r'[^\w\s]', '', regex=True
+#             )
+#             # Remove double spaces
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+#                 r'\s+', ' ', regex=True
+#             )
+#             # Remove trailing spaces
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+#
+#             # Remap the values in the scheme column
+#             survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+#
+#             survey_list["installation_status"] = None
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+#                 "installed",
+#                 survey_list["installation_status"]
+#             )
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+#                 "cancelled",
+#                 survey_list["installation_status"]
+#             )
+#             # Find partial installations
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+#                 "partially installed",
+#                 survey_list["installation_status"]
+#             )
+#             # Find partial cancellations
+#             # TODO: We might have more indications of partial cancellations
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+#                 "partially cancelled",
+#                 survey_list["installation_status"]
+#             )
+#
+#             # Finally, for other cases, we set the status to "in progress"
+#             survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+#
+#             # We concatenate the scheme name with the installation status
+#             survey_list["installation_status"] = (
+#                 survey_list[scheme_column] + " - " + survey_list["installation_status"]
+#             )
+#
+#             # TODO: END TEMP
+#
+#             survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
+#             survey_list_to_merge["is_remaining"] = False
+#             analysis_data = analysis_data.drop(columns="is_remaining").merge(
+#                 survey_list_to_merge,
+#                 how="left", on="asset_list_row_id"
+#             )
+#             analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
+#
+#             n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
+#             n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
+#
+#         # Take just remaining
+#         analysis_data = analysis_data[analysis_data["is_remaining"]]
+#
+#         # Also, if the HA has started selling, we remove any that are still subject to ciga
+#         n_eco4_missed_subject_to_ciga = 0
+#         if not inputs["survey_list"].empty:
+#             n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
+#             analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
+#
+#         ################################################################################################
+#         # We take the properties that strictly qualified under eco
+#         ################################################################################################
+#
+#         eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
+#         eco4_identified["identification_type"] = None
+#         eco4_identified["identification_type"] = np.where(
+#             (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
+#             "strict",
+#             eco4_identified["identification_type"]
+#         )
+#
+#         # For expansive, the property can be no higher than an EPC C
+#         eco4_identified["identification_type"] = np.where(
+#             (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
+#                 eco4_identified["sap"] <= epc_c_threshold
+#             ),
+#             "expansive",
+#             eco4_identified["identification_type"]
+#         )
+#         ################################################################################################
+#         # We take the properties dependent on CIGA
+#         ################################################################################################
+#
+#         ciga_dependent_identified = analysis_data[
+#             analysis_data["ECO Eligibility"].isin(
+#                 [
+#                     "eco4 (subject to ciga)",
+#                     "eco4 - passed ciga"
+#                 ]
+#             )
+#         ].copy()
+#
+#         # These are properties that show filled cavity
+#         ciga_dependent_identified["identification_type"] = None
+#         ciga_dependent_identified["identification_type"] = np.where(
+#             ciga_dependent_identified["eco4_message"].isin(
+#                 [
+#                     "Perfect suitability",
+#                     "Meets cavity and sap",
+#                     "Fails cavity, meets loft, fails SAP",
+#                     "Meets fabric, fails SAP check",
+#                     "Meets cavity, loft borderline, meets sap",
+#                 ]
+#             ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
+#             "strict",
+#             ciga_dependent_identified["identification_type"]
+#         )
+#
+#         ciga_dependent_identified["identification_type"] = np.where(
+#             ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+#                 ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+#             )) & (
+#                 (ciga_dependent_identified["sap"] <= epc_c_threshold) &
+#                 pd.isnull(ciga_dependent_identified["identification_type"])
+#             ),
+#             "expansive",
+#             ciga_dependent_identified["identification_type"]
+#         )
+#
+#         ################################################################################################
+#         # We properties that qualified for gbis
+#         ################################################################################################
+#         gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
+#         gbis_identified["identification_type"] = None
+#         gbis_identified["identification_type"] = np.where(
+#             (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
+#             "strict",
+#             gbis_identified["identification_type"]
+#         )
+#
+#         gbis_identified["identification_type"] = np.where(
+#             (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
+#                 pd.isnull(gbis_identified["identification_type"])
+#             ),
+#             "expansive",
+#             gbis_identified["identification_type"]
+#         )
+#
+#         # Finally, we look at the properties that have not been identified by Warmfront
+#         not_identified = analysis_data[
+#             analysis_data["ECO Eligibility"].isin(
+#                 [
+#                     "not eligible"
+#                 ]
+#             )
+#         ].copy()
+#
+#         surplus_eco4 = not_identified[
+#             (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
+#                 ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
+#             ))
+#             ]
+#
+#         surplus_gbis = not_identified[
+#             (not_identified["gbis_eligible"] == True) & (
+#                 ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
+#             ) & (not_identified["sap"] < 69) & (
+#                 (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
+#                 not_identified["walls"].str.contains("partial", case=False, na=False)
+#             )
+#             )
+#             ]
+#         surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
+#
+#         # Output variables - the data was sent to us in December, but the remaining figures are
+#         # what was in November
+#         november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
+#
+#         # ECO4
+#         n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
+#         november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
+#         november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
+#         eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
+#
+#         n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
+#         eco4_of_which_identified_strict = (
+#             eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
+#             ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
+#         )
+#         eco4_of_which_identified_expansive = (
+#             eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
+#             ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
+#         )
+#         # GBIS
+#         n_warmfront_identified_gbis = gbis_identified.shape[0]
+#         november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
+#         november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
+#         gbis_sales_since_november = n_sold_gbis - november_gbis_sold
+#         gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
+#         gbis_of_which_identified_expansive = \
+#             gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
+#
+#         to_append = {
+#             ("", "HA Name"): ha_name,
+#             ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
+#             ############
+#             # ECO4
+#             ############
+#             ("ECO4", "# remaining November file"): november_eco4_remaining,
+#             ("ECO4", "# sold in November file"): november_eco4_sold,
+#             ("ECO4", "# sold (survey list)"): n_sold_eco4,
+#             ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
+#             ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
+#             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
+#             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
+#             ("ECO4", "Of which identified by model - total"): (
+#                 eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+#             ),
+#             ("ECO4", "Additional properties"): surplus_eco4.shape[0],
+#             ############
+#             # GBIS
+#             ############
+#             ("GBIS", "# remaining November file"): november_gbis_remaining,
+#             ("GBIS", "# sold in November file"): november_gbis_sold,
+#             ("GBIS", "# sold (survey list)"): n_sold_gbis,
+#             ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
+#             ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
+#             ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
+#             ("GBIS", "Of which identified by model - total"): (
+#                 gbis_of_which_identified_strict + gbis_of_which_identified_expansive
+#             ),
+#             ("GBIS", "Additional properties"): surplus_gbis.shape[0]
+#         }
+#
+#         ha_analysis_results.append(to_append)
+#
+#         # Calculate the revenue results
+#         to_append_revenue = {
+#             ("", "HA Name"): ha_name,
+#             # Eco4 revenue
+#             ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
+#             ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
+#             ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
+#             ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
+#             ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
+#             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
+#             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
+#             ("ECO4", "Of which identified by model - total"): eco4_rate * (
+#                 eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+#             ),
+#             ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
+#         }
+#         total_revenue_results.append(to_append_revenue)
+#
+#     ha_analysis_results = pd.DataFrame(ha_analysis_results)
+#     ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
+#
+#     facts_and_figures = loader.facts_and_figures.copy()
+#     facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
+#     facts_and_figures = facts_and_figures.sort_values("ha_number")
+#     facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
+#
+#     # Rename some of the cols
+#     facts_and_figures = facts_and_figures.rename(
+#         columns={
+#             # ECO4 cols
+#             "ECO4": "ECO4 - November",
+#             "GBIS": "GBIS - November",
+#             "eco4 (subject to ciga)": "ECO4 - subject to ciga",
+#             "eco4": "ECO4 - doesn't need CIGA",
+#             "eco4 - passed ciga": "ECO4 - passed CIGA",
+#             "failed ciga": "ECO4 - failed CIGA",
+#             "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
+#             "ECO4 - in progress": "ECO4 - Install in progress",
+#             "ECO4 - cancelled": "ECO4 - Install cancelled",
+#             # GBIS cols
+#             "gbis": "GBIS total (asset list)"
+#         }
+#     )
+#     # We calculate the eco4 total from the asset list
+#     # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
+#     # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
+#     # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
+#     # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
+#     facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
+#         facts_and_figures["ECO4 - doesn't need CIGA"] +
+#         facts_and_figures["ECO4 - subject to ciga"] +
+#         facts_and_figures["ECO4 - passed CIGA"]
+#     )
+#
+#     facts_and_figures["ECO4 total (asset list - post ciga)"] = None
+#     facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
+#         facts_and_figures["ECO4 - passed CIGA"] > 0,
+#         facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
+#         facts_and_figures["ECO4 total (asset list - post ciga)"]
+#     )
+#
+#     # Re-arrange the columns
+#     facts_and_figures = facts_and_figures[
+#         [
+#             'HA Name',
+#             'ECO4 - November',
+#             'GBIS - November',
+#             'ECO4 total (asset list - pre ciga)',
+#             'ECO4 total (asset list - post ciga)',
+#             'GBIS total (asset list)',
+#             'ECO4 - subject to ciga',
+#             "ECO4 - doesn't need CIGA",
+#             'ECO4 - passed CIGA',
+#             'ECO4 - failed CIGA',
+#             'ECO4 - installed',
+#             'ECO4 - Install in progress',
+#             'ECO4 - Install cancelled',
+#             'ECO4 - partially installed',
+#             'ECO4 - Install downgrade to GBIS',
+#         ]
+#     ]
+#     # Addd a note to flag any rows where ECO4 (
+#     # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
+#     # )
+#     facts_and_figures["Missed CIGA checks opportunity"] = None
+#     facts_and_figures["Missed CIGA checks opportunity"] = np.where(
+#         (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
+#         "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
+#             str) + " ECO4 properties needing a CIGA check",
+#         facts_and_figures["Missed CIGA checks opportunity"]
+#     )
+#
+#     facts_and_figures.to_csv("Facts and figures sample.csv")
+#
+#     # Re arrage the columns
+#
+#     # Also sort ha_analysis_results by ha number
+#     ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
+#     ha_analysis_results = ha_analysis_results.sort_values("ha_number")
+#     ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
+#
+#     # We save 2 sheets
+#     # Automate creation of the excel
+#     # Create a Pandas Excel writer using XlsxWriter as the engine
+#     with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
+#         # Write each dataframe to a different worksheet without the index
+#         for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
+#                           (ha_analysis_results, 'Asset Identification')]:
+#
+#             df.to_excel(writer, sheet_name=sheet)
+#
+#             # Auto-adjust columns' width
+#             for i, width in enumerate(get_col_widths(df)):
+#                 writer.sheets[sheet].set_column(i, i, width)
+#
+#     # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
+#     #               description, and what proportion of time they get identified via non-invasive surveys
+#
+#     # true_eco4_assets = []
+#     # ciga_dependent_assets = []
+#     # not_eligible = []
+#     # as_built_insulated = []
+#     # date_cols = {
+#     #     "HA39": "date_built",
+#     #     "HA14": "Built In Year",
+#     #     "HA6": "Construction Year",
+#     #     "HA1": "Build Date",
+#     #     "HA107": "YEAR BUILT"
+#     # }
+#     # for ha_name, data_objects in outputs.items():
+#     #     inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+#     #
+#     #     date_col = date_cols[ha_name]
+#     #     results_df = data_objects["results_df"].copy()
+#     #     df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
+#     #         columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
+#     #     ).merge(
+#     #         results_df,
+#     #         how="left",
+#     #         right_on="row_id",
+#     #         left_on="asset_list_row_id"
+#     #     )
+#     #
+#     #     # take the true ECO4
+#     #     true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
+#     #     ciga_dependent = df[
+#     #         df["ECO Eligibility"].isin(
+#     #             [
+#     #                 "eco4 (subject to ciga)",
+#     #                 "failed ciga",
+#     #                 "eco4 - passed ciga"
+#     #             ]
+#     #         )
+#     #     ]
+#     #     insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
+#     #     # We convert date built to datetime
+#     #     try:
+#     #         insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
+#     #         insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
+#     #         as_built_insulated.append(insulated_assumed)
+#     #     except Exception as e:
+#     #         print("oh well")
+#     #
+#     #     true_eco4_assets.append(true_eco4)
+#     #     ciga_dependent_assets.append(ciga_dependent)
+#     #
+#     # true_eco4_assets = pd.concat(true_eco4_assets)
+#     # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
+#     # as_built_insulated = pd.concat(as_built_insulated)
+#     #
+#     # true_eco4_assets["walls"].value_counts(normalize=True)
+#     # ciga_dependent_assets["walls"].value_counts(normalize=True)
+#     #
+#     # from recommendations.recommendation_utils import extract_insulation_thickness
+#     #
+#     # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
+#     #     lambda x: extract_insulation_thickness(x)
+#     # )
+#     #
+#     # true_eco4_assets["e"] = true_eco4_assets.merge(
+#     #     pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
+#     #     how="left",
+#     #     left_on="roof",
+#     #     right_on="original_description"
+#     # )
+#     #
+#     # true_eco4_assets["sap"].mean()
+#     #
+#     # true_eco4_assets["insulation_thickness"].isin(
+#     #     ["250", "150", "200", "100", "75", "50"]
+#     # ).sum() / true_eco4_assets.shape[0]
+#     #
+#     # true_eco4_assets["insulation_thickness"].isin(
+#     #     ["100"]
+#     # ).sum() / true_eco4_assets.shape[0]
+#     #
+#     # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
 
 
 def get_propensity_model_data(
@@ -2567,29 +2567,39 @@ def calculate_eco4_post_ciga(
             eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
             ]["count"].sum()
 
+        eco4_confirmed_ciga_failures = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "failed ciga"
+            ]["count"].sum()
+
         eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
         eco4_confirmed = np.round(eco4_confirmed)
 
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
+            eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
             eco4_remaining_forecast = np.round(
-                remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
+                eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
             )
+            eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
             eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
         else:
             eco4_remaining_forecast = 0
+            eco4_estimated_ciga_failures = 0
             eco4_post_ciga = eco4_confirmed
     else:
         eco4_no_ciga_needed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "eco4"
             ]["count"].sum()
+        eco4_confirmed_ciga_failures = 0
+        # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+        eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
+
         eco4_remaining_forecast = np.round(
-            remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
-        )
-        eco4_post_ciga = (
-            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast
+            eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
         )
+        eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
 
     eco4_post_ciga = int(eco4_post_ciga)
     eco4_remaining_forecast = int(eco4_remaining_forecast)
@@ -2604,6 +2614,16 @@ def calculate_eco4_post_ciga(
         "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
         "Of which confirmed - £": eco4_confirmed * eco4_rate,
         "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+        # Ciga failures
+        "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures),
+        "Confirmed CIGA failures": eco4_confirmed_ciga_failures,
+        "Estimated CIGA failures": int(eco4_estimated_ciga_failures),
+        # Ciga failures cost
+        "Estimated total - failed CIGA - £": int(
+            (eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures) * eco4_rate
+        ),
+        "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate),
+        "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate),
     }
 
     return results
@@ -2617,8 +2637,8 @@ def forecast_remaining_sales(loader):
 
     gbis_rate = 600
     eco4_rate = 1710
-    old_gbis_rate = 432
-    old_eco4_rate = 1456
+    # old_gbis_rate = 432
+    # old_eco4_rate = 1456
 
     # 1) Calculate the conversion rate from passed CIGA to actual sale
     converted_ciga_jobs = []
@@ -2800,16 +2820,18 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
 
-        original_warmfront_eco4_revenue = (
-            original_warmfront_remaining_eco4 * eco4_rate +
-            (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
-        )
+        # original_warmfront_eco4_revenue = (
+        #     original_warmfront_remaining_eco4 * eco4_rate +
+        #     (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
+        # )
+        original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate
         original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
 
         # Original warmfront figures - GBIS
@@ -2817,9 +2839,12 @@ def forecast_remaining_sales(loader):
         original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
         original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
 
+        # original_warmfront_gbis_revenue = (
+        #     original_warmfront_remaining_gbis * gbis_rate +
+        #     (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
+        # )
         original_warmfront_gbis_revenue = (
-            original_warmfront_remaining_gbis * gbis_rate +
-            (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
+            original_warmfront_gbis * gbis_rate
         )
         original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
 
@@ -2835,6 +2860,7 @@ def forecast_remaining_sales(loader):
                 how="left",
                 on="asset_list_row_id"
             )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
@@ -2913,6 +2939,32 @@ def forecast_remaining_sales(loader):
             eco4_rate=eco4_rate
         )
 
+        # GBIS Figures
+        # Estimate the GBIS conversion rate
+        ha_gbis_sale_conversion = gbis_ciga_independent_passrates[
+            gbis_ciga_independent_passrates["Ha Name"] == ha_name
+            ]
+
+        if not ha_gbis_sale_conversion.empty:
+            ha_gbis_sale_conversion = (
+                ha_gbis_sale_conversion["# GBIS successfully installed"].values[0] /
+                ha_gbis_sale_conversion["# GBIS at install stage"].values[0]
+            )
+        else:
+            ha_gbis_sale_conversion = median_gbis_to_install
+
+        gbis_total = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "gbis"
+            ]["count"].sum()
+        gbis_total = np.round(gbis_total * ha_gbis_sale_conversion)
+        gbis_total_revenue = gbis_total * gbis_rate
+
+        gbis_remaining = eligiblity_counts_remaining[
+            eligiblity_counts["ECO Eligibility"] == "gbis"
+            ]["count"].sum()
+        gbis_remaining = np.round(gbis_remaining * ha_gbis_sale_conversion)
+        gbis_remaining_revenue = gbis_remaining * gbis_rate
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -2950,16 +3002,48 @@ def forecast_remaining_sales(loader):
             ("ECO4 post-ciga", "", "Of which forecast - £", ""):
                 eco4_post_ciga_remaining_results["Of which forecast - £"],
             # CIGA failures
+            ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
+                'Estimated total - failed CIGA'
+            ],
+            ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - £", ""): eco4_post_ciga_remaining_results[
+                'Estimated total - failed CIGA - £'
+            ],
+            ("ECO4 CIGA failures", "", "Confirmed failures - #", ""): eco4_post_ciga_remaining_results[
+                "Confirmed CIGA failures"
+            ],
+            ("ECO4 CIGA failures", "", "Confirmed failures - £", ""): eco4_post_ciga_remaining_results[
+                "Confirmed CIGA failures - £"
+            ],
+            ("ECO4 CIGA failures", "", "Estimated failures - #", ""): eco4_post_ciga_remaining_results[
+                "Estimated CIGA failures"
+            ],
+            ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
+                "Estimated CIGA failures - £"
+            ],
+            # GBIS postcode list
+            ("", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
+            ("", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
+            ("", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 22:
+        if len(to_append) != 32:
             raise ValueError("Something went wrong")
 
         results.append(to_append)
 
     results = pd.DataFrame(results)
 
+    # TODO: Add a blank row and then a total row
+
+    assumptions = {
+        "ECO4 new rate": eco4_rate,
+        "GBIS new rate": gbis_rate,
+        # "ECO4 old rate": old_eco4_rate,
+        # "GBIS old rate": old_gbis_rate,
+    }
+
 
 def app():
     """

From 2ba37d55e65a746fdb58588aa2768851a83a3887 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 20:06:57 +0000
Subject: [PATCH 046/262] Added assumptions table

---
 .../ha_15_32/ha_analysis_batch_3.py           | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 191ca74c..ac4d3a0c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2956,14 +2956,14 @@ def forecast_remaining_sales(loader):
         gbis_total = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_total = np.round(gbis_total * ha_gbis_sale_conversion)
-        gbis_total_revenue = gbis_total * gbis_rate
+        gbis_total = int(np.round(gbis_total * ha_gbis_sale_conversion))
+        gbis_total_revenue = int(gbis_total * gbis_rate)
 
         gbis_remaining = eligiblity_counts_remaining[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_remaining = np.round(gbis_remaining * ha_gbis_sale_conversion)
-        gbis_remaining_revenue = gbis_remaining * gbis_rate
+        gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
+        gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -3037,12 +3037,37 @@ def forecast_remaining_sales(loader):
 
     # TODO: Add a blank row and then a total row
 
-    assumptions = {
-        "ECO4 new rate": eco4_rate,
-        "GBIS new rate": gbis_rate,
-        # "ECO4 old rate": old_eco4_rate,
-        # "GBIS old rate": old_gbis_rate,
-    }
+    assumptions = [
+        {
+            ("", "", "", "HA Name"): "ECO4 rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate)
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS rate",
+            ("ECO4 original", "", "Remaining - #", ""): "£" + str(gbis_rate)
+        },
+        {
+            ("", "", "", "HA Name"): "Median CIGA pass rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_success_rate * 100, 1)) + "%",
+        },
+        {
+            ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - £", ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks"
+        },
+        {
+            ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - £",
+             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check"
+        },
+        {
+            ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - £",
+             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check"
+        }
+    ]
 
 
 def app():

From 57a7edf62511207f7d7af176414b5b269f3b1aa1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 20:18:44 +0000
Subject: [PATCH 047/262] collating results

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ac4d3a0c..7da6bb3a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3035,9 +3035,21 @@ def forecast_remaining_sales(loader):
 
     results = pd.DataFrame(results)
 
-    # TODO: Add a blank row and then a total row
+    totals_row = {}
+    for col in results.columns:
+        if col == ('', '', '', 'HA Name'):
+            totals_row[col] = "Total"
+        elif col == ("ECO4 post-ciga", "", "Delta vs original estimate", ""):
+            totals_row[col] = results[col].mean()
+        else:
+            totals_row[col] = results[col].sum()
+
+    blank_row = pd.DataFrame([{col: "" for col in results.columns}])
 
     assumptions = [
+        {
+            ("", "", "", "HA Name"): "Assumptions",
+        },
         {
             ("", "", "", "HA Name"): "ECO4 rate",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate)
@@ -3059,16 +3071,20 @@ def forecast_remaining_sales(loader):
             ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
             ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - £",
-             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check"
+             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
             ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - £",
-             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check"
+             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel"
         }
     ]
 
+    results = pd.concat(
+        [results, pd.DataFrame([totals_row]), blank_row, blank_row, pd.DataFrame(assumptions)]
+    )
+
 
 def app():
     """

From 028c2edce7ab951987379a7c653324e5863426ae Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 20:48:43 +0000
Subject: [PATCH 048/262] Added headlines

---
 .../ha_15_32/ha_analysis_batch_3.py           | 129 +++++++++++++++++-
 1 file changed, 126 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7da6bb3a..1c320f9c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2991,7 +2991,7 @@ def forecast_remaining_sales(loader):
             # ECO4 - asset list, post ciga, remaining
             ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - #"],
-            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_results[
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - £"],
             ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
                 eco4_post_ciga_remaining_results["Of which confirmed - #"],
@@ -3046,6 +3046,126 @@ def forecast_remaining_sales(loader):
 
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])
 
+    # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals
+
+    # ECO4 Headlines
+    headline_eco4_original_remaining = totals_row[("ECO4 original", "", "Remaining - #", "")]
+    headline_eco4_original_remaining_revenue = totals_row[("ECO4 original", "", "Remaining - £", "")]
+    headline_eco4_postcode_list_remaining = totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")]
+    headline_eco4_postcode_list_remaining_revenue = totals_row[
+        ("ECO4 post-ciga", "", "Estimated remaining eligible - £", "")
+    ]
+    headline_eco4_delta = 100 * (
+        (headline_eco4_postcode_list_remaining - headline_eco4_original_remaining) /
+        headline_eco4_original_remaining
+    )
+    headline_eco4_delta = round(headline_eco4_delta, 1)
+
+    # GBIS Headlines
+    headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")]
+    headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")]
+    headline_gbis_postcode_list_remaining = totals_row[("", "Warmfront post code list", "Remaining - #", "GBIS total")]
+    headline_gbis_postcode_list_remaining_revenue = totals_row[
+        ("", "Warmfront post code list", "Remaining - £", "GBIS total")
+    ]
+    headline_gbis_delta = 100 * (
+        (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) /
+        headline_gbis_original_remaining
+    )
+    headline_gbis_delta = round(headline_gbis_delta, 1)
+
+    headline_original_total_revenue_remaining = (
+        headline_eco4_original_remaining_revenue + headline_gbis_original_remaining_revenue
+    )
+
+    headline_postcode_list_total_revenue_remaining = (
+        headline_eco4_postcode_list_remaining_revenue + headline_gbis_postcode_list_remaining_revenue
+    )
+    headline_total_delta = 100 * (
+        (headline_postcode_list_total_revenue_remaining - headline_original_total_revenue_remaining) /
+        headline_original_total_revenue_remaining
+    )
+    headline_total_delta = round(headline_total_delta, 1)
+
+    headlines = [
+        {
+            ("", "", "", "HA Name"): "Headlines",
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - November - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_eco4_original_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - November - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_eco4_original_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_eco4_postcode_list_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_eco4_postcode_list_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%"
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - November - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_gbis_original_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - November - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_gbis_original_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_gbis_postcode_list_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - post code list - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_gbis_postcode_list_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_gbis_delta) + "%"
+        },
+        # Total revenue
+        {
+            ("", "", "", "HA Name"): "Total Remaining - November - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_original_total_revenue_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "Total Remaining - post code list - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_postcode_list_total_revenue_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "Total Remaining delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_total_delta) + "%"
+        },
+    ]
+
     assumptions = [
         {
             ("", "", "", "HA Name"): "Assumptions",
@@ -3065,7 +3185,9 @@ def forecast_remaining_sales(loader):
         {
             ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
             ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £", ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks"
+            ("ECO4 original", "", "Remaining - £",
+             ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be "
+                  "conservative"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
@@ -3082,7 +3204,8 @@ def forecast_remaining_sales(loader):
     ]
 
     results = pd.concat(
-        [results, pd.DataFrame([totals_row]), blank_row, blank_row, pd.DataFrame(assumptions)]
+        [results, pd.DataFrame([headlines]), pd.DataFrame([totals_row]), blank_row, blank_row,
+         pd.DataFrame(assumptions)]
     )
 
 

From 721bfb19fcc3bd70fe02081e14e4abde22f9a13e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 21:33:45 +0000
Subject: [PATCH 049/262] Added totals percentages aggregations

---
 .../ha_15_32/ha_analysis_batch_3.py           | 74 ++++++++++++++++---
 1 file changed, 64 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1c320f9c..3341e34c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2965,6 +2965,14 @@ def forecast_remaining_sales(loader):
         gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
+        # GBIS delta
+        if original_warmfront_gbis == 0:
+            gbis_delta_vs_original_estimate = 100 * gbis_total
+        else:
+            gbis_delta_vs_original_estimate = 100 * (
+                gbis_total - original_warmfront_gbis
+            ) / original_warmfront_gbis
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -2987,7 +2995,7 @@ def forecast_remaining_sales(loader):
                 "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
                 "ECO4 - post CIGA - £"],
-            ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate,
+            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate,
             # ECO4 - asset list, post ciga, remaining
             ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - #"],
@@ -3021,14 +3029,15 @@ def forecast_remaining_sales(loader):
                 "Estimated CIGA failures - £"
             ],
             # GBIS postcode list
-            ("", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
-            ("", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
-            ("", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
-            ("", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("GBIS Postcode list", "", "Delta vs original estimate - %", ""): gbis_delta_vs_original_estimate,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 32:
+        if len(to_append) != 33:
             raise ValueError("Something went wrong")
 
         results.append(to_append)
@@ -3039,11 +3048,31 @@ def forecast_remaining_sales(loader):
     for col in results.columns:
         if col == ('', '', '', 'HA Name'):
             totals_row[col] = "Total"
-        elif col == ("ECO4 post-ciga", "", "Delta vs original estimate", ""):
-            totals_row[col] = results[col].mean()
+        elif col in [
+            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""),
+            ("GBIS Postcode list", "", "Delta vs original estimate - %", "")
+        ]:
+            totals_row[col] = None
         else:
             totals_row[col] = results[col].sum()
 
+    # For the delta columns, we calculate the delta on the totals
+    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round(
+        100 * (
+            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "")] -
+            totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")],
+        1
+    )
+
+    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = round(
+        100 * (
+            totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] -
+            totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")],
+        1
+    )
+
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])
 
     # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals
@@ -3204,10 +3233,35 @@ def forecast_remaining_sales(loader):
     ]
 
     results = pd.concat(
-        [results, pd.DataFrame([headlines]), pd.DataFrame([totals_row]), blank_row, blank_row,
-         pd.DataFrame(assumptions)]
+        [
+            results,
+            pd.DataFrame([totals_row]),
+            pd.DataFrame(headlines),
+            blank_row,
+            blank_row,
+            pd.DataFrame(assumptions)
+        ]
     )
 
+    # header_rows = [
+    #     [name[0] for name in results.columns.values],
+    #     [name[1] for name in results.columns.values],
+    #     [name[2] for name in results.columns.values],
+    #     [name[3] for name in results.columns.values]
+    # ]
+
+    # Step 2: Write the transformed header and DataFrame data to CSV.
+    # Open the file in write mode.
+    import csv
+    with open("HA Remaining Analysis.csv", "w", newline="") as file:
+        # writer = csv.writer(file)
+
+        # Write the header rows.
+        # writer.writerows(header_rows)
+
+        # Write the DataFrame data without the index (adjust if you want the index).
+        results.to_csv(file, header=True, index=False)
+
 
 def app():
     """

From f9d1a90689ef742fd32217b606c6a919b766d974 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:17:11 +0000
Subject: [PATCH 050/262] Fixing some formatting bugs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 86 +++++++++++--------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3341e34c..6309d2e2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2820,6 +2820,8 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+        if ha_name == "HA16":
+            dew
 
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
@@ -2991,8 +2993,9 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
-            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga_total_results[
-                "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)"):
+                eco4_post_ciga_total_results[
+                    "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
                 "ECO4 - post CIGA - £"],
             ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate,
@@ -3059,7 +3062,7 @@ def forecast_remaining_sales(loader):
     # For the delta columns, we calculate the delta on the totals
     totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round(
         100 * (
-            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "")] -
+            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] -
             totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
         ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")],
         1
@@ -3093,9 +3096,11 @@ def forecast_remaining_sales(loader):
     # GBIS Headlines
     headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")]
     headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")]
-    headline_gbis_postcode_list_remaining = totals_row[("", "Warmfront post code list", "Remaining - #", "GBIS total")]
+    headline_gbis_postcode_list_remaining = totals_row[
+        ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")
+    ]
     headline_gbis_postcode_list_remaining_revenue = totals_row[
-        ("", "Warmfront post code list", "Remaining - £", "GBIS total")
+        ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total")
     ]
     headline_gbis_delta = 100 * (
         (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) /
@@ -3205,29 +3210,33 @@ def forecast_remaining_sales(loader):
         },
         {
             ("", "", "", "HA Name"): "GBIS rate",
-            ("ECO4 original", "", "Remaining - #", ""): "£" + str(gbis_rate)
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(gbis_rate)
         },
         {
             ("", "", "", "HA Name"): "Median CIGA pass rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_success_rate * 100, 1)) + "%",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_ciga_success_rate * 100, 1)) + "%",
         },
         {
             ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(maximum_ciga_conversion * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
              ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be "
                   "conservative"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_eco4_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
              ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_ciga_pass_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
              ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel"
         }
     ]
@@ -3236,6 +3245,7 @@ def forecast_remaining_sales(loader):
         [
             results,
             pd.DataFrame([totals_row]),
+            blank_row,
             pd.DataFrame(headlines),
             blank_row,
             blank_row,
@@ -3291,32 +3301,32 @@ def app():
     loader.load()
     loader.ha_facts_and_figures()
 
+    forecast_remaining_sales(loader)
+
     # We load in the additional data required to perform the analysis
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-    cleaned = patch_cleaned(cleaned)
-
-    cleaning_data = read_dataframe_from_s3_parquet(
-        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
-    )
-    created_at = datetime.now().isoformat()
-
-    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
-
-    outputs = get_epc_data(
-        loader=loader,
-        cleaned=cleaned,
-        cleaning_data=cleaning_data,
-        created_at=created_at,
-        photo_supply_lookup=photo_supply_lookup,
-        floor_area_decile_thresholds=floor_area_decile_thresholds,
-        pull_data=pull_data
-    )
-
-    analyse_ha_data(outputs, loader)
+    # cleaned = read_from_s3(
+    #     s3_file_name="cleaned_epc_data/cleaned.bson",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    # cleaned = msgpack.unpackb(cleaned, raw=False)
+    # cleaned = patch_cleaned(cleaned)
+    #
+    # cleaning_data = read_dataframe_from_s3_parquet(
+    #     bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    # )
+    # created_at = datetime.now().isoformat()
+    #
+    # photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+    #
+    # outputs = get_epc_data(
+    #     loader=loader,
+    #     cleaned=cleaned,
+    #     cleaning_data=cleaning_data,
+    #     created_at=created_at,
+    #     photo_supply_lookup=photo_supply_lookup,
+    #     floor_area_decile_thresholds=floor_area_decile_thresholds,
+    #     pull_data=pull_data
+    # )
 
     # import pickle
     # with open("ha_analysis.pickle", "wb") as f:

From 0497290b7cac36b4519b3db4c0f9d1d1be4932b5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:17:51 +0000
Subject: [PATCH 051/262] removed temp code

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6309d2e2..ec9469dc 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2820,8 +2820,6 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
-        if ha_name == "HA16":
-            dew
 
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]

From fbd808a54d3314d9821d5fad5456e951558959c9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:27:50 +0000
Subject: [PATCH 052/262] re-formatting percentages

---
 .../ha_15_32/ha_analysis_batch_3.py           | 64 ++++++++-----------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ec9469dc..0daf239b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2926,9 +2926,10 @@ def forecast_remaining_sales(loader):
         )
 
         # Calculate the delta compared to Warmfront's original estimate
-        eco4_delta_vs_original_estimate = 100 * (
-            eco4_post_ciga_total_results["ECO4 - post CIGA - #"] - original_warmfront_eco4
-        ) / original_warmfront_eco4
+        eco4_delta_vs_original_estimate = (
+                                              eco4_post_ciga_total_results[
+                                                  "ECO4 - post CIGA - #"] - original_warmfront_eco4
+                                          ) / original_warmfront_eco4
 
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
             eligiblity_counts=eligiblity_counts_remaining,
@@ -2967,11 +2968,11 @@ def forecast_remaining_sales(loader):
 
         # GBIS delta
         if original_warmfront_gbis == 0:
-            gbis_delta_vs_original_estimate = 100 * gbis_total
+            gbis_delta_vs_original_estimate = gbis_total
         else:
-            gbis_delta_vs_original_estimate = 100 * (
-                gbis_total - original_warmfront_gbis
-            ) / original_warmfront_gbis
+            gbis_delta_vs_original_estimate = (
+                                                  gbis_total - original_warmfront_gbis
+                                              ) / original_warmfront_gbis
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -3125,27 +3126,23 @@ def forecast_remaining_sales(loader):
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - November - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_eco4_original_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_original_remaining
+
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - November - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_eco4_original_remaining_revenue
-            )
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_eco4_original_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_eco4_postcode_list_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_eco4_postcode_list_remaining_revenue
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "ECO4 delta %",
@@ -3153,27 +3150,22 @@ def forecast_remaining_sales(loader):
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - November - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_gbis_original_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_original_remaining
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - November - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_gbis_original_remaining_revenue
-            )
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_gbis_original_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_gbis_postcode_list_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - post code list - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_gbis_postcode_list_remaining_revenue
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_gbis_postcode_list_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "GBIS delta %",
@@ -3182,15 +3174,13 @@ def forecast_remaining_sales(loader):
         # Total revenue
         {
             ("", "", "", "HA Name"): "Total Remaining - November - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_original_total_revenue_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_original_total_revenue_remaining
         },
         {
             ("", "", "", "HA Name"): "Total Remaining - post code list - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_postcode_list_total_revenue_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_postcode_list_total_revenue_remaining
         },
         {
             ("", "", "", "HA Name"): "Total Remaining delta %",

From 46f5ee8ea43e719dc4f0c8c472de68b62d974270 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:34:02 +0000
Subject: [PATCH 053/262] formatting percentage

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0daf239b..b5c6835b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3059,20 +3059,18 @@ def forecast_remaining_sales(loader):
             totals_row[col] = results[col].sum()
 
     # For the delta columns, we calculate the delta on the totals
-    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round(
-        100 * (
+    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = (
+        (
             totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] -
             totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")],
-        1
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
     )
 
-    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = round(
-        100 * (
+    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = (
+        (
             totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] -
             totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")],
-        1
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
     )
 
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])

From d9e9be4389d371176a8f83ec5f83f0fcbabbeb8b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 23:48:27 +0000
Subject: [PATCH 054/262] Added HA25

---
 .../ha_15_32/ha_analysis_batch_3.py           | 79 ++++++++++++-------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b5c6835b..baaa4050 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -159,19 +159,18 @@ class DataLoader:
     }
 
     UNMATCHED_CIGA = {
-        # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
-        # the asset list
+        "HA6": 117,
         "HA14": 3,
         "HA16": 7,
-        # There's just too many unmatched here
-        "HA6": 117,
+        "HA24": 12,
         "HA107": 51,
     }
 
-    def __init__(self, directories, december_figures_filepath, use_cache):
+    def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
         self.directories = directories
         self.use_cache = use_cache
         self.december_figures_filepath = december_figures_filepath
+        self.rebuild = rebuild
 
         self.data = {}
         self.december_figures = None
@@ -312,23 +311,20 @@ class DataLoader:
         return asset_list
 
     @staticmethod
-    def create_ciga_list_house_no(ha_name, ciga_list):
+    def create_ciga_list_house_no(ciga_list):
         """
         This function will append the House number onto the asset list
         :return:
         """
 
-        if ha_name in ["HA6", "HA14", "HA107", "HA16"]:
-            split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
-            house_numbers = split_addresses[0].str.split(' ', expand=True)
-            # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
-            # many columns there might be
-            house_numbers = house_numbers.iloc[:, 0:1]
-            house_numbers.columns = ['HouseNo']
+        split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
+        house_numbers = split_addresses[0].str.split(' ', expand=True)
+        # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
+        # many columns there might be
+        house_numbers = house_numbers.iloc[:, 0:1]
+        house_numbers.columns = ['HouseNo']
 
-            ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
-        else:
-            raise NotImplementedError("Implement me")
+        ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
 
         return ciga_list
 
@@ -447,7 +443,7 @@ class DataLoader:
             # Remove rows with missing postcode which happens in a small number of cases
             ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
             ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
-            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+            ciga_list = self.create_ciga_list_house_no(ciga_list)
             ciga_list = self.dedupe_ciga_list(ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
@@ -800,6 +796,10 @@ class DataLoader:
             "st. leodegars close", "st leodegars close"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "montgomery crescent", "montgomery road"
+        )
+
         return survey_list
 
     @staticmethod
@@ -1102,16 +1102,18 @@ class DataLoader:
         for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]:
             self.december_figures[col] = self.december_figures[col].astype("Int64")
 
-        if self.use_cache:
-            self.data = read_pickle_from_s3(
+        if self.use_cache and not self.rebuild:
+            data = read_pickle_from_s3(
                 bucket_name="retrofit-datalake-dev",
                 s3_file_name="ha-analysis/batch3-inputs.pickle",
             )
-            return
+        else:
+            data = {}
 
-        data = {}
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
+            if ha_name in data:
+                continue
             # Load asset list
             logger.info("Loading data for {}".format(ha_name))
             asset_list, survey_list, ciga_list = self.load_asset_list(
@@ -2635,6 +2637,10 @@ def forecast_remaining_sales(loader):
     # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
     maximum_ciga_conversion = 0.75
 
+    # This is a hard limit to the allowed conversion rates to final sale. These are typically very
+    # high but there are some anomalies, amongst surveys that are early on
+    sales_conversion_lower_bound = 0.8
+
     gbis_rate = 600
     eco4_rate = 1710
     # old_gbis_rate = 432
@@ -2796,14 +2802,30 @@ def forecast_remaining_sales(loader):
     eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates)
     gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates)
 
+    eco4_ciga_independent_passrates["conversion"] = (
+        eco4_ciga_independent_passrates["# ECO4 successfully installed"] /
+        eco4_ciga_independent_passrates["# ECO4 at install stage"]
+    )
+    eco4_ciga_independent_passrates_clipped = eco4_ciga_independent_passrates[
+        eco4_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+        ]
+
+    gbis_ciga_independent_passrates["conversion"] = (
+        gbis_ciga_independent_passrates["# GBIS successfully installed"] /
+        gbis_ciga_independent_passrates["# GBIS at install stage"]
+    )
+    gbis_ciga_independent_passrates_clipped = gbis_ciga_independent_passrates[
+        gbis_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+        ]
+
     median_eco4_to_install = (
-        eco4_ciga_independent_passrates["# ECO4 successfully installed"].sum() /
-        eco4_ciga_independent_passrates["# ECO4 at install stage"].sum()
+        eco4_ciga_independent_passrates_clipped["# ECO4 successfully installed"].sum() /
+        eco4_ciga_independent_passrates_clipped["# ECO4 at install stage"].sum()
     )
 
     median_gbis_to_install = (
-        gbis_ciga_independent_passrates["# GBIS successfully installed"].sum() /
-        gbis_ciga_independent_passrates["# GBIS at install stage"].sum()
+        gbis_ciga_independent_passrates_clipped["# GBIS successfully installed"].sum() /
+        gbis_ciga_independent_passrates_clipped["# GBIS at install stage"].sum()
     )
 
     # Produce the final output
@@ -3270,6 +3292,8 @@ def app():
     use_cache = True
     # Determines if we want to perform the data pull
     pull_data = False
+    # Override to re-build all inputs
+    rebuild_inputs = False
 
     # List all of the data in the folder
 
@@ -3278,12 +3302,11 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    # priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 
-    loader = DataLoader(directories, december_figures_filepath, use_cache)
+    loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
     loader.load()
     loader.ha_facts_and_figures()
 

From cbd4a0052ef005e00ce143c16306b5f0b782c4ed Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 23:52:19 +0000
Subject: [PATCH 055/262] Starting HA25

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index baaa4050..0c9f685f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -347,6 +347,8 @@ class DataLoader:
             return "Asset"
         elif "Decent Homes Stock" in workbook.sheetnames:
             return "Decent Homes Stock"
+        elif "Report" in workbook.sheetnames:
+            return "Report"
         else:
             return "Assets"
 

From fc022b8a22d571651ba21fff9fd4c5901b18e20f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 12:34:22 +0000
Subject: [PATCH 056/262] Added data load for HA25

---
 .../ha_15_32/ha_analysis_batch_3.py           | 32 +++++++++++++++----
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0c9f685f..4ae881d2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -155,6 +155,10 @@ class DataLoader:
         "HA24": {
             "address": "Address",
             "postcode": "Postcode"
+        },
+        "HA25": {
+            "address": "T1_Address",
+            "postcode": "matching_postcode"
         }
     }
 
@@ -178,7 +182,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA25"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -374,13 +378,23 @@ class DataLoader:
         asset_sheetname = self.get_asset_sheetname(workbook)
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
+        if ha_name == "HA25":
+            asset_sheet_colnames[11] = "matching_postcode"
+
+        values_only = not ha_name != "HA25"
 
         rows_data = []
-        for row in asset_sheet.iter_rows(min_row=2, values_only=False):
-            row_data = [cell.value for cell in row]  # This will get you the cell values
-            rows_data.append(row_data)
+        if not values_only:
+            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):
+                row_data = [cell.value for cell in row]  # This will get you the cell values
+                rows_data.append(row_data)
+        else:
+            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):  # use values_only=True to get values
+                row_data = list(row)  # No need for comprehension, values_only=True returns a tuple of values
+                rows_data.append(row_data)
 
         asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames)
+
         asset_list = asset_list.loc[:, asset_list.columns.notnull()]
 
         # Remove entirely empty rows - consider all rows apart from row_color
@@ -403,9 +417,10 @@ class DataLoader:
             asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
             asset_list = asset_list_correction_function(asset_list)
 
-        # For HA1, there is an exception in the structure of the data. We don't have any survey or ciga lists, and so
+        # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
+        # lists, and so
         # we can return the asset list now
-        if ha_name == "HA1":
+        if ha_name in ["HA1", "HA25"]:
             return asset_list, pd.DataFrame(), pd.DataFrame()
 
         # We check if there is a survey list
@@ -1149,7 +1164,8 @@ class DataLoader:
             "ECO4": "ECO4",
             "AFFORDABLE WARMTH": "ECO4",
             "ECO4 A/W": "ECO4",
-            "ECO4 GBIS (ECO+)": "GBIS"
+            "ECO4 GBIS (ECO+)": "GBIS",
+            "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS"
         }
 
         eco_eligibility_map = {
@@ -3305,6 +3321,8 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"]
+    # Next HAs to do: 15, 32, 33,
+    # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From 9a69f8741ece9fdb740cb1b9855f53e639637f44 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 12:54:19 +0000
Subject: [PATCH 057/262] adding HA15

---
 .../ha_15_32/ha_analysis_batch_3.py           | 32 +++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4ae881d2..81ed2301 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -165,6 +165,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         "HA6": 117,
         "HA14": 3,
+        "HA15": 3,
         "HA16": 7,
         "HA24": 12,
         "HA107": 51,
@@ -204,7 +205,15 @@ class DataLoader:
                                              asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
-
+        elif ha_name == "HA15":
+            asset_list["matching_address"] = (
+                asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -502,6 +511,15 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def correct_ha15_asset_list(asset_list):
+        asset_list["matching_postcode"] = np.where(
+            asset_list["Address Line 1"] == "103 Priory Crescent",
+            "hp19 9ny",
+            asset_list["matching_postcode"]
+        )
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -655,6 +673,14 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha15_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Mary Mac Manus Drive, Milton Keynes", "Mary Mac Manus Drive"
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha16_survey_list(survey_list):
         survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
@@ -3320,7 +3346,9 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"]
+    priority_has = [
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA39", "HA107"
+    ]
     # Next HAs to do: 15, 32, 33,
     # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs

From dad2fc74c889112cbed0a67578fb013e21b276f9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 13:10:28 +0000
Subject: [PATCH 058/262] HA15 checked and added

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 81ed2301..1ae05d16 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1191,12 +1191,15 @@ class DataLoader:
             "AFFORDABLE WARMTH": "ECO4",
             "ECO4 A/W": "ECO4",
             "ECO4 GBIS (ECO+)": "GBIS",
-            "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS"
+            "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
+            "ECO4 AFFORDABLE WARMTH": "ECO4"
         }
 
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
+            "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
+            "eco4 (subject to archetype check)": "eco4"
         }
 
         ha_facts_and_figures = []

From 9eccfca70dda75ac1c49084bcd63ec3734e3dd23 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 13:26:54 +0000
Subject: [PATCH 059/262] fixing merge

---
 .../ha_15_32/ha_analysis_batch_3.py           | 67 ++++++++++++++++++-
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1ae05d16..1f99d23c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -214,6 +214,13 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA32":
+            asset_list["matching_address"] = (
+                asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Street"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -308,6 +315,8 @@ class DataLoader:
 
         if ha_name in ["HA107"]:
             asset_list["HouseNo"] = asset_list["House No"].copy()
+        elif ha_name == "HA32":
+            asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -520,6 +529,16 @@ class DataLoader:
         )
         return asset_list
 
+    @staticmethod
+    def correct_ha32_asset_list(asset_list):
+        asset_list["Postcode"] = np.where(
+            (asset_list["Street"] == "Norton Grove") & (asset_list["Postcode"] == "HU4 6HQ") & (
+                asset_list["Dwelling num"] == "7"),
+            "hu4 6hg",
+            asset_list["Postcode"]
+        )
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -845,6 +864,50 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha32_survey_list(survey_list):
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Coxwold",
+            "Coxwold Grove",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update the Barringhton Avenue with their correct spelling: Barrington Avenue
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Barringhton Avenue",
+            "Barrington Avenue",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the Rustenburn addresses are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Rustenburg",
+            "Rustenburg Street",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the MALIN LODGE, RONALDSWAY CLOSE addresses are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "MALIN LODGE, RONALDSWAY CLOSE",
+            "Malin Lodge",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the Feroes Close are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Feroes Close",
+            "Faroes Close",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == 'FORESTER  WAY',
+            'FORESTER WAY',
+            survey_list["Street / Block Name"]
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -3350,9 +3413,9 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA39", "HA107"
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA32", "HA39", "HA107"
     ]
-    # Next HAs to do: 15, 32, 33,
+    # Next HAs to do: 15[DONE], 32, 33,
     # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]

From 2828b005cbb3676216827fcb5dc70630f8ecb393 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:06:31 +0000
Subject: [PATCH 060/262] fixing HA32 merge

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1f99d23c..c84a2c5c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -906,6 +906,19 @@ class DataLoader:
             survey_list["Street / Block Name"]
         )
 
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == '6 Zeigfeld',
+            'Ziegfeld Court',
+            survey_list["Street / Block Name"]
+        )
+
+        # Malin Lodge, Ronaldsway Close
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == 'Malin Lodge, Ronaldsway Close',
+            'Malin Lodge',
+            survey_list["Street / Block Name"]
+        )
+
         return survey_list
 
     @staticmethod

From 811f141c45b1fcfa52c9f1d685690389df55f531 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:35:49 +0000
Subject: [PATCH 061/262] started working on ha33 but paused

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c84a2c5c..9bd04884 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -221,6 +221,12 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA33":
+            asset_list["matching_address"] = (
+                asset_list["ADDRESS"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["POST CODE"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -3426,9 +3432,9 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA32", "HA39", "HA107"
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
     ]
-    # Next HAs to do: 15[DONE], 32, 33,
+    # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]

From cb39590f618e7c6ff382e76cc461792101a9741a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:48:05 +0000
Subject: [PATCH 062/262] debugging matching for HA28

---
 .../ha_15_32/ha_analysis_batch_3.py           | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9bd04884..7481724b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -214,6 +214,13 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA28":
+            asset_list["matching_address"] = (
+                asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Street 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA32":
             asset_list["matching_address"] = (
                 asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " +
@@ -323,6 +330,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["House No"].copy()
         elif ha_name == "HA32":
             asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
+        elif ha_name == "HA28":
+            asset_list["HouseNo"] = asset_list["House Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -371,6 +380,8 @@ class DataLoader:
     def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
             return "Asset List"
+        elif "Asset list" in workbook.sheetnames:
+            return "Asset list"
         elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
             return "Asset"
         elif "Decent Homes Stock" in workbook.sheetnames:
@@ -394,6 +405,8 @@ class DataLoader:
     def get_survey_sheetname(workbook):
         if "ECO Surveys" in workbook.sheetnames:
             return "ECO Surveys"
+        elif "ECO Survey" in workbook.sheetnames:
+            return "ECO Survey"
         else:
             return "ECO surveys"
 
@@ -870,6 +883,12 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha28_survey_list(survey_list):
+        # Rename the "No" column to "No." to align with the other survey sheets
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+        return survey_list
+
     @staticmethod
     def correct_ha32_survey_list(survey_list):
         survey_list["Street / Block Name"] = np.where(
@@ -1027,6 +1046,10 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
             ].copy()
 
+            if str(house_number) not in df["matching_address"].values:
+                if "flat" in str(house_number):
+                    house_number = house_number.split("flat")[1].strip()
+
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:

From 0909b811ee7aea834784f0deb947308593ce7cdd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:57:49 +0000
Subject: [PATCH 063/262] fixed matching for ha28

---
 .../ha_15_32/ha_analysis_batch_3.py           | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7481724b..b954a651 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -887,6 +887,27 @@ class DataLoader:
     def correct_ha28_survey_list(survey_list):
         # Rename the "No" column to "No." to align with the other survey sheets
         survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "ME75HA",
+            "ME7 5HA",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ANDREW MANOR/BRITTON ST", "ANDREW MANOR"
+        )
+
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "ME75TW",
+            "ME7 5TW",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST MARKS HOUSE/SAXON ST", "ST MARKS HOUSE"
+        )
+
         return survey_list
 
     @staticmethod
@@ -1046,7 +1067,7 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
             ].copy()
 
-            if str(house_number) not in df["matching_address"].values:
+            if not any(df["matching_address"].str.contains(str(house_number))):
                 if "flat" in str(house_number):
                     house_number = house_number.split("flat")[1].strip()
 

From 87c77e53c03ec83286718d6ef6bb5593466a48b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 16:22:42 +0000
Subject: [PATCH 064/262] handing facts and figures for ha28

---
 .../ha_15_32/ha_analysis_batch_3.py           | 92 +++++++++++--------
 1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b954a651..3ded09ba 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -398,6 +398,8 @@ class DataLoader:
             return "CIGA Checks"
         elif "CIGA checks" in workbook.sheetnames:
             return "CIGA checks"
+        elif "CIGA check" in workbook.sheetnames:
+            return "CIGA check"
         else:
             return "CIGA"
 
@@ -1318,14 +1320,16 @@ class DataLoader:
             "ECO4 A/W": "ECO4",
             "ECO4 GBIS (ECO+)": "GBIS",
             "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
-            "ECO4 AFFORDABLE WARMTH": "ECO4"
+            "ECO4 AFFORDABLE WARMTH": "ECO4",
+            "Affordable Warmth": "ECO4"
         }
 
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype check)": "eco4"
+            "eco4 (subject to archetype check)": "eco4",
+            "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -1384,46 +1388,56 @@ class DataLoader:
             sales_report = {}
             if not survey_list.empty:
                 scheme_column = survey_list.columns[0]
-                # We clean up the survey list installation or cancelled
-                survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
-                # Remove all punctuation
-                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                    r'[^\w\s]', '', regex=True
-                )
-                # Remove double spaces
-                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                    r'\s+', ' ', regex=True
-                )
-                # Remove trailing spaces
-                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
-
                 # Remap the values in the scheme column
                 survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+                # We clean up the survey list installation or cancelled
+                if "INSTALLED OR CANCELLED" in survey_list.columns:
+                    survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+                    # Remove all punctuation
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.replace(
+                        r'[^\w\s]', '', regex=True
+                    )
+                    # Remove double spaces
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.replace(
+                        r'\s+', ' ', regex=True
+                    )
+                    # Remove trailing spaces
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.strip()
 
-                survey_list["installation_status"] = None
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
-                    "installed",
-                    survey_list["installation_status"]
-                )
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
-                    "cancelled",
-                    survey_list["installation_status"]
-                )
-                # Find partial installations
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
-                    "partially installed",
-                    survey_list["installation_status"]
-                )
-                # Find partial cancellations
-                # TODO: We might have more indications of partial cancellations
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
-                    "partially cancelled",
-                    survey_list["installation_status"]
-                )
+                    survey_list["installation_status"] = None
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+                        "installed",
+                        survey_list["installation_status"]
+                    )
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+                        "cancelled",
+                        survey_list["installation_status"]
+                    )
+                    # Find partial installations
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+                        "partially installed",
+                        survey_list["installation_status"]
+                    )
+                    # Find partial cancellations
+                    # TODO: We might have more indications of partial cancellations
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+                        "partially cancelled",
+                        survey_list["installation_status"]
+                    )
+                else:
+                    # We have some examples, e.g. HA28, where we do not have the installed or cancelled column
+                    survey_list["installation_status"] = np.where(
+                        survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                        "cancelled",
+                        "installed",
+                    )
 
                 # Finally, for other cases, we set the status to "in progress"
                 survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")

From f8948ff60f9e00d9501bd2f71f4269152cf3ab51 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 16:47:10 +0000
Subject: [PATCH 065/262] ha38 wip:

---
 .../ha_15_32/ha_analysis_batch_3.py           | 20 +++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3ded09ba..4af7d9b9 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -234,6 +234,13 @@ class DataLoader:
                 asset_list["POST CODE"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA38":
+            asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -332,6 +339,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
         elif ha_name == "HA28":
             asset_list["HouseNo"] = asset_list["House Number"].copy()
+        elif ha_name == "HA38":
+            asset_list["HouseNo"] = asset_list["House_Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -912,6 +921,12 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha38_survey_list(survey_list):
+        # Rename the "No" column to "No." to align with the other survey sheets
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+        return survey_list
+
     @staticmethod
     def correct_ha32_survey_list(survey_list):
         survey_list["Street / Block Name"] = np.where(
@@ -3490,10 +3505,11 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
-    # Then: 28, 41, 38, 10, 14, 20, 48
+    # Then: 28 [DONE],
+    # 38, 41, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From abe0e627dbe1c89209de2f867c2abe4eef419d2e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 10:24:42 +0000
Subject: [PATCH 066/262] Fixing bug with gbis remaining counts

---
 .../ha_15_32/ha_analysis_batch_3.py           | 266 ++++++++++++------
 1 file changed, 184 insertions(+), 82 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4af7d9b9..6d1a3b45 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -424,6 +424,12 @@ class DataLoader:
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
+
+        # TODO: TEMP
+        sheetnames_lower = [x.lower() for x in workbook.sheetnames]
+        if any("eco3" in x for x in sheetnames_lower):
+            raise Exception("REMOVE ME")
+
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
         if ha_name == "HA25":
@@ -569,6 +575,34 @@ class DataLoader:
         )
         return asset_list
 
+    @staticmethod
+    def correct_ha38_asset_list(asset_list):
+        # For Kingsford court, the house number is at the end of the address
+        def rearrange_address_if_flat(address):
+            if '/flat' in address.lower():
+                parts = address.split('/flat', 1)
+                return f"FLAT{parts[1]}, {parts[0]}"
+            return address
+
+        def extract_house_no_if_flat(address):
+            if '/flat' in address.lower():
+                # Attempt to extract the house number following "/flat"
+                try:
+                    house_no = address.split('/flat ')[1].split(' ')[0]
+                    # Remove trailing comma
+                    house_no = house_no.replace(",", "")
+                except IndexError:
+                    house_no = None
+                return house_no
+            return None
+
+        asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat)
+        asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo']
+        asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat)
+        # We then need to
+
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -925,6 +959,11 @@ class DataLoader:
     def correct_ha38_survey_list(survey_list):
         # Rename the "No" column to "No." to align with the other survey sheets
         survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'Kingsford Court, Coombe Valley Road', 'Kingsford Court'
+        )
+
         return survey_list
 
     @staticmethod
@@ -1345,6 +1384,7 @@ class DataLoader:
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
             "eco4 (subject to archetype check)": "eco4",
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
+            "eco4  (subject to ciga)": "eco4 (subject to ciga)"
         }
 
         ha_facts_and_figures = []
@@ -2943,8 +2983,8 @@ def forecast_remaining_sales(loader):
     median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
 
     # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
-    eco4_ciga_independent_passrates = []
-    gbis_ciga_independent_passrates = []
+    eco4_ciga_independent_to_install = []
+    gbis_to_install = []
     for ha_name, input_data in loader.data.items():
         asset_list = input_data["asset_list"].copy()
         survey_list = input_data["survey_list"].copy()
@@ -2973,7 +3013,7 @@ def forecast_remaining_sales(loader):
                 )
             ]
 
-            eco4_ciga_independent_passrates.append(
+            eco4_ciga_independent_to_install.append(
                 {
                     "Ha Name": ha_name,
                     "# ECO4 at install stage": typical_eco4_installed.shape[0],
@@ -2993,7 +3033,7 @@ def forecast_remaining_sales(loader):
                 )
             ]
 
-            gbis_ciga_independent_passrates.append(
+            gbis_to_install.append(
                 {
                     "Ha Name": ha_name,
                     "# GBIS at install stage": typical_gbis_installed.shape[0],
@@ -3001,33 +3041,33 @@ def forecast_remaining_sales(loader):
                 }
             )
 
-    eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates)
-    gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates)
+    eco4_ciga_independent_to_install = pd.DataFrame(eco4_ciga_independent_to_install)
+    gbis_to_install = pd.DataFrame(gbis_to_install)
 
-    eco4_ciga_independent_passrates["conversion"] = (
-        eco4_ciga_independent_passrates["# ECO4 successfully installed"] /
-        eco4_ciga_independent_passrates["# ECO4 at install stage"]
+    eco4_ciga_independent_to_install["conversion"] = (
+        eco4_ciga_independent_to_install["# ECO4 successfully installed"] /
+        eco4_ciga_independent_to_install["# ECO4 at install stage"]
     )
-    eco4_ciga_independent_passrates_clipped = eco4_ciga_independent_passrates[
-        eco4_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+    eco4_ciga_independent_to_install_clipped = eco4_ciga_independent_to_install[
+        eco4_ciga_independent_to_install["conversion"] >= sales_conversion_lower_bound
         ]
 
-    gbis_ciga_independent_passrates["conversion"] = (
-        gbis_ciga_independent_passrates["# GBIS successfully installed"] /
-        gbis_ciga_independent_passrates["# GBIS at install stage"]
+    gbis_to_install["conversion"] = (
+        gbis_to_install["# GBIS successfully installed"] /
+        gbis_to_install["# GBIS at install stage"]
     )
-    gbis_ciga_independent_passrates_clipped = gbis_ciga_independent_passrates[
-        gbis_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+    gbis_to_install_clipped = gbis_to_install[
+        gbis_to_install["conversion"] >= sales_conversion_lower_bound
         ]
 
     median_eco4_to_install = (
-        eco4_ciga_independent_passrates_clipped["# ECO4 successfully installed"].sum() /
-        eco4_ciga_independent_passrates_clipped["# ECO4 at install stage"].sum()
+        eco4_ciga_independent_to_install_clipped["# ECO4 successfully installed"].sum() /
+        eco4_ciga_independent_to_install_clipped["# ECO4 at install stage"].sum()
     )
 
     median_gbis_to_install = (
-        gbis_ciga_independent_passrates_clipped["# GBIS successfully installed"].sum() /
-        gbis_ciga_independent_passrates_clipped["# GBIS at install stage"].sum()
+        gbis_to_install_clipped["# GBIS successfully installed"].sum() /
+        gbis_to_install_clipped["# GBIS at install stage"].sum()
     )
 
     # Produce the final output
@@ -3044,29 +3084,26 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
-
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
+        original_warmfront_sold_eco4 = (
+            original_warmfront_estimates["No. of Tech surveys complete - Eco 4"].values[0] * eco4_rate
+        )
 
-        # original_warmfront_eco4_revenue = (
-        #     original_warmfront_remaining_eco4 * eco4_rate +
-        #     (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
-        # )
         original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate
         original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
+        original_warmfront_sold_gbis = (
+            original_warmfront_estimates["No. of Tech surveys complete - GBIS"].values[0] * gbis_rate
+        )
 
         # Original warmfront figures - GBIS
 
         original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
         original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
 
-        # original_warmfront_gbis_revenue = (
-        #     original_warmfront_remaining_gbis * gbis_rate +
-        #     (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
-        # )
         original_warmfront_gbis_revenue = (
             original_warmfront_gbis * gbis_rate
         )
@@ -3123,7 +3160,7 @@ def forecast_remaining_sales(loader):
 
         # We also need the ha ciga passed to install success rate
         ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name]
-        if not ha_ciga_pass_to_sale.empty:
+        if not ha_ciga_pass_to_sale.empty and ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0] != 0:
             ha_ciga_pass_to_sale_rate = (
                 ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] /
                 ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0]
@@ -3131,7 +3168,9 @@ def forecast_remaining_sales(loader):
         else:
             ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install
 
-        ha_eco4_to_sale = eco4_ciga_independent_passrates[eco4_ciga_independent_passrates["Ha Name"] == ha_name]
+        ha_eco4_to_sale = eco4_ciga_independent_to_install_clipped[
+            eco4_ciga_independent_to_install_clipped["Ha Name"] == ha_name
+            ]
         if not ha_eco4_to_sale.empty:
             ha_eco4_to_sale_rate = (
                 ha_eco4_to_sale['# ECO4 successfully installed'].values[0] /
@@ -3149,12 +3188,6 @@ def forecast_remaining_sales(loader):
             eco4_rate=eco4_rate
         )
 
-        # Calculate the delta compared to Warmfront's original estimate
-        eco4_delta_vs_original_estimate = (
-                                              eco4_post_ciga_total_results[
-                                                  "ECO4 - post CIGA - #"] - original_warmfront_eco4
-                                          ) / original_warmfront_eco4
-
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
             eligiblity_counts=eligiblity_counts_remaining,
             input_data=input_data,
@@ -3164,10 +3197,18 @@ def forecast_remaining_sales(loader):
             eco4_rate=eco4_rate
         )
 
+        # Calculate the delta compared to Warmfront's original remaining
+        if original_warmfront_remaining_eco4 == 0:
+            eco4_delta_vs_original_estimate_remaining = eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"]
+        else:
+            eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] -
+                                                          original_warmfront_remaining_eco4) /
+                                                         original_warmfront_remaining_eco4)
+
         # GBIS Figures
         # Estimate the GBIS conversion rate
-        ha_gbis_sale_conversion = gbis_ciga_independent_passrates[
-            gbis_ciga_independent_passrates["Ha Name"] == ha_name
+        ha_gbis_sale_conversion = gbis_to_install_clipped[
+            gbis_to_install_clipped["Ha Name"] == ha_name
             ]
 
         if not ha_gbis_sale_conversion.empty:
@@ -3178,6 +3219,9 @@ def forecast_remaining_sales(loader):
         else:
             ha_gbis_sale_conversion = median_gbis_to_install
 
+        asset_list["ECO Eligibility"].value_counts()
+        asset_list_remaining["ECO Eligibility"].value_counts()
+
         gbis_total = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
@@ -3185,18 +3229,59 @@ def forecast_remaining_sales(loader):
         gbis_total_revenue = int(gbis_total * gbis_rate)
 
         gbis_remaining = eligiblity_counts_remaining[
-            eligiblity_counts["ECO Eligibility"] == "gbis"
+            eligiblity_counts_remaining["ECO Eligibility"] == "gbis"
             ]["count"].sum()
         gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
         # GBIS delta
-        if original_warmfront_gbis == 0:
-            gbis_delta_vs_original_estimate = gbis_total
+        if original_warmfront_remaining_gbis == 0:
+            gbis_delta_vs_original_estimate_remaining = gbis_remaining
         else:
-            gbis_delta_vs_original_estimate = (
-                                                  gbis_total - original_warmfront_gbis
-                                              ) / original_warmfront_gbis
+            gbis_delta_vs_original_estimate_remaining = (
+                (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis
+            )
+
+        # Current sales figures
+        # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
+        eco4_actually_sold = 0
+        gbis_actually_sold = 0
+        if not survey_list.empty:
+            surveys_with_eligibility = survey_list.merge(
+                asset_list[["asset_list_row_id", "ECO Eligibility"]],
+                how="left", on="asset_list_row_id"
+            )
+            completed_eco4_sales = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "ECO4 - installed"
+                ]
+            incomplete_eco4_sales = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+                (~surveys_with_eligibility["ECO Eligibility"].isin(
+                    ["eco4 - passed ciga"])
+                 )
+                ]
+            incomplete_eco4_sales_ciga = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+                (surveys_with_eligibility["ECO Eligibility"].isin(
+                    ["eco4 - passed ciga"])
+                )
+                ]
+
+            eco4_actually_sold = (completed_eco4_sales.shape[0] * eco4_rate) + (
+                incomplete_eco4_sales.shape[0] * ha_eco4_to_sale_rate +
+                incomplete_eco4_sales_ciga.shape[0] * ha_ciga_pass_to_sale_rate
+            ) * eco4_rate
+
+            completed_gbis_sales = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "GBIS - installed"
+                ]
+            incomplete_gbis_sales = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "GBIS - in progress")
+            ]
+
+            gbis_actually_sold = completed_gbis_sales.shape[0] * gbis_rate + (
+                incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate
+            )
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -3204,29 +3289,33 @@ def forecast_remaining_sales(loader):
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
             ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
             ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
+            ("ECO4 original", "", "Sold - £", ""): original_warmfront_sold_eco4,
             ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
             # GBIS - original warmfront figures
             ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
             ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
             ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
+            ("GBIS original", "", "Sold - £", ""): original_warmfront_sold_gbis,
             ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
             # ECO4 - asset list, pre-ciga
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
-            ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)"):
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
                 eco4_post_ciga_total_results[
                     "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
                 "ECO4 - post CIGA - £"],
-            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate,
             # ECO4 - asset list, post ciga, remaining
             ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - £"],
+            ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %",
+             ""): eco4_delta_vs_original_estimate_remaining,
             ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
                 eco4_post_ciga_remaining_results["Of which confirmed - #"],
             ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""):
@@ -3257,13 +3346,15 @@ def forecast_remaining_sales(loader):
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
-            ("GBIS Postcode list", "", "Delta vs original estimate - %", ""): gbis_delta_vs_original_estimate,
+            ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
+                gbis_delta_vs_original_estimate_remaining,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 33:
+        if len(to_append) != 37:
             raise ValueError("Something went wrong")
 
         results.append(to_append)
@@ -3275,26 +3366,26 @@ def forecast_remaining_sales(loader):
         if col == ('', '', '', 'HA Name'):
             totals_row[col] = "Total"
         elif col in [
-            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""),
-            ("GBIS Postcode list", "", "Delta vs original estimate - %", "")
+            ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", ""),
+            ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")
         ]:
             totals_row[col] = None
         else:
             totals_row[col] = results[col].sum()
 
     # For the delta columns, we calculate the delta on the totals
-    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = (
+    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", "")] = (
         (
-            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] -
-            totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
+            totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")] -
+            totals_row[("ECO4 original", "", "Remaining - #", "")]
+        ) / totals_row[("ECO4 original", "", "Remaining - #", "")]
     )
 
-    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = (
+    totals_row[("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")] = (
         (
-            totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] -
-            totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
+            totals_row[("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")] -
+            totals_row[("GBIS original", "", "Remaining - #", "")]
+        ) / totals_row[("GBIS original", "", "Remaining - #", "")]
     )
 
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])
@@ -3342,6 +3433,15 @@ def forecast_remaining_sales(loader):
     )
     headline_total_delta = round(headline_total_delta, 1)
 
+    headline_eco4_sold_since_november = (
+        totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] - totals_row[('ECO4 original', '', 'Sold - £', '')]
+    )
+
+    headline_gbis_sold_since_november = (
+        totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] -
+        totals_row[('GBIS original', '', 'Sold - £', '')]
+    )
+
     headlines = [
         {
             ("", "", "", "HA Name"): "Headlines",
@@ -3358,16 +3458,22 @@ def forecast_remaining_sales(loader):
                 "ECO4 - November"): headline_eco4_original_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #",
+            ("", "", "", "HA Name"): "ECO4 Sold since November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_eco4_sold_since_november
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - #",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining
         },
         {
-            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £",
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - £",
             ("", "Original Warmfront estimate", "Total - #",
              "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "ECO4 delta %",
+            ("", "", "", "HA Name"): "ECO4 £ remaining delta - %",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%"
         },
         {
@@ -3380,6 +3486,12 @@ def forecast_remaining_sales(loader):
                 "", "Original Warmfront estimate", "Total - #",
                 "ECO4 - November"): headline_gbis_original_remaining_revenue
         },
+        {
+            ("", "", "", "HA Name"): "GBIS Sold since November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_gbis_sold_since_november
+        },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining
@@ -3400,7 +3512,7 @@ def forecast_remaining_sales(loader):
              "ECO4 - November"): headline_original_total_revenue_remaining
         },
         {
-            ("", "", "", "HA Name"): "Total Remaining - post code list - £",
+            ("", "", "", "HA Name"): "Total Remaining - post code list (post CIGA) - £",
             ("", "Original Warmfront estimate", "Total - #",
              "ECO4 - November"): headline_postcode_list_total_revenue_remaining
         },
@@ -3440,14 +3552,16 @@ def forecast_remaining_sales(loader):
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
                 round(median_eco4_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - #",
-             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel"
+             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Surveys that resulted "
+                  "in cancelled install are excluded."
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
                 round(median_ciga_pass_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - #",
-             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel"
+             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Surveys that resulted in "
+                  "cancelled installs are excluded."
         }
     ]
 
@@ -3462,23 +3576,7 @@ def forecast_remaining_sales(loader):
             pd.DataFrame(assumptions)
         ]
     )
-
-    # header_rows = [
-    #     [name[0] for name in results.columns.values],
-    #     [name[1] for name in results.columns.values],
-    #     [name[2] for name in results.columns.values],
-    #     [name[3] for name in results.columns.values]
-    # ]
-
-    # Step 2: Write the transformed header and DataFrame data to CSV.
-    # Open the file in write mode.
-    import csv
     with open("HA Remaining Analysis.csv", "w", newline="") as file:
-        # writer = csv.writer(file)
-
-        # Write the header rows.
-        # writer.writerows(header_rows)
-
         # Write the DataFrame data without the index (adjust if you want the index).
         results.to_csv(file, header=True, index=False)
 
@@ -3504,8 +3602,12 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
+    # priority_has = [
+    #     "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
+    # ]
+    # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28 [DONE],

From 5b32ac8aad59b1942f80a399d072486ab6db9ec3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 10:59:07 +0000
Subject: [PATCH 067/262] handling case where property is marked as gbis but
 sold for ECO4

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6d1a3b45..7bfbd7f5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1509,11 +1509,12 @@ class DataLoader:
                 }
 
                 # We find some cases where properties have sold but are missing CIGA checks
-                survey_list_to_merge = survey_list[["asset_list_row_id"]].copy()
+                survey_list_to_merge = survey_list[["asset_list_row_id", "installation_status"]].copy()
                 survey_list_to_merge["has_a_survey_record"] = True
                 survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])]
 
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
+                # Update the cases where properties have sold, but are missing a CIGA check
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & (
                         asset_list["has_a_survey_record"] == True
@@ -1521,6 +1522,17 @@ class DataLoader:
                     "eco4 - passed ciga",
                     asset_list["ECO Eligibility"]
                 )
+                # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "gbis") & (
+                        asset_list["installation_status"].isin(
+                            ["ECO4 - installed", "ECO4 - cancelled"]
+                        )
+                    ),
+                    "eco4",
+                    asset_list["ECO Eligibility"]
+                )
+
                 asset_list = asset_list.drop(columns=["has_a_survey_record"])
 
                 # Update the survey list with installation status
@@ -3199,7 +3211,7 @@ def forecast_remaining_sales(loader):
 
         # Calculate the delta compared to Warmfront's original remaining
         if original_warmfront_remaining_eco4 == 0:
-            eco4_delta_vs_original_estimate_remaining = eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"]
+            eco4_delta_vs_original_estimate_remaining = "N/A"
         else:
             eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] -
                                                           original_warmfront_remaining_eco4) /
@@ -3219,9 +3231,6 @@ def forecast_remaining_sales(loader):
         else:
             ha_gbis_sale_conversion = median_gbis_to_install
 
-        asset_list["ECO Eligibility"].value_counts()
-        asset_list_remaining["ECO Eligibility"].value_counts()
-
         gbis_total = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
@@ -3236,7 +3245,7 @@ def forecast_remaining_sales(loader):
 
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
-            gbis_delta_vs_original_estimate_remaining = gbis_remaining
+            gbis_delta_vs_original_estimate_remaining = "N/A"
         else:
             gbis_delta_vs_original_estimate_remaining = (
                 (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis

From 9d26c94ae571ce1ba5363e9c850b8017f110bc9d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 11:35:14 +0000
Subject: [PATCH 068/262] removed stray comma causing bugs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 32 ++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7bfbd7f5..e58c7799 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1526,14 +1526,40 @@ class DataLoader:
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "gbis") & (
                         asset_list["installation_status"].isin(
-                            ["ECO4 - installed", "ECO4 - cancelled"]
+                            ["ECO4 - installed", "ECO4 - cancelled", "ECO4 - in progress"]
                         )
                     ),
                     "eco4",
                     asset_list["ECO Eligibility"]
                 )
+                # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"].isin(
+                        ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                    )) & (
+                        asset_list["installation_status"].isin(
+                            ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"]
+                        )
+                    ),
+                    "gbis",
+                    asset_list["ECO Eligibility"]
+                )
+                # Update the cases where a property is marked as not eligible, but sold for GBIS
+                if ((asset_list["ECO Eligibility"] == "not eligible") & (
+                    asset_list["installation_status"].isin(
+                        ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
+                    ))).sum():
+                    bah
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "not eligible") & (
+                        asset_list["installation_status"].isin(
+                            ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
+                        )),
+                    "gbis",
+                    asset_list["ECO Eligibility"]
+                )
 
-                asset_list = asset_list.drop(columns=["has_a_survey_record"])
+                asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])
 
                 # Update the survey list with installation status
                 self.data[ha_name]["survey_list"] = survey_list
@@ -2897,8 +2923,6 @@ def forecast_remaining_sales(loader):
 
     gbis_rate = 600
     eco4_rate = 1710
-    # old_gbis_rate = 432
-    # old_eco4_rate = 1456
 
     # 1) Calculate the conversion rate from passed CIGA to actual sale
     converted_ciga_jobs = []

From a70260f128aec2785a8000669dc981d8220505a3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 11:55:02 +0000
Subject: [PATCH 069/262] Update how we handle partially completed jobs

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e58c7799..060539e1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1476,7 +1476,7 @@ class DataLoader:
                     # Find partial installations
                     survey_list["installation_status"] = np.where(
                         survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
-                        "partially installed",
+                        "in progress",
                         survey_list["installation_status"]
                     )
                     # Find partial cancellations
@@ -1550,6 +1550,7 @@ class DataLoader:
                         ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
                     ))).sum():
                     bah
+
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "not eligible") & (
                         asset_list["installation_status"].isin(
@@ -1559,6 +1560,15 @@ class DataLoader:
                     asset_list["ECO Eligibility"]
                 )
 
+                # Update the cases where a property is marked as not eligible, but sold for ECO4
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "not eligible") & (
+                        asset_list["installation_status"].isin(
+                            ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"]
+                        )
+                    )
+                )
+
                 asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])
 
                 # Update the survey list with installation status

From 4cc467e5142c7eba903d2819d59229643cf93e03 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 11:57:03 +0000
Subject: [PATCH 070/262] fix bug in updating eligibility for initially
 non-eligible rows

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 060539e1..8c03b1ef 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1545,12 +1545,6 @@ class DataLoader:
                     asset_list["ECO Eligibility"]
                 )
                 # Update the cases where a property is marked as not eligible, but sold for GBIS
-                if ((asset_list["ECO Eligibility"] == "not eligible") & (
-                    asset_list["installation_status"].isin(
-                        ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
-                    ))).sum():
-                    bah
-
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "not eligible") & (
                         asset_list["installation_status"].isin(
@@ -1566,7 +1560,9 @@ class DataLoader:
                         asset_list["installation_status"].isin(
                             ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"]
                         )
-                    )
+                    ),
+                    "eco4",
+                    asset_list["ECO Eligibility"]
                 )
 
                 asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])

From 5e991547f7239cf5a84f8e5824d4d9379b825a2a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 14:08:05 +0000
Subject: [PATCH 071/262] debuging variances, fixed usage of 75% ciga pass rate

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8c03b1ef..91c198b1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3196,8 +3196,8 @@ def forecast_remaining_sales(loader):
             )
         else:
             ha_ciga_conversion_rate = (
-                median_ciga_success_rate if median_ciga_success_rate <= median_ciga_success_rate else
-                median_ciga_success_rate
+                median_ciga_success_rate if median_ciga_success_rate <= maximum_ciga_conversion else
+                maximum_ciga_conversion
             )
 
         # We also need the ha ciga passed to install success rate

From d35d8ea8457ce128ac1fe0c51abd9f83f4e3acaa Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 14:14:50 +0000
Subject: [PATCH 072/262] fixed but in eligibility counts remaining

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 91c198b1..1e2c5d92 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3177,7 +3177,7 @@ def forecast_remaining_sales(loader):
         ]["count"].sum()
 
         eco4_pre_ciga_remaining = eligiblity_counts_remaining[
-            eligiblity_counts["ECO Eligibility"].isin(
+            eligiblity_counts_remaining["ECO Eligibility"].isin(
                 ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
             )
         ]["count"].sum()

From 680f38963a874eef548883d8f0f365f7958d42b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 15:01:33 +0000
Subject: [PATCH 073/262] Added variance columns to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 49 ++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1e2c5d92..d4c3f74f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2859,21 +2859,30 @@ def calculate_eco4_post_ciga(
             eligiblity_counts["ECO Eligibility"] == "failed ciga"
             ]["count"].sum()
 
+        eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed
+
         eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
         eco4_confirmed = np.round(eco4_confirmed)
 
+        eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed)
+
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
             eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+
             eco4_remaining_forecast = np.round(
                 eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
             )
+            eco4_ciga_needed_cancellations = eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast
             eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
             eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
         else:
             eco4_remaining_forecast = 0
             eco4_estimated_ciga_failures = 0
+            eco4_ciga_needed_cancellations = 0
             eco4_post_ciga = eco4_confirmed
+
+        eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
     else:
         eco4_no_ciga_needed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "eco4"
@@ -2881,14 +2890,18 @@ def calculate_eco4_post_ciga(
         eco4_confirmed_ciga_failures = 0
         # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+        eco4_no_ciga_cancellations = int(eco4_no_ciga_needed - eco4_confirmed)
         eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
         eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
 
         eco4_remaining_forecast = np.round(
             eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
         )
+        eco4_ciga_cancellations = int(eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast)
         eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
 
+        eco4_expected_cancellations = eco4_no_ciga_cancellations + eco4_ciga_cancellations
+
     eco4_post_ciga = int(eco4_post_ciga)
     eco4_remaining_forecast = int(eco4_remaining_forecast)
     eco4_confirmed = int(eco4_confirmed)
@@ -2912,6 +2925,9 @@ def calculate_eco4_post_ciga(
         ),
         "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate),
         "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate),
+        # Expected cencellations
+        "Expected cancellations - #": eco4_expected_cancellations,
+        "Expected cancellations - £": eco4_expected_cancellations * eco4_rate
     }
 
     return results
@@ -3322,6 +3338,28 @@ def forecast_remaining_sales(loader):
                 incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate
             )
 
+        # Add in the variance:
+        # We should expect that the pre-ciga total is:
+        # 1) The number of post CIGA successes +
+        # 2) the number of CIGA failures +
+        # 3) The number of cancellations
+        variance_total = eco4_pre_ciga - (
+            eco4_post_ciga_total_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_total_results['Estimated total - failed CIGA'] +
+            eco4_post_ciga_total_results["Expected cancellations - #"]
+        )
+        if variance_total != 0:
+            raise ValueError("Something went wrong in variance total")
+
+        variance_remaining = eco4_pre_ciga_remaining - (
+            eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] +
+            eco4_post_ciga_remaining_results["Expected cancellations - #"]
+        )
+
+        if variance_remaining != 0:
+            raise ValueError("Something went wrong in variance remaining")
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -3340,6 +3378,8 @@ def forecast_remaining_sales(loader):
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
+            ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
@@ -3382,6 +3422,13 @@ def forecast_remaining_sales(loader):
             ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
                 "Estimated CIGA failures - £"
             ],
+            # Expected ECO4 cancellations
+            ("ECO4 Cancellations", "", "Expected cancellations - #", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - #"
+            ],
+            ("ECO4 Cancellations", "", "Expected cancellations - £", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - £"
+            ],
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
@@ -3393,7 +3440,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 37:
+        if len(to_append) != 41:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From e966dfdf6e785cbcc1e2245cce852e842d0def92 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 16:22:20 +0000
Subject: [PATCH 074/262] Adding cancellations to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 68 +++++++++++++------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d4c3f74f..09b0910e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3301,6 +3301,10 @@ def forecast_remaining_sales(loader):
         # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
         eco4_actually_sold = 0
         gbis_actually_sold = 0
+        eco4_confirmed_cancellations = 0
+        eco4_expected_cancellations = 0
+        gbis_confirmed_cancellations = 0
+        gbis_expected_cancellations = 0
         if not survey_list.empty:
             surveys_with_eligibility = survey_list.merge(
                 asset_list[["asset_list_row_id", "ECO Eligibility"]],
@@ -3308,34 +3312,54 @@ def forecast_remaining_sales(loader):
             )
             completed_eco4_sales = surveys_with_eligibility[
                 surveys_with_eligibility["installation_status"] == "ECO4 - installed"
-                ]
+                ].shape[0]
             incomplete_eco4_sales = surveys_with_eligibility[
                 (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
                 (~surveys_with_eligibility["ECO Eligibility"].isin(
                     ["eco4 - passed ciga"])
                  )
-                ]
+                ].shape[0]
             incomplete_eco4_sales_ciga = surveys_with_eligibility[
                 (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
                 (surveys_with_eligibility["ECO Eligibility"].isin(
                     ["eco4 - passed ciga"])
                 )
-                ]
+                ].shape[0]
 
-            eco4_actually_sold = (completed_eco4_sales.shape[0] * eco4_rate) + (
-                incomplete_eco4_sales.shape[0] * ha_eco4_to_sale_rate +
-                incomplete_eco4_sales_ciga.shape[0] * ha_ciga_pass_to_sale_rate
-            ) * eco4_rate
+            eco4_confirmed_cancellations = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "ECO4 - cancelled"
+                ].shape[0]
+
+            expected_eco4_sales_no_ciga = np.round(incomplete_eco4_sales * ha_eco4_to_sale_rate)
+            expected_eco4_sales_ciga = np.round(incomplete_eco4_sales_ciga * ha_ciga_pass_to_sale_rate)
+
+            eco4_expected_cancellations = (incomplete_eco4_sales + incomplete_eco4_sales_ciga) - (
+                expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+            )
+            eco4_expected_cancellations = int(np.round(eco4_expected_cancellations))
+
+            eco4_actually_sold = eco4_rate * (
+                completed_eco4_sales + expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+            )
 
             completed_gbis_sales = surveys_with_eligibility[
                 surveys_with_eligibility["installation_status"] == "GBIS - installed"
-                ]
+                ].shape[0]
             incomplete_gbis_sales = surveys_with_eligibility[
                 (surveys_with_eligibility["installation_status"] == "GBIS - in progress")
-            ]
+            ].shape[0]
 
-            gbis_actually_sold = completed_gbis_sales.shape[0] * gbis_rate + (
-                incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate
+            # Get confirmed cancellations
+            gbis_confirmed_cancellations = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "GBIS - cancelled"
+                ].shape[0]
+
+            expected_gbis_unconfirmed_sales = incomplete_gbis_sales * ha_gbis_sale_conversion
+
+            gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales)
+
+            gbis_actually_sold = completed_gbis_sales * gbis_rate + (
+                expected_gbis_unconfirmed_sales * gbis_rate
             )
 
         # Add in the variance:
@@ -3381,6 +3405,9 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
             ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
+            ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations,
+            # This is for jobs that are in-progress and could still cancel
+            ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
             ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
@@ -3403,6 +3430,13 @@ def forecast_remaining_sales(loader):
                 eco4_post_ciga_remaining_results["Of which forecast - #"],
             ("ECO4 post-ciga", "", "Of which forecast - £", ""):
                 eco4_post_ciga_remaining_results["Of which forecast - £"],
+            # Expected ECO4 cancellations
+            ("ECO4 Cancellations", "", "Of which expected cancellations - #", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - #"
+            ],
+            ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - £"
+            ],
             # CIGA failures
             ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
                 'Estimated total - failed CIGA'
@@ -3422,17 +3456,13 @@ def forecast_remaining_sales(loader):
             ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
                 "Estimated CIGA failures - £"
             ],
-            # Expected ECO4 cancellations
-            ("ECO4 Cancellations", "", "Expected cancellations - #", ""): eco4_post_ciga_remaining_results[
-                "Expected cancellations - #"
-            ],
-            ("ECO4 Cancellations", "", "Expected cancellations - £", ""): eco4_post_ciga_remaining_results[
-                "Expected cancellations - £"
-            ],
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
+            ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations,
+            # This is for jobs that are in-progress and could still cancel
+            ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
             ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
@@ -3440,7 +3470,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 41:
+        if len(to_append) != 45:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From e2055b3b7dde7a1b001a568c23bb3016fbfa4079 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 19:34:43 +0000
Subject: [PATCH 075/262] fixed variance for HA6

---
 .../ha_15_32/ha_analysis_batch_3.py           | 135 +++++++++++++++++-
 1 file changed, 129 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 09b0910e..8c9f59c2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -730,6 +730,81 @@ class DataLoader:
             "Post Code"
         ] = "ST5 7BY"
 
+        # PERFORM ADDITIONAL DROPS
+        # Dropping rows based on multiple conditions
+        conditions_to_drop = [
+            (survey_list['Street / Block Name'] == "Bedford Crescent") & (survey_list['Post Code'] == "ST5 3EH") & (
+                survey_list['NO.'] == 23) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "Hereford Avenue") & (survey_list['Post Code'] == "ST5 3EJ") & (
+                survey_list['NO.'] == 92) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "Seabridge Lane") & (survey_list['Post Code'] == "ST5 3EX") & (
+                survey_list['NO.'].isin([16, 18, 42])) & (
+                survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "ESKDALE PLACE") & (survey_list['Post Code'] == "ST5 3QW") & (
+                survey_list['NO.'] == 5) & (survey_list['SUBMISSION DATE'].astype(str) == "2023-03-06 00:00:00"),
+            (survey_list['Street / Block Name'] == "Birch House road") & (survey_list['Post Code'] == "ST6 2LS") & (
+                survey_list['NO.'].isin([56, 58])),
+            (survey_list['Street / Block Name'] == "Blackthorn Place") & (survey_list['Post Code'] == "ST6 2LS") & (
+                survey_list['NO.'].isin([37, 39])),
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 7BT") & (
+                survey_list['NO.'].isin([17, 6])),
+            (survey_list['Street / Block Name'] == "Lion Grove") & (survey_list['Post Code'] == "ST5 7HQ") & (
+                survey_list['NO.'].isin([10, 12])) & (
+                survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "DENRY CRESCENT") & (survey_list['Post Code'] == "ST5 8JW") & (
+                survey_list['NO.'] == 87) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "HOLLINS CRESCENT") & (survey_list['Post Code'] == "ST7 1JW") & (
+                survey_list['NO.'] == 19)
+        ]
+
+        # Combine all conditions with an OR "|"
+        combined_condition = np.logical_or.reduce(conditions_to_drop)
+
+        # Drop rows that meet the combined condition
+        survey_list = survey_list[~combined_condition]
+
+        # Making replacements using np.where
+        survey_list['Post Code'] = np.where(
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3EH") & (
+                survey_list['NO.'] == 17),
+            "ST5 7BT",
+            survey_list['Post Code']
+        )
+
+        survey_list['Post Code'] = np.where(
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3ED") & (
+                survey_list['NO.'] == 6),
+            "ST5 7BT",
+            survey_list['Post Code']
+        )
+
+        # Maple avenue (stoke on trent, not newcastle) should be st7 1jw
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"].str.lower().str.contains("maple avenue")) & (
+                survey_list["Post Code"].str.lower() == "st7 1jx"
+            ),
+            "st7 1jw",
+            survey_list["Post Code"]
+        )
+
+        # Hollins Crescent should be st7 1jx
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"].str.lower().str.contains("hollins crescent")) & (
+                survey_list["Post Code"].str.lower() == "st7 1jw"
+            ),
+            "st7 1jx",
+            survey_list["Post Code"]
+        )
+
+        # Additional drops as the above misses some:
+        survey_list = survey_list[
+            ~((survey_list["NO."].astype(str).isin(["18", "42"])) &
+              (survey_list["Street / Block Name"] == "Seabridge Lane") &
+              (survey_list["Post Code"] == "ST5 3EY") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "24.07.2023") &
+              (survey_list["INSTALLED OR CANCELLED"].str.contains("NO UPDATE YET")))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -1176,6 +1251,11 @@ class DataLoader:
         if matching_lookup.shape[0] != survey_list.shape[0]:
             raise ValueError("Mismatch in the number of survey rows and matching lookup rows")
 
+        matching_lookup = matching_lookup[~pd.isnull(matching_lookup["asset_list_row_id"])]
+
+        if matching_lookup["asset_list_row_id"].duplicated().sum():
+            raise ValueError("Duplicated matches in survey list")
+
         # Merge onto the survey list
         survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
 
@@ -1483,7 +1563,7 @@ class DataLoader:
                     # TODO: We might have more indications of partial cancellations
                     survey_list["installation_status"] = np.where(
                         survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
-                        "partially cancelled",
+                        "cancelled",
                         survey_list["installation_status"]
                     )
                 else:
@@ -3174,6 +3254,8 @@ def forecast_remaining_sales(loader):
         if survey_list.empty:
             asset_list_remaining = asset_list.copy()
         else:
+            # For HA6, there are a small number of postcodes that do not match to any item in the asset list
+            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
             asset_list_remaining = asset_list.merge(
                 survey_list[["asset_list_row_id", "installation_status"]],
                 how="left",
@@ -3183,6 +3265,47 @@ def forecast_remaining_sales(loader):
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
+        # # TODO: TEMP
+        # n_pre_ciga = asset_list[
+        #     asset_list["ECO Eligibility"].isin(
+        #         [
+        #             "eco4 - passed ciga",
+        #             "eco4 (subject to ciga)",
+        #             "failed ciga",
+        #             "eco4"
+        #         ]
+        #     )
+        # ].shape[0]
+        #
+        # n_pre_ciga_remaining = asset_list_remaining[
+        #     asset_list_remaining["ECO Eligibility"].isin(
+        #         [
+        #             "eco4 - passed ciga",
+        #             "eco4 (subject to ciga)",
+        #             "failed ciga",
+        #             "eco4"
+        #         ]
+        #     )
+        # ].shape[0]
+        #
+        # compare_to_ids = asset_list_remaining["asset_list_row_id"].values
+        # assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids]
+        # diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)]
+        #
+        # n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
+        # # cancellations = survey_list[]
+        # asset_list["ECO Eligibility"].value_counts()
+        #
+        # # Revenenue
+        # pre_ciga_revenue = n_pre_ciga * eco4_rate
+        # pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate
+        # sold_revenue = n_sold * eco4_rate
+        #
+        # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
+        # # MISSING 1 SALE from sold
+        # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
+        # # TODO: END TEMP
+
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
         eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
 
@@ -3402,13 +3525,13 @@ def forecast_remaining_sales(loader):
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
             ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
-            ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations,
+            ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate,
             # This is for jobs that are in-progress and could still cancel
-            ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations,
-            ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
+            ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations * eco4_rate,
             # ECO4 - asset list, post ciga, total
             ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
                 eco4_post_ciga_total_results[
@@ -3460,9 +3583,9 @@ def forecast_remaining_sales(loader):
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
-            ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations,
+            ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
             # This is for jobs that are in-progress and could still cancel
-            ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations,
+            ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
             ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):

From 21082d8d3779a75cae422becf1a6e589ebcbaba6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 19:46:28 +0000
Subject: [PATCH 076/262] fixed duplication variance for HA16

---
 .../ha_15_32/ha_analysis_batch_3.py           | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8c9f59c2..7859d6d2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -960,6 +960,21 @@ class DataLoader:
             survey_list["NO."]
         )
 
+        # Delete some duplicated entries
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "york road") &
+              (survey_list["NO."].astype(str) == "12") &
+              (survey_list["Post Code"] == "M44 5HU") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "45229"))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "peatfield avenue") &
+              (survey_list["NO."].astype(str) == "23") &
+              (survey_list["Post Code"] == "M27 9XG") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "45236"))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -3265,7 +3280,7 @@ def forecast_remaining_sales(loader):
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
-        # # TODO: TEMP
+        # TODO: TEMP
         # n_pre_ciga = asset_list[
         #     asset_list["ECO Eligibility"].isin(
         #         [
@@ -3304,6 +3319,9 @@ def forecast_remaining_sales(loader):
         # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
         # # MISSING 1 SALE from sold
         # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
+        # dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values
+        # z = survey_list[survey_list["asset_list_row_id"].isin(dupes)]
+        # z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']]
         # # TODO: END TEMP
 
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()

From af13467c2c4c9b7fc98e5be1e343399f57c062fb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 20:04:37 +0000
Subject: [PATCH 077/262] Added gbis variance checks

---
 .../ha_15_32/ha_analysis_batch_3.py           | 83 ++++++++-----------
 1 file changed, 36 insertions(+), 47 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7859d6d2..553f6271 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3237,6 +3237,7 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
@@ -3280,50 +3281,6 @@ def forecast_remaining_sales(loader):
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
-        # TODO: TEMP
-        # n_pre_ciga = asset_list[
-        #     asset_list["ECO Eligibility"].isin(
-        #         [
-        #             "eco4 - passed ciga",
-        #             "eco4 (subject to ciga)",
-        #             "failed ciga",
-        #             "eco4"
-        #         ]
-        #     )
-        # ].shape[0]
-        #
-        # n_pre_ciga_remaining = asset_list_remaining[
-        #     asset_list_remaining["ECO Eligibility"].isin(
-        #         [
-        #             "eco4 - passed ciga",
-        #             "eco4 (subject to ciga)",
-        #             "failed ciga",
-        #             "eco4"
-        #         ]
-        #     )
-        # ].shape[0]
-        #
-        # compare_to_ids = asset_list_remaining["asset_list_row_id"].values
-        # assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids]
-        # diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)]
-        #
-        # n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
-        # # cancellations = survey_list[]
-        # asset_list["ECO Eligibility"].value_counts()
-        #
-        # # Revenenue
-        # pre_ciga_revenue = n_pre_ciga * eco4_rate
-        # pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate
-        # sold_revenue = n_sold * eco4_rate
-        #
-        # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
-        # # MISSING 1 SALE from sold
-        # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
-        # dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values
-        # z = survey_list[survey_list["asset_list_row_id"].isin(dupes)]
-        # z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']]
-        # # TODO: END TEMP
-
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
         eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
 
@@ -3525,6 +3482,35 @@ def forecast_remaining_sales(loader):
         if variance_remaining != 0:
             raise ValueError("Something went wrong in variance remaining")
 
+        # We also check variances to make sure that the pre-CIGA ECO4 total equals
+        # 1) Pre CIGA remaining +
+        # 2) ECO4 sold +
+        # 3) ECO4 confirmed cancellations +
+        # 4) ECO4 unconfirmed cancellations
+
+        pre_ciga_eco4_variance = (
+            eco4_pre_ciga_revenue -
+            eco4_pre_ciga_remaining_revenue -
+            eco4_actually_sold -
+            eco4_confirmed_cancellations * eco4_rate -
+            eco4_expected_cancellations * eco4_rate
+        )
+
+        if pre_ciga_eco4_variance != 0:
+            raise ValueError("Something went wrong in pre_ciga_eco4_variance")
+
+        # Check GBIS total variance
+        gbis_variance = (
+            gbis_total_revenue -
+            gbis_actually_sold -
+            gbis_confirmed_cancellations * gbis_rate -
+            gbis_expected_cancellations * gbis_rate -
+            gbis_remaining_revenue
+        )
+
+        if gbis_variance != 0:
+            raise ValueError("Something went wrong in gbis_variance")
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -3544,8 +3530,10 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
-            ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
-            ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL", ""): pre_ciga_eco4_variance,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL VS ELIGIBLE & INELIGIBLE", ""): variance_total,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 REMAINING VS ELIGIBLE & INELIGIBLE", ""):
+                variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
             ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate,
             # This is for jobs that are in-progress and could still cancel
@@ -3600,6 +3588,7 @@ def forecast_remaining_sales(loader):
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
             ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
             # This is for jobs that are in-progress and could still cancel
@@ -3611,7 +3600,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 45:
+        if len(to_append) != 47:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From 8dcb6a9be0f903fc06e4c9dcb3218bb1d6db949e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 21:11:17 +0000
Subject: [PATCH 078/262] 11% through matching ha38

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 553f6271..6998eb4b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1054,6 +1054,17 @@ class DataLoader:
             'Kingsford Court, Coombe Valley Road', 'Kingsford Court'
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'LESLIE TEW COURT/DERWENT ROAD', 'LESLIE TEW COURT'
+        )
+
+        # There is no 18A LESLIE TEW COURT in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "LESLIE TEW COURT") &
+              (survey_list["Post Code"] == "TN10 3TX") &
+              (survey_list["NO."] == "18A"))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -3848,12 +3859,10 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    # priority_has = [
-    #     "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
-    # ]
+    # Add in: "HA25"
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA38", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28 [DONE],

From 17b5f6e140a90d261b790fee1a4a28f43d1e3a62 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 21:42:17 +0000
Subject: [PATCH 079/262] ha38 23% merged

---
 .../ha_15_32/ha_analysis_batch_3.py           | 50 ++++++++++++++-----
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6998eb4b..ff39b190 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1065,6 +1065,24 @@ class DataLoader:
               (survey_list["NO."] == "18A"))
         ]
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'Brindley House, Wellbeck Road', 'Brindley House'
+        )
+
+        # Try taking just the first part of the string, splitting on a /
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split('/').str[0].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'HUNTSMAN WAY', 'HUNTSMANS WAY'
+        )
+
+        # Try taking just the first part of the string, splitting on a ,
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split(',').str[0].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "McCLAREN COURT", "MCLAREN COURT"
+        )
+
         return survey_list
 
     @staticmethod
@@ -1228,6 +1246,10 @@ class DataLoader:
                 if "flat" in str(house_number):
                     house_number = house_number.split("flat")[1].strip()
 
+                # We check if we had an instance of flat x, y
+                if "," in str(house_number):
+                    house_number = house_number.split(",")[0].strip()
+
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:
@@ -1251,19 +1273,23 @@ class DataLoader:
                 df = df[df["HouseNo"].astype(str) == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
-
-                    full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + row[
-                        "Town/Area"].lower().strip() + row["Post Code"].lower().strip()
-                    # Remove any spaces from the full key
-                    full_key = full_key.replace(" ", "")
-
-                    df = self.levenstein_match(full_key, df)
-
                     if df.shape[0] != 1:
-                        print(row["Street / Block Name"])
-                        print(house_number)
-                        print(row["Post Code"])
-                        raise ValueError("Investigate")
+                        if "Town/Area" not in row.keys():
+                            full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() +
+                                        row["Post Code"].lower().strip())
+                        else:
+                            full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + \
+                                       row["Town/Area"].lower().strip() + row["Post Code"].lower().strip()
+                        # Remove any spaces from the full key
+                        full_key = full_key.replace(" ", "")
+
+                        df = self.levenstein_match(full_key, df)
+
+                        if df.shape[0] != 1:
+                            print(row["Street / Block Name"])
+                            print(house_number)
+                            print(row["Post Code"])
+                            raise ValueError("Investigate")
 
             matching_lookup.append(
                 {

From 8e258ff3ca164e2eddcd9cc74d1e7531bf655e4f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 22:29:18 +0000
Subject: [PATCH 080/262] 44% through matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 70 ++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ff39b190..567394a4 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1083,6 +1083,70 @@ class DataLoader:
             "McCLAREN COURT", "MCLAREN COURT"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST JAMES CLOISTERS", "ST. JAMES'S CLOISTERS"
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            ((survey_list["NO."].isin(
+                [
+                    "FLAT 1 22",
+                    "FLAT 2 22",
+                    "FLAT 3 22",
+                    "FLAT 4 22",
+                    "FLAT 5 22",
+                    "FLAT 6 22",
+                ]
+            )) &
+             (survey_list["Street / Block Name"] == "MELTON ROAD")),
+            "22 MELTON ROAD",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            ((survey_list["NO."].isin(
+                [
+                    "FLAT 1 24",
+                    "FLAT 2 24",
+                    "FLAT 3 24",
+                    "FLAT 4 24",
+                    "FLAT 5 24",
+                    "FLAT 6 24",
+                ]
+            )) &
+             (survey_list["Street / Block Name"] == "MELTON ROAD")),
+            "24 MELTON ROAD",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "TURRETT GREEN COURT SILENT STREET", "TURRET GREEN COURT"
+        )
+
+        # Turret green court flat 1 doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "TURRET GREEN COURT") &
+              (survey_list["NO."] == 1))
+        ]
+        # 3, 45 raywell steet doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "45 RAYWELL STREET") &
+              (survey_list["NO."] == 3))
+        ]
+
+        # 40 Avondale drive doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Avondale Drive") &
+              (survey_list["NO."] == 40))
+        ]
+        # 17A beech road has the wrong postcode
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "BEECH ROAD") &
+            (survey_list["Post Code"] == "DH6 1JD"),
+            "DH6 1JB",
+            survey_list["Post Code"]
+        )
+
         return survey_list
 
     @staticmethod
@@ -1250,6 +1314,10 @@ class DataLoader:
                 if "," in str(house_number):
                     house_number = house_number.split(",")[0].strip()
 
+                # We may also have a space for an instance of flat x y
+                if " " in str(house_number):
+                    house_number = house_number.split(" ")[0].strip()
+
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:
@@ -1270,7 +1338,7 @@ class DataLoader:
                 raise ValueError("Investigate")
 
             if df.shape[0] != 1:
-                df = df[df["HouseNo"].astype(str) == str(house_number)]
+                df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
                     if df.shape[0] != 1:

From 067a66c1b172b63abc419a112525382ce7c2baa3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 22:45:22 +0000
Subject: [PATCH 081/262] ha38 wip - leaving for now

---
 .../ha_15_32/ha_analysis_batch_3.py           | 54 ++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 567394a4..c4f6307c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -599,7 +599,52 @@ class DataLoader:
         asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat)
         asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo']
         asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat)
-        # We then need to
+
+        # We update a few specific rows
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM A1",
+                    "10 SOUTH VIEW/ROOM A2",
+                    "10 SOUTH VIEW/ROOM A3",
+                ]
+            )),
+            "10A",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM A1",
+                ]
+            )),
+            "10a, 10 south view/room a1, spennymoor, co. durham, dl16 7df'",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM B1",
+                    "10 SOUTH VIEW/ROOM B2",
+                    "10 SOUTH VIEW/ROOM B3",
+                    "10 SOUTH VIEW/ROOM B4",
+                ]
+            )),
+            "10B",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM B1",
+                ]
+            )),
+            "10b, 10 south view/room b1, spennymoor, co. durham, dl16 7df",
+            asset_list["matching_address"]
+        )
 
         return asset_list
 
@@ -1147,6 +1192,13 @@ class DataLoader:
             survey_list["Post Code"]
         )
 
+        survey_list["Street / Block Name"] = np.where(
+            (survey_list["Street / Block Name"] == "SOUTHVIEW") &
+            (survey_list["Post Code"] == "DL16 7DF"),
+            "SOUTH VIEW",
+            survey_list["Street / Block Name"]
+        )
+
         return survey_list
 
     @staticmethod

From 5c3f6320dd6bfc2ddaac4fefb8786646c50e7945 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 10:42:51 +0000
Subject: [PATCH 082/262] 29% through matching eco3 ha25

---
 .../ha_15_32/ha_analysis_batch_3.py           | 136 +++++++++++++++---
 1 file changed, 117 insertions(+), 19 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c4f6307c..3ea9649e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -183,7 +183,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA25"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -214,6 +214,14 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA25":
+            asset_list["matching_address"] = asset_list[
+                self.COLUMN_CONFIG[ha_name]["address"]
+            ].astype(str).str.lower().str.strip()
+
+            asset_list["matching_postcode"] = asset_list['matching_address'].apply(
+                lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x
+            )
         elif ha_name == "HA28":
             asset_list["matching_address"] = (
                 asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
@@ -352,6 +360,9 @@ class DataLoader:
             house_numbers = house_numbers.iloc[:, 0:1]
             house_numbers.columns = ['HouseNo']
 
+            # Remove trailing punctuation such as , or ;
+            house_numbers["HouseNo"] = house_numbers["HouseNo"].str.rstrip(',;')
+
             asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
 
         return asset_list
@@ -425,27 +436,16 @@ class DataLoader:
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
 
-        # TODO: TEMP
-        sheetnames_lower = [x.lower() for x in workbook.sheetnames]
-        if any("eco3" in x for x in sheetnames_lower):
-            raise Exception("REMOVE ME")
-
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
         if ha_name == "HA25":
             asset_sheet_colnames[11] = "matching_postcode"
 
-        values_only = not ha_name != "HA25"
-
         rows_data = []
-        if not values_only:
-            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                rows_data.append(row_data)
-        else:
-            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):  # use values_only=True to get values
-                row_data = list(row)  # No need for comprehension, values_only=True returns a tuple of values
-                rows_data.append(row_data)
+
+        for row in asset_sheet.iter_rows(min_row=2, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            rows_data.append(row_data)
 
         asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames)
 
@@ -477,6 +477,29 @@ class DataLoader:
         if ha_name in ["HA1", "HA25"]:
             return asset_list, pd.DataFrame(), pd.DataFrame()
 
+        # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
+        # suitable under ECO4, since their walls will be filled
+        eco3_list = pd.DataFrame()
+        sheetnames_lower = [x.lower() for x in workbook.sheetnames]
+        eco3_sheetname_index = [i for i, x in enumerate(sheetnames_lower) if "eco3" in x.replace(" ", "")]
+        if eco3_sheetname_index:
+            eco3_sheetname = workbook.sheetnames[eco3_sheetname_index[0]]
+            eco3_sheet = workbook[eco3_sheetname]
+            eco3_rows = []
+            for row in eco3_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+                row_data = [cell.value for cell in row]  # This will get you the cell values
+                eco3_rows.append(row_data)
+
+            eco3_list = pd.DataFrame(eco3_rows, columns=[cell.value for cell in eco3_sheet[1]])
+            # Remove columns that are None
+            eco3_list = eco3_list.loc[:, eco3_list.columns.notnull()]
+            # Remove rows that are completely empty
+            eco3_list = eco3_list.loc[eco3_list.loc[:, eco3_list.columns].notnull().any(axis=1)]
+            eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))]
+
+            # Perform the eco3 merge
+            eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
+
         # We check if there is a survey list
         survey_sheetname = self.get_survey_sheetname(workbook)
         survey_sheet = workbook[survey_sheetname]
@@ -518,7 +541,7 @@ class DataLoader:
             ciga_list = self.dedupe_ciga_list(ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
-        return asset_list, survey_list, ciga_list
+        return asset_list, survey_list, ciga_list, eco3_list
 
     @staticmethod
     def correct_ha6_asset_list(asset_list):
@@ -1433,6 +1456,79 @@ class DataLoader:
 
         return survey_list
 
+    def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
+
+        # We add on a matching postcode without spaces for this
+        # asset_list["matching_postcode_no_space"] = asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+
+        # May need an eco3 list correction function
+
+        # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
+        eco3_list = eco3_list[
+            ~(eco3_list["Post Code"] == "BS305DT")
+        ]
+        # Drop rows with missings postcode
+        eco3_list = eco3_list[
+            ~pd.isnull(eco3_list["Post Code"])
+        ]
+
+        missed_postcodes = []
+        if ha_name == "HA25":
+            missed_postcodes = {
+                postcode.lower() for postcode in eco3_list["Post Code"] if
+                postcode.lower() not in asset_list["matching_postcode"].values
+            }
+            eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
+
+        matching_lookup = []
+        missed = []
+        for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
+
+            postcode = row["Post Code"].lower().strip()
+
+            # df will never be empty, since we've already done a check for common postcodes
+            df = asset_list[
+                asset_list["matching_postcode"].str.contains(postcode)
+            ]
+
+            house_number = row["NO "]
+            if isinstance(house_number, str):
+                house_number = house_number.lower().strip()
+
+            if not any(df["matching_address"].str.contains(str(house_number))):
+                if "flat" in str(house_number):
+                    house_number = house_number.split("flat")[1].strip()
+
+                # We check if we had an instance of flat x, y
+                if "," in str(house_number):
+                    house_number = house_number.split(",")[0].strip()
+
+                # We may also have a space for an instance of flat x y
+                if " " in str(house_number):
+                    house_number = house_number.split(" ")[0].strip()
+
+            df = df[df["matching_address"].str.contains(str(house_number))]
+
+            if df.empty:
+                missed.append(row["eco3_list_row_id"])
+                continue
+
+            if df.shape[0] != 1:
+                df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
+
+            if df.shape[0] != 1:
+                print(row["Street / Block Name"])
+                print(house_number)
+                print(row["Post Code"])
+                raise ValueError("Investigate")
+
+            matching_lookup.append(
+                {
+                    "eco3_list_row_id": row["eco3_list_row_id"],
+                    "asset_list_row_id": df["asset_list_row_id"].values[0],
+                }
+            )
+
     @staticmethod
     def extract_streetname(address, house_number=None, postcode=None):
         """
@@ -4008,11 +4104,13 @@ def app():
     # Add in: "HA25"
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA38", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28 [DONE],
-    # 38, 41, 10, 14, 20, 48
+    # 41, 10, 14 [DONE], 20, 48, 50
+    # 38[problematic, but no ECO4]
+    # TODO - do 50 and 25 next
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From ef77db10373c653e28c82265460ce9fd3bf3f3bf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 10:56:27 +0000
Subject: [PATCH 083/262] HA25 eco3 matching 91% complete

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3ea9649e..ea5b0456 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1516,6 +1516,15 @@ class DataLoader:
             if df.shape[0] != 1:
                 df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
 
+            if df.empty:
+                missed.append(row["eco3_list_row_id"])
+                continue
+
+            if df.shape[0] != 1:
+                # Perform a search on streetname
+                street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
+                df = df[df["matching_address"].str.contains(street_name_section1)]
+
             if df.shape[0] != 1:
                 print(row["Street / Block Name"])
                 print(house_number)

From 022244377d36557f83081e505b8068ab2bd98004 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 12:26:16 +0000
Subject: [PATCH 084/262] working on fixing missed matched in eco3 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 84 +++++++++++++++----
 1 file changed, 66 insertions(+), 18 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ea5b0456..a5845990 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -171,6 +171,10 @@ class DataLoader:
         "HA107": 51,
     }
 
+    UNMATCHED_ECO3 = {
+        "HA25": 94
+    }
+
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
         self.directories = directories
         self.use_cache = use_cache
@@ -1458,9 +1462,6 @@ class DataLoader:
 
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
-        # We add on a matching postcode without spaces for this
-        # asset_list["matching_postcode_no_space"] = asset_list["matching_postcode"].str.lower().str.replace(" ", "")
-
         # May need an eco3 list correction function
 
         # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
@@ -1471,8 +1472,17 @@ class DataLoader:
         eco3_list = eco3_list[
             ~pd.isnull(eco3_list["Post Code"])
         ]
+        # We have a bunch of genuine duplicates
+        eco3_list = eco3_list.drop_duplicates(["NO ", "Street / Block Name", "Post Code"])
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "HALWILL MEADOOW", "HALWILL MEADOW"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Hall Road", "Hall Rd"
+        )
 
-        missed_postcodes = []
         if ha_name == "HA25":
             missed_postcodes = {
                 postcode.lower() for postcode in eco3_list["Post Code"] if
@@ -1480,10 +1490,18 @@ class DataLoader:
             }
             eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
 
+        # For the asset list, we create a matching address without any punctuation
+        # TODO: We should generally just remove puncutation from addresses when matching
+        asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(r'[^\w\s]', '',
+                                                                                                   regex=True)
+        # Remove double spaces
+        asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace(
+            "  ", " "
+        )
+
         matching_lookup = []
         missed = []
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
-
             postcode = row["Post Code"].lower().strip()
 
             # df will never be empty, since we've already done a check for common postcodes
@@ -1507,24 +1525,20 @@ class DataLoader:
                 if " " in str(house_number):
                     house_number = house_number.split(" ")[0].strip()
 
-            df = df[df["matching_address"].str.contains(str(house_number))]
+            # We must do the house number filter
+            df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
+
+            # Perform a search on streetname
+            # We do this to prevent duplicate matches to properties with the same postcode and house number,
+            # but different streets
+            street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
+            street_name_section1 = re.sub(r'[^\w\s]', '', street_name_section1)
+            df = df[df["matching_address_no_punctuation"].str.contains(street_name_section1)]
 
             if df.empty:
                 missed.append(row["eco3_list_row_id"])
                 continue
 
-            if df.shape[0] != 1:
-                df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
-
-            if df.empty:
-                missed.append(row["eco3_list_row_id"])
-                continue
-
-            if df.shape[0] != 1:
-                # Perform a search on streetname
-                street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
-                df = df[df["matching_address"].str.contains(street_name_section1)]
-
             if df.shape[0] != 1:
                 print(row["Street / Block Name"])
                 print(house_number)
@@ -1538,6 +1552,40 @@ class DataLoader:
                 }
             )
 
+        # We verify the missed
+        # -HA25 contains 88 missed entries. These are actually 8 unique postcodes, where surveys were conducted
+        # on properties that had house numbers outside of the asset list
+        if len(missed) != self.UNMATCHED_ECO3[ha_name]:
+            raise ValueError(
+                f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
+            )
+
+        # TODO: 194 missed
+
+        matching_lookup = pd.DataFrame(matching_lookup)
+        # Check dupes as this will cause problems later on
+        if matching_lookup["asset_list_row_id"].duplicated().any():
+            raise ValueError("Duplicated asset list row ids")
+
+        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
+        missed_df.head(3).tail(1)["eco3_list_row_id"]
+
+        duped_ids = matching_lookup[matching_lookup["asset_list_row_id"].duplicated()]["asset_list_row_id"].tolist()
+        duped_df = matching_lookup[
+            matching_lookup["asset_list_row_id"].isin(duped_ids)
+        ]
+        duped_surveys = eco3_list[
+            eco3_list["eco3_list_row_id"].isin(duped_df["eco3_list_row_id"].values)
+        ].copy()
+
+        duped_surveys = duped_surveys.merge(matching_lookup, how="left", on="eco3_list_row_id")
+
+        duped_surveys[
+            ["NO ", "Street / Block Name", "Post Code", "eco3_list_row_id", "asset_list_row_id"]
+        ].sort_values("asset_list_row_id").head()
+
+        asset_list[asset_list["asset_list_row_id"] == "HA2515145"]["matching_address"].values
+
     @staticmethod
     def extract_streetname(address, house_number=None, postcode=None):
         """

From b09bd63b53c8d9b14f11c1c5b7cb38b28c63afbc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 12:53:25 +0000
Subject: [PATCH 085/262] done with ha25 matching for now

---
 .../ha_15_32/ha_analysis_batch_3.py           | 66 +++++++++++--------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a5845990..f0813aef 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -172,7 +172,7 @@ class DataLoader:
     }
 
     UNMATCHED_ECO3 = {
-        "HA25": 94
+        "HA25": 119
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -478,7 +478,7 @@ class DataLoader:
         # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
         # lists, and so
         # we can return the asset list now
-        if ha_name in ["HA1", "HA25"]:
+        if ha_name in ["HA1"]:
             return asset_list, pd.DataFrame(), pd.DataFrame()
 
         # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
@@ -1460,10 +1460,8 @@ class DataLoader:
 
         return survey_list
 
-    def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
-
-        # May need an eco3 list correction function
-
+    @staticmethod
+    def correct_ha25_eco3_list(eco3_list):
         # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
         eco3_list = eco3_list[
             ~(eco3_list["Post Code"] == "BS305DT")
@@ -1483,6 +1481,29 @@ class DataLoader:
             "Hall Road", "Hall Rd"
         )
 
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "SPRINGFIELD WAY SAINT DAY", "SPRINGFIELD WAY ST DAY"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "BOND SPEAR COURT", "BOND-SPEAR COURT"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "ST.MARYS HILL", "ST MARYS HILL"
+        )
+        # Correct the postcode for edmund road
+        eco3_list["Post Code"] = np.where(
+            (eco3_list["Street / Block Name"] == "EDMUND ROAD") &
+            (eco3_list["Post Code"] == "TR14 8QJ"),
+            "TR15 1BY",
+            eco3_list["Post Code"]
+        )
+        return eco3_list
+
+    def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
+
+        eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
+        eco3_list = eco3_list_correction_function(eco3_list)
+
         if ha_name == "HA25":
             missed_postcodes = {
                 postcode.lower() for postcode in eco3_list["Post Code"] if
@@ -1492,8 +1513,9 @@ class DataLoader:
 
         # For the asset list, we create a matching address without any punctuation
         # TODO: We should generally just remove puncutation from addresses when matching
-        asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(r'[^\w\s]', '',
-                                                                                                   regex=True)
+        asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(
+            r'[^\w\s]', '', regex=True
+        )
         # Remove double spaces
         asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace(
             "  ", " "
@@ -1502,6 +1524,8 @@ class DataLoader:
         matching_lookup = []
         missed = []
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
+            # if row["eco3_list_row_id"] == "HA25_Eco3_5422":
+            #     raise Exception()
             postcode = row["Post Code"].lower().strip()
 
             # df will never be empty, since we've already done a check for common postcodes
@@ -1553,38 +1577,24 @@ class DataLoader:
             )
 
         # We verify the missed
-        # -HA25 contains 88 missed entries. These are actually 8 unique postcodes, where surveys were conducted
-        # on properties that had house numbers outside of the asset list
+        # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
+        # where many surveys were conducted on house numbers, not in the asset list
         if len(missed) != self.UNMATCHED_ECO3[ha_name]:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
-        # TODO: 194 missed
-
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
         if matching_lookup["asset_list_row_id"].duplicated().any():
             raise ValueError("Duplicated asset list row ids")
 
-        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
-        missed_df.head(3).tail(1)["eco3_list_row_id"]
+        # Merge onto eco3 list
+        eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id")
 
-        duped_ids = matching_lookup[matching_lookup["asset_list_row_id"].duplicated()]["asset_list_row_id"].tolist()
-        duped_df = matching_lookup[
-            matching_lookup["asset_list_row_id"].isin(duped_ids)
-        ]
-        duped_surveys = eco3_list[
-            eco3_list["eco3_list_row_id"].isin(duped_df["eco3_list_row_id"].values)
-        ].copy()
+        asset_list = asset_list.drop(columns=["matching_address_no_punctuation"])
 
-        duped_surveys = duped_surveys.merge(matching_lookup, how="left", on="eco3_list_row_id")
-
-        duped_surveys[
-            ["NO ", "Street / Block Name", "Post Code", "eco3_list_row_id", "asset_list_row_id"]
-        ].sort_values("asset_list_row_id").head()
-
-        asset_list[asset_list["asset_list_row_id"] == "HA2515145"]["matching_address"].values
+        return eco3_list
 
     @staticmethod
     def extract_streetname(address, house_number=None, postcode=None):

From 961b53d523bf7dc82d9e83459861cb3aa2865c93 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 12:58:29 +0000
Subject: [PATCH 086/262] Adding return for HA25

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index f0813aef..7ad50583 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -479,7 +479,7 @@ class DataLoader:
         # lists, and so
         # we can return the asset list now
         if ha_name in ["HA1"]:
-            return asset_list, pd.DataFrame(), pd.DataFrame()
+            return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
 
         # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
         # suitable under ECO4, since their walls will be filled
@@ -504,6 +504,10 @@ class DataLoader:
             # Perform the eco3 merge
             eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
 
+        if ha_name in ["HA25"]:
+            # Accomodate ha25 unique structure
+            return asset_list, pd.DataFrame(), pd.DataFrame(), eco3_list
+
         # We check if there is a survey list
         survey_sheetname = self.get_survey_sheetname(workbook)
         survey_sheet = workbook[survey_sheetname]
@@ -1592,7 +1596,7 @@ class DataLoader:
         # Merge onto eco3 list
         eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id")
 
-        asset_list = asset_list.drop(columns=["matching_address_no_punctuation"])
+        asset_list.drop(columns=["matching_address_no_punctuation"], inplace=True)
 
         return eco3_list
 
@@ -1756,7 +1760,7 @@ class DataLoader:
                 continue
             # Load asset list
             logger.info("Loading data for {}".format(ha_name))
-            asset_list, survey_list, ciga_list = self.load_asset_list(
+            asset_list, survey_list, ciga_list, eco3_list = self.load_asset_list(
                 filepath=filepath,
                 ha_name=ha_name,
             )
@@ -1764,7 +1768,8 @@ class DataLoader:
             data[ha_name] = {
                 "asset_list": asset_list,
                 "survey_list": survey_list,
-                "ciga_list": ciga_list
+                "ciga_list": ciga_list,
+                "eco3_list": eco3_list
             }
 
         self.data = data

From 7f88f0e0f59e584d82a6799671e8f1a64a034392 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 13:59:32 +0000
Subject: [PATCH 087/262] Added in the re-labelling of assets based on eco3
 merge

---
 .../ha_15_32/ha_analysis_batch_3.py           | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7ad50583..21509923 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1812,6 +1812,7 @@ class DataLoader:
             asset_list = data_assets["asset_list"].copy()
             survey_list = data_assets["survey_list"].copy()
             ciga_list = data_assets["ciga_list"].copy()
+            eco3_list = data_assets.get("eco3_list", pd.DataFrame())
 
             asset_list_starting_size = asset_list.shape[0]
 
@@ -1859,6 +1860,25 @@ class DataLoader:
             if asset_list.shape[0] != asset_list_starting_size:
                 raise ValueError("The asset list has changed in size")
 
+            # If we have eco3 surveys, we set a property to not eligible
+            if not eco3_list.empty:
+                eco3_list_to_merge = eco3_list[["asset_list_row_id"]].copy()
+                eco3_list_to_merge["has_eco3"] = True
+                asset_list = asset_list.merge(
+                    eco3_list_to_merge, how="left", on="asset_list_row_id"
+                )
+
+                if asset_list.shape[0] != asset_list_starting_size:
+                    raise ValueError("The asset list has changed in size, when merging on eco3")
+
+                # Any rows that have an eco3 survey are set to not eligible
+                asset_list["ECO Eligibility"] = np.where(
+                    asset_list["has_eco3"] == True,
+                    "not eligible",
+                    asset_list["ECO Eligibility"]
+                )
+                asset_list = asset_list.drop(columns=["has_eco3"])
+
             # Report on sales
             sales_report = {}
             if not survey_list.empty:

From 9a0c6c3e8fbae7a23980aa7e75912ef6202ab29d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 14:18:08 +0000
Subject: [PATCH 088/262] expanded eco3 matching

---
 .../ha_15_32/ha_analysis_batch_3.py            | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 21509923..06bb0d96 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -172,7 +172,7 @@ class DataLoader:
     }
 
     UNMATCHED_ECO3 = {
-        "HA25": 119
+        "HA25": 154
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -1508,12 +1508,16 @@ class DataLoader:
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
         eco3_list = eco3_list_correction_function(eco3_list)
 
+        asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
+        eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
+
         if ha_name == "HA25":
+            # 317 -> 259
             missed_postcodes = {
-                postcode.lower() for postcode in eco3_list["Post Code"] if
-                postcode.lower() not in asset_list["matching_postcode"].values
+                postcode for postcode in eco3_list["postcode_no_space"] if
+                postcode not in asset_list["matching_postcode_nospace"].values
             }
-            eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
+            eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)]
 
         # For the asset list, we create a matching address without any punctuation
         # TODO: We should generally just remove puncutation from addresses when matching
@@ -1530,11 +1534,11 @@ class DataLoader:
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
             # if row["eco3_list_row_id"] == "HA25_Eco3_5422":
             #     raise Exception()
-            postcode = row["Post Code"].lower().strip()
+            postcode = row["postcode_no_space"]
 
             # df will never be empty, since we've already done a check for common postcodes
             df = asset_list[
-                asset_list["matching_postcode"].str.contains(postcode)
+                asset_list["matching_postcode_nospace"].str.contains(postcode)
             ]
 
             house_number = row["NO "]
@@ -1588,6 +1592,8 @@ class DataLoader:
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
+        # 154 missed, 2827 matched for HA 25
+
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
         if matching_lookup["asset_list_row_id"].duplicated().any():

From 8b70fb346c0ce51acd24b245bbbecedeaa10d30c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:00:51 +0000
Subject: [PATCH 089/262] matching ha50

---
 .../ha_15_32/ha_analysis_batch_3.py           | 56 ++++++++++++++++---
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 06bb0d96..4708bf35 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -172,7 +172,8 @@ class DataLoader:
     }
 
     UNMATCHED_ECO3 = {
-        "HA25": 154
+        "HA25": 154,
+        "HA50": 5
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -262,6 +263,10 @@ class DataLoader:
                                              asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["post_code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA50":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -433,6 +438,8 @@ class DataLoader:
             return "ECO Surveys"
         elif "ECO Survey" in workbook.sheetnames:
             return "ECO Survey"
+        elif "ECO 4 Surveys completed" in workbook.sheetnames:
+            return "ECO 4 Surveys completed"
         else:
             return "ECO surveys"
 
@@ -1289,6 +1296,34 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha50_survey_list(survey_list):
+
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == 'COSELEY STREET') &
+            (survey_list["Post Code"] == 'ST16 1LR'),
+            "ST6 1JU",
+            survey_list["Post Code"]
+        )
+
+        # Remove some of COSELEY STREET, as we have surveys done, outside of the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "COSELEY STREET") &
+              (survey_list["Post Code"] == "ST6 1JU") &
+              (survey_list["NO."].isin([96])))
+        ]
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace("ST33JZ", "ST3 3JZ")
+
+        # Remove some of Jesmond drive as we have surveys done outside of the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Jesmond Drive") &
+              (survey_list["Post Code"] == "ST3 3JZ") &
+              (survey_list["NO."].isin([29])))
+        ]
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -1503,6 +1538,10 @@ class DataLoader:
         )
         return eco3_list
 
+    @staticmethod
+    def correct_ha50_eco3_list(eco3_list):
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -1517,6 +1556,7 @@ class DataLoader:
                 postcode for postcode in eco3_list["postcode_no_space"] if
                 postcode not in asset_list["matching_postcode_nospace"].values
             }
+
             eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)]
 
         # For the asset list, we create a matching address without any punctuation
@@ -4199,16 +4239,18 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    # Add in: "HA25"
+    # Add in:
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA50", "HA107",
     ]
-    # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
+    # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
+    # back on this],
     # Then: 28 [DONE],
-    # 41, 10, 14 [DONE], 20, 48, 50
-    # 38[problematic, but no ECO4]
-    # TODO - do 50 and 25 next
+    # 41, 48, 50
+    # 38[problematic, but no ECO4], 10 problematic (no eligibility),
+    # 20 has barely any in
+    # TODO - do 50
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From 3001a98421b377cb31e2c3b667528e8d4b80a150 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:02:23 +0000
Subject: [PATCH 090/262] ha50 30% matched

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4708bf35..901784e1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1322,6 +1322,10 @@ class DataLoader:
               (survey_list["NO."].isin([29])))
         ]
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BRUNDELL OVAL", "BRUNDALL OVAL"
+        )
+
         return survey_list
 
     @staticmethod

From 4afd012e51bfc3b366dc1e8d1f70281bb1097bd0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:14:53 +0000
Subject: [PATCH 091/262] ha50 51% matched

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 901784e1..bde6f647 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1326,6 +1326,13 @@ class DataLoader:
             "BRUNDELL OVAL", "BRUNDALL OVAL"
         )
 
+        # Remove 4 Linden Place
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Linden Place") &
+              (survey_list["Post Code"] == "ST3 3AT") &
+              (survey_list["NO."].isin([4])))
+        ]
+
         return survey_list
 
     @staticmethod

From 1146f34eba62ab2b00f610502b17ba6f9425cf43 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:24:20 +0000
Subject: [PATCH 092/262] matching 81% complete

---
 .../ha_15_32/ha_analysis_batch_3.py           | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bde6f647..818f6e4f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1333,6 +1333,45 @@ class DataLoader:
               (survey_list["NO."].isin([4])))
         ]
 
+        # Remove 11 Tilehurst Place
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tilehurst Place") &
+              (survey_list["Post Code"] == "ST3 3AP") &
+              (survey_list["NO."].isin([11])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "deavile road", "DEAVILLE ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WOOLISCROFT ROAD", "WOOLLISCROFT ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Leak Road", "Leek Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Springfield road", "Springfields road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "MILLWARD RD", "MILLWARD ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "REPINGTON RD", "REPINGTON ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ECCELSTONE PLACE", "ECCLESTONE PLACE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. James Place", "St James Place"
+        )
+
         return survey_list
 
     @staticmethod

From 5a1aa3995221ddf125b25c6d619165fdbcab37ff Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:33:26 +0000
Subject: [PATCH 093/262] ha50 93% complete

---
 .../ha_15_32/ha_analysis_batch_3.py           | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 818f6e4f..3b9bd7ca 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1372,6 +1372,50 @@ class DataLoader:
             "St. James Place", "St James Place"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CHELL HEATH RD", "CHELL HEATH ROAD"
+        )
+        # Correct postcode
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == 'CHELL HEATH ROAD') &
+            (survey_list["Post Code"] == 'ST6 6HU'),
+            "ST6 6HJ",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Franklin Rd", "Franklin Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Lodge Rd", "Lodge Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St Matthews Street", "St Matthew Street"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Grove Bank Road", "Grovebank Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "OVERSLEY RD", "OVERSLEY ROAD"
+        )
+
+        # Replace all of the " RD" with " ROAD"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            " RD", " ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. Georges Crescent", "St Georges Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Tewson Road", "Tewson Green"
+        )
+
         return survey_list
 
     @staticmethod

From d4e378f109deb3c71b87165309a5935b3641a915 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:40:37 +0000
Subject: [PATCH 094/262] ha50 matching complete subject to checks

---
 .../ha_15_32/ha_analysis_batch_3.py           | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3b9bd7ca..a5b99a72 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1416,6 +1416,35 @@ class DataLoader:
             "Tewson Road", "Tewson Green"
         )
 
+        # Remove 55 Seabridge Lane
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Seabridge Lane") &
+              (survey_list["Post Code"] == "ST5 4AG") &
+              (survey_list["NO."].isin([55])))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tyne Way") &
+              (survey_list["Post Code"] == "ST5 4AX") &
+              (survey_list["NO."].isin([56])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St.Bernards Place", "St Bernard Place"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Penarth Road", "Penarth Grove"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. Marys Road", "St Marys Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Larch Drive", "Larch Grove"
+        )
+
         return survey_list
 
     @staticmethod

From 33b3f51ca4701ede548e6af82f80ae191a3c0710 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:54:40 +0000
Subject: [PATCH 095/262] handling dupes for ha50

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a5b99a72..7124919e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1445,6 +1445,21 @@ class DataLoader:
             "Larch Drive", "Larch Grove"
         )
 
+        # Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") &
+              (survey_list["Post Code"] == "ST20QS") &
+              (survey_list["NO."].isin([31])))
+        ]
+
+        # Handle dropping of dupes
+        survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "")
+        survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "")
+
+        # Should go to 18
+        survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"])
+        survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"])
+
         return survey_list
 
     @staticmethod

From 23eaa5600118f0df54667ea36422153158db8dd5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:57:00 +0000
Subject: [PATCH 096/262] checked ha50 ciga merge

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7124919e..2feded98 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -168,6 +168,7 @@ class DataLoader:
         "HA15": 3,
         "HA16": 7,
         "HA24": 12,
+        "HA50": 4,
         "HA107": 51,
     }
 
@@ -429,6 +430,8 @@ class DataLoader:
             return "CIGA checks"
         elif "CIGA check" in workbook.sheetnames:
             return "CIGA check"
+        elif "CIGA requested" in workbook.sheetnames:
+            return "CIGA requested"
         else:
             return "CIGA"
 

From 180c0c53eaa48c185c75cf22aee448aac91bbe30 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 16:26:58 +0000
Subject: [PATCH 097/262] done with ha50

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2feded98..0720a686 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1982,7 +1982,8 @@ class DataLoader:
             "ECO4 GBIS (ECO+)": "GBIS",
             "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
             "ECO4 AFFORDABLE WARMTH": "ECO4",
-            "Affordable Warmth": "ECO4"
+            "Affordable Warmth": "ECO4",
+            "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
         }
 
         eco_eligibility_map = {

From c43349a5777326145107a6406779eadcdc6e9dab Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 16:39:47 +0000
Subject: [PATCH 098/262] Added ha41 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0720a686..4cf447aa 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -174,7 +174,8 @@ class DataLoader:
 
     UNMATCHED_ECO3 = {
         "HA25": 154,
-        "HA50": 5
+        "HA41": 26,
+        "HA50": 5,
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -264,6 +265,14 @@ class DataLoader:
                                              asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["post_code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA41":
+            asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA50":
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
@@ -1683,6 +1692,10 @@ class DataLoader:
     def correct_ha50_eco3_list(eco3_list):
         return eco3_list
 
+    @staticmethod
+    def correct_ha41_eco3_list(eco3_list):
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -4384,15 +4397,14 @@ def app():
     # Add in:
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA50", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA50", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this],
     # Then: 28 [DONE],
     # 41, 48, 50
-    # 38[problematic, but no ECO4], 10 problematic (no eligibility),
-    # 20 has barely any in
-    # TODO - do 50
+    # Ignore for now:
+    # TODO: 38[problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From c4af2251f4fac0af95676b7158e5baf1ad9d3d3c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 16:41:58 +0000
Subject: [PATCH 099/262] data load for ha41

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4cf447aa..c2d585a2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -452,6 +452,8 @@ class DataLoader:
             return "ECO Survey"
         elif "ECO 4 Surveys completed" in workbook.sheetnames:
             return "ECO 4 Surveys completed"
+        elif "ECO4 Surveys" in workbook.sheetnames:
+            return "ECO4 Surveys"
         else:
             return "ECO surveys"
 
@@ -1533,6 +1535,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha41_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()

From ae714e42a62b1e6def566c6de46b34035d0ab7bb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 17:11:44 +0000
Subject: [PATCH 100/262] identified 9 additional has worth analysing

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c2d585a2..b22ea273 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4403,14 +4403,16 @@ def app():
     # Add in:
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA50", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
+        "HA50", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this],
-    # Then: 28 [DONE],
-    # 41, 48, 50
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE],
+    # 48 [WIP],
+    # Consider for ECO4: 2, 63, 12, 13, 136, 117
+    # COnsider for GBIS: 56, 35, 34
     # Ignore for now:
-    # TODO: 38[problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
+    # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From c84be65e8defa04aa1453f80b53d073c9011a629 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 19:52:08 +0000
Subject: [PATCH 101/262] ha48 ciga unmatched count added

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b22ea273..56867ef7 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -159,6 +159,10 @@ class DataLoader:
         "HA25": {
             "address": "T1_Address",
             "postcode": "matching_postcode"
+        },
+        "HA48": {
+            "address": "Full Address",
+            "postcode": "Postcode"
         }
     }
 
@@ -170,6 +174,7 @@ class DataLoader:
         "HA24": 12,
         "HA50": 4,
         "HA107": 51,
+        "HA48": 0
     }
 
     UNMATCHED_ECO3 = {
@@ -190,7 +195,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA48"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()

From c3fd2ae902bd96250bc5ca376a424ebc8cbc3335 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 20:58:47 +0000
Subject: [PATCH 102/262] Adding HA2, data load done

---
 .../ha_15_32/ha_analysis_batch_3.py           | 34 ++++++++++++-------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 56867ef7..74c6d3f5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -167,6 +167,7 @@ class DataLoader:
     }
 
     UNMATCHED_CIGA = {
+        "HA2": 0,
         "HA6": 117,
         "HA14": 3,
         "HA15": 3,
@@ -202,6 +203,12 @@ class DataLoader:
             asset_list["matching_postcode"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["postcode"]
             ].astype(str).str.lower().str.strip()
+        elif ha_name == "HA2":
+            # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA7":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
             asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
@@ -3794,7 +3801,6 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
-
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
@@ -4074,13 +4080,13 @@ def forecast_remaining_sales(loader):
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
             ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
             ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
-            ("ECO4 original", "", "Sold - £", ""): original_warmfront_sold_eco4,
+            ("ECO4 original", "", "Sold or cancelled - £", ""): original_warmfront_sold_eco4,
             ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
             # GBIS - original warmfront figures
             ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
             ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
             ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
-            ("GBIS original", "", "Sold - £", ""): original_warmfront_sold_gbis,
+            ("GBIS original", "", "Sold or cancelled - £", ""): original_warmfront_sold_gbis,
             ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
             # ECO4 - asset list, pre-ciga
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
@@ -4237,12 +4243,17 @@ def forecast_remaining_sales(loader):
     headline_total_delta = round(headline_total_delta, 1)
 
     headline_eco4_sold_since_november = (
-        totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] - totals_row[('ECO4 original', '', 'Sold - £', '')]
+        totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] +
+        totals_row[('ECO4 pre-ciga', '', 'Confirmed cancellations - £', '')] +  # confirmed canclleations
+        totals_row[('ECO4 pre-ciga', '', 'Unconfirmed cancellations - £', '')] -  # expected cancellations
+        totals_row[('ECO4 original', '', 'Sold or cancelled - £', '')]
     )
 
     headline_gbis_sold_since_november = (
-        totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] -
-        totals_row[('GBIS original', '', 'Sold - £', '')]
+        totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] +
+        totals_row[("GBIS Postcode list", "", "Confirmed cancellations - £", "")] +  # confirmed cancellations
+        totals_row[("GBIS Postcode list", "", "Unconfirmed cancellations - £", "")] -  # expected cancellations
+        totals_row[('GBIS original', '', 'Sold or cancelled - £', '')]
     )
 
     headlines = [
@@ -4261,7 +4272,7 @@ def forecast_remaining_sales(loader):
                 "ECO4 - November"): headline_eco4_original_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "ECO4 Sold since November - £",
+            ("", "", "", "HA Name"): "ECO4 Sold or cancelled since November - £",
             (
                 "", "Original Warmfront estimate", "Total - #",
                 "ECO4 - November"): headline_eco4_sold_since_november
@@ -4290,7 +4301,7 @@ def forecast_remaining_sales(loader):
                 "ECO4 - November"): headline_gbis_original_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "GBIS Sold since November - £",
+            ("", "", "", "HA Name"): "GBIS Sold or cancelled since November - £",
             (
                 "", "Original Warmfront estimate", "Total - #",
                 "ECO4 - November"): headline_gbis_sold_since_november
@@ -4399,21 +4410,18 @@ def app():
     rebuild_inputs = False
 
     # List all of the data in the folder
-
     directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir()
                    for file in entry.iterdir() if file.suffix == '.xlsx']
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     # Add in:
-    # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
+        "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
         "HA50", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE],
-    # 48 [WIP],
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE],
     # Consider for ECO4: 2, 63, 12, 13, 136, 117
     # COnsider for GBIS: 56, 35, 34
     # Ignore for now:

From 19850f924445035e3880eaae40f750d21fb12b80 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 21:34:46 +0000
Subject: [PATCH 103/262] fixing up ha63 eco3 list

---
 .../ha_15_32/ha_analysis_batch_3.py           | 46 +++++++++++++++++--
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 74c6d3f5..aebf0506 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -289,6 +289,10 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA63":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -1551,6 +1555,16 @@ class DataLoader:
     def correct_ha41_survey_list(survey_list):
         return survey_list
 
+    @staticmethod
+    def correct_ha63_survey_list(survey_list):
+        # Drop some filler rows
+        survey_list = survey_list[
+            ~survey_list[survey_list.columns[0]].isin(
+                ["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"]
+            )
+        ]
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -1714,6 +1728,26 @@ class DataLoader:
     def correct_ha41_eco3_list(eco3_list):
         return eco3_list
 
+    @staticmethod
+    def correct_ha63_eco3_list(eco3_list):
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+        # Some postcode that aren't in the asset list
+        eco3_list = eco3_list[
+            ~eco3_list["Post Code"].isin(
+                ["NR32 15X", "NR30 2BT"]
+            )
+        ]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "POUND COTTAGES - BLOOMSBERRY CLOSE", "POUND COTTAGES"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "FREDRICK ROAD", "Frederick Road"
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -1799,12 +1833,15 @@ class DataLoader:
         # We verify the missed
         # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
         # where many surveys were conducted on house numbers, not in the asset list
+        # 154 missed, 2827 matched for HA 25
         if len(missed) != self.UNMATCHED_ECO3[ha_name]:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
-        # 154 missed, 2827 matched for HA 25
+        # 41
+        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
+        missed_df.head(1)["Street / Block Name"]
 
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
@@ -4418,11 +4455,12 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
-        "HA50", "HA107",
+        "HA50", "HA63", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE],
-    # Consider for ECO4: 2, 63, 12, 13, 136, 117
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE]
+    # 63 [WIP]
+    # Consider for ECO4: 12, 13, 136, 117
     # COnsider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From 47b97fce0a6eec4fe15a967f1721e18908bffccf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 21:46:44 +0000
Subject: [PATCH 104/262] fixing eco3 matching for ha63

---
 .../ha_15_32/ha_analysis_batch_3.py           | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index aebf0506..bab5cdab 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -174,6 +174,7 @@ class DataLoader:
         "HA16": 7,
         "HA24": 12,
         "HA50": 4,
+        "HA63": 15,
         "HA107": 51,
         "HA48": 0
     }
@@ -182,6 +183,7 @@ class DataLoader:
         "HA25": 154,
         "HA41": 26,
         "HA50": 5,
+        "HA63": 0
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -1746,6 +1748,25 @@ class DataLoader:
             "FREDRICK ROAD", "Frederick Road"
         )
 
+        # For denmark street, remove the space from the house number
+        eco3_list["NO "] = np.where(
+            eco3_list["Street / Block Name"] == "DENMARK STREET",
+            eco3_list["NO "].str.replace(" ", ""),
+            eco3_list["NO "]
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "OLD HOSPITAL MEWS HOSPITAL WALK", "Old Hospital Mews"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Portland House, Portland Street", "Portland House"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "MIDDLE MARKET STREET", "Middle Market Road"
+        )
+
         return eco3_list
 
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
@@ -1791,7 +1812,7 @@ class DataLoader:
             if isinstance(house_number, str):
                 house_number = house_number.lower().strip()
 
-            if not any(df["matching_address"].str.contains(str(house_number))):
+            if not any(df["HouseNo"].str.contains(str(house_number))):
                 if "flat" in str(house_number):
                     house_number = house_number.split("flat")[1].strip()
 
@@ -1839,10 +1860,6 @@ class DataLoader:
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
-        # 41
-        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
-        missed_df.head(1)["Street / Block Name"]
-
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
         if matching_lookup["asset_list_row_id"].duplicated().any():

From 9cd166160bfbe9a3cc89f5d43231c3c8ed5c2ede Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 21:51:16 +0000
Subject: [PATCH 105/262] sorted ha63 facts and figures

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bab5cdab..2a1a4b16 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2077,7 +2077,8 @@ class DataLoader:
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
             "eco4 (subject to archetype check)": "eco4",
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
-            "eco4  (subject to ciga)": "eco4 (subject to ciga)"
+            "eco4  (subject to ciga)": "eco4 (subject to ciga)",
+            "eco4(subject to ciga)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []

From 76ef60d06c8d508d4c78e1bda320902880bce96c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 22:16:05 +0000
Subject: [PATCH 106/262] done with ha12

---
 .../ha_15_32/ha_analysis_batch_3.py           | 58 ++++++++++++++-----
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2a1a4b16..4dbf326b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -148,6 +148,10 @@ class DataLoader:
             "address": "propertyaddress",
             "postcode": "address"  # The 'address' column actually contains postcode
         },
+        "HA12": {
+            "address": "Full Address",
+            "postcode": "Postcode"
+        },
         "HA16": {
             "address": "Address",
             "postcode": "Postcode"
@@ -169,6 +173,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         "HA2": 0,
         "HA6": 117,
+        "HA12": 6,
         "HA14": 3,
         "HA15": 3,
         "HA16": 7,
@@ -198,7 +203,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA48"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA48"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -1558,13 +1563,39 @@ class DataLoader:
         return survey_list
 
     @staticmethod
-    def correct_ha63_survey_list(survey_list):
-        # Drop some filler rows
-        survey_list = survey_list[
-            ~survey_list[survey_list.columns[0]].isin(
-                ["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"]
-            )
-        ]
+    def correct_ha12_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Henstone Road", "Hanstone Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Lindern avenue", "Linden Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "priness way", "Princess Way"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Worth Crecesent", "Worth Crescent"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "DY117HA", "DY11 7HA"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "DY117HF", "DY11 7HF"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Adderbrook Crescent", "Addenbrooke Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Kinver Road", "Kinver Avenue"
+        )
+
         return survey_list
 
     @staticmethod
@@ -2079,6 +2110,7 @@ class DataLoader:
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
+            "eco4 subject to ciga": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -4472,13 +4504,13 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
-        "HA50", "HA63", "HA107",
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41",
+        "HA48", "HA50", "HA63", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE]
-    # 63 [WIP]
-    # Consider for ECO4: 12, 13, 136, 117
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE]
+    #
+    # Consider for ECO4: 13, 136, 117
     # COnsider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From e3f36fc881925fd845f623d469d0faf9cd6b89c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 18:52:32 +0000
Subject: [PATCH 107/262] HA117 data load

---
 .../ha_15_32/ha_analysis_batch_3.py           | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4dbf326b..d4de589a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -188,7 +188,8 @@ class DataLoader:
         "HA25": 154,
         "HA41": 26,
         "HA50": 5,
-        "HA63": 0
+        "HA63": 0,
+        "HA117": 4
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -308,6 +309,11 @@ class DataLoader:
                                              asset_list["District"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA117":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["PostCode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
         else:
             raise NotImplementedError("implement me")
 
@@ -1800,6 +1806,17 @@ class DataLoader:
 
         return eco3_list
 
+    @staticmethod
+    def correct_ha117_eco3_list(eco3_list):
+        # Delete rows where postcode is null - there are some placeholder rows where this happens
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "TARRING ROAD", "155 TARRING ROAD"
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -4505,13 +4522,13 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41",
-        "HA48", "HA50", "HA63", "HA107",
+        "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE]
-    #
-    # Consider for ECO4: 13, 136, 117
-    # COnsider for GBIS: 56, 35, 34
+    # 117 [WIP]
+    # Consider for ECO4: 13
+    # Consider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 15efd02b8b8220f1d6cc745cb1b4a571be808643 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 19:14:35 +0000
Subject: [PATCH 108/262] done ha117, ha13 next

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d4de589a..97ac96da 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2119,15 +2119,19 @@ class DataLoader:
             "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
         }
 
+        # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
+        # treat these as similar to subject to CIGA, and therefore unconfirmed worked that could fail. There
+        # are only a small volume of properties for which we see this
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype check)": "eco4",
+            "eco4 (subject to archetype check)": "eco4 (subject to ciga)",
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
+            "eco4 (subject to archetype)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -4525,9 +4529,9 @@ def app():
         "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE]
-    # 117 [WIP]
-    # Consider for ECO4: 13
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE]
+    # 13 [WIP]
+    # Consider for ECO4:
     # Consider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From b2b8fd8f84321f369cc3d14b009515759a2eff9a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 19:20:38 +0000
Subject: [PATCH 109/262] ha13 49% matched

---
 .../ha_15_32/ha_analysis_batch_3.py           | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 97ac96da..3edc1490 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -224,6 +224,12 @@ class DataLoader:
                                              asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA13":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA14":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
@@ -1604,6 +1610,19 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha13_survey_list(survey_list):
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Woodfarm Road", "WOOD FARM ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ALLANDALE ROAD", "ALLANDALE"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -4525,8 +4544,8 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41",
-        "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39",
+        "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE]

From 21117f3e585be18d5da6e49744353f7ed830a483 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 19:32:42 +0000
Subject: [PATCH 110/262] worked through ha13 matching - need to do facts and
 figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3edc1490..15a4f438 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -174,6 +174,7 @@ class DataLoader:
         "HA2": 0,
         "HA6": 117,
         "HA12": 6,
+        "HA13": 119,
         "HA14": 3,
         "HA15": 3,
         "HA16": 7,
@@ -1621,6 +1622,30 @@ class DataLoader:
             "ALLANDALE ROAD", "ALLANDALE"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "NEWFIELDS LANE", "NEWFIELD LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BROADFIELDS ROAD", "BROADFIELD ROAD"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "HP2 5SF+", "HP2 5SF"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PESCOTT HILL", "PESCOT HILL"
+        )
+
+        # This is a duplicate record
+        survey_list = survey_list[
+            ~((survey_list["NO."] == 33) &
+              (survey_list["Street / Block Name"] == "Turners Hill") &
+              (survey_list["Post Code"] == "HP2 4LH") &
+              (survey_list["INSTALLED OR CANCELLED"] == "NO UPDATE - CHECKED 18.12.23"))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -1652,6 +1677,9 @@ class DataLoader:
                 postcode.lower() not in asset_list["matching_postcode"].values
             ]
 
+        if ha_name == "HA13":
+            missed_postcodes = ["hp17 8le"]
+
         matching_lookup = []
         for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
 

From f03485d4f49045e8f68cf7a8dcc5caf58113ede1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 14:41:38 +0000
Subject: [PATCH 111/262] updating facts and figures to treat archetype
 dependent properties separately

---
 .../ha_15_32/ha_analysis_batch_3.py            | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 15a4f438..c0f3ab12 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2172,13 +2172,12 @@ class DataLoader:
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
-            "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype check)": "eco4 (subject to ciga)",
-            "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
+            "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4 (subject to archetype check)": "eco4 (subject to archetype)",
+            "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -2330,7 +2329,7 @@ class DataLoader:
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
                 # Update the cases where properties have sold, but are missing a CIGA check
                 asset_list["ECO Eligibility"] = np.where(
-                    (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & (
+                    (asset_list["ECO Eligibility"].str.contains("(subject to ciga)")) & (
                         asset_list["has_a_survey_record"] == True
                     ),
                     "eco4 - passed ciga",
@@ -2349,7 +2348,14 @@ class DataLoader:
                 # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"].isin(
-                        ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                        [
+                            "eco4",
+                            "eco4 (subject to ciga)",
+                            "eco4 - passed ciga",
+                            "failed ciga",
+                            "eco4 (subject to archetype)",
+                            "eco4 (subject to ciga) (subject to archetype)"
+                        ]
                     )) & (
                         asset_list["installation_status"].isin(
                             ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"]

From c1a15052f246288c5216e2c80849ccef3b2c6be0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 14:46:26 +0000
Subject: [PATCH 112/262] Handling warning for regex searching of (subject to
 ciga)

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c0f3ab12..430e5ff7 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2329,7 +2329,7 @@ class DataLoader:
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
                 # Update the cases where properties have sold, but are missing a CIGA check
                 asset_list["ECO Eligibility"] = np.where(
-                    (asset_list["ECO Eligibility"].str.contains("(subject to ciga)")) & (
+                    (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & (
                         asset_list["has_a_survey_record"] == True
                     ),
                     "eco4 - passed ciga",

From b46da0f6c0140b28d00385f02f29cae91f412b2d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 15:48:51 +0000
Subject: [PATCH 113/262] adding in archetype check process to model

---
 .../ha_15_32/ha_analysis_batch_3.py           | 99 +++++++++++++++----
 1 file changed, 82 insertions(+), 17 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 430e5ff7..9a959956 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3658,19 +3658,47 @@ def patch_cleaned(cleaned):
 
 def calculate_eco4_post_ciga(
     eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate,
-    eco4_rate
+    eco4_rate, archetype_conversion_rate
 ):
     remaining_needing_ciga_check = eligiblity_counts[
-        eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+        ~eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
         ]["count"].sum()
 
+    remaining_needing_ciga_and_archetype_check = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
+        ]["count"].sum()
+    # We scale this down by the archetype_conversion_rate, and add this on to the remaining_needing_ciga_check
+    remaining_needing_ciga_and_archetype_check_passed = np.round(
+        remaining_needing_ciga_and_archetype_check * archetype_conversion_rate
+    )
+
+    remaining_needing_ciga_check += remaining_needing_ciga_and_archetype_check_passed
+
+    eco4_no_ciga_needed = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4"
+        ]["count"].sum()
+
+    eco4_no_ciga_archetype_needed = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4 (subject to archetype)"
+        ]["count"].sum()
+    eco4_no_ciga_archetype_needed_passed = np.round(
+        eco4_no_ciga_archetype_needed * archetype_conversion_rate
+    )
+
+    eco4_no_ciga_needed += eco4_no_ciga_archetype_needed_passed
+
+    failed_archetype_check = int(
+        remaining_needing_ciga_and_archetype_check +
+        eco4_no_ciga_archetype_needed -
+        remaining_needing_ciga_and_archetype_check_passed -
+        eco4_no_ciga_archetype_needed_passed
+    )
+
     has_ciga_check = not input_data["ciga_list"].empty
     if has_ciga_check:
 
-        eco4_no_ciga_needed = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"] == "eco4"
-            ]["count"].sum()
-
         eco4_ciga_passed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
             ]["count"].sum()
@@ -3681,8 +3709,10 @@ def calculate_eco4_post_ciga(
 
         eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed
 
-        eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
-        eco4_confirmed = np.round(eco4_confirmed)
+        eco4_confirmed = np.round(
+            (eco4_no_ciga_needed * ha_eco4_to_sale_rate) +
+            (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
+        )
 
         eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed)
 
@@ -3704,9 +3734,7 @@ def calculate_eco4_post_ciga(
 
         eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
     else:
-        eco4_no_ciga_needed = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"] == "eco4"
-            ]["count"].sum()
+
         eco4_confirmed_ciga_failures = 0
         # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
@@ -3735,6 +3763,9 @@ def calculate_eco4_post_ciga(
         "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
         "Of which confirmed - £": eco4_confirmed * eco4_rate,
         "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+        # Archetype check failures
+        "Estimated total - failed archetype check - #": failed_archetype_check,
+        "Estimated total - failed archetype check - £": failed_archetype_check * eco4_rate,
         # Ciga failures
         "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures),
         "Confirmed CIGA failures": eco4_confirmed_ciga_failures,
@@ -3766,6 +3797,14 @@ def forecast_remaining_sales(loader):
     gbis_rate = 600
     eco4_rate = 1710
 
+    # Based on ONS https://www.ons.gov.uk/peoplepopulationandcommunity/housing/bulletins/housingenglandandwales
+    # /census2021
+    # there are 5.7 million terraced properties in the UK, of the 19.3 million houses or bungalows. We therefore apply
+    # a 30% discount to homes that are dependent on an archetype check, since around 30% of them will be mid terraced
+    # This 30% is slightly harsh but we be conservative
+    # Therefore, the archetype check conversion rate is 70%
+    archetype_conversion_rate = 0.7
+
     # 1) Calculate the conversion rate from passed CIGA to actual sale
     converted_ciga_jobs = []
     for ha_name, input_data in loader.data.items():
@@ -4010,13 +4049,27 @@ def forecast_remaining_sales(loader):
 
         eco4_pre_ciga = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"].isin(
-                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                [
+                    "eco4",
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga",
+                    "failed ciga",
+                    "eco4 (subject to ciga) (subject to archetype)",
+                    "eco4 (subject to archetype)"
+                ]
             )
         ]["count"].sum()
 
         eco4_pre_ciga_remaining = eligiblity_counts_remaining[
             eligiblity_counts_remaining["ECO Eligibility"].isin(
-                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                [
+                    "eco4",
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga",
+                    "failed ciga",
+                    "eco4 (subject to ciga) (subject to archetype)",
+                    "eco4 (subject to archetype)"
+                ]
             )
         ]["count"].sum()
 
@@ -4065,7 +4118,8 @@ def forecast_remaining_sales(loader):
             ha_ciga_conversion_rate=ha_ciga_conversion_rate,
             ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
             ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
-            eco4_rate=eco4_rate
+            eco4_rate=eco4_rate,
+            archetype_conversion_rate=archetype_conversion_rate
         )
 
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
@@ -4074,7 +4128,8 @@ def forecast_remaining_sales(loader):
             ha_ciga_conversion_rate=ha_ciga_conversion_rate,
             ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
             ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
-            eco4_rate=eco4_rate
+            eco4_rate=eco4_rate,
+            archetype_conversion_rate=archetype_conversion_rate
         )
 
         # Calculate the delta compared to Warmfront's original remaining
@@ -4111,6 +4166,8 @@ def forecast_remaining_sales(loader):
         gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
+        survey_list["installation_status"].value_counts()
+
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
             gbis_delta_vs_original_estimate_remaining = "N/A"
@@ -4176,7 +4233,7 @@ def forecast_remaining_sales(loader):
                 surveys_with_eligibility["installation_status"] == "GBIS - cancelled"
                 ].shape[0]
 
-            expected_gbis_unconfirmed_sales = incomplete_gbis_sales * ha_gbis_sale_conversion
+            expected_gbis_unconfirmed_sales = np.round(incomplete_gbis_sales * ha_gbis_sale_conversion)
 
             gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales)
 
@@ -4187,10 +4244,12 @@ def forecast_remaining_sales(loader):
         # Add in the variance:
         # We should expect that the pre-ciga total is:
         # 1) The number of post CIGA successes +
+        # 2) The number of archetype failures +
         # 2) the number of CIGA failures +
         # 3) The number of cancellations
         variance_total = eco4_pre_ciga - (
             eco4_post_ciga_total_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_total_results["Estimated total - failed archetype check - #"] +
             eco4_post_ciga_total_results['Estimated total - failed CIGA'] +
             eco4_post_ciga_total_results["Expected cancellations - #"]
         )
@@ -4199,6 +4258,7 @@ def forecast_remaining_sales(loader):
 
         variance_remaining = eco4_pre_ciga_remaining - (
             eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_remaining_results["Estimated total - failed archetype check - #"] +
             eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] +
             eco4_post_ciga_remaining_results["Expected cancellations - #"]
         )
@@ -4290,6 +4350,11 @@ def forecast_remaining_sales(loader):
             ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[
                 "Expected cancellations - £"
             ],
+            # Archetype check failures
+            ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - #", ""):
+                eco4_post_ciga_remaining_results['Estimated total - failed archetype check - #'],
+            ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - £", ""):
+                eco4_post_ciga_remaining_results['Estimated total - failed archetype check - £'],
             # CIGA failures
             ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
                 'Estimated total - failed CIGA'
@@ -4324,7 +4389,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 47:
+        if len(to_append) != 49:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From a7e593ecd9289551d7ef47481ea3dff0c2a70592 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 16:15:16 +0000
Subject: [PATCH 114/262] Added handling of archetype checks and corrected gbis
 calculations

---
 .../ha_15_32/ha_analysis_batch_3.py           | 65 ++++++++++++++-----
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9a959956..aca2ce43 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4154,19 +4154,25 @@ def forecast_remaining_sales(loader):
         else:
             ha_gbis_sale_conversion = median_gbis_to_install
 
-        gbis_total = eligiblity_counts[
+        gbis_total_pre_cancellations = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_total = int(np.round(gbis_total * ha_gbis_sale_conversion))
-        gbis_total_revenue = int(gbis_total * gbis_rate)
 
-        gbis_remaining = eligiblity_counts_remaining[
+        gbis_total_pre_cancellations_revenue = gbis_total_pre_cancellations * gbis_rate
+        # gbis_total = int(np.round(gbis_total_pre_cancellations * ha_gbis_sale_conversion))
+        # gbis_total_revenue = int(gbis_total * gbis_rate)
+
+        gbis_remaining_pre_cancellations = eligiblity_counts_remaining[
             eligiblity_counts_remaining["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
+        gbis_remaining_pre_cancellations_revenue = (
+            gbis_remaining_pre_cancellations * gbis_rate
+        )
+        # This is the gbis jobs we expect to sell
+        gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
-
-        survey_list["installation_status"].value_counts()
+        # This is the number we expect to cancel
+        gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining) * gbis_rate
 
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
@@ -4179,9 +4185,10 @@ def forecast_remaining_sales(loader):
         # Current sales figures
         # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
         eco4_actually_sold = 0
-        gbis_actually_sold = 0
         eco4_confirmed_cancellations = 0
         eco4_expected_cancellations = 0
+
+        gbis_actually_sold = 0
         gbis_confirmed_cancellations = 0
         gbis_expected_cancellations = 0
         if not survey_list.empty:
@@ -4284,17 +4291,30 @@ def forecast_remaining_sales(loader):
             raise ValueError("Something went wrong in pre_ciga_eco4_variance")
 
         # Check GBIS total variance
-        gbis_variance = (
-            gbis_total_revenue -
-            gbis_actually_sold -
-            gbis_confirmed_cancellations * gbis_rate -
-            gbis_expected_cancellations * gbis_rate -
-            gbis_remaining_revenue
+        # The total before cancellations should equal:
+        # The number of sold +
+        # The number of confirmed cancelled +
+        # The number of expected cancelled +
+        # The number of remaining
+        gbis_variance = gbis_total_pre_cancellations - (
+            gbis_actually_sold / gbis_rate +
+            gbis_confirmed_cancellations +
+            gbis_expected_cancellations +
+            gbis_remaining_pre_cancellations
         )
 
         if gbis_variance != 0:
             raise ValueError("Something went wrong in gbis_variance")
 
+        # We expect the remaining to equal expected sales + expected cancellations
+        gbis_variance_2 = gbis_remaining_pre_cancellations - (
+            gbis_remaining +
+            gbis_remaining_expected_cancellations
+        )
+
+        if gbis_variance_2 != 0:
+            raise ValueError("Something went wrong in gbis_variance")
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -4375,17 +4395,26 @@ def forecast_remaining_sales(loader):
                 "Estimated CIGA failures - £"
             ],
             # GBIS postcode list
-            ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
-            ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total_pre_cancellations,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"):
+                gbis_total_pre_cancellations_revenue,
             ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
             ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
             # This is for jobs that are in-progress and could still cancel
             ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate,
-            ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
-            ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"):
+                gbis_remaining_pre_cancellations,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"):
+                gbis_remaining_pre_cancellations_revenue,
             ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
                 gbis_delta_vs_original_estimate_remaining,
+            # Expected cancellations
+            (
+                "GBIS Postcode list", "Of which expected sales - £", "Remaining - £",
+                "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"):
+                gbis_remaining_expected_cancellations
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From f9957a55d066a294e79efdf196b72e79d82689fb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 16:19:54 +0000
Subject: [PATCH 115/262] fixed bug in gbis variance 2?

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index aca2ce43..a25f98c6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4172,7 +4172,8 @@ def forecast_remaining_sales(loader):
         gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
         # This is the number we expect to cancel
-        gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining) * gbis_rate
+        gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining)
+        gbis_remaining_expected_cancellations_revenue = gbis_remaining_expected_cancellations * gbis_rate
 
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
@@ -4313,7 +4314,7 @@ def forecast_remaining_sales(loader):
         )
 
         if gbis_variance_2 != 0:
-            raise ValueError("Something went wrong in gbis_variance")
+            raise ValueError("Something went wrong in gbis_variance2")
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -4414,7 +4415,7 @@ def forecast_remaining_sales(loader):
                 "GBIS Postcode list", "Of which expected sales - £", "Remaining - £",
                 "GBIS total"): gbis_remaining_revenue,
             ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"):
-                gbis_remaining_expected_cancellations
+                gbis_remaining_expected_cancellations_revenue
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From 1ccb2cdebdca9a2fc17f0b11ef431bac81309357 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 16:22:28 +0000
Subject: [PATCH 116/262] updated number of expected to append

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a25f98c6..7ddc9844 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4419,7 +4419,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 49:
+        if len(to_append) != 51:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From 768a0385e3a2cf7fc29b86b827cfb43d914e4621 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 17:02:33 +0000
Subject: [PATCH 117/262] ha35 data read

---
 .../ha_15_32/ha_analysis_batch_3.py           | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7ddc9844..ea0078c2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -276,6 +276,13 @@ class DataLoader:
                 asset_list["POST CODE"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA35":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA38":
             asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -1648,6 +1655,13 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha35_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BALLADIER WLAK", "BALLADIER WALK"
+        )
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -4673,14 +4687,14 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39",
-        "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA35",
+        "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE]
-    # 13 [WIP]
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE]
+    # 35 [WIP]
     # Consider for ECO4:
-    # Consider for GBIS: 56, 35, 34
+    # Consider for GBIS: 56, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 29f2a2abf801e4c01ad89383b18eaac4ed97b0af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 17:09:43 +0000
Subject: [PATCH 118/262] HA35 done

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ea0078c2..04ee343c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4691,8 +4691,9 @@ def app():
         "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE]
-    # 35 [WIP]
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
+    # 35 [DONE]
+    # 34 [WIP]
     # Consider for ECO4:
     # Consider for GBIS: 56, 34
     # Ignore for now:

From 6e4fc23ecc2036e14148b18611cb04aafde8084b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 18:12:12 +0000
Subject: [PATCH 119/262] fixed dupes for HA34

---
 .../ha_15_32/ha_analysis_batch_3.py           | 104 +++++++++++++++++-
 1 file changed, 98 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 04ee343c..8784481b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -276,6 +276,12 @@ class DataLoader:
                 asset_list["POST CODE"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA34":
+            asset_list["matching_address"] = (
+                asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list[" Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA35":
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -566,7 +572,8 @@ class DataLoader:
             eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))]
 
             # Perform the eco3 merge
-            eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
+            if not eco3_list.empty:
+                eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
 
         if ha_name in ["HA25"]:
             # Accomodate ha25 unique structure
@@ -1657,9 +1664,94 @@ class DataLoader:
 
     @staticmethod
     def correct_ha35_survey_list(survey_list):
-        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
-            "BALLADIER WLAK", "BALLADIER WALK"
+        return survey_list
+
+    @staticmethod
+    def correct_ha34_survey_list(survey_list):
+        # Note in the asset list
+        survey_list = survey_list[
+            survey_list["Post Code"] != "L5 3SS"
+            ]
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "L177DR", "L17 7DR"
         )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PENVALLEY CRESENT", "Penvalley Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PENLINKEN DRIVE", "Penlinken Drive"
+        )
+
+        # There's no 32 Penlinken Drive in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Penlinken Drive") &
+              (survey_list["NO."] == 32))
+        ]
+
+        # There's no 30 Gwent Street in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "GWENT ST") &
+              (survey_list["NO."] == 30))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "POULTON RD", "Poulton Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST PAULS RD", "St Pauls Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BROAD LANE, KIRKBY", "BROAD LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BULLENS RD, KIRKBY", "Bullens Road"
+        )
+
+        # There's no 219 NORTH HILL ST in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "NORTH HILL ST") &
+              (survey_list["NO."] == 219))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CROSLAND RD, KIRKBY", "CROSLAND ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PARK BROW DRIVE, KIRKBY", "Park Brow Drive"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CELTIC TREET", "Celtic Street"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BUCKLAND ROAD", "Buckland Street"
+        )
+
+        # duplicates
+        survey_list = survey_list.drop_duplicates(["Street / Block Name", "NO.", "Post Code"])
+
+        # This is a duplicate with wrong postcode
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "CLARIBEL STREET") &
+              (survey_list["NO."] == 7) &
+              (survey_list["Post Code"] == "L8 8AF"))
+        ]
+
+        survey_list["NO."] = np.where(
+            ((survey_list["NO."] == "187 A") &
+             (survey_list["Post Code"] == "L32 6QF")),
+            "187A",
+            survey_list["NO."]
+        )
+
         return survey_list
 
     @staticmethod
@@ -1685,7 +1777,7 @@ class DataLoader:
         survey_list = survey_list_correction_function(survey_list)
 
         missed_postcodes = []
-        if ha_name == "HA6":
+        if ha_name in ["HA6", "HA34"]:
             missed_postcodes = [
                 postcode.lower() for postcode in survey_list["Post Code"] if
                 postcode.lower() not in asset_list["matching_postcode"].values
@@ -4687,8 +4779,8 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA35",
-        "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA34",
+        "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],

From 27fed2dce320a54a049df279fca5c3abd407275f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 18:25:22 +0000
Subject: [PATCH 120/262] temp removed HA34 due to issue

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8784481b..d1f8d546 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2270,6 +2270,11 @@ class DataLoader:
             "ECO4 AFFORDABLE WARMTH": "ECO4",
             "Affordable Warmth": "ECO4",
             "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
+            "ECO4 PPS": "ECO4",
+            "AFFORDABLE WARMTH / REMEDIAL": "ECO4",
+            "AFF0RDALE WARMTH": "ECO4",
+            "ECO 4 RdSAP CL": "ECO4",
+            "Affordable Warmth (R) ": "ECO4"
         }
 
         # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
@@ -4779,15 +4784,17 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA34",
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32",
+        # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE]
-    # 34 [WIP]
+    #  [WIP]
     # Consider for ECO4:
-    # Consider for GBIS: 56, 34
+    # Consider for GBIS: 56
+    # 34 [bug in the results so leaving out for the moment]
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 28434f43c8fd9dac176fd68a1b4e20a79a128e9d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 13:55:44 +0000
Subject: [PATCH 121/262] ha56 wip

---
 .../ha_15_32/ha_analysis_batch_3.py           | 90 +++++++++++++++++--
 1 file changed, 83 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d1f8d546..064ff8f5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -317,6 +317,12 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA56":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA63":
             asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["POSTCODE"].astype(str).str.lower().str.strip()
@@ -639,6 +645,54 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def correct_ha56_asset_list(asset_list):
+        # CH1 4JR has already been surveyed, but it's listed in the asset list
+        # as a single row, when it's actually 32 units, so we just set this
+        # as ineligible
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CH1 4JR",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        # Same for CW8 3EU
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CW8 3EU",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CW1 3HP",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "WA4 2PH",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "BD6 1QJ",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "L39 1RS",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "WA10 2DE",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
     @staticmethod
     def correct_ha14_asset_list(asset_list):
 
@@ -1970,6 +2024,24 @@ class DataLoader:
 
         return eco3_list
 
+    @staticmethod
+    def correct_ha56_eco3_list(eco3_list):
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Mount Pleasant, Crewe", "Mount Pleasant"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Dutton Close", "Dutton Way"
+        )
+
+        eco3_list["Post Code"] = eco3_list["Post Code"].str.replace(
+            "Ls63nl", "LS6 3NL"
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -1978,8 +2050,8 @@ class DataLoader:
         asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
         eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
 
-        if ha_name == "HA25":
-            # 317 -> 259
+        if ha_name in ["HA25", "HA56"]:
+            # HA25: 317 -> 259
             missed_postcodes = {
                 postcode for postcode in eco3_list["postcode_no_space"] if
                 postcode not in asset_list["matching_postcode_nospace"].values
@@ -2060,6 +2132,7 @@ class DataLoader:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
+        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
 
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
@@ -3896,6 +3969,9 @@ def calculate_eco4_post_ciga(
 
 
 def forecast_remaining_sales(loader):
+    # TODO: Skip HA34 for the moment
+    loader.data = {k: v for k, v in loader.data.items() if k != "HA34"}
+
     # Assumptions:
     # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
     # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
@@ -4523,9 +4599,9 @@ def forecast_remaining_sales(loader):
                 gbis_delta_vs_original_estimate_remaining,
             # Expected cancellations
             (
-                "GBIS Postcode list", "Of which expected sales - £", "Remaining - £",
+                "GBIS Postcode list", "", "Of which expected sales - £ - £",
                 "GBIS total"): gbis_remaining_revenue,
-            ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"):
+            ("GBIS Postcode list", "", "Of which expected cancellations -£", "GBIS total"):
                 gbis_remaining_expected_cancellations_revenue
         }
 
@@ -4786,14 +4862,14 @@ def app():
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32",
         # "HA34",
-        "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE]
-    #  [WIP]
+    # 56 [WIP]
     # Consider for ECO4:
-    # Consider for GBIS: 56
+    # Consider for GBIS:
     # 34 [bug in the results so leaving out for the moment]
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From db7b6de87bfb13486a179cbdc547ae375cfc0c8d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:13:20 +0000
Subject: [PATCH 122/262] handle HA56 dupes

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 064ff8f5..62099386 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -189,6 +189,7 @@ class DataLoader:
         "HA25": 154,
         "HA41": 26,
         "HA50": 5,
+        "HA56": 320,
         "HA63": 0,
         "HA117": 4
     }
@@ -693,6 +694,8 @@ class DataLoader:
             asset_list["ECO Eligibility"]
         )
 
+        return asset_list
+
     @staticmethod
     def correct_ha14_asset_list(asset_list):
 
@@ -2040,6 +2043,14 @@ class DataLoader:
             "Ls63nl", "LS6 3NL"
         )
 
+        # Handle a duplicate
+        eco3_list = eco3_list[
+            ~((eco3_list["Street / Block Name"] == "Mount Pleasant") &
+              (eco3_list["Post Code"] == "CW1 3JF") &
+              (eco3_list["NO "] == 5) &
+              (eco3_list["INSTALL/ CANCELLATION DATE"] == "CANCELLED 20.5.2022"))
+        ]
+
         return eco3_list
 
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
@@ -2128,15 +2139,16 @@ class DataLoader:
         # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
         # where many surveys were conducted on house numbers, not in the asset list
         # 154 missed, 2827 matched for HA 25
+        # For HA56, the number of missed is high at 320, however a big portion of these are due to the block being
+        # listed in the asset list, and individual units being in the survey list
         if len(missed) != self.UNMATCHED_ECO3[ha_name]:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
-        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
 
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
-        if matching_lookup["asset_list_row_id"].duplicated().any():
+        if matching_lookup["asset_list_row_id"].duplicated().sum():
             raise ValueError("Duplicated asset list row ids")
 
         # Merge onto eco3 list

From 8b3f4d3a520f9148195c6fbd55d3b1d7354d0ee1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:25:47 +0000
Subject: [PATCH 123/262] ha56 survey list matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 62099386..f9bf3856 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -694,6 +694,20 @@ class DataLoader:
             asset_list["ECO Eligibility"]
         )
 
+        # Already surveyed under ECO4
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "SK17 6NR",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            ~((asset_list["Post Code"] == "WA5 0EN") &
+              (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
         return asset_list
 
     @staticmethod
@@ -1811,6 +1825,29 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha56_survey_list(survey_list):
+        # Not in asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Samual Street") &
+              (survey_list["NO."].isin([22, 24])) &
+              (survey_list["Post Code"] == "WA5 1BB"))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "STOURTON RD", "Stourton Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BIRKIN RD", "Birkin Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PORTLAND RD", "Portland Road"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -1843,6 +1880,10 @@ class DataLoader:
         if ha_name == "HA13":
             missed_postcodes = ["hp17 8le"]
 
+        if ha_name == "HA56":
+            # Multiple properties are listed as blocks, which is a problem for matching
+            missed_postcodes = ["sk17 6nr", "wa5 0en"]
+
         matching_lookup = []
         for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
 
@@ -1890,6 +1931,19 @@ class DataLoader:
                 df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
+
+                    if df.empty:
+
+                        postcode_lower = row["Post Code"].lower()
+                        if postcode_lower in missed_postcodes:
+                            matching_lookup.append(
+                                {
+                                    "survey_list_row_id": row["survey_list_row_id"],
+                                    "asset_list_row_id": None,
+                                }
+                            )
+                            continue
+
                     if df.shape[0] != 1:
                         if "Town/Area" not in row.keys():
                             full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() +

From 4a6711a1403a8661b467a0f7023151829e305822 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:35:08 +0000
Subject: [PATCH 124/262] handling ha56 dupes|

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index f9bf3856..0030af9d 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1846,6 +1846,13 @@ class DataLoader:
             "PORTLAND RD", "Portland Road"
         )
 
+        # We remove a row, because two rows match to a block listing
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tavlin Avenue") &
+              (survey_list["NO."] == 17) &
+              (survey_list["Post Code"] == "WA5 0EN"))
+        ]
+
         return survey_list
 
     @staticmethod

From ba65b6c8e37e5a44492c3342a05513d05d275ac4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:39:15 +0000
Subject: [PATCH 125/262] fixed bug in asset list cleaning

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0030af9d..b1eda326 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -702,8 +702,8 @@ class DataLoader:
         )
 
         asset_list["ECO Eligibility"] = np.where(
-            ~((asset_list["Post Code"] == "WA5 0EN") &
-              (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
+            ((asset_list["Post Code"] == "WA5 0EN") &
+             (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
             "Not eligible",
             asset_list["ECO Eligibility"]
         )

From 5eb938bf54fbaaf52bb72e7c8972bad5e2d58a46 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 15:40:02 +0000
Subject: [PATCH 126/262] ha18 done

---
 .../ha_15_32/ha_analysis_batch_3.py           | 28 +++++++++++++++++--
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b1eda326..676bd613 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -249,6 +249,20 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA18":
+            asset_list["matching_address"] = (
+                asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Post Code"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA19":
+            asset_list["matching_address"] = (
+                asset_list["Address1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA25":
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
@@ -495,6 +509,8 @@ class DataLoader:
             return "CIGA checks"
         elif "CIGA check" in workbook.sheetnames:
             return "CIGA check"
+        elif "CIGA Check" in workbook.sheetnames:
+            return "CIGA Check"
         elif "CIGA requested" in workbook.sheetnames:
             return "CIGA requested"
         else:
@@ -1733,6 +1749,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha18_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def correct_ha35_survey_list(survey_list):
         return survey_list
@@ -2435,6 +2455,7 @@ class DataLoader:
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
+            "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
         }
 
         ha_facts_and_figures = []
@@ -4933,14 +4954,15 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32",
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
+        "HA19", "HA24", "HA25", "HA28", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE]
-    # 56 [WIP]
+    # 35 [DONE], 56 [DONE], 19 [DONE]
+    #
     # Consider for ECO4:
     # Consider for GBIS:
     # 34 [bug in the results so leaving out for the moment]

From 5b39cf138df458b749d13fd100de011e6f3ac350 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 15:52:33 +0000
Subject: [PATCH 127/262] ha9 data load

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 676bd613..88ab706b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -173,6 +173,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         "HA2": 0,
         "HA6": 117,
+        "HA9": 0,
         "HA12": 6,
         "HA13": 119,
         "HA14": 3,
@@ -226,6 +227,14 @@ class DataLoader:
                                              asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA9":
+            asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA13":
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -430,7 +439,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA107"]:
+        if ha_name == "HA107":
             asset_list["HouseNo"] = asset_list["House No"].copy()
         elif ha_name == "HA32":
             asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
@@ -438,6 +447,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["House Number"].copy()
         elif ha_name == "HA38":
             asset_list["HouseNo"] = asset_list["House_Number"].copy()
+        elif ha_name == "HA9":
+            asset_list["HouseNo"] = asset_list["House Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -4954,7 +4965,7 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
+        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
         "HA19", "HA24", "HA25", "HA28", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"

From efbda5cece019d8518b770c0ace444c1179a1d6a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 16:09:08 +0000
Subject: [PATCH 128/262] ha27 complete

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 88ab706b..fba30f1f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -280,6 +280,12 @@ class DataLoader:
             asset_list["matching_postcode"] = asset_list['matching_address'].apply(
                 lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x
             )
+        elif ha_name == "HA27":
+            asset_list["matching_address"] = (
+                asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list[" Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA28":
             asset_list["matching_address"] = (
                 asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
@@ -582,7 +588,7 @@ class DataLoader:
         # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
         # lists, and so
         # we can return the asset list now
-        if ha_name in ["HA1"]:
+        if ha_name in ["HA1", "HA27"]:
             return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
 
         # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
@@ -4966,13 +4972,13 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
-        "HA19", "HA24", "HA25", "HA28", "HA32",
+        "HA19", "HA24", "HA25", "HA27", "HA28", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE], 56 [DONE], 19 [DONE]
+    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 DONE
     #
     # Consider for ECO4:
     # Consider for GBIS:

From 22f3aca336abafc164439f00ddbdf34649f4f28a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 16:26:42 +0000
Subject: [PATCH 129/262] ha30 32% matched

---
 .../ha_15_32/ha_analysis_batch_3.py           | 29 +++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index fba30f1f..bdb0d0c4 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -164,6 +164,10 @@ class DataLoader:
             "address": "T1_Address",
             "postcode": "matching_postcode"
         },
+        "HA30": {
+            "address": "A_Address",
+            "postcode": "A_Postcode"
+        },
         "HA48": {
             "address": "Full Address",
             "postcode": "Postcode"
@@ -207,7 +211,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA48"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA48"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -1892,6 +1896,27 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha30_survey_list(survey_list):
+
+        survey_list = survey_list[~pd.isnull(survey_list["Post Code"])]
+
+        # Split on / and take the first half
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+        # Not in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Horsebridge Road") &
+              (survey_list["NO."] == 286))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "DUTTON WAY") &
+              (survey_list["NO."] == 9))
+        ]
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -4972,7 +4997,7 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
-        "HA19", "HA24", "HA25", "HA27", "HA28", "HA32",
+        "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]

From cd81c2b0b29a65b3fd3c59ec5dec7730afdd64ec Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 16:45:59 +0000
Subject: [PATCH 130/262] done ha30 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bdb0d0c4..71062b16 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1915,6 +1915,74 @@ class DataLoader:
               (survey_list["NO."] == 9))
         ]
 
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "PAYTHORNE CLOSE") &
+              (survey_list["NO."] == 10))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "MARCHWOOD ROAD") &
+              (survey_list["NO."] == 11))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Otterburn Close") &
+              (survey_list["NO."] == 4))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Blossom Court") &
+              (survey_list["NO."] == 5))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St LUKES CLOSE , HUNTINGDON", "St. Lukes Close"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "St. Lukes Close") &
+              (survey_list["NO."].isin([4, 7, 8])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ROMAN WAY , GODMANCHESTER , HUNTINGDON", "Roman Way"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Roman Way") &
+              (survey_list["NO."].isin([58])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "HEADLANDS , FENSTANTON , HUNTINGDON", "Headlands Fenstanton"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Headlands Fenstanton") &
+              (survey_list["NO."].isin([126, 134])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WALLACE COURT , HUNTINGDON", "Wallace Court"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CRICKETERS WAY , CHATTERIS", "Cricketers Way"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Jubilee Gardens", "Jubilee Green"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Harrow Road") &
+              (survey_list["NO."].isin([10])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST LUKES CLOSE", "St. Lukes Close"
+        )
+
         return survey_list
 
     @staticmethod

From 2810316e22ffe4662ae40c2c3bb9bee2f6af6f83 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 17:14:22 +0000
Subject: [PATCH 131/262] handled bug for HA30

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 71062b16..1ee40dde 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2566,6 +2566,7 @@ class DataLoader:
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
             "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
         }
 
         ha_facts_and_figures = []
@@ -2716,11 +2717,13 @@ class DataLoader:
 
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
                 # Update the cases where properties have sold, but are missing a CIGA check
+                # If we don't have a CIGA list, we set the value to ECO4
+                set_to = "eco4 - passed ciga" if not ciga_list.empty else "eco4"
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & (
                         asset_list["has_a_survey_record"] == True
                     ),
-                    "eco4 - passed ciga",
+                    set_to,
                     asset_list["ECO Eligibility"]
                 )
                 # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4
@@ -4122,7 +4125,6 @@ def calculate_eco4_post_ciga(
 
         eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
     else:
-
         eco4_confirmed_ciga_failures = 0
         # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)

From e15b977930c1b65ab39099c8c6a92d05039e96af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 17:25:07 +0000
Subject: [PATCH 132/262] fixed ha34, completed 30

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1ee40dde..7d35386d 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2550,7 +2550,8 @@ class DataLoader:
             "AFFORDABLE WARMTH / REMEDIAL": "ECO4",
             "AFF0RDALE WARMTH": "ECO4",
             "ECO 4 RdSAP CL": "ECO4",
-            "Affordable Warmth (R) ": "ECO4"
+            "Affordable Warmth (R) ": "ECO4",
+            "Affordable Warmth ": "ECO4"
         }
 
         # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
@@ -4175,9 +4176,6 @@ def calculate_eco4_post_ciga(
 
 
 def forecast_remaining_sales(loader):
-    # TODO: Skip HA34 for the moment
-    loader.data = {k: v for k, v in loader.data.items() if k != "HA34"}
-
     # Assumptions:
     # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
     # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
@@ -5066,18 +5064,15 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
-        "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA32",
-        # "HA34",
-        "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
+        "HA27", "HA28", "HA30", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 DONE
+    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE]
     #
     # Consider for ECO4:
     # Consider for GBIS:
-    # 34 [bug in the results so leaving out for the moment]
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 41c17aa1dafe9110c74d6969f2fa06e58d3f0cf8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 18:13:45 +0000
Subject: [PATCH 133/262] HA54 done

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7d35386d..d556450b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -168,9 +168,17 @@ class DataLoader:
             "address": "A_Address",
             "postcode": "A_Postcode"
         },
+        "HA31": {
+            "address": "A_Address",
+            "postcode": "matching_postcode"
+        },
         "HA48": {
             "address": "Full Address",
             "postcode": "Postcode"
+        },
+        "HA54": {
+            "address": "Postal Address",
+            "postcode": "matching_postcode"
         }
     }
 
@@ -211,7 +219,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA48"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -559,6 +567,12 @@ class DataLoader:
         if ha_name == "HA25":
             asset_sheet_colnames[11] = "matching_postcode"
 
+        if ha_name == "HA31":
+            asset_sheet_colnames[2] = "matching_postcode"
+
+        if ha_name == "HA54":
+            asset_sheet_colnames[10] = "matching_postcode"
+
         rows_data = []
 
         for row in asset_sheet.iter_rows(min_row=2, values_only=False):
@@ -2568,6 +2582,7 @@ class DataLoader:
             "eco4 subject to ciga": "eco4 (subject to ciga)",
             "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
             "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)"
         }
 
         ha_facts_and_figures = []
@@ -5065,11 +5080,12 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
-        "HA27", "HA28", "HA30", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
+        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA54", "HA56", "HA63",
+        "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE]
+    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
     #
     # Consider for ECO4:
     # Consider for GBIS:

From 6a327629bf0ab5284b1b951cc98360597f30ce1f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 12 Mar 2024 11:09:09 +0000
Subject: [PATCH 134/262] rough attempt to attribute surplus ciga dependent
 eco4 jobs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 144 +++++++++++++-----
 1 file changed, 107 insertions(+), 37 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d556450b..5ad1aa27 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -176,6 +176,10 @@ class DataLoader:
             "address": "Full Address",
             "postcode": "Postcode"
         },
+        "HA49": {
+            "address": "Property Address Full",
+            "postcode": "Property Postcode"
+        },
         "HA54": {
             "address": "Postal Address",
             "postcode": "matching_postcode"
@@ -219,7 +223,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA54"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -382,6 +386,16 @@ class DataLoader:
                                              asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["PostCode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HAXX":
+            asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["PostCode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HAXXX":
+            asset_list["matching_address"] = (
+                asset_list["Combined Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         else:
             raise NotImplementedError("implement me")
 
@@ -467,6 +481,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["House_Number"].copy()
         elif ha_name == "HA9":
             asset_list["HouseNo"] = asset_list["House Number"].copy()
+        elif ha_name == "HAXXX":
+            asset_list["HouseNo"] = asset_list["Door Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -1999,6 +2015,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha49_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -5080,8 +5100,11 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
-        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA54", "HA56", "HA63",
-        "HA107", "HA117"
+        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
+        "HA63", "HA107", "HA117",
+
+        # New HAS
+        "HAXX", "HAXXX",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
@@ -5100,39 +5123,86 @@ def app():
 
     forecast_remaining_sales(loader)
 
-    # We load in the additional data required to perform the analysis
-    # cleaned = read_from_s3(
-    #     s3_file_name="cleaned_epc_data/cleaned.bson",
-    #     bucket_name="retrofit-data-dev"
-    # )
-    # cleaned = msgpack.unpackb(cleaned, raw=False)
-    # cleaned = patch_cleaned(cleaned)
-    #
-    # cleaning_data = read_dataframe_from_s3_parquet(
-    #     bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
-    # )
-    # created_at = datetime.now().isoformat()
-    #
-    # photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
-    #
-    # outputs = get_epc_data(
-    #     loader=loader,
-    #     cleaned=cleaned,
-    #     cleaning_data=cleaning_data,
-    #     created_at=created_at,
-    #     photo_supply_lookup=photo_supply_lookup,
-    #     floor_area_decile_thresholds=floor_area_decile_thresholds,
-    #     pull_data=pull_data
-    # )
+    conversion_rate = 0.95
+    archetype_check_conversion = 0.7
+    res = []
+    for k, v in loader.data.items():
+        asset_list = v["asset_list"].copy()
+        agg = asset_list["ECO Eligibility"].value_counts()
+        # We find a case where there are properties that have passed CIGA
+        if not any("passed" in x for x in agg.index):
+            continue
 
-    # import pickle
-    # with open("ha_analysis.pickle", "wb") as f:
-    #     pickle.dump({"outputs": outputs, "loader": loader}, f)
+        agg = pd.DataFrame(agg).reset_index()
 
-    # To read:
-    # import pickle
-    # with open("ha_analysis.pickle", "rb") as f:
-    #     outputs = pickle.load(f)["outputs"]
-    #
-    # with open("loader.pickle", "rb") as f:
-    #     loader = pickle.load(f)
+        passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"]
+        passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0
+
+        failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"]
+        failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0
+
+        ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1
+
+        dormant_ciga = agg[
+            agg["ECO Eligibility"].str.contains("subject to ciga") &
+            ~agg["ECO Eligibility"].str.contains("subject to archetype")
+            ]
+
+        dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0
+
+        dormant_ciga_archetype = agg[
+            agg["ECO Eligibility"].str.contains("subject to ciga") &
+            agg["ECO Eligibility"].str.contains("subject to archetype")
+            ]
+
+        dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0
+
+        needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion
+        needing_check = np.round(needing_check)
+
+        additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + (
+            dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate
+        )
+        additional_jobs = np.round(additional_jobs)
+
+        # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs
+        original_estimate = loader.december_figures[
+            loader.december_figures["HA Name"] == k
+            ]
+
+        original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0
+        base_eco_figures = agg[
+            agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"])
+        ]["count"].sum()
+        eco4_from_ciga = original_estimate - base_eco_figures
+        eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0
+        surplus_from_dormant = additional_jobs - eco4_from_ciga
+        surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant
+
+        res.append(
+            {
+                "ha_name": k,
+                "additional_eco4": additional_jobs,
+                "needing_check": needing_check,
+                "surplus_from_dormant": surplus_from_dormant
+            }
+        )
+
+    res = pd.DataFrame(res)
+    # Drop the HAs that are not in that pervious draft
+    # In the v2 draft, there are 12 HAs
+
+    v5_surplus = res[
+        ~res["ha_name"].isin(["HA9"])
+    ]["additional_eco4"].sum()
+    # 7212 properties
+    # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November
+    # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255,
+    # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties
+    # pre-CIGA
+
+    v5_surplus_from_dormant = res[
+        ~res["ha_name"].isin(["HA9"])
+    ]["surplus_from_dormant"].sum()
+    # 5539.0
+    # 9471690

From ddb5de50e550190c74cd5a2be767f2960352143a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 13:58:29 +0000
Subject: [PATCH 135/262] testing with another stupid effing method

---
 .idea/.gitignore                              |   2 +
 .../ha_15_32/ha_analysis_batch_3.py           | 230 +++++++++++++++++-
 .../epc_attributes/RoofAttributes.py          |  17 +-
 3 files changed, 241 insertions(+), 8 deletions(-)

diff --git a/.idea/.gitignore b/.idea/.gitignore
index 26d33521..8f00030d 100644
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -1,3 +1,5 @@
 # Default ignored files
 /shelf/
 /workspace.xml
+# GitHub Copilot persisted chat sessions
+/copilot/chatSessions
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 5ad1aa27..767e13c8 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -7,7 +7,9 @@ import msgpack
 from datetime import datetime
 import pandas as pd
 import numpy as np
-from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
+from utils.s3 import (
+    read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3, save_dataframe_to_s3_parquet
+)
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from tqdm import tqdm
@@ -2860,8 +2862,8 @@ def get_property_type_and_built_form(property_meta, ha_name):
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
     elif ha_name == "HA7":
-        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Archetype"]]
-        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"][property_meta["Property Type"]]
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
     elif ha_name == "HA14":
         if property_meta["Asset Type Description"] == "Block - Repair":
             # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
@@ -4429,6 +4431,12 @@ def forecast_remaining_sales(loader):
     for ha_name, input_data in loader.data.items():
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
+        if original_warmfront_estimates.empty:
+            # Append an empty row
+            original_warmfront_estimates = december_figures.head(1).copy()
+            for k in original_warmfront_estimates.columns:
+                original_warmfront_estimates[k] = 0
+            original_warmfront_estimates["HA Name"] = ha_name
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
@@ -4742,6 +4750,12 @@ def forecast_remaining_sales(loader):
         if gbis_variance_2 != 0:
             raise ValueError("Something went wrong in gbis_variance2")
 
+        # Update the GBIS sold, since Warmfront often sold more GBIS that expected
+        original_warmfront_gbis_revenue = original_warmfront_sold_gbis + original_warmfront_remaining_gbis_revenue
+        original_warmfront_gbis = (
+            original_warmfront_sold_gbis / gbis_rate + original_warmfront_remaining_gbis_revenue / gbis_rate
+        )
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -5077,6 +5091,216 @@ def forecast_remaining_sales(loader):
         results.to_csv(file, header=True, index=False)
 
 
+def fml_data_pull(loader):
+    has_bruh = ["HA7"]
+    from backend.SearchEpc import SearchEpc
+    epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
+
+    for ha in has_bruh:
+        asset_list = loader.data[ha]["asset_list"].copy()
+        # properties found as eligibile
+        fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+
+        # For each property, search for the latest EPC
+        epc_data = []
+        for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+            property_type, built_form = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+            searcher = SearchEpc(
+                address1=row["HouseNo"],
+                postcode=row["matching_postcode"],
+                auth_token=epc_api_key,
+                os_api_key="",
+                property_type=property_type,
+                full_address=row["matching_address"],
+            )
+            searcher.ordnance_survey_client.property_type = property_type
+            searcher.ordnance_survey_client.built_form = built_form
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            epc = {
+                "asset_list_row_id": row["asset_list_row_id"],
+                **searcher.newest_epc.copy()
+            }
+
+            epc_data.append(epc)
+
+        # Remove None entries
+        epc_data = [x for x in epc_data if x is not None]
+        # Save the data in S3 as a parquet
+        epc_data_df = pd.DataFrame(epc_data)
+        save_pickle_to_s3(
+            data=epc_data_df,
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
+        )
+
+
+def extract_lower_bound(age_band):
+    if pd.isna(age_band):
+        return 1930
+    try:
+        return int(age_band.split(':')[1].split('-')[0].strip())
+    except (ValueError, IndexError):
+        return 1930
+
+
+def fml_analysis(loader):
+    from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+    from etl.epc.DataProcessor import EPCDataProcessor
+    assumed_ciga_pass_rate = 0.731
+    has_bruh = ["HA7"]
+
+    results = []
+    for ha_name in has_bruh:
+
+        original_figures = loader.december_figures[
+            loader.december_figures["HA Name"] == ha_name
+            ].copy()
+        original_remaining = original_figures["ECO4 remaining"].values[0]
+
+        # Read in the epc data
+        asset_list = loader.data[ha_name]["asset_list"].copy()
+        # properties found as eligibile
+        fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+        epc_data = read_pickle_from_s3(
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
+        )
+
+        fuck_this = fml.merge(
+            epc_data, how="left", on="asset_list_row_id"
+        )
+        if fuck_this.shape[0] != fml.shape[0]:
+            raise Exception("What the fuck bruv")
+
+        # Take just remaining
+        if not loader.data[ha_name]["survey_list"].empty:
+            raise NotImplementedError("TAKE JUST REMAINING IDIOT")
+
+        insulation_thicknesses = []
+        for _, x in fuck_this.iterrows():
+            if pd.isnull(x["roof-description"]):
+                continue
+            thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"]
+            # If there is a + in the thickness, strip it out
+            thickness = str(thickness).replace("+", "")
+            insulation_thicknesses.append(
+                {'uprn': x["uprn"], "roof_insulation_thickness": thickness}
+            )
+        insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
+
+        fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
+        # clean roof insulation
+        fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("below average", "50")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("None", "0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("none", "0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("average", "150")
+
+        fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
+            lambda x: EPCDataProcessor.clean_construction_age_band(x)
+        )
+
+        fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
+
+        had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
+
+        # proportion with a survey:
+        proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
+
+        # Let's look just at the ECO4 business
+        # For things that had a survey, take the properties that didn't need a CIGA check
+        no_ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"] == "eco4"
+            ]
+
+        no_ciga_check_needed_with_archetype = no_ciga_check_needed[
+            (no_ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (no_ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+            ]
+        if not no_ciga_check_needed_with_archetype.empty:
+            raise Exception("SORT ME OUT")
+
+        # Characterise no CIGA check needed
+
+        # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction
+
+        ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"].str.contains("subject to ciga")
+        ]
+
+        # We take just the cavity walls
+        # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
+        # This paper is based on London properties
+        # The proportion of EPCs with building characteristics errors are shown to
+        # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
+        # compared with ~5% for wall insulation and glazing performance
+
+        ciga_check_needed_with_archetype = ciga_check_needed[
+            (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+            ]
+
+        # We take properties that could feasibly be within install regions
+        ciga_check_needed_plausible = ciga_check_needed_with_archetype[
+            ciga_check_needed_with_archetype["roof_insulation_thickness"].astype(float) < 270
+            ]
+
+        if not loader.data[ha_name]["ciga_list"].empty:
+            raise NotImplementedError("SORT OUT THE CIGA BRUV")
+        else:
+            ha_ciga_pass_rate = assumed_ciga_pass_rate
+
+        ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate)
+        without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0]
+
+        # Need to add on the non-ciga
+        total_expectation = ciga_check_expectation + without_ciga_expectation
+
+        if proportion_with_survey < 100:
+            # We estimate the rest
+            without_survey_needing_ciga = fuck_this[
+                (pd.isnull(fuck_this["estimated"]) == False) &
+                (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
+                ]
+
+            # We apply the same conversion rate as the properties with a survey
+            without_survey_without_ciga_expected = np.round(
+                without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+            )
+
+            total_expectation += without_survey_without_ciga_expected
+
+            without_survey_without_ciga = fuck_this[
+                (pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"]))
+                ]
+
+            if not without_survey_without_ciga.empty:
+                raise Exception("Estimate the rest!!")
+
+        results.append(
+            {
+                "HA Name": ha_name,
+                "Original ECO4 Estimate - Remaining": original_remaining,
+                "Proportion with a survey": proportion_with_survey,
+                "total_expectation": total_expectation
+            }
+        )
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py
index 9d3b46b4..76f99f09 100644
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@@ -122,6 +122,13 @@ class RoofAttributes(Definitions):
         result["is_valid"] = "invalid" not in description
         description = description.replace("invalid", "")
 
+        # We handle an edge case where the description is "pitched, 150  loft insulation" and is missing the mm
+        if result["is_pitched"] or result["is_loft"]:
+            # Search for a regular expression that matches 150   insulation
+            match = re.search(r"(\d+\+?)\s*insulation", description)
+            if match:
+                result['insulation_thickness'] = match.group(1)
+
         # insulation thickness
         thickness_map = {
             "ceiling insulated": "average",
@@ -137,11 +144,11 @@ class RoofAttributes(Definitions):
                 # Remove the match from the description
                 # description = description.replace(key, "")
                 break
-        else:
-            # Extract insulation thickness in mm, if present
-            match = re.search(r'(\d+\+?)\s*mm', description)
-            if match:
-                result['insulation_thickness'] = match.group(1)
+
+        # Extract insulation thickness in mm, if present
+        match = re.search(r'(\d+\+?)\s*mm', description)
+        if match:
+            result['insulation_thickness'] = match.group(1)
 
         if "insulation_thickness" not in result:
             result['insulation_thickness'] = None

From bee07a253b8285a67c4cb78b9051e2b000de30c0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 16:10:55 +0000
Subject: [PATCH 136/262] new method wip

---
 .../ha_15_32/ha_analysis_batch_3.py           | 125 +++++++++++++++---
 1 file changed, 105 insertions(+), 20 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 767e13c8..9cadaf9f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -92,6 +92,27 @@ PROPERTY_TYPE_LOOKUP = {
         'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
         'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
     },
+    "HA25": {
+        'Flat': 'Flat',
+        'Mid Terrace House': 'House',
+        'Semi Detached House': 'House',
+        'End Terrace House': 'House',
+        'House': 'House',
+        'Semi Detached Bung': 'Bungalow',
+        'Bungalow': 'Bungalow',
+        'End Terrace Bungalow': 'Bungalow',
+        'Maisonnette': 'Maisonette',
+        'Mid Terrace Bungalow': 'Bungalow',
+        'Bedspace': None,
+        'Detached House': 'House',
+        'Bedsit': 'Flat',
+        'Coach House': 'House',
+        'Detached Bungalow': 'Bungalow',
+        'Office Buildings': None,
+        'Guest Room': None,
+        'Mid Terrace Housekeeping ': 'House',
+        'End Terrace Housex': 'House'
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -2877,6 +2898,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
                 property_meta["Asset Type Description"]
             ]
 
+        built_form = None
+    elif ha_name == "HA25":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
         built_form = None
     elif ha_name == "HA16":
         config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
@@ -5092,7 +5116,8 @@ def forecast_remaining_sales(loader):
 
 
 def fml_data_pull(loader):
-    has_bruh = ["HA7"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+    # DO
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
 
@@ -5104,7 +5129,7 @@ def fml_data_pull(loader):
         # For each property, search for the latest EPC
         epc_data = []
         for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
-            property_type, built_form = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+            property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
             searcher = SearchEpc(
                 address1=row["HouseNo"],
                 postcode=row["matching_postcode"],
@@ -5113,8 +5138,9 @@ def fml_data_pull(loader):
                 property_type=property_type,
                 full_address=row["matching_address"],
             )
-            searcher.ordnance_survey_client.property_type = property_type
-            searcher.ordnance_survey_client.built_form = built_form
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
 
             searcher.find_property(skip_os=True)
             if searcher.newest_epc is None:
@@ -5147,11 +5173,32 @@ def extract_lower_bound(age_band):
         return 1930
 
 
+def classify_loft(x):
+    # high confidence
+    if float(x["roof_insulation_thickness"]) <= 100:
+        return "high"
+
+    if float(x["roof_insulation_thickness"]) <= 200:
+        return "medium"
+
+    if float(x["roof_insulation_thickness"]) <= 270 and x["epc_age"] >= 5 * 365:
+        return "medium"
+
+    return "unlikely"
+
+
 def fml_analysis(loader):
     from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
     from etl.epc.DataProcessor import EPCDataProcessor
+    from datetime import datetime
     assumed_ciga_pass_rate = 0.731
-    has_bruh = ["HA7"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+
+    no_ciga_cavity_descriptions = [
+        "Cavity wall, as built, insulated (assumed)",
+        "Cavity wall, as built, no insulation (assumed)",
+        "Cavity wall, as built, partial insulation (assumed)"
+    ]
 
     results = []
     for ha_name in has_bruh:
@@ -5170,6 +5217,11 @@ def fml_analysis(loader):
             s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
         )
 
+        # time from the inspection to now
+        epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
+        if "estimated" not in epc_data.columns:
+            epc_data["estimated"] = None
+
         fuck_this = fml.merge(
             epc_data, how="left", on="asset_list_row_id"
         )
@@ -5178,12 +5230,27 @@ def fml_analysis(loader):
 
         # Take just remaining
         if not loader.data[ha_name]["survey_list"].empty:
-            raise NotImplementedError("TAKE JUST REMAINING IDIOT")
+            survey_list = (
+                loader.data[ha_name]["survey_list"][
+                    ~pd.isnull(loader.data[ha_name]["survey_list"]["asset_list_row_id"])
+                ]
+            )
+            fuck_this = fuck_this.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            fuck_this = fuck_this[pd.isnull(fuck_this["installation_status"])]
+            fuck_this = fuck_this.drop(columns=["installation_status"])
 
         insulation_thicknesses = []
         for _, x in fuck_this.iterrows():
             if pd.isnull(x["roof-description"]):
                 continue
+            if x["roof-description"] == "SAP05:Roof":
+                continue
+
             thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"]
             # If there is a + in the thickness, strip it out
             thickness = str(thickness).replace("+", "")
@@ -5208,11 +5275,13 @@ def fml_analysis(loader):
             "roof_insulation_thickness"
         ].str.replace("average", "150")
 
-        fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
-            lambda x: EPCDataProcessor.clean_construction_age_band(x)
-        )
+        fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
 
-        fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
+        # fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
+        #     lambda x: EPCDataProcessor.clean_construction_age_band(x)
+        # )
+        #
+        # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
 
         had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
 
@@ -5225,9 +5294,23 @@ def fml_analysis(loader):
             had_survey["ECO Eligibility"] == "eco4"
             ]
 
+        # Walls:
+        # Cavity wall, as built, insulated (assumed)
+        # Cavity wall, as built, no insulation (assumed)
+        # Cavity wall, as built, partial insulation (assumed)
+
+        # Roof:
+        # Less than 100mm = high confidence
+        # Less than 270mm & EPC at least 5 years old = medium confidence
+        # Otherwise, low confidence
+
+        # SAP criteria is EPC C or below
+
+        # Pre is 54 or below
+
         no_ciga_check_needed_with_archetype = no_ciga_check_needed[
-            (no_ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
-            (no_ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
         if not no_ciga_check_needed_with_archetype.empty:
@@ -5239,7 +5322,14 @@ def fml_analysis(loader):
 
         ciga_check_needed = had_survey[
             had_survey["ECO Eligibility"].str.contains("subject to ciga")
-        ]
+        ].copy()
+
+        ciga_check_passed = had_survey[
+            had_survey["ECO Eligibility"] == "eco4 - passed ciga"
+            ]
+
+        if not ciga_check_passed.empty:
+            raise Exception("SORT ME BRUV")
 
         # We take just the cavity walls
         # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
@@ -5248,17 +5338,12 @@ def fml_analysis(loader):
         # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
         # compared with ~5% for wall insulation and glazing performance
 
-        ciga_check_needed_with_archetype = ciga_check_needed[
+        ciga_check_needed_plausible = ciga_check_needed[
             (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
-            (ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
-        # We take properties that could feasibly be within install regions
-        ciga_check_needed_plausible = ciga_check_needed_with_archetype[
-            ciga_check_needed_with_archetype["roof_insulation_thickness"].astype(float) < 270
-            ]
-
         if not loader.data[ha_name]["ciga_list"].empty:
             raise NotImplementedError("SORT OUT THE CIGA BRUV")
         else:

From 9b255029b3f58d9f8653aaf1bbbd0cc43b024803 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 17:36:09 +0000
Subject: [PATCH 137/262] fml fml

---
 .../ha_15_32/ha_analysis_batch_3.py           | 141 ++++++++++++------
 1 file changed, 96 insertions(+), 45 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9cadaf9f..e1d7db4d 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -20,6 +20,9 @@ from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from etl.epc.Record import EPCRecord
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc.DataProcessor import EPCDataProcessor
+from datetime import datetime
 
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -5188,9 +5191,6 @@ def classify_loft(x):
 
 
 def fml_analysis(loader):
-    from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
-    from etl.epc.DataProcessor import EPCDataProcessor
-    from datetime import datetime
     assumed_ciga_pass_rate = 0.731
     has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
 
@@ -5216,15 +5216,20 @@ def fml_analysis(loader):
             bucket_name="retrofit-datalake-dev",
             s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
         )
+        # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
+        # issue at this point
+        epc_data = epc_data.drop_duplicates("uprn")
 
         # time from the inspection to now
         epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
         if "estimated" not in epc_data.columns:
-            epc_data["estimated"] = None
+            # For all after HA7, we don't use estimated surveys
+            epc_data["estimated"] = False
 
         fuck_this = fml.merge(
             epc_data, how="left", on="asset_list_row_id"
         )
+        fuck_this["estimated"] = fuck_this["estimated"].fillna(True)
         if fuck_this.shape[0] != fml.shape[0]:
             raise Exception("What the fuck bruv")
 
@@ -5259,7 +5264,15 @@ def fml_analysis(loader):
             )
         insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
 
+        before_merge_shape = fuck_this.shape[0]
         fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
+
+        if fuck_this.shape[0] != before_merge_shape:
+            raise Exception("SOMETHING WENT WRONG")
+
+        if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+            blah
+
         # clean roof insulation
         fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
         fuck_this["roof_insulation_thickness"] = fuck_this[
@@ -5283,7 +5296,7 @@ def fml_analysis(loader):
         #
         # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
 
-        had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
+        had_survey = fuck_this[fuck_this["estimated"] == False]
 
         # proportion with a survey:
         proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
@@ -5294,27 +5307,11 @@ def fml_analysis(loader):
             had_survey["ECO Eligibility"] == "eco4"
             ]
 
-        # Walls:
-        # Cavity wall, as built, insulated (assumed)
-        # Cavity wall, as built, no insulation (assumed)
-        # Cavity wall, as built, partial insulation (assumed)
-
-        # Roof:
-        # Less than 100mm = high confidence
-        # Less than 270mm & EPC at least 5 years old = medium confidence
-        # Otherwise, low confidence
-
-        # SAP criteria is EPC C or below
-
-        # Pre is 54 or below
-
-        no_ciga_check_needed_with_archetype = no_ciga_check_needed[
+        no_ciga_check_needed_eligible = no_ciga_check_needed[
             (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
             (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
-        if not no_ciga_check_needed_with_archetype.empty:
-            raise Exception("SORT ME OUT")
 
         # Characterise no CIGA check needed
 
@@ -5327,9 +5324,20 @@ def fml_analysis(loader):
         ciga_check_passed = had_survey[
             had_survey["ECO Eligibility"] == "eco4 - passed ciga"
             ]
+        # These should be treated the same as one that have passed their ciga checks, from a detection perspective
+        ciga_check_passed_eligible = ciga_check_passed[
+            (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) &
+            (ciga_check_passed["current-energy-efficiency"].astype(float) <= 80)
+            ]
 
-        if not ciga_check_passed.empty:
-            raise Exception("SORT ME BRUV")
+        if not loader.data[ha_name]["ciga_list"].empty:
+
+            proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True)
+            ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0]
+
+        else:
+            ha_ciga_pass_rate = assumed_ciga_pass_rate
 
         # We take just the cavity walls
         # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
@@ -5338,53 +5346,96 @@ def fml_analysis(loader):
         # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
         # compared with ~5% for wall insulation and glazing performance
 
-        ciga_check_needed_plausible = ciga_check_needed[
+        ciga_check_needed_eligible = ciga_check_needed[
             (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
             (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
-        if not loader.data[ha_name]["ciga_list"].empty:
-            raise NotImplementedError("SORT OUT THE CIGA BRUV")
-        else:
-            ha_ciga_pass_rate = assumed_ciga_pass_rate
-
-        ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate)
-        without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0]
+        ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
+        without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
+        passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
 
         # Need to add on the non-ciga
-        total_expectation = ciga_check_expectation + without_ciga_expectation
+        total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
 
         if proportion_with_survey < 100:
             # We estimate the rest
             without_survey_needing_ciga = fuck_this[
-                (pd.isnull(fuck_this["estimated"]) == False) &
+                (fuck_this["estimated"] == True) &
                 (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
                 ]
 
-            # We apply the same conversion rate as the properties with a survey
-            without_survey_without_ciga_expected = np.round(
-                without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
-            )
+            if without_survey_needing_ciga.empty:
+                without_survey_without_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_without_ciga_expected = np.round(
+                    without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+                )
 
-            total_expectation += without_survey_without_ciga_expected
-
-            without_survey_without_ciga = fuck_this[
-                (pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"]))
+            without_survey_passed_ciga = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4 - passed ciga")
                 ]
 
-            if not without_survey_without_ciga.empty:
-                raise Exception("Estimate the rest!!")
+            if without_survey_passed_ciga.empty:
+                without_survey_passed_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_passed_ciga_expected = np.round(
+                    without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0])
+                )
+
+            # Finally, no ciga needed
+            without_survey_eco4 = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4")
+                ]
+
+            if without_survey_eco4.empty:
+                without_survey_eco4_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_eco4_expected = np.round(
+                    without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
+                )
+
+            total_expectation = (
+                total_expectation +
+                without_survey_without_ciga_expected +
+                without_survey_passed_ciga_expected +
+                without_survey_eco4_expected
+            )
+
+        surveys = loader.data[ha_name]["survey_list"]
+        sold_now = 0
+        if not surveys.empty:
+            sold_now = surveys[
+                surveys["installation_status"].str.lower().str.contains("eco4")
+            ].shape[0]
+
+        sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0]
 
         results.append(
             {
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
+                "Of which sold": sales_since_nov,
+                "Of which ECO4 Eligible - Remaining": int(total_expectation),
                 "Proportion with a survey": proportion_with_survey,
-                "total_expectation": total_expectation
             }
         )
 
+    results_df = pd.DataFrame(results)
+
+    results_df["Delta vs November"] = 100 * (
+        results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
+    ) / results_df["Original ECO4 Estimate - Remaining"]
+
+    # TODO: Split into high and low confidence?
+    #
+
 
 def app():
     """

From 3b65a71793721d65fd8356c215813a13d384bc4d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 18:25:50 +0000
Subject: [PATCH 138/262] added in extra shit to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 47 ++++++++++++++++---
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e1d7db4d..53ce69e2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5200,6 +5200,22 @@ def fml_analysis(loader):
         "Cavity wall, as built, partial insulation (assumed)"
     ]
 
+    codes = [
+        "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
+        "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27",
+        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX"
+    ]
+
+    values = [
+        706, 2161, 1053, 793, 0, 656, 1200, 1647, 4248, 2703, 1087, 1876, 2135,
+        1078, 775, 538, 518, 401, 466, 2627, 98, 1050, 524, 191, 538, 384, 204,
+        281, 422, 74, 313, 71, 6
+    ]
+
+    # Create a dictionary mapping
+    remaining_eligible_mapping = dict(zip(codes, values))
+
     results = []
     for ha_name in has_bruh:
 
@@ -5207,6 +5223,7 @@ def fml_analysis(loader):
             loader.december_figures["HA Name"] == ha_name
             ].copy()
         original_remaining = original_figures["ECO4 remaining"].values[0]
+        postcode_list_remaining = remaining_eligible_mapping[ha_name]
 
         # Read in the epc data
         asset_list = loader.data[ha_name]["asset_list"].copy()
@@ -5271,7 +5288,7 @@ def fml_analysis(loader):
             raise Exception("SOMETHING WENT WRONG")
 
         if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
-            blah
+            raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
 
         # clean roof insulation
         fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
@@ -5313,6 +5330,13 @@ def fml_analysis(loader):
             (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
+        # For anything not needing a CIGA check, some of it will be GBIS
+        no_ciga_check_needed_eligible_gbis = no_ciga_check_needed[
+            (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) &
+            (~no_ciga_check_needed["asset_list_row_id"].isin(no_ciga_check_needed_eligible["asset_list_row_id"].values))
+            ]
+
         # Characterise no CIGA check needed
 
         # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction
@@ -5359,6 +5383,8 @@ def fml_analysis(loader):
         # Need to add on the non-ciga
         total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
 
+        total_gbis_expectation = no_ciga_check_needed_eligible_gbis.shape[0]
+
         if proportion_with_survey < 100:
             # We estimate the rest
             without_survey_needing_ciga = fuck_this[
@@ -5395,12 +5421,17 @@ def fml_analysis(loader):
 
             if without_survey_eco4.empty:
                 without_survey_eco4_expected = 0
+                without_survey_gbis_expected = 0
             else:
                 # We apply the same conversion rate as the properties with a survey
                 without_survey_eco4_expected = np.round(
                     without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
                 )
 
+                without_survey_gbis_expected = np.round(
+                    without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0])
+                )
+
             total_expectation = (
                 total_expectation +
                 without_survey_without_ciga_expected +
@@ -5408,6 +5439,8 @@ def fml_analysis(loader):
                 without_survey_eco4_expected
             )
 
+            total_gbis_expectation = total_gbis_expectation + without_survey_gbis_expected
+
         surveys = loader.data[ha_name]["survey_list"]
         sold_now = 0
         if not surveys.empty:
@@ -5421,20 +5454,22 @@ def fml_analysis(loader):
             {
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
+                "Postcode List - Remaining": postcode_list_remaining,
                 "Of which sold": sales_since_nov,
                 "Of which ECO4 Eligible - Remaining": int(total_expectation),
+                "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
                 "Proportion with a survey": proportion_with_survey,
             }
         )
 
     results_df = pd.DataFrame(results)
 
-    results_df["Delta vs November"] = 100 * (
-        results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
-    ) / results_df["Original ECO4 Estimate - Remaining"]
+    # results_df["Delta vs November"] = 100 * (
+    #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
+    # ) / results_df["Original ECO4 Estimate - Remaining"]
 
-    # TODO: Split into high and low confidence?
-    #
+    # TODO: Add in estimated GBIS (for eco jobs, of which look like gbis)
+    # TODO: Change the left hand side number for our post CIGA estimates
 
 
 def app():

From 479a2b08c33e2911a5ae98c3d315903af04e4980 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 19:02:33 +0000
Subject: [PATCH 139/262] ffs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 +++++++++++++++++--
 etl/epc_clean/app.py                          |  3 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 53ce69e2..9462642f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5119,7 +5119,9 @@ def forecast_remaining_sales(loader):
 
 
 def fml_data_pull(loader):
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16",
+                # Do these
+                "HA1", "HA13", "HA50", "HA24"]
     # DO
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
@@ -5197,9 +5199,19 @@ def fml_analysis(loader):
     no_ciga_cavity_descriptions = [
         "Cavity wall, as built, insulated (assumed)",
         "Cavity wall, as built, no insulation (assumed)",
-        "Cavity wall, as built, partial insulation (assumed)"
+        "Cavity wall, as built, partial insulation (assumed)",
+        "Cavity wall, no insulation (assumed)",
+        "Cavity wall, partial insulation (assumed)",
+        "Cavity wall,",
+        "Cavity wall, insulated (assumed)",
+        "Cavity wall, no insulation (assumed)",
+        "Cavity wall, as built, insulated (assumed)",
+        "Cavity wall, partial insulation (assumed)",
     ]
 
+    # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
+    #  them!
+
     codes = [
         "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
         "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
@@ -5217,6 +5229,7 @@ def fml_analysis(loader):
     remaining_eligible_mapping = dict(zip(codes, values))
 
     results = []
+    wall_descriptions = []
     for ha_name in has_bruh:
 
         original_figures = loader.december_figures[
@@ -5236,6 +5249,7 @@ def fml_analysis(loader):
         # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
         # issue at this point
         epc_data = epc_data.drop_duplicates("uprn")
+        wall_descriptions.extend(epc_data["walls-description"].unique().tolist())
 
         # time from the inspection to now
         epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
@@ -5464,6 +5478,10 @@ def fml_analysis(loader):
 
     results_df = pd.DataFrame(results)
 
+    wall_descriptions = list(set(wall_descriptions))
+    from pprint import pprint
+    pprint(wall_descriptions)
+
     # results_df["Delta vs November"] = 100 * (
     #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
     # ) / results_df["Original ECO4 Estimate - Remaining"]
diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py
index 53c1a329..3f1a1a80 100644
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@@ -36,8 +36,11 @@ def app():
     cleaned_data = {}
     epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
 
+    WALLS = []
     for directory in tqdm(epc_directories):
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        z = data["WALLS_DESCRIPTION"].unique().tolist()
+        WALLS.extend(z)
         # Rename the columns to the same format as the api returns
         data.columns = [c.replace("_", "-").lower() for c in data.columns]
         # Take just date before the date threshold

From cc319ab91149f77dd04e691e6bc6b99bb9d39702 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 15 Mar 2024 10:09:26 +0000
Subject: [PATCH 140/262] new ha analysis wip

---
 .../ha_15_32/ha_analysis_batch_3.py            | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9462642f..a0b7e0bb 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5210,7 +5210,7 @@ def fml_analysis(loader):
     ]
 
     # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
-    #  them!
+    #  them! Non-invasices will have checked the wall though
 
     codes = [
         "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
@@ -5352,16 +5352,11 @@ def fml_analysis(loader):
             ]
 
         # Characterise no CIGA check needed
-
-        # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction
-
         ciga_check_needed = had_survey[
             had_survey["ECO Eligibility"].str.contains("subject to ciga")
         ].copy()
 
-        ciga_check_passed = had_survey[
-            had_survey["ECO Eligibility"] == "eco4 - passed ciga"
-            ]
+        ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"]
         # These should be treated the same as one that have passed their ciga checks, from a detection perspective
         ciga_check_passed_eligible = ciga_check_passed[
             (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
@@ -5469,18 +5464,15 @@ def fml_analysis(loader):
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
                 "Postcode List - Remaining": postcode_list_remaining,
-                "Of which sold": sales_since_nov,
+                # "Of which sold": sales_since_nov,
                 "Of which ECO4 Eligible - Remaining": int(total_expectation),
                 "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
-                "Proportion with a survey": proportion_with_survey,
+                # "Proportion with a survey": proportion_with_survey,
             }
         )
 
     results_df = pd.DataFrame(results)
-
-    wall_descriptions = list(set(wall_descriptions))
-    from pprint import pprint
-    pprint(wall_descriptions)
+    results_df.to_csv("analysis - revised.csv")
 
     # results_df["Delta vs November"] = 100 * (
     #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]

From 12f780a08989e896235adf96e175d39240c3adbb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 15 Mar 2024 16:54:48 +0000
Subject: [PATCH 141/262] setting up complete data pull

---
 .../ha_15_32/ha_analysis_batch_3.py           | 380 +++++++++++++++++-
 1 file changed, 369 insertions(+), 11 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a0b7e0bb..902d48fd 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -42,6 +42,15 @@ PROPERTY_TYPE_LOOKUP = {
             'Detached Local Connect': 'Detached',
         }
     },
+    "HA2": {
+        'HOUSE': 'House',
+        'FLAT': 'Flat',
+        'SHELTERED': None,
+        'BUNGALOW': 'Bungalow',
+        'BED-SIT': None,
+        'MAISONETTE': "Maisonette",
+        'HOSTEL': None
+    },
     "HA6": {
         "property_type": {
             'HOUSE': "House",
@@ -69,6 +78,23 @@ PROPERTY_TYPE_LOOKUP = {
             "End Terraced": "End-Terrace",
         }
     },
+    "HA12": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+    },
+    "HA13": {
+        'House': "House",
+        'Flat': "Flat",
+        'House MT': "House",
+        'House SD': "House",
+        'House ET': "House",
+        'Bungalow MT': "Bungalow",
+        'Bungalow ET': "Bungalow",
+        'ii': None,
+    },
     "HA14": {
         "property_type": {
             "House": "House",
@@ -77,6 +103,13 @@ PROPERTY_TYPE_LOOKUP = {
             "Maisonette": "Maisonette",
         }
     },
+    "HA15": {
+        'House': 'House',
+        'Flat': 'Flat',
+        'Bungalow': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'Flat over garage': 'Flat',
+    },
     "HA16": {
         'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
         'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
@@ -95,6 +128,30 @@ PROPERTY_TYPE_LOOKUP = {
         'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
         'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
     },
+    "HA18": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+        "Shop": None,
+        "Hostel": None,
+        "Block": None,
+    },
+    "HA24": {
+        '01 HOUSE': 'House',
+        '02 FLAT': 'Flat',
+        '03 BUNGALOW': 'Bungalow',
+        '10 PBUNGALOW': 'Bungalow',
+        '01 HOUSE MID': 'House',
+        '13 SBUNGALOW': 'Bungalow',
+        '12 SBEDSIT': None,  # BEDSIT does not match the specified property types
+        '14 SFLAT': 'Flat',
+        '05 BEDSIT': None,
+        '04 MAISONETTE': 'Maisonette',
+        '11 PFLAT': 'Flat',
+        '09 PBEDSIT': None
+    },
     "HA25": {
         'Flat': 'Flat',
         'Mid Terrace House': 'House',
@@ -116,6 +173,77 @@ PROPERTY_TYPE_LOOKUP = {
         'Mid Terrace Housekeeping ': 'House',
         'End Terrace Housex': 'House'
     },
+    "HA28": {
+        'Flat': 'Flat',
+        'Semi detached house': 'House',
+        'Terraced house': 'House',
+        'Maisonette flat': 'Maisonette',
+        'Sheltered bedsit': None,
+        'APD flat': 'Flat',
+        'Bungalow terraced': 'Bungalow',
+        'Flat with partition': 'Flat',
+        'Bungalow semi detached': 'Bungalow',
+        'APD Bungalow': 'Bungalow',
+        'Sheltered flat': 'Flat',
+        'Bedsit Flat': 'Flat',
+        'Bedsit bungalow semi detached': 'Bungalow',
+        'Sheltered bungalow terraced': 'Bungalow',
+        'Sheltered bedsit disabled': None,
+        'Bedsit bungalow terraced': 'Bungalow',
+        'Sheltered bungalow semi detached': 'Bungalow',
+        'Sheltered warden flat': 'Flat',
+        'Bungalow detached': 'Bungalow',
+        'Block': None,  # Does not match the specified property types
+        'End Terraced House': 'House',
+        'Mid Terraced House': 'House',
+        '#N/A': None,  # Assuming this is an invalid or missing entry
+        0: None  # Assuming 0 is also an invalid or missing entry
+    },
+    "HA30": {
+        'House': 'House',
+        'Flat': 'Flat',
+        'Bungalow': 'Bungalow',
+        'House with Attached Garage': 'House',
+        'Bed Space': None,  # Assuming this does not fit the specified property types
+        'House with Garage': 'House',
+        'Bungalow with Wheelchair Access': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'Flat with Wheelchair Access': 'Flat',
+        'Bedsit': None,  # Assuming this does not fit the specified property types
+        'Flat w Wheelchair Access & Car Park': 'Flat',
+        'House with Wheelchair Access': 'House',
+        'Bungalow w Wheelchair Access & Car ': 'Bungalow'
+    },
+    "HA32": {
+        'Bungalow': 'Bungalow',
+        'Flat': 'Flat',
+        'Bungalow Disabled': 'Bungalow',  # "Disabled" properties categorized with their base type
+        'House': 'House',
+        'Dormer Bungalow': 'Bungalow',
+        'Pop-In': None,  # Does not fit the specified property types
+        'Flat Disabled': 'Flat',
+        'Laundry': None,  # Does not fit the specified property types
+        'Bedsit': None,  # Excluded from the given categories
+        'Shed': None,  # Does not fit the specified property types
+        'Store Room': None  # Does not fit the specified property types
+    },
+    "HA34": {
+        'Flat': 'Flat',
+        'House': 'House',
+        'Bungalow': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'ND': None,
+    },
+    "HA35": {
+        "Flat": "Flat",
+        "Maisonette": "Maisonette",
+        "House": "House",
+        "Bedsit": None,
+        "2 Bedroom Unknown": None,
+        "1 Bedroom Unknown": None,
+        "3 Bedroom Unknown": None,
+        "4 Bedroom Unknown": None,
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -140,6 +268,105 @@ PROPERTY_TYPE_LOOKUP = {
         "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
         "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
     },
+    "HA41": {
+        'Garage': None,
+        'House 1919-1945': 'House',
+        'House 1946-1964': 'House',
+        'Flats & Maisonettes post 1974': 'Flat',
+        'Non traditional houses': 'House',
+        'Sheltered': None,
+        'Flats & Maisonettes 1965-1974': 'Flat',
+        'House post 1974': 'House',
+        'Block': None,
+        'Flats & Maisonettes 1946-1964': 'Flat',
+        'House 1965-1974': 'House',
+        'Non traditional flats': 'Flat',
+        'Bungalow 1965-1974': 'Bungalow',
+        'PIMSS EMPTY': None,
+        'Bungalow post 1974': 'Bungalow',
+        'Bungalow 1946-1964': 'Bungalow',
+        'Flats & Maisonettes 1919-1945': 'Flat',
+        'House pre 1919': 'House',
+        'Flats & Maisonettes pre 1919': 'Flat',
+        'Bungalow 1919-1945': 'Bungalow',
+        'Office': None
+    },
+    "HA48": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Unit": None
+    },
+    "HA50": {
+        'House': 'House',
+        'Bungalow': 'Bungalow',
+        'Flat': 'Flat',
+        'House SD': 'House',
+        'House MT': 'House',
+        'House ET': 'House',
+        'Bungalow ET': 'Bungalow',
+        'House SD ': 'House',
+        'House. SD': 'House',
+        'Bungalow SD': 'Bungalow',
+        'Bungalow MT': 'Bungalow',
+        'Bungalow D': 'Bungalow',
+        'House D': 'House',
+        'House. MT': 'House',
+        'House ': 'House',
+        'House ET ': 'House',
+        ' ': None,
+        'Flat?': 'Flat',
+        'Bungalow ': 'Bungalow'
+    },
+    "HA56": {
+        'House Non Specific': 'House',
+        'HOUSE TERRACED': 'House',
+        'HOUSE - SEMI DETACHD': 'House',
+        'Bungalow': 'Bungalow',
+        'House - End Terraced': 'House',
+        'Block': None,
+        'Block with Communal': None,
+        'Bungalow - Terraced': 'Bungalow',
+        'Bungalow - Semi Dtch': 'Bungalow',
+        'Block House with rooms': None,
+        'Bungalow - End Terr': 'Bungalow',
+        'House - Mid Terraced': 'House',
+        'Bungalow - Detached': 'Bungalow',
+        'House - Detached': 'House',
+        'HOUSE THREE STOREY': 'House',
+        'Maisonette': 'Maisonette',
+        'Communal Block': None,
+        'Scheme': None
+    },
+    "HA63": {
+        'Flat': 'Flat',
+        'House - Semi detached': 'House',
+        'House - Detached': 'House',
+        'House - End Terrace': 'House',
+        'House - Mid Terrace': 'House',
+        'Bungalow - Semi detached': 'Bungalow',
+        'Bungalow': 'Bungalow',
+        'Bedsit': None,  # Considering as a non-specific residential category here
+        'Maisonette': 'Maisonette',
+        'Bungalow - End Terrace': 'Bungalow',
+        'Bungalow - Detached': 'Bungalow',
+        'Maisonette - Mid Terrace': 'Maisonette',
+        'Maisonette - End Terrace': 'Maisonette',
+        'Studio Flat': 'Flat',
+        'Maisonette - Detached': 'Maisonette',
+        'Bungalow - Mid Terrace': 'Bungalow',
+        'Bedsit - Mid Terrace': None,
+        'Bedsit - End Terrace': None,
+        'Amenity Block - Semi detached': None,  # Assuming non-residential
+        'Maisonette - Semi Detached': 'Maisonette',
+        'Amenity Block - Detached': None,  # Assuming non-residential
+        'Hostel': None,  # Typically not considered a standard residential property for this context
+        'Bungalow - Attached': 'Bungalow',
+        'Unknown': None,  # Not enough information to categorize
+        'Studio Flat - Mid Terrace': 'Flat',
+        'Chalet - Wheelchair': None  # Specialized type, not categorized here
+    },
     "HA107": {
         "property_type": {
             "HOUSE": "House",
@@ -160,6 +387,27 @@ PROPERTY_TYPE_LOOKUP = {
             "Detached": "Detached",
             "Detatched": "Detached",
         }
+    },
+    "HA117": {
+        "Flat": "Flat",
+        "House": "House",
+        "Bungalow": "Bungalow",
+        "Flat over garage/underpass": "Flat",
+    },
+    "HAXXX": {
+        'mid terraced house': 'House',
+        'semi detached house': 'House',
+        '1st fl 4 in a block': 'Flat',
+        'G/F 4 in a block': 'Flat',
+        'end terraced house': 'House',
+        '1st floor flat': 'Flat',
+        'G/F floor flat': 'Flat',
+        'semi detached bungalow': 'Bungalow',
+        '2nd floor flat': 'Flat',
+        'mid terrace bungalow': 'Bungalow',
+        'detached bungalow': 'Bungalow',
+        'end terrace bungalow': 'Bungalow',
+        'Staff accommodation': None  # Marked as None due to its special nature
     }
 }
 
@@ -2882,12 +3130,36 @@ def get_property_type_and_built_form(property_meta, ha_name):
             property_type = "Flat"
 
         built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None)
+    elif ha_name == "HA2":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip())
+        built_form = None
     elif ha_name == "HA6":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
     elif ha_name == "HA7":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
         built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
+    elif ha_name == "HA9":
+        property_description = property_meta["Asset Type"].strip().lower()
+        if "house" in property_description:
+            return "House", None
+
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
+    elif ha_name == "HA12":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip())
+        built_form = None
+    elif ha_name == "HA13":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Type Cd"].strip())
+        built_form = None
     elif ha_name == "HA14":
         if property_meta["Asset Type Description"] == "Block - Repair":
             # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
@@ -2902,15 +3174,60 @@ def get_property_type_and_built_form(property_meta, ha_name):
             ]
 
         built_form = None
-    elif ha_name == "HA25":
-        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
+    elif ha_name == "HA15":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
     elif ha_name == "HA16":
         config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
         property_type = config.get("property-type")
         built_form = config.get("built-form")
-    elif ha_name == "HA39":
+    elif ha_name == "HA18":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA19":
+        property_type = property_meta["Dwelling Type"]
+        built_form = None
+    elif ha_name == "HA24":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA25":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
+        built_form = None
+    elif ha_name == "HA27":
+        property_type = property_meta["Property Type"]
+        built_form = None
+    elif ha_name == "HA28":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Property Type - Academy"]]
+        built_form = None
+    elif ha_name == "HA30":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["A_AssetType"]]
+        built_form = None
+    elif ha_name == "HA31":
+        property_description = property_meta["A_AssetType"].strip().lower()
+        if "house" in property_description:
+            return "House", None
 
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
+
+    elif ha_name == "HA32":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling type"].strip())
+        built_form = None
+    elif ha_name == "HA34":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA35":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip())
+        built_form = None
+    elif ha_name == "HA39":
         property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
         property_type = property_type_config.get("property_type", None)
         built_form = property_type_config.get("built_form", None)
@@ -2921,11 +3238,35 @@ def get_property_type_and_built_form(property_meta, ha_name):
                 property_type = "Flat"
             else:
                 property_type = "House"
+    elif ha_name == "HA41":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip())
+        built_form = None
+    elif ha_name == "HA48":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA49":
+        property_type = property_meta["Property Class"].strip()
+        built_form = None
+    elif ha_name == "HA54":
+        property_type = property_meta["Property Type"]
+        built_form = None
+    elif ha_name == "HA56":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type Description"].strip())
+        built_form = None
+    elif ha_name == "HA63":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PropertyType"].strip())
+        built_form = None
     elif ha_name == "HA107":
-
         property_type = property_meta.get("property_type", None)
         built_form = property_meta.get("built_form", None)
-
+    elif ha_name == "HA117":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HAXX":
+        return property_meta["Property Type"].split(":")[0].strip(), None
+    elif ha_name == "HAXXX":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Unit Description"].strip())
+        built_form = None
     else:
         raise NotImplementedError("Implement me")
 
@@ -5119,9 +5460,16 @@ def forecast_remaining_sales(loader):
 
 
 def fml_data_pull(loader):
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16",
-                # Do these
-                "HA1", "HA13", "HA50", "HA24"]
+    has_bruh = [
+        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28",
+        # Updated get_property_type_and_built_form, still needs running
+        "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+        # todo
+    ]
+
+    # Can't pull from EPC database because it's based in Scotland
+    # "HAXXX", "HAXX"
     # DO
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
@@ -5134,14 +5482,24 @@ def fml_data_pull(loader):
         # For each property, search for the latest EPC
         epc_data = []
         for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+
             property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+
+            if ha == "HAXXX":
+                to_join = [str(x) for x in
+                           [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
+                            row["Postcode"]] if x is not None]
+                full_address = ", ".join(to_join)
+            else:
+                full_address = row["matching_address"]
+
             searcher = SearchEpc(
-                address1=row["HouseNo"],
+                address1=str(row["HouseNo"]),
                 postcode=row["matching_postcode"],
                 auth_token=epc_api_key,
                 os_api_key="",
                 property_type=property_type,
-                full_address=row["matching_address"],
+                full_address=full_address,
             )
             # Force the skipping of estimating the EPC
             searcher.ordnance_survey_client.property_type = None
@@ -5194,7 +5552,7 @@ def classify_loft(x):
 
 def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", "HA1"]
 
     no_ciga_cavity_descriptions = [
         "Cavity wall, as built, insulated (assumed)",

From 6423ab2fac732a905645260263ebc72149424712 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 15 Mar 2024 17:53:18 +0000
Subject: [PATCH 142/262] data pull pipeline ready

---
 backend/SearchEpc.py                          |  11 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 100 ++++++++++--------
 2 files changed, 61 insertions(+), 50 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 3d2df9fb..cc2ee4a9 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -147,6 +147,7 @@ class SearchEpc:
         uprn: [int, None] = None,
         size=None,
         property_type=None,
+        fast=False
     ):
         """
         Address lines 1 and postcode are mandatory fields. The other address lines are optional
@@ -187,6 +188,7 @@ class SearchEpc:
         self.size = size if size is not None else 25
 
         self.property_type = property_type
+        self.fast = fast
 
     @classmethod
     def get_house_number(cls, address: str) -> str | None:
@@ -365,9 +367,6 @@ class SearchEpc:
         # Finally, we identify the newest epc and the rest, and then return
         newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
 
-        # Retrieve postcode and address
-        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
-
         # Ge the uprn from the newest record for this home
         uprns = {r["uprn"] for r in rows if r["uprn"]}
         # We can sometimes have no uprn for a property
@@ -384,6 +383,12 @@ class SearchEpc:
 
         uprn = uprns.pop() if uprns else None
 
+        if self.fast:
+            return newest_epc, [], {}, "", "", None
+
+        # Retrieve postcode and address
+        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
+
         return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
 
     @staticmethod
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 902d48fd..7db97733 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5461,9 +5461,9 @@ def forecast_remaining_sales(loader):
 
 def fml_data_pull(loader):
     has_bruh = [
-        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28",
+        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         # Updated get_property_type_and_built_form, still needs running
-        "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         # todo
     ]
@@ -5474,57 +5474,63 @@ def fml_data_pull(loader):
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
 
+    failed_has = []
     for ha in has_bruh:
-        asset_list = loader.data[ha]["asset_list"].copy()
-        # properties found as eligibile
-        fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+        print(f"Pulling data for {ha}")
+        try:
+            asset_list = loader.data[ha]["asset_list"].copy()
+            # properties found as eligibile
+            fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
 
-        # For each property, search for the latest EPC
-        epc_data = []
-        for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+            # For each property, search for the latest EPC
+            epc_data = []
+            for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
 
-            property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+                property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
 
-            if ha == "HAXXX":
-                to_join = [str(x) for x in
-                           [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
-                            row["Postcode"]] if x is not None]
-                full_address = ", ".join(to_join)
-            else:
-                full_address = row["matching_address"]
+                if ha == "HAXXX":
+                    to_join = [str(x) for x in
+                               [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
+                                row["Postcode"]] if x is not None]
+                    full_address = ", ".join(to_join)
+                else:
+                    full_address = row["matching_address"]
 
-            searcher = SearchEpc(
-                address1=str(row["HouseNo"]),
-                postcode=row["matching_postcode"],
-                auth_token=epc_api_key,
-                os_api_key="",
-                property_type=property_type,
-                full_address=full_address,
+                searcher = SearchEpc(
+                    address1=str(row["HouseNo"]),
+                    postcode=row["matching_postcode"],
+                    auth_token=epc_api_key,
+                    os_api_key="",
+                    property_type=property_type,
+                    full_address=full_address,
+                    fast=True
+                )
+                # Force the skipping of estimating the EPC
+                searcher.ordnance_survey_client.property_type = None
+                searcher.ordnance_survey_client.built_form = None
+
+                searcher.find_property(skip_os=True)
+                if searcher.newest_epc is None:
+                    continue
+
+                epc = {
+                    "asset_list_row_id": row["asset_list_row_id"],
+                    **searcher.newest_epc.copy()
+                }
+
+                epc_data.append(epc)
+
+            # Remove None entries
+            epc_data = [x for x in epc_data if x is not None]
+            # Save the data in S3 as a parquet
+            epc_data_df = pd.DataFrame(epc_data)
+            save_pickle_to_s3(
+                data=epc_data_df,
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
             )
-            # Force the skipping of estimating the EPC
-            searcher.ordnance_survey_client.property_type = None
-            searcher.ordnance_survey_client.built_form = None
-
-            searcher.find_property(skip_os=True)
-            if searcher.newest_epc is None:
-                continue
-
-            epc = {
-                "asset_list_row_id": row["asset_list_row_id"],
-                **searcher.newest_epc.copy()
-            }
-
-            epc_data.append(epc)
-
-        # Remove None entries
-        epc_data = [x for x in epc_data if x is not None]
-        # Save the data in S3 as a parquet
-        epc_data_df = pd.DataFrame(epc_data)
-        save_pickle_to_s3(
-            data=epc_data_df,
-            bucket_name="retrofit-datalake-dev",
-            s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
-        )
+        except Exception as e:
+            failed_has.append(ha)
 
 
 def extract_lower_bound(age_band):

From 4e077053cd73b4e6cd27392440e4e179846f6f9a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 16 Mar 2024 14:51:39 +0000
Subject: [PATCH 143/262] Adding gbis to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 92 +++++++++++++++----
 1 file changed, 74 insertions(+), 18 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7db97733..0ca28927 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3247,6 +3247,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA49":
         property_type = property_meta["Property Class"].strip()
         built_form = None
+    elif ha_name == "HA50":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA54":
         property_type = property_meta["Property Type"]
         built_form = None
@@ -5685,12 +5688,6 @@ def fml_analysis(loader):
 
         fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
 
-        # fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
-        #     lambda x: EPCDataProcessor.clean_construction_age_band(x)
-        # )
-        #
-        # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
-
         had_survey = fuck_this[fuck_this["estimated"] == False]
 
         # proportion with a survey:
@@ -5716,10 +5713,6 @@ def fml_analysis(loader):
             ]
 
         # Characterise no CIGA check needed
-        ciga_check_needed = had_survey[
-            had_survey["ECO Eligibility"].str.contains("subject to ciga")
-        ].copy()
-
         ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"]
         # These should be treated the same as one that have passed their ciga checks, from a detection perspective
         ciga_check_passed_eligible = ciga_check_passed[
@@ -5743,20 +5736,60 @@ def fml_analysis(loader):
         # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
         # compared with ~5% for wall insulation and glazing performance
 
+        ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"].str.contains("subject to ciga")
+        ].copy()
+
         ciga_check_needed_eligible = ciga_check_needed[
             (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
             (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
+        # Finally, characterise gbis properties. Some of the business might look like ECO4 work, whereas we then
+        # qualify what actually looks like gbis
+        gbis_identified = had_survey[
+            had_survey["ECO Eligibility"] == "gbis"
+            ].copy()
+
+        gbis_looks_like_eco4 = gbis_identified[
+            (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (gbis_identified["roof_classiciation"].isin(["high", "medium"])) &
+            (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+            (
+                (
+                    (gbis_identified["property-type"] == "House") &
+                    (gbis_identified["built-form"] != "Mid-Terrace")
+                ) | (
+                    (gbis_identified["property-type"] == "Bungalow") &
+                    (gbis_identified["built-form"].isin(["Detached"]))
+                )
+            )
+            ]
+
+        gbis_qualified = gbis_identified[
+            (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+            (~gbis_identified["asset_list_row_id"].isin(gbis_looks_like_eco4["asset_list_row_id"].values))
+            ]
+
         ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
         without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
         passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
+        identified_as_gbis_looks_like_eco4 = gbis_looks_like_eco4.shape[0]
 
         # Need to add on the non-ciga
-        total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
+        total_eco4_expectation = (
+            ciga_check_expectation +
+            without_ciga_expectation +
+            passed_ciga_expectation +
+            identified_as_gbis_looks_like_eco4
+        )
 
-        total_gbis_expectation = no_ciga_check_needed_eligible_gbis.shape[0]
+        no_ciga_check_needed_actually_gbis = no_ciga_check_needed_eligible_gbis.shape[0]
+        gbis_qualified = gbis_qualified.shape[0]
+
+        total_gbis_expectation = no_ciga_check_needed_actually_gbis + gbis_qualified
 
         if proportion_with_survey < 100:
             # We estimate the rest
@@ -5805,14 +5838,38 @@ def fml_analysis(loader):
                     without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0])
                 )
 
-            total_expectation = (
-                total_expectation +
+            # And gbis
+            without_survey_gbis = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "gbis")
+                ]
+
+            if without_survey_gbis.empty:
+                without_survey_identified_as_gbis_qualified = 0
+                without_survey_identified_as_gbis_eco4 = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_identified_as_gbis_qualified = np.round(
+                    without_survey_gbis.shape[0] * (gbis_qualified / gbis_identified.shape[0])
+                )
+
+                without_survey_identified_as_gbis_eco4 = np.round(
+                    without_survey_eco4.shape[0] * (identified_as_gbis_looks_like_eco4 / gbis_identified.shape[0])
+                )
+
+            total_eco4_expectation = (
+                total_eco4_expectation +
                 without_survey_without_ciga_expected +
                 without_survey_passed_ciga_expected +
-                without_survey_eco4_expected
+                without_survey_eco4_expected +
+                without_survey_identified_as_gbis_eco4
             )
 
-            total_gbis_expectation = total_gbis_expectation + without_survey_gbis_expected
+            total_gbis_expectation = (
+                total_gbis_expectation +
+                without_survey_gbis_expected +
+                without_survey_identified_as_gbis_qualified
+            )
 
         surveys = loader.data[ha_name]["survey_list"]
         sold_now = 0
@@ -5829,9 +5886,8 @@ def fml_analysis(loader):
                 "Original ECO4 Estimate - Remaining": original_remaining,
                 "Postcode List - Remaining": postcode_list_remaining,
                 # "Of which sold": sales_since_nov,
-                "Of which ECO4 Eligible - Remaining": int(total_expectation),
+                "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation),
                 "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
-                # "Proportion with a survey": proportion_with_survey,
             }
         )
 

From a7ed3b84e560ea3e92517f8568bc7918e352d0e7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 17 Mar 2024 14:12:49 +0000
Subject: [PATCH 144/262] Added HA8

---
 .../ha_15_32/ha_analysis_batch_3.py           | 98 ++++++++++++++++++-
 1 file changed, 93 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0ca28927..67139e40 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -517,6 +517,11 @@ class DataLoader:
                                              asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA8":
+            asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA9":
             asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
@@ -2293,6 +2298,30 @@ class DataLoader:
     def correct_ha49_survey_list(survey_list):
         return survey_list
 
+    @staticmethod
+    def correct_ha8_survey_list(survey_list):
+        # Split on / and take the first half
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WESTONIA COURT HOUSE", "Westonia Court"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Hillesdon Avenue", "Hillesden Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Weston Street", "Western Street"
+        )
+
+        # Remove placeholder rows where postcode is missing
+        survey_list = survey_list[
+            ~pd.isnull(survey_list["Post Code"])
+        ]
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -5464,7 +5493,7 @@ def forecast_remaining_sales(loader):
 
 def fml_data_pull(loader):
     has_bruh = [
-        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+        "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         # Updated get_property_type_and_built_form, still needs running
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
@@ -5561,7 +5590,13 @@ def classify_loft(x):
 
 def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", "HA1"]
+    has_bruh = [
+        "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+        # Updated get_property_type_and_built_form, still needs running
+        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+        # todo
+    ]
 
     no_ciga_cavity_descriptions = [
         "Cavity wall, as built, insulated (assumed)",
@@ -5597,12 +5632,13 @@ def fml_analysis(loader):
 
     results = []
     wall_descriptions = []
-    for ha_name in has_bruh:
+    for ha_name in tqdm(has_bruh):
 
         original_figures = loader.december_figures[
             loader.december_figures["HA Name"] == ha_name
             ].copy()
         original_remaining = original_figures["ECO4 remaining"].values[0]
+        original_gbis_remaining = original_figures["GBIS remaining"].values[0]
         postcode_list_remaining = remaining_eligible_mapping[ha_name]
 
         # Read in the epc data
@@ -5669,7 +5705,54 @@ def fml_analysis(loader):
             raise Exception("SOMETHING WENT WRONG")
 
         if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
-            raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
+            # We perform the archetype test. If the property is a house, we it needs to be detached, semi-detached
+            # or end terrace. If it's a bungalow, it must be attached
+            fuck_this["passes_archetype"] = None
+            fuck_this["passes_archetype"] = np.where(
+                (fuck_this["property-type"] == "House") &
+                (fuck_this["built-form"].isin(["Semi-Detached", "End-Terrace", "Detached"])),
+                True,
+                fuck_this["passes_archetype"]
+            )
+
+            fuck_this["passes_archetype"] = np.where(
+                (fuck_this["property-type"] == "Bungalow") &
+                (fuck_this["built-form"].isin(["Detached"])),
+                True,
+                fuck_this["passes_archetype"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+                (fuck_this["passes_archetype"] == True),
+                "eco4 (subject to ciga)",
+                fuck_this["ECO Eligibility"]
+            )
+
+            # If failed the archetype check and needs a CIGA, it's not eligibile
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+                (fuck_this["passes_archetype"] != True),
+                "not eligible",
+                fuck_this["ECO Eligibility"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+                (fuck_this["passes_archetype"] == True),
+                "eco4",
+                fuck_this["ECO Eligibility"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+                (fuck_this["passes_archetype"] != True),
+                "gbis",
+                fuck_this["ECO Eligibility"]
+            )
+
+            if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+                raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
 
         # clean roof insulation
         fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
@@ -5685,6 +5768,9 @@ def fml_analysis(loader):
         fuck_this["roof_insulation_thickness"] = fuck_this[
             "roof_insulation_thickness"
         ].str.replace("average", "150")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("above 150", "150")
 
         fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
 
@@ -5884,6 +5970,7 @@ def fml_analysis(loader):
             {
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
+                "Original GGBIS Estimate - Remaining": original_gbis_remaining,
                 "Postcode List - Remaining": postcode_list_remaining,
                 # "Of which sold": sales_since_nov,
                 "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation),
@@ -5927,7 +6014,8 @@ def app():
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
-
+        # Added as of March 17th
+        "HA8",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 94ad06726320972b02db779b8f2e9440a0ea9c0e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 17 Mar 2024 14:25:49 +0000
Subject: [PATCH 145/262] done ha11

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 67139e40..920ec1b6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -530,6 +530,12 @@ class DataLoader:
                                              asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA11":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA13":
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -2322,6 +2328,15 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha11_survey_list(survey_list):
+        # Remove 39 HOLLYWOOD WAY as it's not in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "HOLLYWOOD WAY") &
+              (survey_list["NO."] == 39))
+        ]
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -6015,7 +6030,7 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
         # Added as of March 17th
-        "HA8",
+        "HA8", "HA11",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 9bbcbc881f3f1c50ab8ec422c5b38f04e864e676 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 17 Mar 2024 14:42:24 +0000
Subject: [PATCH 146/262] Added ha21

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 920ec1b6..e9de4695 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -573,6 +573,12 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA21":
+            asset_list["matching_address"] = (
+                asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["PostCode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA25":
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
@@ -6030,7 +6036,7 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
         # Added as of March 17th
-        "HA8", "HA11",
+        "HA8", "HA11", "HA21",
         # New HAS
         "HAXX", "HAXXX",
     ]
@@ -6038,7 +6044,7 @@ def app():
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
     #
-    # Consider for ECO4:
+    # Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess
     # Consider for GBIS:
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From 897d58eec2ecc1e51d4a46878918f6c019a2705c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 10:40:12 +0000
Subject: [PATCH 147/262] Added ha44

---
 .../ha_15_32/ha_analysis_batch_3.py           | 189 +++++++++++++++++-
 1 file changed, 178 insertions(+), 11 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e9de4695..dc96d403 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -78,6 +78,29 @@ PROPERTY_TYPE_LOOKUP = {
             "End Terraced": "End-Terrace",
         }
     },
+    "HA8": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+        "Room": None,
+        "Other": None,
+        "Commerical": None
+    },
+    "HA11": {
+        "Flat": "Flat",
+        "House": "House",
+        "Semi-Det House": "House",
+        "Bedsit": None,
+        "End-Terr House": "House",
+        "Mid-Terr House": "House",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "End Terr Flat": "Flat",
+        "Mid Terr Flat": "Flat",
+        "Detached Flat": "Flat",
+    },
     "HA12": {
         "House": "House",
         "Flat": "Flat",
@@ -244,6 +267,13 @@ PROPERTY_TYPE_LOOKUP = {
         "3 Bedroom Unknown": None,
         "4 Bedroom Unknown": None,
     },
+    "HA37": {
+        "FLT": "Flat",
+        "HSE": "House",
+        "BNW": "Bungalow",
+        "MAS": "Maisonette",
+        "HSL": None
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -291,6 +321,21 @@ PROPERTY_TYPE_LOOKUP = {
         'Bungalow 1919-1945': 'Bungalow',
         'Office': None
     },
+    "HA42": {
+        'Flat': 'Flat',
+        'House': 'House',
+        'Flat Basement': 'Flat',
+        'Room': None,
+        'Bedsit Flat': 'Flat',
+        'Maisonette': 'Maisonette',
+        'Scheme Office': None,
+        'Scheme Lounge': None,
+        'Bungalow': 'Bungalow',
+        'Garage': None,
+        'Scheme Sleep Room': None,
+        'Cluster': None,
+        'Scheme Room': None
+    },
     "HA48": {
         "House": "House",
         "Flat": "Flat",
@@ -626,6 +671,12 @@ class DataLoader:
                                              asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA37":
+            asset_list["matching_address"] = asset_list["ADDRESS LINE 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["ADDRESS LINE 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["ADDRESS LINE 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA38":
             asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -650,6 +701,18 @@ class DataLoader:
                                              asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA42":
+            asset_list["matching_address"] = asset_list["Dwelling Number"].astype(str).str.lower().str.strip() + " " + \
+                                             asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Locality"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA44":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postal Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postal Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA50":
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
@@ -1177,6 +1240,66 @@ class DataLoader:
             asset_list["matching_address"]
         )
 
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT C",
+                ]
+            )),
+            "10C",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT C",
+                ]
+            )),
+            "FLAT c, spennymoor, co. durham, dl16 7df, 10c, 10 south view",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT D",
+                ]
+            )),
+            "10D",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT D",
+                ]
+            )),
+            "FLAT d, spennymoor, co. durham, dl16 7df, 10d, 10 south view",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT E",
+                ]
+            )),
+            "10E",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT E",
+                ]
+            )),
+            'FLAT e, spennymoor, co. durham, dl16 7df, 10e, 10 south view',
+            asset_list["matching_address"]
+        )
+
         return asset_list
 
     @staticmethod
@@ -1730,6 +1853,13 @@ class DataLoader:
             survey_list["Street / Block Name"]
         )
 
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "BEECH ROAD") &
+            (survey_list["Post Code"] == "DH6 1JD"),
+            "DH6 1JB",
+            survey_list["Post Code"]
+        )
+
         return survey_list
 
     @staticmethod
@@ -2343,6 +2473,18 @@ class DataLoader:
         ]
         return survey_list
 
+    @staticmethod
+    def correct_ha42_survey_list(survey_list):
+        # original asset list has nothing in the street
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Turnstone Terrace", ""
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Pegasus place", ""
+        )
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -2926,7 +3068,7 @@ class DataLoader:
             "eco4 subject to ciga": "eco4 (subject to ciga)",
             "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
             "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
-            "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)"
+            "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)",
         }
 
         ha_facts_and_figures = []
@@ -3189,6 +3331,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA7":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
         built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
+    elif ha_name == "HA8":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA9":
         property_description = property_meta["Asset Type"].strip().lower()
         if "house" in property_description:
@@ -3204,6 +3349,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
             return "Maisonette", None
 
         return None, None
+    elif ha_name == "HA11":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA12":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip())
         built_form = None
@@ -3237,6 +3385,21 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA19":
         property_type = property_meta["Dwelling Type"]
         built_form = None
+    elif ha_name == "HA21":
+        property_description = property_meta["Property Type"].strip().lower()
+        if "house" in property_description:
+            return "House", None
+
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
     elif ha_name == "HA24":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
@@ -3277,6 +3440,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA35":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip())
         built_form = None
+    elif ha_name == "HA37":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PROPERTY TYPE"].strip())
+        built_form = None
     elif ha_name == "HA39":
         property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
         property_type = property_type_config.get("property_type", None)
@@ -3291,6 +3457,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA41":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip())
         built_form = None
+    elif ha_name == "HA42":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip())
+        built_form = None
     elif ha_name == "HA48":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
@@ -5515,10 +5684,9 @@ def forecast_remaining_sales(loader):
 def fml_data_pull(loader):
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        # Updated get_property_type_and_built_form, still needs running
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        # todo
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
     ]
 
     # Can't pull from EPC database because it's based in Scotland
@@ -5613,10 +5781,9 @@ def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        # Updated get_property_type_and_built_form, still needs running
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        # todo
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
     ]
 
     no_ciga_cavity_descriptions = [
@@ -5639,7 +5806,7 @@ def fml_analysis(loader):
         "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
         "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27",
-        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX"
+        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX",
     ]
 
     values = [
@@ -5660,7 +5827,6 @@ def fml_analysis(loader):
             ].copy()
         original_remaining = original_figures["ECO4 remaining"].values[0]
         original_gbis_remaining = original_figures["GBIS remaining"].values[0]
-        postcode_list_remaining = remaining_eligible_mapping[ha_name]
 
         # Read in the epc data
         asset_list = loader.data[ha_name]["asset_list"].copy()
@@ -5992,10 +6158,10 @@ def fml_analysis(loader):
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
                 "Original GGBIS Estimate - Remaining": original_gbis_remaining,
-                "Postcode List - Remaining": postcode_list_remaining,
+                # "Postcode List - Remaining": postcode_list_remaining,
                 # "Of which sold": sales_since_nov,
-                "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation),
-                "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
+                "EPC verified ECO4 Eligible - Remaining": int(total_eco4_expectation),
+                "EPC verified GBIS Eligibile - Remaining": int(total_gbis_expectation),
             }
         )
 
@@ -6036,7 +6202,8 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
         # Added as of March 17th
-        "HA8", "HA11", "HA21",
+        "HA8", "HA11", "HA21", "HA37", "HA42",
+        "HA44",
         # New HAS
         "HAXX", "HAXXX",
     ]

From c58acadb730b6e6ab1ebb700b4669ab3cf171f5b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 12:19:15 +0000
Subject: [PATCH 148/262] HA51 eco3 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 80 ++++++++++++++++---
 1 file changed, 71 insertions(+), 9 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index dc96d403..af9af514 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -491,6 +491,10 @@ class DataLoader:
             "address": "A_Address",
             "postcode": "matching_postcode"
         },
+        "HA45": {
+            "address": "Full postal address",
+            "postcode": "Postcode"
+        },
         "HA48": {
             "address": "Full Address",
             "postcode": "Postcode"
@@ -518,7 +522,8 @@ class DataLoader:
         "HA50": 4,
         "HA63": 15,
         "HA107": 51,
-        "HA48": 0
+        "HA48": 0,
+        "HA45": 0
     }
 
     UNMATCHED_ECO3 = {
@@ -527,7 +532,8 @@ class DataLoader:
         "HA50": 5,
         "HA56": 320,
         "HA63": 0,
-        "HA117": 4
+        "HA117": 4,
+        "HA51": 24
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -542,7 +548,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -717,6 +723,18 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA51":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_address"] = np.where(
+                asset_list["Block"].str.strip().str.len() > 0,
+                asset_list["Block"].astype(str).str.lower().str.strip() + ", " + \
+                asset_list["matching_address"],
+                asset_list["matching_address"]
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA56":
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -2485,6 +2503,13 @@ class DataLoader:
         )
         return survey_list
 
+    @staticmethod
+    def correct_ha45_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Norwich Road", "Norwich Avenue"
+        )
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -2744,6 +2769,38 @@ class DataLoader:
 
         return eco3_list
 
+    @staticmethod
+    def correct_ha51_eco3_list(eco3_list):
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "HASELEMERE AVENUE", "HASLEMERE AVENUE"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "THORVILLE GROVE", "THORNVILLE GROVE"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "MONTBRETA CLOSE", "MONTBRETIA CLOSE"
+        )
+        eco3_list["Post Code"] = np.where(
+            (eco3_list["Street / Block Name"] == "SYDENHAM ROAD") &
+            (eco3_list["Post Code"] == "CR0 2DW"),
+            "CR0 2ED",
+            eco3_list["Post Code"]
+        )
+        # Not in asset list
+        eco3_list = eco3_list[
+            ~((eco3_list["Street / Block Name"] == "WOODLEY LANE") &
+              (eco3_list["Post Code"] == "SM5 2RJ") &
+              (eco3_list["NO "] == "FLAT 3, 11"))
+        ]
+
+        eco3_list["NO "] = np.where(
+            (eco3_list["NO "] == "47 B"),
+            "47B",
+            eco3_list["NO "]
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -2752,7 +2809,7 @@ class DataLoader:
         asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
         eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
 
-        if ha_name in ["HA25", "HA56"]:
+        if ha_name in ["HA25", "HA56", "HA51"]:
             # HA25: 317 -> 259
             missed_postcodes = {
                 postcode for postcode in eco3_list["postcode_no_space"] if
@@ -2774,7 +2831,7 @@ class DataLoader:
         matching_lookup = []
         missed = []
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
-            # if row["eco3_list_row_id"] == "HA25_Eco3_5422":
+            # if row["eco3_list_row_id"] == "HA51_Eco3_22":
             #     raise Exception()
             postcode = row["postcode_no_space"]
 
@@ -2813,6 +2870,12 @@ class DataLoader:
                 missed.append(row["eco3_list_row_id"])
                 continue
 
+            if df.shape[0] > 1:
+                if "flat" in str(row["NO "]).lower():
+                    df = df[df["matching_address"].str.contains("flat")]
+                else:
+                    df = df[~df["matching_address"].str.contains("flat")]
+
             if df.shape[0] != 1:
                 print(row["Street / Block Name"])
                 print(house_number)
@@ -6200,10 +6263,9 @@ def app():
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
-        "HA63", "HA107", "HA117",
-        # Added as of March 17th
-        "HA8", "HA11", "HA21", "HA37", "HA42",
-        "HA44",
+        "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
+        # Added as of March 18th
+        "HA44", "HA45", "HA51",
         # New HAS
         "HAXX", "HAXXX",
     ]

From e7cd80eba0ef8f11c62506509b5a7d60c7a37ce7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 12:34:28 +0000
Subject: [PATCH 149/262] Added HA52

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index af9af514..056a4190 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -946,6 +946,17 @@ class DataLoader:
         else:
             return "ECO surveys"
 
+    @staticmethod
+    def correct_ha51_asset_list(asset_list):
+        # Correct this
+        asset_list["HouseNo"] = np.where(
+            asset_list["matching_address"].str.contains("61 wandle bank"),
+            asset_list["Block"].str.lower(),
+            asset_list["HouseNo"]
+        )
+
+        return asset_list
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
@@ -2510,6 +2521,16 @@ class DataLoader:
         )
         return survey_list
 
+    @staticmethod
+    def correct_ha51_survey_list(survey_list):
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Autum Close", "Autumn Close"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()

From e6c9dd7074dfba12668b31651ec1a5d9eab6a27c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 12:55:37 +0000
Subject: [PATCH 150/262] Done HA52

---
 .../ha_15_32/ha_analysis_batch_3.py           | 37 +++++++++++++++++--
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 056a4190..bdf15917 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -503,6 +503,10 @@ class DataLoader:
             "address": "Property Address Full",
             "postcode": "Property Postcode"
         },
+        "HA52": {
+            "address": "Postal Address",
+            "postcode": "POSTCODE"
+        },
         "HA54": {
             "address": "Postal Address",
             "postcode": "matching_postcode"
@@ -523,7 +527,8 @@ class DataLoader:
         "HA63": 15,
         "HA107": 51,
         "HA48": 0,
-        "HA45": 0
+        "HA45": 0,
+        "HA52": 5
     }
 
     UNMATCHED_ECO3 = {
@@ -548,7 +553,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -2531,6 +2536,25 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha52_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Mardalle Avenue", "Mardale Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Ollerton  Close, Grappenhall", "Ollerton Close"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Bradshaw Road, Grappenhall", "Bradshaw Lane"
+        )
+
+        # Drop a bunch of dupes
+        survey_list = survey_list.drop_duplicates(["NO.", "Street / Block Name", "Post Code"])
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -3165,7 +3189,12 @@ class DataLoader:
             asset_list_starting_size = asset_list.shape[0]
 
             # Change the column name if it's ECO eligibility
-            asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"})
+            asset_list = asset_list.rename(
+                columns={
+                    "ECO eligibility": "ECO Eligibility",
+                    "ECO Eligibilty": "ECO Eligibility",
+                },
+            )
             # Remove surplus whitespace from the ECO Eligibility column
             asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip()
             # Push to lower case
@@ -6286,7 +6315,7 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
         # Added as of March 18th
-        "HA44", "HA45", "HA51",
+        "HA44", "HA45", "HA51", "HA52",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 92193d773dbd72aca67da82870d3f7da5a4acfe7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 13:21:57 +0000
Subject: [PATCH 151/262] fix facts and figures bug for ha51

---
 .../ha_15_32/ha_analysis_batch_3.py           | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bdf15917..e40bb98b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3305,11 +3305,18 @@ class DataLoader:
                     )
                 else:
                     # We have some examples, e.g. HA28, where we do not have the installed or cancelled column
-                    survey_list["installation_status"] = np.where(
-                        survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
-                        "cancelled",
-                        "installed",
-                    )
+                    if 'INSTALL/ CANCELLATION DATE' in survey_list.columns:
+                        survey_list["installation_status"] = np.where(
+                            survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                            "cancelled",
+                            "installed",
+                        )
+                    else:
+                        survey_list["installation_status"] = np.where(
+                            survey_list['INSTALL / CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                            "cancelled",
+                            "installed",
+                        )
 
                 # Finally, for other cases, we set the status to "in progress"
                 survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
@@ -5800,6 +5807,8 @@ def fml_data_pull(loader):
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
+        # NEW - add property type
+        'HA44', 'HA45', 'HA51', 'HA52'
     ]
 
     # Can't pull from EPC database because it's based in Scotland

From 443aa585d0c3c35ae34718f0e8338ec48ba7ad3c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 15:40:52 +0000
Subject: [PATCH 152/262] Adding ha5

---
 .../ha_15_32/ha_analysis_batch_3.py           | 181 +++++++++++++++++-
 1 file changed, 171 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e40bb98b..009064c6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -336,6 +336,16 @@ PROPERTY_TYPE_LOOKUP = {
         'Cluster': None,
         'Scheme Room': None
     },
+    "HA45": {
+        'Large block of flats': 'Flat',
+        'Small block of flats/dwelling converted in to flats': 'Flat',
+        'Semi-detached house': 'House',
+        'Mid-terraced house': 'House',
+        'End-terraced house': 'House',
+        'Block of flats': 'Flat',
+        'Detached house': 'House',
+        'Flat in mixed use building': 'Flat',
+    },
     "HA48": {
         "House": "House",
         "Flat": "Flat",
@@ -364,6 +374,30 @@ PROPERTY_TYPE_LOOKUP = {
         'Flat?': 'Flat',
         'Bungalow ': 'Bungalow'
     },
+    "HA51": {
+        'FLAT': 'Flat',
+        'HOUSE': 'House',
+        'MAISONETTE': 'Maisonette',
+        'BEDSIT': None,  # Considering as a non-specific residential category here
+        'BUNGALOW': 'Bungalow',
+    },
+    "HA52": {
+        'House - Mid Terrace': 'House',
+        'Flat - First Floor': 'Flat',
+        'Flat - Ground Floor': 'Flat',
+        'House - Semi-Detached': 'House',
+        'House - End Terrace': 'House',
+        'Flat - Second Floor': 'Flat',
+        'Bedsit': None,  # Considering as a non-specific residential category here
+        'Bungalow - Semi-Detached': 'Bungalow',
+        'Bungalow - Mid Terrace': 'Bungalow',
+        'Bungalow - End Terrace': 'Bungalow',
+        'House - Detached': 'House',
+        'Flat - Third Floor': 'Flat',
+        'House attached to flats': 'House',
+        'Flat - Fourth Floor': 'Flat',
+        'Bungalow - Detached': 'Bungalow'
+    },
     "HA56": {
         'House Non Specific': 'House',
         'HOUSE TERRACED': 'House',
@@ -463,6 +497,10 @@ class DataLoader:
             "address": "Address",
             "postcode": "Address - Postcode"
         },
+        "HA5": {
+            "address": "Address",
+            "postcode": "matching_postcode"
+        },
         "HA6": {
             "address": "propertyaddress",
             "postcode": "address"  # The 'address' column actually contains postcode
@@ -553,7 +591,9 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]:
+        if ha_name in [
+            "HA1", "HA5", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"
+        ]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -750,6 +790,10 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["POSTCODE"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA70":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -962,9 +1006,100 @@ class DataLoader:
 
         return asset_list
 
+    def prepare_ha17(self, workbook):
+        blocks_sheet = workbook["Blocks List - Cavity Wall only"]
+        blocks_data = []
+        blocks_colnames = [cell.value for cell in blocks_sheet[2]]
+        for row in blocks_sheet.iter_rows(min_row=4, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            blocks_data.append(row_data)
+
+        blocks_df = pd.DataFrame(blocks_data, columns=blocks_colnames)
+
+        blocks_df["matching_address"] = (
+            blocks_df["Block Name\n[as per Naming Convention procedure]"].astype(str).str.lower().str.strip() + ", " +
+            blocks_df["Block Street Name"].astype(str).str.lower().str.strip() + ", " +
+            blocks_df["Postcode"].astype(str).str.lower().str.strip()
+        )
+        blocks_df["matching_postcode"] = blocks_df["Postcode"].astype(str).str.lower().str.strip()
+        blocks_df["property_type"] = "Flat"
+
+        street_properties_sheet = workbook["Street Properties - Cavity Wall"]
+        street_properties_data = []
+        street_properties_colnames = [cell.value for cell in street_properties_sheet[2]]
+        for row in street_properties_sheet.iter_rows(min_row=3, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            street_properties_data.append(row_data)
+
+        street_properties_df = pd.DataFrame(street_properties_data, columns=street_properties_colnames)
+
+        street_properties_df["matching_address"] = (
+            street_properties_df["Block Name\n[as per Naming Convention procedure]"].astype(
+                str).str.lower().str.strip() + ", " +
+            street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+        )
+        street_properties_df["matching_postcode"] = street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+        street_properties_df["property_type"] = street_properties_df[
+            "Block typology based on dwelling type\n[defined list]"
+        ]
+
+        asset_list_compressed = pd.concat(
+            [
+                blocks_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]],
+                street_properties_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]]
+            ],
+            axis=0
+        )
+        # We expand
+        range_pattern = r"(\d+)\s+to\s+(\d+)\s+(.*)"
+        asset_list = []
+        for _, row in tqdm(asset_list_compressed.iterrows(), total=len(asset_list_compressed)):
+            if row["ECO Eligibility"] == "Not Eligible":
+                asset_list.append(row.to_dict())
+                continue
+
+            # Detect a house number range
+            match = re.search(range_pattern, row["matching_address"])
+
+            if not match:
+                asset_list.append(row.to_dict())
+                continue
+
+            # Extracting the start and end of the range
+            start_number = int(match.group(1))
+            end_number = int(match.group(2))
+            rest_of_address = match.group(3)
+
+            # Generating the list of house numbers
+            house_numbers = list(range(start_number, end_number + 1))
+            data_to_extend = []
+            for house_number in house_numbers:
+                new_adress = f"{house_number} {rest_of_address}"
+
+                entry = row.to_dict().copy()
+                entry.update({"matching_address": new_adress})
+
+                data_to_extend.append(entry)
+
+            asset_list.extend(data_to_extend)
+
+        asset_list = pd.DataFrame(asset_list)
+
+        # Add in asset_list_row_id
+        asset_list["asset_list_row_id"] = ["HA17" + str(i) for i in range(0, len(asset_list))]
+
+        # Add on house number
+        asset_list = self.create_asset_list_house_no(ha_name="HA17", asset_list=asset_list)
+
+        return asset_list
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
-        asset_sheetname = self.get_asset_sheetname(workbook)
+        if ha_name == "HA17":
+            asset_list = self.prepare_ha17(workbook)
+            return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+        else:
+            asset_sheetname = self.get_asset_sheetname(workbook)
 
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
@@ -977,6 +1112,9 @@ class DataLoader:
         if ha_name == "HA54":
             asset_sheet_colnames[10] = "matching_postcode"
 
+        if ha_name == "HA5":
+            asset_sheet_colnames[2] = "matching_postcode"
+
         rows_data = []
 
         for row in asset_sheet.iter_rows(min_row=2, values_only=False):
@@ -2555,6 +2693,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha5_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -3431,6 +3573,9 @@ class DataLoader:
 
 
 def get_property_type_and_built_form(property_meta, ha_name):
+    if ha_name in ["HA44"]:
+        return None, None
+
     if ha_name == "HA1":
         property_type = property_meta["Asset Type"]
         # We correct a small error
@@ -3499,6 +3644,8 @@ def get_property_type_and_built_form(property_meta, ha_name):
         config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
         property_type = config.get("property-type")
         built_form = config.get("built-form")
+    elif ha_name == "HA17":
+        return property_meta["property_type"], None
     elif ha_name == "HA18":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
         built_form = None
@@ -3580,6 +3727,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA42":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip())
         built_form = None
+    elif ha_name == "HA45":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property type"].strip())
+        built_form = None
     elif ha_name == "HA48":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
@@ -3589,6 +3739,14 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA50":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
+    elif ha_name == "HA51":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA52":
+        if property_meta["Property Type"] is None:
+            return None, None
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA54":
         property_type = property_meta["Property Type"]
         built_form = None
@@ -5806,9 +5964,9 @@ def fml_data_pull(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52',
         # NEW - add property type
-        'HA44', 'HA45', 'HA51', 'HA52'
+        "HA17"
     ]
 
     # Can't pull from EPC database because it's based in Scotland
@@ -5905,7 +6063,7 @@ def fml_analysis(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52'
     ]
 
     no_ciga_cavity_descriptions = [
@@ -6320,11 +6478,11 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
-        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
-        "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
+        "HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
+        "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54",
+        "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
         # Added as of March 18th
-        "HA44", "HA45", "HA51", "HA52",
+        "HA44", "HA45", "HA51", "HA52", "HA17",
         # New HAS
         "HAXX", "HAXXX",
     ]
@@ -6332,7 +6490,10 @@ def app():
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
     #
-    # Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess
+    # Consider for ECO4:
+    # HA 70 - have to merge ECO3 list though,
+    # HA17 has LOTs of assets, but the asset list is a mess
+    # HA53 but has EPCs done
     # Consider for GBIS:
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From 6ccfff0411ee2af58d6f7dc47b98f2deb70eac5c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 16:14:11 +0000
Subject: [PATCH 153/262] Added ha20

---
 .../ha_15_32/ha_analysis_batch_3.py           | 50 +++++++++++++++++--
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 009064c6..627fcede 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -566,7 +566,8 @@ class DataLoader:
         "HA107": 51,
         "HA48": 0,
         "HA45": 0,
-        "HA52": 5
+        "HA52": 5,
+        "HA20": 6
     }
 
     UNMATCHED_ECO3 = {
@@ -669,6 +670,17 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA20":
+            asset_list["matching_address"] = (
+                asset_list["House Name"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Block"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA21":
             asset_list["matching_address"] = (
                 asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
@@ -2697,6 +2709,35 @@ class DataLoader:
     def correct_ha5_survey_list(survey_list):
         return survey_list
 
+    @staticmethod
+    def correct_ha20_survey_list(survey_list):
+        # Not in the asset list
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Abbot Close", "ABBOTS CLOSE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Downbarns Road", "DOWN BARNS ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Austin Lane", "AUSTINS LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "South Park Way", "SOUTHPARK WAY"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "OAKLAND ROAD", "OAKWOOD ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ACRE WAY/NORTHWOOD", "ACRE WAY"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -3301,7 +3342,8 @@ class DataLoader:
             "AFF0RDALE WARMTH": "ECO4",
             "ECO 4 RdSAP CL": "ECO4",
             "Affordable Warmth (R) ": "ECO4",
-            "Affordable Warmth ": "ECO4"
+            "Affordable Warmth ": "ECO4",
+            "ECO 4 AFFORDABLE WARMTH": "ECO4",
         }
 
         # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
@@ -6478,11 +6520,11 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
+        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
         "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54",
         "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
         # Added as of March 18th
-        "HA44", "HA45", "HA51", "HA52", "HA17",
+        "HA44", "HA45", "HA51", "HA52", "HA17", "HA5", "HA20",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 3dd30445f92635df45b5da2a756650ca116f3855 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 19:37:11 +0000
Subject: [PATCH 154/262] HA Analysis finalised

---
 .../ha_15_32/ha_analysis_batch_3.py           | 257 +++++++++++++++---
 1 file changed, 225 insertions(+), 32 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 627fcede..2f17ed73 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -51,6 +51,12 @@ PROPERTY_TYPE_LOOKUP = {
         'MAISONETTE': "Maisonette",
         'HOSTEL': None
     },
+    "HA5": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Bedsit": None
+    },
     "HA6": {
         "property_type": {
             'HOUSE': "House",
@@ -161,6 +167,21 @@ PROPERTY_TYPE_LOOKUP = {
         "Hostel": None,
         "Block": None,
     },
+    "HA20": {
+        "House": "House",
+        "Flat": "Flat",
+        'Sheltered Flat': "Flat",
+        'Maisonette': 'Maisonette',
+        'Bungalow': 'Bungalow',
+        'House. SD': 'House',
+        'House. MT': 'House',
+        'House. ET': 'House',
+        'Sheltered Bungalow': 'Bungalow',
+        'Guest Accomodation': None,
+        'Sheltered House': 'House',
+        'House. MT ': 'House',
+        'House. D': 'House'
+    },
     "HA24": {
         '01 HOUSE': 'House',
         '02 FLAT': 'Flat',
@@ -3632,6 +3653,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA2":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip())
         built_form = None
+    elif ha_name == "HA5":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
     elif ha_name == "HA6":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
@@ -3694,6 +3718,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA19":
         property_type = property_meta["Dwelling Type"]
         built_form = None
+    elif ha_name == "HA20":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
     elif ha_name == "HA21":
         property_description = property_meta["Property Type"].strip().lower()
         if "house" in property_description:
@@ -5775,6 +5802,7 @@ def forecast_remaining_sales(loader):
         results.append(to_append)
 
     results = pd.DataFrame(results)
+    results.to_csv("pipeline_remaining_raw.csv")
 
     totals_row = {}
     for col in results.columns:
@@ -6006,9 +6034,7 @@ def fml_data_pull(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52',
-        # NEW - add property type
-        "HA17"
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
 
     # Can't pull from EPC database because it's based in Scotland
@@ -6105,7 +6131,7 @@ def fml_analysis(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52'
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
 
     no_ciga_cavity_descriptions = [
@@ -6124,22 +6150,6 @@ def fml_analysis(loader):
     # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
     #  them! Non-invasices will have checked the wall though
 
-    codes = [
-        "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
-        "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
-        "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27",
-        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX",
-    ]
-
-    values = [
-        706, 2161, 1053, 793, 0, 656, 1200, 1647, 4248, 2703, 1087, 1876, 2135,
-        1078, 775, 538, 518, 401, 466, 2627, 98, 1050, 524, 191, 538, 384, 204,
-        281, 422, 74, 313, 71, 6
-    ]
-
-    # Create a dictionary mapping
-    remaining_eligible_mapping = dict(zip(codes, values))
-
     results = []
     wall_descriptions = []
     for ha_name in tqdm(has_bruh):
@@ -6397,9 +6407,13 @@ def fml_analysis(loader):
                 without_survey_without_ciga_expected = 0
             else:
                 # We apply the same conversion rate as the properties with a survey
-                without_survey_without_ciga_expected = np.round(
-                    without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
-                )
+
+                if ciga_check_needed.shape[0] == 0 and ciga_check_expectation == 0:
+                    without_survey_without_ciga_expected = without_survey_needing_ciga.shape[0]
+                else:
+                    without_survey_without_ciga_expected = np.round(
+                        without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+                    )
 
             without_survey_passed_ciga = fuck_this[
                 (fuck_this["estimated"] == True) &
@@ -6466,15 +6480,6 @@ def fml_analysis(loader):
                 without_survey_identified_as_gbis_qualified
             )
 
-        surveys = loader.data[ha_name]["survey_list"]
-        sold_now = 0
-        if not surveys.empty:
-            sold_now = surveys[
-                surveys["installation_status"].str.lower().str.contains("eco4")
-            ].shape[0]
-
-        sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0]
-
         results.append(
             {
                 "HA Name": ha_name,
@@ -6498,6 +6503,194 @@ def fml_analysis(loader):
     # TODO: Change the left hand side number for our post CIGA estimates
 
 
+def create_final_report():
+    """
+    This function will produce the final output for the HA analysis
+    :return:
+    """
+    epc_validated_results = pd.read_csv("analysis - revised.csv")
+    pipeline_results = pd.read_csv("pipeline_remaining_raw.csv")
+
+    ####################################
+    # Original Warmfront estimates
+    ####################################
+    # Create the volumes result
+    all_ha_summary_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 original', '', 'Remaining - #', '')",
+            "('GBIS original', '', 'Remaining - #', '')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 original', '', 'Remaining - #', '')": "# ECO4 remaining - All HA Summary",
+            "('GBIS original', '', 'Remaining - #', '')": "# GBIS remaining - All HA Summary",
+        }
+    )
+    all_ha_summary_remaining["# Total remaining - All HA Summary"] = (
+        all_ha_summary_remaining["# ECO4 remaining - All HA Summary"] +
+        all_ha_summary_remaining["# GBIS remaining - All HA Summary"]
+    )
+    all_ha_summary_remaining = all_ha_summary_remaining.sort_values("HA Name")
+
+    ####################################
+    # Postcode list - pre-CIGA
+    ####################################
+    postcode_list_pre_ciga_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 pre-ciga', '', 'Remaining - #', '')",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 pre-ciga', '', 'Remaining - #', '')": "# ECO4 remaining - Postcode list (pre CIGA)",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+                "# GBIS remaining - Postcode list (pre CIGA)"
+            ),
+        }
+    )
+
+    postcode_list_pre_ciga_remaining["# Total remaining - Postcode list (pre CIGA)"] = (
+        postcode_list_pre_ciga_remaining["# ECO4 remaining - Postcode list (pre CIGA)"] +
+        postcode_list_pre_ciga_remaining["# GBIS remaining - Postcode list (pre CIGA)"]
+    )
+    postcode_list_pre_ciga_remaining = postcode_list_pre_ciga_remaining.sort_values("HA Name")
+
+    ####################################
+    # Postcode list - post-CIGA
+    ####################################
+    postcode_list_post_ciga_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')":
+                "# ECO4 remaining - Postcode list (post CIGA)",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+                "# GBIS remaining - Postcode list (post CIGA)"
+            ),
+        }
+    )
+
+    postcode_list_post_ciga_remaining["# Total remaining - Postcode list (post CIGA)"] = (
+        postcode_list_post_ciga_remaining["# ECO4 remaining - Postcode list (post CIGA)"] +
+        postcode_list_post_ciga_remaining["# GBIS remaining - Postcode list (post CIGA)"]
+    )
+    postcode_list_post_ciga_remaining = postcode_list_post_ciga_remaining.sort_values("HA Name")
+
+    ####################################
+    # From EPC Database
+    ####################################
+    from_epc_database = epc_validated_results[
+        [
+            "HA Name",
+            "EPC verified ECO4 Eligible - Remaining",
+            "EPC verified GBIS Eligibile - Remaining"
+        ]
+    ].copy().rename(
+        columns={
+            "EPC verified ECO4 Eligible - Remaining": "# ECO4 remaining - From EPC Database (post CIGA)",
+            "EPC verified GBIS Eligibile - Remaining": "# GBIS remaining - From EPC Database (post CIGA)",
+        }
+    )
+
+    from_epc_database["# Total remaining - From EPC Database (post CIGA)"] = (
+        from_epc_database["# ECO4 remaining - From EPC Database (post CIGA)"] +
+        from_epc_database["# GBIS remaining - From EPC Database (post CIGA)"]
+    )
+    from_epc_database = from_epc_database.sort_values("HA Name")
+
+    # Combine the datasets
+    volumes = all_ha_summary_remaining.merge(
+        postcode_list_pre_ciga_remaining, how="left", on="HA Name"
+    ).merge(
+        postcode_list_post_ciga_remaining, how="left", on="HA Name"
+    ).merge(
+        from_epc_database, how="inner", on="HA Name"
+    )
+
+    revenue = volumes.copy()
+    # Convert the ECO4 volumes to revenue
+    for col in [
+        '# ECO4 remaining - All HA Summary',
+        '# ECO4 remaining - Postcode list (pre CIGA)',
+        '# ECO4 remaining - Postcode list (post CIGA)',
+        '# ECO4 remaining - From EPC Database (post CIGA)'
+    ]:
+        revenue[col] = revenue[col] * 1710
+
+    # Convert the GBIS volumes to revenue
+    for col in [
+        '# GBIS remaining - All HA Summary',
+        '# GBIS remaining - Postcode list (pre CIGA)',
+        '# GBIS remaining - Postcode list (post CIGA)',
+        '# GBIS remaining - From EPC Database (post CIGA)'
+    ]:
+        revenue[col] = revenue[col] * 600
+
+    # Re-calculate the totals
+    revenue['# Total remaining - All HA Summary'] = (
+        revenue['# ECO4 remaining - All HA Summary'] + revenue['# GBIS remaining - All HA Summary']
+    )
+
+    revenue['# Total remaining - Postcode list (pre CIGA)'] = (
+        revenue['# ECO4 remaining - Postcode list (pre CIGA)'] + revenue['# GBIS remaining - Postcode list (pre CIGA)']
+    )
+
+    revenue['# Total remaining - Postcode list (post CIGA)'] = (
+        revenue['# ECO4 remaining - Postcode list (post CIGA)'] + revenue[
+        '# GBIS remaining - Postcode list (post CIGA)']
+    )
+
+    revenue['# Total remaining - From EPC Database (post CIGA)'] = (
+        revenue['# ECO4 remaining - From EPC Database (post CIGA)'] +
+        revenue['# GBIS remaining - From EPC Database (post CIGA)']
+    )
+
+    # Replace the # with £ in the columns
+    revnue_colnames = [col.replace("#", "£") for col in revenue.columns]
+    revenue.columns = revnue_colnames
+
+    # We check that each column gets smaller
+    decreasing_check1 = all(
+        volumes["# ECO4 remaining - Postcode list (pre CIGA)"] >= volumes[
+            '# ECO4 remaining - Postcode list (post CIGA)']
+    )
+    if not decreasing_check1:
+        raise ValueError("decreasing_check1 failed")
+
+    # Just HA32 and HA17 should fail this, and it's due to GBIS jobs looking like ECO4
+    decreasing_check2 = volumes[volumes["# ECO4 remaining - From EPC Database (post CIGA)"] > volumes[
+        "# ECO4 remaining - Postcode list (post CIGA)"]]
+
+    if set(decreasing_check2["HA Name"].tolist()) != {"HA17", "HA32"}:
+        raise ValueError("decreasing_check2 failed")
+
+    # Check for GBIS
+    decreasing_check3 = all(
+        volumes["# GBIS remaining - Postcode list (pre CIGA)"] >= volumes[
+            '# GBIS remaining - Postcode list (post CIGA)']
+    )
+
+    if not decreasing_check3:
+        raise ValueError("decreasing_check3 failed")
+
+    # Don't perform this - this happens for multiple
+    # decreasing_check4 = volumes[volumes["# GBIS remaining - From EPC Database (post CIGA)"] > volumes[
+    #     "# GBIS remaining - Postcode list (post CIGA)"]]
+
+    # Store final outputs
+    volumes.to_csv("HA Analysis Final - volumes.csv")
+    revenue.to_csv("HA Analysis Final - revenue.csv")
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.

From fb31f95457f3a40a60bc6ff502c1c2fe5e8233f1 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 19 Mar 2024 11:48:48 +0000
Subject: [PATCH 155/262] add multiprocessing to process directory

---
 etl/epc/Pipeline.py            | 54 ++++++++++++++++++++++++++++------
 etl/epc/property_change_app.py |  1 +
 2 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py
index 0943b206..36c381ce 100644
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@@ -4,6 +4,7 @@ import pandas as pd
 from typing import List
 from pathlib import Path
 from tqdm import tqdm
+import multiprocessing as mp
 
 from etl.epc.DataProcessor import EPCDataProcessor
 from etl.epc.Record import EPCRecord, EPCDifferenceRecord
@@ -83,6 +84,7 @@ class EPCPipeline:
         epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_rooms.parquet",
         epc_all_equal_rows_key="sap_change_model/all_equal_rows_rooms.parquet",
         epc_compiled_dataset_key="sap_change_model/dataset_rooms.parquet",
+        use_parallel=False,
     ):
         """
         :param directories: List of directories to process
@@ -107,6 +109,7 @@ class EPCPipeline:
         self.epc_cleaning_dataset_key = epc_cleaning_dataset_key
         self.epc_all_equal_rows_key = epc_all_equal_rows_key
         self.epc_compiled_dataset_key = epc_compiled_dataset_key
+        self.use_parallel = use_parallel
 
     def run(self):
         """
@@ -145,8 +148,11 @@ class EPCPipeline:
                 "Directories not specified - Unable to run Training pipeline"
             )
 
-        for directory in tqdm(self.directories):
-            self.process_directory(directory)
+        if self.use_parallel:
+            self.run_training_dataset_parallel_pipeline()
+        else:
+            for directory in tqdm(self.directories):
+                self.process_directory(directory)
 
         save_dataframe_to_s3_parquet(
             df=self.compiled_dataset,
@@ -166,6 +172,41 @@ class EPCPipeline:
             file_key=self.epc_cleaning_dataset_key,
         )
 
+    def run_training_dataset_parallel_pipeline(self):
+        """
+        Run the training pipeline in parallel
+        """
+
+        with mp.Pool() as pool:
+            results = list(
+                tqdm(
+                    pool.imap(self.process_directory_task, self.directories),
+                    total=len(self.directories),
+                ),
+            )
+
+        for result in tqdm(results):
+            self.compiled_dataset = pd.concat(
+                [self.compiled_dataset, result["dataset"]]
+            )
+            self.compiled_cleaning_averages.append(result["cleaning_averages"])
+            self.compiled_all_equal_rows.extend(result["all_equal_rows"])
+
+    def process_directory_task(self, directory: str) -> pd.DataFrame:
+        """
+        Task to enable parallel processing
+        """
+
+        self.process_directory(directory=directory)
+
+        output = {
+            "dataset": self.compiled_dataset,
+            "cleaning_averages": self.epc_data_processor.cleaning_averages,
+            "all_equal_rows": self.compiled_all_equal_rows,
+        }
+
+        return output
+
     def process_directory(self, directory: Path):
         """
         Process a single directory
@@ -177,12 +218,13 @@ class EPCPipeline:
         self.epc_data_processor.prepare_data(filepath=filepath)
 
         constituency_data = self.epc_data_processor.data
+
         self.compiled_cleaning_averages.append(
             self.epc_data_processor.cleaning_averages
         )
 
         constituency_difference_records = []
-        # self.check_records = []
+
         for uprn, property_data in constituency_data.groupby("uprn", observed=True):
             difference_records = self.process_uprn(
                 uprn=str(uprn), property_data=property_data, directory=directory
@@ -190,12 +232,6 @@ class EPCPipeline:
             if difference_records is not None:
                 constituency_difference_records.extend(difference_records)
 
-        # check_list = []
-        # for check_record in self.check_records:
-        #     check_list.append(check_record["difference_record"])
-
-        # td = TrainingDataset(datasets=check_list, cleaned_lookup=clean_lookup)
-
         constituency_dataset = TrainingDataset(
             datasets=constituency_difference_records, cleaned_lookup=clean_lookup
         )
diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py
index c8923d6d..c985567d 100644
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@@ -16,6 +16,7 @@ def main():
 
     epc_pipeline = EPCPipeline(
         directories=directories,
+        use_parallel=True,
         epc_data_processor=EPCDataProcessor(run_mode="training"),
     )
 

From 0ea8a40143e09ccb1b6b7c1061dba732e666318f Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Fri, 22 Mar 2024 19:21:21 +0000
Subject: [PATCH 156/262] add requirements for pyarrow, add timestemp
 infromation to dataset for loose version control

---
 etl/epc/Pipeline.py                | 16 ++++++++++------
 etl/epc/generate_scenarios_data.py |  8 ++++++--
 etl/epc/requirements.txt           |  3 ++-
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py
index c678c830..6abf05bd 100644
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@@ -1,5 +1,6 @@
 import msgpack
 import pandas as pd
+from datetime import datetime
 
 from typing import List
 from pathlib import Path
@@ -82,9 +83,9 @@ class EPCPipeline:
         run_mode="training",
         epc_local_file="certificates.csv",
         epc_bucket_name="retrofit-data-dev",
-        epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_rooms.parquet",
-        epc_all_equal_rows_key="sap_change_model/all_equal_rows_rooms.parquet",
-        epc_compiled_dataset_key="sap_change_model/dataset_rooms.parquet",
+        epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
+        epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
+        epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
         use_parallel=False,
     ):
         """
@@ -107,10 +108,13 @@ class EPCPipeline:
         self.run_mode = run_mode
         self.epc_local_file = epc_local_file
         self.epc_bucket_name = epc_bucket_name
-        self.epc_cleaning_dataset_key = epc_cleaning_dataset_key
-        self.epc_all_equal_rows_key = epc_all_equal_rows_key
-        self.epc_compiled_dataset_key = epc_compiled_dataset_key
+
         self.use_parallel = use_parallel
+        self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+
+        self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
+        self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
+        self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
 
     def run(self):
         """
diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py
index 172e8a27..d5bece8b 100644
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@@ -20,6 +20,10 @@ from recommendations.Recommendations import Recommendations
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
 
+from datetime import datetime
+
+now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
+
 logger = setup_logger()
 
 logger.info("Connecting to db")
@@ -132,7 +136,7 @@ for scenario_property in scenario_properties:
     p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
     recommender = Recommendations(property_instance=p, materials=materials)
-    property_recommendations = recommender.recommend()
+    property_recommendations = recommender.recommend("0")
 
     wall_recommendations = recommender.wall_recomender.recommendations
     loft_recommendations = recommender.roof_recommender.recommendations
@@ -247,5 +251,5 @@ all_predictions = model_api.predict_all(
 save_dataframe_to_s3_parquet(
     recommendations_scoring_data,
     "retrofit-data-dev",
-    "scenario_data/recommendations_scoring_data.parquet",
+    f"scenario_data/{now}/recommendations_scoring_data.parquet",
 )
diff --git a/etl/epc/requirements.txt b/etl/epc/requirements.txt
index 9f972bde..87148180 100644
--- a/etl/epc/requirements.txt
+++ b/etl/epc/requirements.txt
@@ -1,4 +1,5 @@
 pandas==2.1.3
 tqdm==4.66.1
 msgpack==1.0.7
-boto3==1.29.6
\ No newline at end of file
+boto3==1.29.6
+pyarrow==15.0.2
\ No newline at end of file

From 724379a86d1bd9b79159f2f8f9e5d8abe9496f5f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 26 Mar 2024 18:05:08 +0000
Subject: [PATCH 157/262] wrapping up ha analysis

---
 .../ha_15_32/ha_analysis_batch_3.py           | 170 ++++++++++--------
 1 file changed, 94 insertions(+), 76 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2f17ed73..e414cd00 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5366,6 +5366,7 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
         if original_warmfront_estimates.empty:
@@ -6032,7 +6033,7 @@ def forecast_remaining_sales(loader):
 def fml_data_pull(loader):
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
@@ -6129,7 +6130,7 @@ def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
@@ -6738,89 +6739,106 @@ def app():
     loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
     loader.load()
     loader.ha_facts_and_figures()
-
     forecast_remaining_sales(loader)
 
-    conversion_rate = 0.95
-    archetype_check_conversion = 0.7
-    res = []
-    for k, v in loader.data.items():
-        asset_list = v["asset_list"].copy()
-        agg = asset_list["ECO Eligibility"].value_counts()
-        # We find a case where there are properties that have passed CIGA
-        if not any("passed" in x for x in agg.index):
+    # gbis rate
+    # breakdowns = []
+    # for ha, data_assets in loader.data.items():
+    #     asset_list = data_assets["asset_list"].copy()
+    #     breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
+    #     breakdowns.append(breakdown)
+    # breakdowns = pd.DataFrame(breakdowns)
+    #
+    # installer = []
+    # for ha, data_assets in loader.data.items():
+    #     survey_list = data_assets["survey_list"]
+    #     if survey_list.empty:
+    #         continue
+    #     if "INSTALLER" not in survey_list.columns:
+    #         continue
+    #
+    #     installers = survey_list["INSTALLER"].value_counts().to_dict()
+    #     installers["ha_name"] = ha
+    #     installer.append(installers)
+    # installer = pd.DataFrame(installer)
+    # installer.drop(columns=["ha_name"]).sum().sum()
+
+    # Adhoc - for HA16, get the properties that still need a CIGA check
+    asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
+    ha_16_need_ciga = asset_list_ha16[
+        asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
+    ]
+    completed_cigas = loader.data["HA16"]["ciga_list"].copy()
+    # Store the results
+    ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
+    completed_cigas.to_csv("ha16_completed_cigas.csv")
+
+    # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
+    # live projects
+
+    # Read excel
+    orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
+    orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
+    orderbook_sheet = orderbook_workbook["Contractual Info"]
+    orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
+
+    rows = []
+    for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        rows.append(row_data)
+
+    orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
+    live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
+    live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
+
+    dormant_properties = []
+    missed_has = []
+    for _, customer in live_orderbook.iterrows():
+        if customer['Redacted HA'] not in loader.data.keys():
+            missed_has.append(customer['Redacted HA'])
             continue
+        asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
+        survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
+        # Remove sold
+        if not survey_list.empty:
+            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+            asset_list = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+            asset_list = asset_list.drop(columns=["installation_status"])
 
-        agg = pd.DataFrame(agg).reset_index()
-
-        passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"]
-        passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0
-
-        failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"]
-        failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0
-
-        ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1
-
-        dormant_ciga = agg[
-            agg["ECO Eligibility"].str.contains("subject to ciga") &
-            ~agg["ECO Eligibility"].str.contains("subject to archetype")
+        # We pull out the properties that need a CIGA check
+        need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
+        need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
+        need_ciga_and_archetype = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
             ]
 
-        dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0
-
-        dormant_ciga_archetype = agg[
-            agg["ECO Eligibility"].str.contains("subject to ciga") &
-            agg["ECO Eligibility"].str.contains("subject to archetype")
-            ]
-
-        dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0
-
-        needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion
-        needing_check = np.round(needing_check)
-
-        additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + (
-            dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate
-        )
-        additional_jobs = np.round(additional_jobs)
-
-        # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs
-        original_estimate = loader.december_figures[
-            loader.december_figures["HA Name"] == k
-            ]
-
-        original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0
-        base_eco_figures = agg[
-            agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"])
-        ]["count"].sum()
-        eco4_from_ciga = original_estimate - base_eco_figures
-        eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0
-        surplus_from_dormant = additional_jobs - eco4_from_ciga
-        surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant
-
-        res.append(
+        dormant_properties.append(
             {
-                "ha_name": k,
-                "additional_eco4": additional_jobs,
-                "needing_check": needing_check,
-                "surplus_from_dormant": surplus_from_dormant
+                "HA Name": customer['Redacted HA'],
+                "Need CIGA": need_ciga.shape[0],
+                "Need Archetype": need_archetype.shape[0],
+                "Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
             }
         )
 
-    res = pd.DataFrame(res)
-    # Drop the HAs that are not in that pervious draft
-    # In the v2 draft, there are 12 HAs
+    dormant_properties = pd.DataFrame(dormant_properties)
+    totals = dormant_properties.sum()
+    totals["HA Name"] = "Total"
 
-    v5_surplus = res[
-        ~res["ha_name"].isin(["HA9"])
-    ]["additional_eco4"].sum()
-    # 7212 properties
-    # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November
-    # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255,
-    # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties
-    # pre-CIGA
+    dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
+    dormant_properties.to_csv("dormant_properties.csv")
 
-    v5_surplus_from_dormant = res[
-        ~res["ha_name"].isin(["HA9"])
-    ]["surplus_from_dormant"].sum()
-    # 5539.0
-    # 9471690
+    loader.december_figures["ECO4 remaining"].sum()
+    december_figures = loader.december_figures.copy()
+    december_figures["ECO4 remaining"] = np.where(
+        december_figures["ECO4 remaining"] < 0,
+        0,
+        december_figures["ECO4 remaining"]
+    )
+    december_figures["ECO4 remaining"].sum()

From ebb28236617abff1e3a5f91dd6b06b66a001a4d7 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Wed, 27 Mar 2024 11:39:51 +0000
Subject: [PATCH 158/262] override scenerio data to have average insulation
 thickness, change impact values

---
 etl/epc/generate_scenarios_data.py | 48 +++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py
index d5bece8b..f9f66034 100644
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@@ -54,9 +54,19 @@ scenario_properties = [
         "postcode": "NN1 5JY",
         "lmk-key": "1459796789102016070507274146560098",
         "measures": [
-            [["internal_wall_insulation"], "11", None, [0]],
-            [["external_wall_insulation"], "10", None, [0]],
-            [["solar", "windows"], "12-15", {"photo_supply_ending": 50}, [0, 1]],
+            [
+                ["internal_wall_insulation"],
+                "11",
+                {"walls_insulation_thickness_ending": "average"},
+                [0],
+            ],
+            [
+                ["external_wall_insulation"],
+                "10",
+                {"walls_insulation_thickness_ending": "average"},
+                [0],
+            ],
+            [["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
         ],
     },
     {
@@ -64,7 +74,12 @@ scenario_properties = [
         "postcode": "HP1 2HA",
         "lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
     {
@@ -72,7 +87,12 @@ scenario_properties = [
         "postcode": "HP1 2HE",
         "lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
     {
@@ -80,7 +100,12 @@ scenario_properties = [
         "postcode": "HP1 2AN",
         "lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
     {
@@ -88,11 +113,17 @@ scenario_properties = [
         "postcode": "HP1 2HX",
         "lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
 ]
 
+
 recommendations_scoring_data = []
 
 for scenario_property in scenario_properties:
@@ -217,6 +248,9 @@ for scenario_property in scenario_properties:
     recommendations_scoring_data.extend(scoring_list)
 
 recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
+    int
+)
 recommendations_scoring_data = recommendations_scoring_data.drop(
     columns=[
         "rdsap_change",

From dbeba4db43645ee999eb49f40c0359457ae0f703 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 27 Mar 2024 18:12:57 +0000
Subject: [PATCH 159/262] set up first basic asset list for gla demo

---
 etl/customers/gla_croydon_demo/asset_list.py  | 145 ++++++++++++++++++
 .../ha_15_32/ha_analysis_batch_3.py           | 109 ++++++++++---
 2 files changed, 232 insertions(+), 22 deletions(-)
 create mode 100644 etl/customers/gla_croydon_demo/asset_list.py

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
new file mode 100644
index 00000000..526c34a0
--- /dev/null
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -0,0 +1,145 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 67
+
+
+def app():
+    """
+    We shall define a small portfolio of properties, based in Croydon
+    :return:
+    """
+
+    # Firstly, read in the EPC data for Croydon
+    epc_data = pd.read_csv(
+        "local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
+        low_memory=False
+    )
+
+    # Filter on entries where we have a UPRN
+    epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+
+    # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+    epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
+
+    epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+
+    # Now filter on social properties
+    epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
+    # There are 17337 properties with a registered EPC in Croydon
+    # Take below EPC C properties
+    epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
+    # 7994 properties are below EPC C (46%)
+
+    # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
+    epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
+
+    # For the purpose of the sample, take the properties have surveys done in the last 2 years
+    # This gives us 1023 remaining properties
+    two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365))
+    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago]
+
+    # Archetype 1: defined below:
+    # 1) House
+    # 2) Unfilled cavity
+    # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
+    # 4) EPC E
+    # Different buckets of properties
+    archetype_1_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["House"]) &
+        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "Pitched, 12 mm loft insulation",
+                "Pitched, 0 mm loft insulation",
+                "Pitched, no insulation",
+                "Pitched, 50 mm loft insulation",
+                "Flat, no insulation (assumed)",
+                "Pitched, no insulation (assumed)"
+            ]
+        )
+        ]
+    archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
+
+    # Archetype 2: defined below:
+    # 1) Flat
+    # 2) Unfilled cavity
+    # 3) Another property above
+    # 4) EPC E
+    archetype_2_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
+        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "(another dwelling above)"
+            ]
+        )
+        ]
+    archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
+
+    # Archetype 3: defined below:
+    # 1) EPC F
+    # 2) Solid brick wall
+    # 3) House
+    # 4) Pitched roof with no insulation
+    # Just 1 property (more expensive to retrofit)
+    archetype_3_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["House"]) &
+        (epc_data["CURRENT_ENERGY_RATING"] == "F") &
+        epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"])
+        ]
+    archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
+
+    # Archetype 4: defined below:
+    # 1) Maisonette
+    # 2) Empty cavity
+    # 3) EPC E
+    # 14 properties here
+    archetype_4_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"])
+        ]
+    archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
+
+    asset_list = pd.concat(
+        [
+            archetype_1_sample_asset_list,
+            archetype_2_sample_asset_list,
+            archetype_3_sample_asset_list,
+            archetype_4_sample_asset_list
+        ]
+    )
+
+    asset_list = asset_list.rename(
+        columns={
+            "UPRN": "uprn",
+            "ADDRESS1": "address",
+            "POSTCODE": "postcode",
+            "ARCHETYPE": "archetype"
+        }
+    )
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "budget": None,
+        "exclusions": ["floor_insulation"]
+    }
+    print(body)
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e414cd00..b4b82d0b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -6692,6 +6692,92 @@ def create_final_report():
     revenue.to_csv("HA Analysis Final - revenue.csv")
 
 
+def identify_eco_works(loader):
+    # ha_names = [
+    #     "HA16",  # For Housing
+    #     "HA39",  # Rooftop
+    #     "HA41",  # Settle
+    #     "HA23",  # Lambeth
+    #     "HA14",  # EMH
+    #     "HA7",  # Believe
+    #     "HA102",  # Thrive
+    # ]
+
+    # Unitas, fairhive, acis, LHP
+    ha_names = [
+        "HA50",  # Unitas
+        "HA15",  # Fairhive
+        "HA107",  # ACIS
+        "HA24",  # LHP
+    ]
+    names = {
+        "HA50": "Unitas",
+        "HA15": "Fairhive",
+        "HA107": "ACIS",
+        "HA24": "LHP"
+    }
+
+    # gbis rate
+    breakdowns = []
+    # lists = {}
+    for ha, data_assets in loader.data.items():
+        if ha not in ha_names:
+            continue
+
+        asset_list = data_assets["asset_list"].copy()
+        survey_list = data_assets["survey_list"].copy()
+        # Remove things that have sold
+        if not survey_list.empty:
+            asset_list = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+            asset_list = asset_list.drop(columns=["installation_status"])
+
+        # Needing a CIGA check
+        needs_cga = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"
+            ].copy()
+
+        eco4 = asset_list[
+            asset_list["ECO Eligibility"] == "eco4"
+            ].copy()
+
+        eco4_passed_ciga = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 - passed ciga"
+            ].copy()
+
+        # lists[ha] = {
+        #     "needs_cga": needs_cga,
+        #     "eco4": eco4,
+        #     "eco4_passed_ciga": eco4_passed_ciga
+        # }
+
+        # Store the data
+        if not needs_cga.empty:
+            needs_cga.to_csv(f"local_data/{names[ha]} - needs ciga.csv")
+
+        if not eco4.empty:
+            eco4.to_csv(f"local_data/{names[ha]} - eco4.csv")
+
+        if not eco4_passed_ciga.empty:
+            eco4_passed_ciga.to_csv(f"local_data/{names[ha]} - eco4 passed ciga.csv")
+
+        summary = {
+            "HA Name": ha,
+            "n_needing_ciga": needs_cga.shape[0],
+            "eco4": eco4.shape[0],
+            "eco4_passed_ciga": eco4_passed_ciga.shape[0]
+        }
+
+        breakdowns.append(summary)
+    breakdowns = pd.DataFrame(breakdowns)
+    breakdowns = breakdowns.fillna(0)
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
@@ -6739,29 +6825,8 @@ def app():
     loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
     loader.load()
     loader.ha_facts_and_figures()
-    forecast_remaining_sales(loader)
 
-    # gbis rate
-    # breakdowns = []
-    # for ha, data_assets in loader.data.items():
-    #     asset_list = data_assets["asset_list"].copy()
-    #     breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
-    #     breakdowns.append(breakdown)
-    # breakdowns = pd.DataFrame(breakdowns)
-    #
-    # installer = []
-    # for ha, data_assets in loader.data.items():
-    #     survey_list = data_assets["survey_list"]
-    #     if survey_list.empty:
-    #         continue
-    #     if "INSTALLER" not in survey_list.columns:
-    #         continue
-    #
-    #     installers = survey_list["INSTALLER"].value_counts().to_dict()
-    #     installers["ha_name"] = ha
-    #     installer.append(installers)
-    # installer = pd.DataFrame(installer)
-    # installer.drop(columns=["ha_name"]).sum().sum()
+    forecast_remaining_sales(loader)
 
     # Adhoc - for HA16, get the properties that still need a CIGA check
     asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()

From d34a4d4d963d349877d63a44753549186247a64d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 14:32:29 +0000
Subject: [PATCH 160/262] allowing passage of uprn to Searcher in api

---
 .idea/Model.iml             | 2 +-
 .idea/misc.xml              | 2 +-
 backend/app/plan/router.py  | 4 ++++
 backend/app/plan/schemas.py | 2 ++
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0b98cf2c..5456cdb6 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -91,10 +91,14 @@ async def trigger_plan(body: PlanTriggerRequest):
         input_properties = []
         for config in tqdm(plan_input):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
+            uprn = config.get("uprn", None)
+            if uprn:
+                uprn = int(float(uprn))
 
             epc_searcher = SearchEpc(
                 address1=config["address"],
                 postcode=config["postcode"],
+                uprn=uprn,
                 auth_token=get_settings().EPC_AUTH_TOKEN,
                 os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 9801375f..1e95fb2f 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -8,3 +8,5 @@ class PlanTriggerRequest(BaseModel):
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
+    # optional exclusions list
+    exclusions: list[str] | None = None

From 91eb9c68f1600970541606fdae3869d19ee724cb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 14:49:19 +0000
Subject: [PATCH 161/262] Adding validation to PlanTriggerRequest

---
 backend/app/plan/schemas.py        | 47 +++++++++++++--
 recommendations/Recommendations.py | 94 +++++++++++++++++-------------
 2 files changed, 95 insertions(+), 46 deletions(-)

diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 1e95fb2f..c13e754e 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -1,12 +1,51 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, conlist, validator
+from typing import Optional
 
 
 class PlanTriggerRequest(BaseModel):
-    budget: float | None = None
+    budget: Optional[float] = None
     goal: str
     housing_type: str
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
-    # optional exclusions list
-    exclusions: list[str] | None = None
+    exclusions: Optional[conlist(str, min_items=1)] = None
+
+    # Pre-defined list of possibilities for exclusions
+    _allowed_exclusions = {
+        "wall_insulation",
+        "ventilation",
+        "roof_insulation",
+        "floor_insulation",
+        "windows",
+        "fireplace",
+        "heating",
+        "hot_water",
+        "lighting",
+        "solar_pv"
+    }
+
+    _allowed_goals = {"Increase EPC"}
+
+    _allowed_housing_types = {"Social", "Private"}
+
+    # Validator to ensure exclusions are within the pre-defined possibilities
+    @validator('exclusions', each_item=True)
+    def check_exclusions(self, v):
+        if v not in self._allowed_exclusions:
+            raise ValueError(f"{v} is not an allowed exclusion")
+        return v
+
+    # Validator to ensure that the goal is within the pre-defined possibilities
+    @validator('goal')
+    def check_goal(self, v):
+        if v not in self._allowed_goals:
+            raise ValueError(f"{v} is not a valid goal")
+        return v
+
+    # Validator to ensure that the housing type is within the pre-defined possibilities
+    @validator('housing_type')
+    def check_housing_type(self, v):
+        if v not in self.allowed_housing_types:
+            raise ValueError(f"{v} is not a valid housing type")
+        return v
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 9f838e1c..d3436ef0 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -22,7 +22,8 @@ class Recommendations:
     def __init__(
         self,
         property_instance: Property,
-        materials: List
+        materials: List,
+        exclusions: List[str] = None,
     ):
         """
         :param property_instance: Instance of the Property class, for the home associated to property_id
@@ -31,6 +32,7 @@ class Recommendations:
 
         self.property_instance = property_instance
         self.materials = materials
+        self.exclusions = exclusions if exclusions else []
 
         self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
         self.wall_recomender = WallRecommendations(property_instance=property_instance, materials=materials)
@@ -58,67 +60,75 @@ class Recommendations:
         property_recommendations = []
         phase = 0
 
-        print("WALL RECOMMENDATIONS HAVE BEEN COMMENTED OUT TEMPORARILY - ADD ME BACK IN")
-        if portfolio_id != 66:
-            # Building Fabric
+        # Building Fabric
+        if "wall_insulation" not in self.exclusions:
             self.wall_recomender.recommend(phase=phase)
             if self.wall_recomender.recommendations:
                 property_recommendations.append(self.wall_recomender.recommendations)
                 phase += 1
 
-            # Ventilation recommendations
-            # We only produce a ventilation recommendation if the property is recommended to have wall or roof
-            # insulation
-            # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
-            # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
-            # wall or roof recommendations, we will ensure that ventilation is included in the simulation
+        # Ventilation recommendations
+        # We only produce a ventilation recommendation if the property is recommended to have wall or roof
+        # insulation
+        # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
+        # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
+        # wall or roof recommendations, we will ensure that ventilation is included in the simulation
+        if "ventilation" not in self.exclusions:
             if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
                 self.ventilation_recomender.recommend()
                 if self.ventilation_recomender.recommendation:
                     property_recommendations.append(self.ventilation_recomender.recommendation)
 
-        self.roof_recommender.recommend(phase=phase)
-        if self.roof_recommender.recommendations:
-            property_recommendations.append(self.roof_recommender.recommendations)
-            phase += 1
+        if "roof_insulation" not in self.exclusions:
+            self.roof_recommender.recommend(phase=phase)
+            if self.roof_recommender.recommendations:
+                property_recommendations.append(self.roof_recommender.recommendations)
+                phase += 1
 
-        self.floor_recommender.recommend(phase=phase)
-        if self.floor_recommender.recommendations:
-            property_recommendations.append(self.floor_recommender.recommendations)
-            phase += 1
+        if "floor_insulation" not in self.exclusions:
+            self.floor_recommender.recommend(phase=phase)
+            if self.floor_recommender.recommendations:
+                property_recommendations.append(self.floor_recommender.recommendations)
+                phase += 1
 
-        self.windows_recommender.recommend(phase=phase)
-        if self.windows_recommender.recommendation:
-            property_recommendations.append(self.windows_recommender.recommendation)
-            phase += 1
+        if "windows" not in self.exclusions:
+            self.windows_recommender.recommend(phase=phase)
+            if self.windows_recommender.recommendation:
+                property_recommendations.append(self.windows_recommender.recommendation)
+                phase += 1
 
-        self.fireplace_recommender.recommend(phase=phase)
-        if self.fireplace_recommender.recommendation:
-            property_recommendations.append(self.fireplace_recommender.recommendation)
-            phase += 1
+        if "fireplace" not in self.exclusions:
+            self.fireplace_recommender.recommend(phase=phase)
+            if self.fireplace_recommender.recommendation:
+                property_recommendations.append(self.fireplace_recommender.recommendation)
+                phase += 1
 
         # Heating and Electical systems
-        self.heating_recommender.recommend(phase=phase)
-        if self.heating_recommender.recommendations:
-            property_recommendations.append(self.heating_recommender.recommendations)
-            phase += 1
+        if "heating" not in self.exclusions:
+            self.heating_recommender.recommend(phase=phase)
+            if self.heating_recommender.recommendations:
+                property_recommendations.append(self.heating_recommender.recommendations)
+                phase += 1
 
         # Hot water
-        self.hotwater_recommender.recommend(phase=phase)
-        if self.hotwater_recommender.recommendations:
-            property_recommendations.append(self.hotwater_recommender.recommendations)
-            phase += 1
+        if "hot_water" not in self.exclusions:
+            self.hotwater_recommender.recommend(phase=phase)
+            if self.hotwater_recommender.recommendations:
+                property_recommendations.append(self.hotwater_recommender.recommendations)
+                phase += 1
 
-        self.lighting_recommender.recommend(phase=phase)
-        if self.lighting_recommender.recommendation:
-            property_recommendations.append(self.lighting_recommender.recommendation)
-            phase += 1
+        if "lighting" not in self.exclusions:
+            self.lighting_recommender.recommend(phase=phase)
+            if self.lighting_recommender.recommendation:
+                property_recommendations.append(self.lighting_recommender.recommendation)
+                phase += 1
 
         # Renewables
-        self.solar_recommender.recommend(phase=phase)
-        if self.solar_recommender.recommendation:
-            property_recommendations.append(self.solar_recommender.recommendation)
-            phase += 1
+        if "solar_pv" not in self.exclusions:
+            self.solar_recommender.recommend(phase=phase)
+            if self.solar_recommender.recommendation:
+                property_recommendations.append(self.solar_recommender.recommendation)
+                phase += 1
 
         # We insert temporary ids into the recommendations which is important for the optimiser later
         property_recommendations = self.insert_temp_recommendation_id(property_recommendations)

From 22a3e21f523b79da4ec65fa12d8d901242c5cfb6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 14:52:24 +0000
Subject: [PATCH 162/262] update validation of PlanTriggerRequest to use cls
 rather than self

---
 backend/app/plan/router.py         |  4 +---
 backend/app/plan/schemas.py        | 12 ++++++------
 recommendations/Recommendations.py |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 5456cdb6..e25c04a5 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -170,9 +170,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
             recommender = Recommendations(property_instance=p, materials=materials)
-            # TODO: portfolio id as an input is temp
-            print("DELETE PORTFOLIO ID AS AN INPUT!!")
-            property_recommendations, property_representative_recommendations = recommender.recommend(body.portfolio_id)
+            property_recommendations, property_representative_recommendations = recommender.recommend()
 
             if not property_recommendations:
                 continue
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index c13e754e..b8a99704 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -31,21 +31,21 @@ class PlanTriggerRequest(BaseModel):
 
     # Validator to ensure exclusions are within the pre-defined possibilities
     @validator('exclusions', each_item=True)
-    def check_exclusions(self, v):
-        if v not in self._allowed_exclusions:
+    def check_exclusions(cls, v):
+        if v not in cls._allowed_exclusions:
             raise ValueError(f"{v} is not an allowed exclusion")
         return v
 
     # Validator to ensure that the goal is within the pre-defined possibilities
     @validator('goal')
-    def check_goal(self, v):
-        if v not in self._allowed_goals:
+    def check_goal(cls, v):
+        if v not in cls._allowed_goals:
             raise ValueError(f"{v} is not a valid goal")
         return v
 
     # Validator to ensure that the housing type is within the pre-defined possibilities
     @validator('housing_type')
-    def check_housing_type(self, v):
-        if v not in self.allowed_housing_types:
+    def check_housing_type(cls, v):
+        if v not in cls._allowed_housing_types:
             raise ValueError(f"{v} is not a valid housing type")
         return v
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d3436ef0..b2e6d991 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -47,7 +47,7 @@ class Recommendations:
         self.heating_recommender = HeatingRecommender(property_instance=property_instance)
         self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
 
-    def recommend(self, portfolio_id):
+    def recommend(self):
 
         """
         This method runs the recommendations for the individual measures and then appends them to a list for output

From 8dbd69eef9140efdb3feab6933f195c762a2ba8c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 15:54:31 +0000
Subject: [PATCH 163/262] Updating router for chunked scoring

---
 backend/Property.py        |  2 +-
 backend/app/plan/router.py | 36 ++++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f86e33dc..d97ce8cf 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -233,7 +233,7 @@ class Property:
                 output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"]
                 # Setting the insulation thickness here to above average should be tested further because we
                 # don't see a high volume of instances for this
-                output["walls_insulation_thickness_ending"] = "above average"
+                output["walls_insulation_thickness_ending"] = "average"
                 output["walls_energy_eff_ending"] = "Good"
 
                 # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e25c04a5..bcbc4332 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -41,6 +41,7 @@ from backend.ml_models.Valuation import PropertyValuation
 logger = setup_logger()
 
 BATCH_SIZE = 5
+SCORING_BATCH_SIZE = 400
 
 
 def patch_epc(config, epc_records):
@@ -164,7 +165,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         recommendations = {}
         recommendations_scoring_data = []
         representative_recommendations = {}
-        for p in input_properties:
+        for p in tqdm(input_properties):
 
             # Property recommendations
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
@@ -196,15 +197,30 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        all_predictions = model_api.predict_all(
-            df=recommendations_scoring_data,
-            bucket=get_settings().DATA_BUCKET,
-            prediction_buckets={
-                "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
-                "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
-                "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
-            }
-        )
+        all_predictions = {
+            "sap_change_predictions": pd.DataFrame(),
+            "heat_demand_predictions": pd.DataFrame(),
+            "carbon_change_predictions": pd.DataFrame()
+        }
+        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = model_api.predict_all(
+                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
+                bucket=get_settings().DATA_BUCKET,
+                prediction_buckets={
+                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
+                }
+            )
+
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+        # TODO: TEMP
+        # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
+        # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy()
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a

From bd15ce65c2b05cdffe7304121d1fd8282fea55cb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 16:29:23 +0000
Subject: [PATCH 164/262] debugging optimisation with ventilation, when
 ventilation already exists

---
 backend/app/plan/router.py         | 16 +++++++++-------
 recommendations/Recommendations.py | 18 ++++++++++--------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index bcbc4332..a0d93190 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -170,7 +170,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             # Property recommendations
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
-            recommender = Recommendations(property_instance=p, materials=materials)
+            recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
             property_recommendations, property_representative_recommendations = recommender.recommend()
 
             if not property_recommendations:
@@ -196,6 +196,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
+        # model_api.MODEL_PREFIXES = ["sap_change_predictions"]
 
         all_predictions = {
             "sap_change_predictions": pd.DataFrame(),
@@ -274,14 +275,15 @@ async def trigger_plan(body: PlanTriggerRequest):
             if any(x in [r["type"] for r in solution] for x in [
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
             ]):
-                ventilation_rec = [
-                    r for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"
-                ][0]
-
-                selected_recommendations = set(
-                    list(selected_recommendations) + [ventilation_rec[0]["recommendation_id"]]
+                ventilation_rec = next(
+                    (r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
+                    None
                 )
 
+                # If a matching recommendation was found, add its ID to the selected recommendations
+                if ventilation_rec:
+                    selected_recommendations.add(ventilation_rec["recommendation_id"])
+
             # We check if the selected recommendation is wall ventilation and if so, we make sure
             # mechanical ventilation is selected
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index b2e6d991..944fec7a 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -67,11 +67,19 @@ class Recommendations:
                 property_recommendations.append(self.wall_recomender.recommendations)
                 phase += 1
 
+        if "roof_insulation" not in self.exclusions:
+            self.roof_recommender.recommend(phase=phase)
+            if self.roof_recommender.recommendations:
+                property_recommendations.append(self.roof_recommender.recommendations)
+                phase += 1
+
         # Ventilation recommendations
         # We only produce a ventilation recommendation if the property is recommended to have wall or roof
         # insulation
-        # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
-        # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
+        # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
+        # has no
+        # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
+        # have any
         # wall or roof recommendations, we will ensure that ventilation is included in the simulation
         if "ventilation" not in self.exclusions:
             if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
@@ -79,12 +87,6 @@ class Recommendations:
                 if self.ventilation_recomender.recommendation:
                     property_recommendations.append(self.ventilation_recomender.recommendation)
 
-        if "roof_insulation" not in self.exclusions:
-            self.roof_recommender.recommend(phase=phase)
-            if self.roof_recommender.recommendations:
-                property_recommendations.append(self.roof_recommender.recommendations)
-                phase += 1
-
         if "floor_insulation" not in self.exclusions:
             self.floor_recommender.recommend(phase=phase)
             if self.floor_recommender.recommendations:

From 72a4feb6af3967dc6ce00bb4df7d7d47c4772dc1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 17:18:08 +0000
Subject: [PATCH 165/262] minor tweak to asset list to make uprn int

---
 etl/customers/gla_croydon_demo/asset_list.py | 8 ++++++--
 etl/customers/gla_croydon_demo/slides.py     | 0
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/gla_croydon_demo/slides.py

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 526c34a0..01220d0a 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -36,7 +36,7 @@ def app():
     epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
 
     # For the purpose of the sample, take the properties have surveys done in the last 2 years
-    # This gives us 1023 remaining properties
+    # This gives us 1167 remaining properties
     two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365))
     epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago]
 
@@ -45,7 +45,7 @@ def app():
     # 2) Unfilled cavity
     # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
     # 4) EPC E
-    # Different buckets of properties
+    # 12 properties
     archetype_1_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
         (epc_data["CURRENT_ENERGY_RATING"] == "E") &
@@ -69,6 +69,7 @@ def app():
     # 2) Unfilled cavity
     # 3) Another property above
     # 4) EPC E
+    # 14 properties here
     archetype_2_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
         (epc_data["CURRENT_ENERGY_RATING"] == "E") &
@@ -108,6 +109,7 @@ def app():
     archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
 
+    # 41 total properties
     asset_list = pd.concat(
         [
             archetype_1_sample_asset_list,
@@ -126,6 +128,8 @@ def app():
         }
     )
 
+    asset_list["uprn"] = asset_list["uprn"].astype(int)
+
     filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
     save_csv_to_s3(
         dataframe=asset_list,
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
new file mode 100644
index 00000000..e69de29b

From 80fc7c821e0923918252edde9b90ab32a18cc765 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 17:38:52 +0000
Subject: [PATCH 166/262] moed reading csv function

---
 backend/app/plan/router.py               |  7 ++--
 backend/app/utils.py                     | 21 -----------
 etl/customers/gla_croydon_demo/slides.py | 44 ++++++++++++++++++++++++
 utils/s3.py                              | 24 +++++++++++--
 4 files changed, 69 insertions(+), 27 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a0d93190..2067d796 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -24,7 +24,7 @@ from backend.app.db.models.portfolio import rating_lookup
 from backend.app.dependencies import validate_token
 from backend.app.plan.schemas import PlanTriggerRequest
 from backend.app.plan.utils import get_cleaned
-from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc
+from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
 
 from backend.ml_models.api import ModelApi
 from backend.Property import Property
@@ -35,7 +35,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
 from recommendations.Recommendations import Recommendations
 from utils.logger import setup_logger
-from utils.s3 import read_dataframe_from_s3_parquet
+from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
 from backend.ml_models.Valuation import PropertyValuation
 
 logger = setup_logger()
@@ -196,7 +196,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
-        # model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+        # model_api.MODEL_PREFIXES = ['sap_change_predictions', 'carbon_change_predictions']
 
         all_predictions = {
             "sap_change_predictions": pd.DataFrame(),
@@ -221,7 +221,6 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # TODO: TEMP
         # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
-        # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy()
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
diff --git a/backend/app/utils.py b/backend/app/utils.py
index ba5509e1..b3843206 100644
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@@ -1,6 +1,4 @@
 import boto3
-import csv
-from io import StringIO
 import string
 import secrets
 import logging
@@ -41,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
     return logger
 
 
-def read_csv_from_s3(bucket_name, filepath):
-    s3 = boto3.client('s3')
-
-    # Get the object from s3
-    s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
-
-    # Read the CSV body from the s3 object
-    body = s3_object['Body'].read()
-
-    # Use StringIO to create a file-like object from the string
-    csv_data = StringIO(body.decode('utf-8'))
-
-    # Use csv library to read it into a list of dictionaries
-    reader = csv.DictReader(csv_data)
-    data = list(reader)
-
-    return data
-
-
 def generate_api_key():
     # Define the characters that will be used to generate the api key
     characters = string.ascii_letters + string.digits
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index e69de29b..5954f604 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -0,0 +1,44 @@
+"""
+This script contains the code to generate the data required to populate the slides
+We connect to the database amd extract the data for the portfolio needed so it is recommended to use
+a environment akin to the backend to run this script
+"""
+import pandas as pd
+import numpy as np
+from backend.app.db.connection import db_engine
+from sqlalchemy.orm import sessionmaker
+from utils.s3 import read_csv_from_s3
+from etl.customers.slide_utils import (
+    plot_epc_distribution,
+    get_property_details_by_portfolio_id,
+    get_plan_by_portfolio_id,
+    get_properties_with_default_recommendations,
+    create_powerpoint,
+    create_recommendations_summary
+)
+
+USER_ID = 8
+PORTFOLIO_ID_1 = 67
+EPC_TARGET_1 = "C"
+SAP_TARGET_1 = 69
+CUSTOMER_KEY = "gla-demo"
+
+
+def app():
+    # Connect to database
+    session = sessionmaker(bind=db_engine)()
+
+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
+    portfolio_id = PORTFOLIO_ID_1
+
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv"
+    )
+
+    # Get the properties for the portfolio
+    properties = get_properties_with_default_recommendations(session, portfolio_id)
+    properties_df = pd.DataFrame(properties)
diff --git a/utils/s3.py b/utils/s3.py
index 8d36bdb3..fd5992ce 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -1,9 +1,10 @@
 import pickle
 import boto3
-from io import BytesIO, StringIO
-from botocore.exceptions import NoCredentialsError, PartialCredentialsError
+import csv
 import pandas as pd
+from io import BytesIO, StringIO
 from utils.logger import setup_logger
+from botocore.exceptions import NoCredentialsError, PartialCredentialsError
 
 logger = setup_logger()
 
@@ -224,3 +225,22 @@ def read_excel_from_s3(bucket_name, file_key, header_row):
     df.reset_index(drop=True, inplace=True)
 
     return df
+
+
+def read_csv_from_s3(bucket_name, filepath):
+    s3 = boto3.client('s3')
+
+    # Get the object from s3
+    s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
+
+    # Read the CSV body from the s3 object
+    body = s3_object['Body'].read()
+
+    # Use StringIO to create a file-like object from the string
+    csv_data = StringIO(body.decode('utf-8'))
+
+    # Use csv library to read it into a list of dictionaries
+    reader = csv.DictReader(csv_data)
+    data = list(reader)
+
+    return data

From 053218b3fd9ef7bec918baed43473f3d3485fa4e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Apr 2024 11:18:58 +0100
Subject: [PATCH 167/262] updated price cap figures

---
 backend/app/plan/router.py                   |  4 --
 backend/ml_models/AnnualBillSavings.py       | 10 ++---
 etl/customers/gla_croydon_demo/asset_list.py | 40 +++++++++++-------
 etl/customers/gla_croydon_demo/slides.py     | 43 ++++++++++++++++++++
 4 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 2067d796..50b8a837 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -196,7 +196,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
-        # model_api.MODEL_PREFIXES = ['sap_change_predictions', 'carbon_change_predictions']
 
         all_predictions = {
             "sap_change_predictions": pd.DataFrame(),
@@ -219,9 +218,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             for key, scored in predictions_dict.items():
                 all_predictions[key] = pd.concat([all_predictions[key], scored])
 
-        # TODO: TEMP
-        # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
-
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
         #       possibility with heating system
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 99fae4db..4a433a7f 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -10,13 +10,13 @@ class AnnualBillSavings:
     AVERAGE_ELECTRICITY_CONSUMPTION = 2700
     AVERAGE_GAS_CONSUMPTION = 11500
 
-    # Latest price cap figures from Ofgem are for January 2024
-    # https://www.ofgem.gov.uk/publications/changes-energy-price-cap-1-january-2024
-    ELECTRICITY_PRICE_CAP = 0.29
-    GAS_PRICE_CAP = 0.07
+    # Latest price cap figures from Ofgem are for April 2024
+    # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
+    ELECTRICITY_PRICE_CAP = 0.245
+    GAS_PRICE_CAP = 0.0604
 
     # This is a weighted mean of the price caps, using the consumption figures above as weights
-    PRICE_FACTOR = 0.11183098591549295
+    PRICE_FACTOR = 0.09549999999999999
 
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 01220d0a..a0475807 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -35,20 +35,20 @@ def app():
     # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
     epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
 
-    # For the purpose of the sample, take the properties have surveys done in the last 2 years
-    # This gives us 1167 remaining properties
-    two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365))
-    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago]
+    # For the purpose of the sample, take the properties have surveys done in the last 3 years
+    # This gives us 1351 remaining properties
+    three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
+    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
 
     # Archetype 1: defined below:
     # 1) House
     # 2) Unfilled cavity
     # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
-    # 4) EPC E
-    # 12 properties
+    # 4) EPC E or D
+    # 24 properties
     archetype_1_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
-        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
         epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
         epc_data["ROOF_DESCRIPTION"].isin(
             [
@@ -69,10 +69,10 @@ def app():
     # 2) Unfilled cavity
     # 3) Another property above
     # 4) EPC E
-    # 14 properties here
+    # 57 properties here
     archetype_2_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
-        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
         epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
         epc_data["ROOF_DESCRIPTION"].isin(
             [
@@ -88,11 +88,18 @@ def app():
     # 2) Solid brick wall
     # 3) House
     # 4) Pitched roof with no insulation
-    # Just 1 property (more expensive to retrofit)
+    # Just 7 properties (more expensive to retrofit)
     archetype_3_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
-        (epc_data["CURRENT_ENERGY_RATING"] == "F") &
-        epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"])
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "Pitched, no insulation",
+                "Pitched, limited insulation (assumed)",
+                "Pitched, 100 mm loft insulation",
+                "Pitched, no insulation (assumed)",
+            ]
+        )
         ]
     archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
@@ -101,15 +108,18 @@ def app():
     # 1) Maisonette
     # 2) Empty cavity
     # 3) EPC E
-    # 14 properties here
+    # 16 properties here
     archetype_4_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
-        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"])
+        epc_data["WALLS_DESCRIPTION"].isin(
+            ["Cavity wall, as built, no insulation (assumed)"]
+        )
         ]
+
     archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
 
-    # 41 total properties
+    # 104 total properties
     asset_list = pd.concat(
         [
             archetype_1_sample_asset_list,
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index 5954f604..ebca7dc3 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -38,7 +38,50 @@ def app():
     asset_list = read_csv_from_s3(
         "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv"
     )
+    asset_list = pd.DataFrame(asset_list)
 
     # Get the properties for the portfolio
     properties = get_properties_with_default_recommendations(session, portfolio_id)
     properties_df = pd.DataFrame(properties)
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+    property_details_df = pd.DataFrame(property_details)
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans = get_plan_by_portfolio_id(session, portfolio_id)
+    plans_df = pd.DataFrame(plans)
+
+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+    # Add uprn on
+    recommendations_df = recommendations_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        how="left",
+        on="property_id"
+    )
+
+    # Summary information by each archetype
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+
+    recommendations_arch_1_summary = create_recommendations_summary(
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
+        properties_df[properties_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
+        SAP_TARGET_1
+    )
+
+    # Take the mean, median and maximum of each value
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+
+    arch_1_property_details_means = arch_1_property_details.mean()
+
+    arch_1_recommendation_means["total_bill_savings"] / arch_1_property_details_means["adjusted_energy_consumption"]

From 08a657eb9f505a10608377eff1c0c10b76bd2f0a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 12:18:08 +0100
Subject: [PATCH 168/262] Adding costs for ttzc

---
 backend/ml_models/AnnualBillSavings.py       |  13 +++
 etl/customers/gla_croydon_demo/asset_list.py |  13 +++
 etl/customers/gla_croydon_demo/slides.py     | 100 ++++++++++++++---
 etl/customers/slide_utils.py                 |  22 +++-
 recommendations/Costs.py                     |  83 +++++++++++++-
 recommendations/HeatingControlRecommender.py | 108 +++++++++++++++++++
 recommendations/HeatingRecommender.py        |  17 +++
 7 files changed, 338 insertions(+), 18 deletions(-)

diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 4a433a7f..9be9d78a 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -18,6 +18,9 @@ class AnnualBillSavings:
     # This is a weighted mean of the price caps, using the consumption figures above as weights
     PRICE_FACTOR = 0.09549999999999999
 
+    # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
+    DAILY_STANDARD_CHARGE = 0.3143
+
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
     @classmethod
@@ -38,6 +41,16 @@ class AnnualBillSavings:
         """
         return cls.ELECTRICITY_PRICE_CAP * kwh
 
+    @classmethod
+    def calculate_annual_bill(cls, kwh):
+        """
+        This method will estimate the total annual bill for a property
+        :param kwh: The total kwh consumption
+        :return: An estimate for annual bill
+        """
+
+        return cls.PRICE_FACTOR * kwh + cls.DAILY_STANDARD_CHARGE * 365
+
     @classmethod
     def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
         """
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index a0475807..3a3f02a3 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -140,6 +140,19 @@ def app():
 
     asset_list["uprn"] = asset_list["uprn"].astype(int)
 
+    # We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
+    # manually remove
+    # 1) 3 Reid Close, CR5 3BL
+    # 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
+    asset_list = asset_list[
+        ~asset_list["uprn"].isin(
+            [
+                100020576460,
+                100020624352,
+            ]
+        )
+    ]
+
     filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
     save_csv_to_s3(
         dataframe=asset_list,
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index ebca7dc3..1d217226 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -16,11 +16,15 @@ from etl.customers.slide_utils import (
     create_powerpoint,
     create_recommendations_summary
 )
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 
 USER_ID = 8
 PORTFOLIO_ID_1 = 67
+PORTFOLIO_ID_2 = 68
 EPC_TARGET_1 = "C"
+EPC_TARGET_2 = "A"
 SAP_TARGET_1 = 69
+SAP_TARGET_2 = 100
 CUSTOMER_KEY = "gla-demo"
 
 
@@ -32,11 +36,13 @@ def app():
     # Get the data we need
     ########################################################################
 
-    portfolio_id = PORTFOLIO_ID_1
+    # TODO: Update to portfolio desired
+    # portfolio_id = PORTFOLIO_ID_1
+    portfolio_id = PORTFOLIO_ID_2
 
     # Get the asset list
     asset_list = read_csv_from_s3(
-        "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv"
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
     )
     asset_list = pd.DataFrame(asset_list)
 
@@ -47,6 +53,10 @@ def app():
     # We now pull the data for the property details
     property_details = get_property_details_by_portfolio_id(session, portfolio_id)
     property_details_df = pd.DataFrame(property_details)
+    # We estimate bills based on the adjusted_energy_consumption
+    property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+        lambda x: AnnualBillSavings.calculate_annual_bill(x)
+    )
     # Merge on uprn
     property_details_df = property_details_df.merge(
         properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
@@ -66,22 +76,84 @@ def app():
         on="property_id"
     )
 
-    # Summary information by each archetype
-    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
-
-    recommendations_arch_1_summary = create_recommendations_summary(
-        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
-        properties_df[properties_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
+    recommendations_summary = create_recommendations_summary(
+        recommendations_df,
+        properties_df,
+        property_details_df,
         SAP_TARGET_1
     )
 
-    # Take the mean, median and maximum of each value
-    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+    # Calculate % changes of energ, co2 and abs
+    recommendations_summary["carbon_percent_change"] = (
+        recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+    )
 
-    arch_1_property_details = property_details_df[
-        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    recommendations_summary["energy_percent_change"] = (
+        recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+    )
+
+    recommendations_summary["bills_percent_change"] = (
+        recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+    )
+
+    # Summary information by each archetype
+    ########################
+    # Archetype 1
+    ########################
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+    recommendations_arch_1_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
     ]
 
-    arch_1_property_details_means = arch_1_property_details.mean()
+    # Take the mean, median and maximum of each value
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
 
-    arch_1_recommendation_means["total_bill_savings"] / arch_1_property_details_means["adjusted_energy_consumption"]
+    ########################
+    # Archetype 2
+    ########################
+    archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+    recommendations_arch_2_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_2_recommendation_min = recommendations_arch_2_summary.min()
+    arch_2_recommendation_max = recommendations_arch_2_summary.max()
+    arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+    ########################
+    # Archetype 3
+    ########################
+    archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+    recommendations_arch_3_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_3_recommendation_min = recommendations_arch_3_summary.min()
+    arch_3_recommendation_max = recommendations_arch_3_summary.max()
+    arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+    ########################
+    # Archetype 4
+    ########################
+    archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+    recommendations_arch_4_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_4_recommendation_min = recommendations_arch_4_summary.min()
+    arch_4_recommendation_max = recommendations_arch_4_summary.max()
+    arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+    property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]["total_floor_area"].mean()
+
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py
index d1efce47..9170ab17 100644
--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@@ -246,7 +246,7 @@ def create_powerpoint(data, save_location):
     prs.save(save_location)
 
 
-def create_recommendations_summary(recommendations_df, properties_df, sap_target):
+def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
     # Aggregate the impact of the recommendations
     # We want:
     # Total number of sap points
@@ -259,13 +259,15 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
         total_valuation_impact=("property_valuation_increase", "sum"),
         total_bill_savings=("energy_cost_savings", "sum"),
         total_cost=("estimated_cost", "sum"),
-        total_carbon=("co2_equivalent_savings", "sum")
+        total_carbon=("co2_equivalent_savings", "sum"),
+        adjusted_heat_demand=("adjusted_heat_demand", "sum")
     ).reset_index()
-    # Merge on current sap points
+    # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
     recommendations_summary = recommendations_summary.merge(
         properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
         how="left"
     )
+
     recommendations_summary["expected_sap_points"] = (
         recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
     )
@@ -274,4 +276,18 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
     )
     recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
 
+    if property_details_df is not None:
+        recommendations_summary = recommendations_summary.merge(
+            property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
+                columns={
+                    "id": "property_id",
+                    "co2_emissions": "current_co2",
+                    "adjusted_energy_consumption": "current_energy",
+                    "energy_bill": "current_energy_bill"
+                }
+            ),
+            on="uprn",
+            how="left"
+        )
+
     return recommendations_summary
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index b2874f28..47844657 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -42,7 +42,22 @@ BATTERY_COST = 3500
 
 # This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
 SMART_APPLIANCE_THERMOSTAT_COST = 400
-PROGRAMMER_COST = 200
+PROGRAMMER_COST = 120
+ROOM_THERMOSTAT_COST = 150
+TRVS_COST = 35
+
+# Cost for TTZC
+# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
+# Based on the Nest system
+TTZC_SMART_THERMOSTAT_COST = 205
+TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
+TTZC_ELECTRICIAN_HOURLY_RATE = 45
+# Based on cost of a Nest temperature sensor
+TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
+TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17  # (Assume ~ 10 mins install per sensor)
+# Basedon an average cost of smart radiator values
+TTZC_SMART_RADIATOR_VALUES = 50
+TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37  # (Assume ~ 15-30 mins install per valve)
 
 
 class Costs:
@@ -998,3 +1013,69 @@ class Costs:
             "labour_hours": 0,
             "labour_days": 0,
         }
+
+    def roomstat_programmer_trvs(
+        self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat
+    ):
+        """
+
+        :return:
+        """
+
+        total_cost = 0
+        labour_hours = 0
+
+        if not has_programmer:
+            total_cost += PROGRAMMER_COST
+            labour_hours += 1
+
+        if not has_trvs:
+            total_cost += TRVS_COST * number_heated_rooms
+            labour_hours += 0.25 * number_heated_rooms
+
+        if not has_room_thermostat:
+            total_cost += ROOM_THERMOSTAT_COST
+            labour_hours += 0.5
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": 1,
+        }
+
+    def time_and_temperature_zone_control(self, number_heated_rooms):
+
+        # The product costs are inclusive of VAT
+        product_costs = (
+            TTZC_SMART_THERMOSTAT_COST +
+            TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
+            TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
+        )
+        labour_hours = (
+            TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
+            TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
+            TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
+        )
+        labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
+        # Add continency and preliminaries to the labour to account for the complexity of the job
+        labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+        vat = labour_costs * self.VAT_RATE
+
+        subtotal_before_vat = product_costs + labour_costs
+        total_cost = subtotal_before_vat + vat
+
+        labour_days = np.ceil(labour_hours / 8)
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 81597f61..99b41469 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -27,6 +27,14 @@ class HeatingControlRecommender:
             self.recommend_high_heat_retention_controls()
             return
 
+        if heating_description in ["Boiler and radiators, mains gas"]:
+            # We can recommend roomstat programmer trvs
+            self.recommend_roomstat_programmer_trvs()
+            # We can also recommend time and temperature zone controls
+            self.recommend_time_temperature_zone_controls()
+
+            return
+
     def recommend_room_heaters_electric_controls(self):
         """
         If the home has Room heaters, electric, we start by identifying potential heating controls that could
@@ -105,3 +113,103 @@ class HeatingControlRecommender:
 
         # We don't implement any other recommendations right now
         return
+
+    def recommend_roomstat_programmer_trvs(self):
+        """
+        If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
+        be upgraded, that would provide a practical impact.
+
+        The criteria for recommending an upgrade to heating controls are (one of these must be true)
+        1) There are no controls
+        2) No programmer
+        3) No room thermostat
+        4) No TRVs
+
+
+        :return:
+        """
+
+        # We check if we have the conditions to recommend this upgrade
+
+        needs_programmer = self.property.main_heating_controls["switch_system"] is None
+        needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None
+        needs_trvs = self.property.main_heating_controls["trvs"] is None
+
+        can_recommend = (
+            (self.property.main_heating_controls["no_control"] is not None) or
+            needs_programmer or
+            needs_room_thermostat or
+            needs_trvs
+        )
+
+        if not can_recommend:
+            return
+
+        ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
+        # We use this to determine how we should be updating the config
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+        # This upgrade will only take the heating system to average energy efficiency
+        # If the current system is below good, we make it good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Good"
+
+        has_programmer = not needs_programmer
+        has_room_thermostat = not needs_room_thermostat
+        has_trvs = not needs_trvs
+
+        self.recommendation.append(
+            {
+                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
+                **self.costs.roomstat_programmer_trvs(
+                    number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+                    has_programmer=has_programmer,
+                    has_room_thermostat=has_room_thermostat,
+                    has_trvs=has_trvs
+                ),
+                "simulation_config": simulation_config
+            }
+        )
+
+        return
+
+    def recommend_time_temperature_zone_controls(self):
+        """
+        If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
+        and more efficient control system than the standard controls that come with a boiler. However, it may come
+        with a higher cost and more involved usage
+        :return:
+        """
+
+        # We check if the efficiency of the current heating controls is good or below, and
+
+        # Conditions for installation are as follows:
+        # 1) The current heating controls are not time and temperature zone controls
+        # 2) The current heating controls are not already at 'Very Good' or above
+
+        if (
+            (self.property["thermostatic_control"] == "time and temperature zone control") or
+            (self.property.data["mainheatc-energy-eff"] in ["Very Good"])
+        ):
+            # No recommendation needed
+            return
+
+        ending_config = MainheatControlAttributes("Time and temperature zone control").process()
+
+        # We use this to determine how we should be updating the config
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+
+        # If the current system is below very good, we make it very good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
+
+        self.recommendation.append(
+            {
+                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
+                **self.costs.time_and_temperature_zone_control(),
+                "simulation_config": simulation_config
+            }
+        )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 11ae3da6..6467bd2f 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -26,6 +26,11 @@ class HeatingRecommender:
             self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
             return
 
+        # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
+        if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]:
+            self.recommend_roomstat_programmer_trvs(phase=phase)
+            return
+
     @staticmethod
     def check_simulation_difference(old_config, new_config):
         """
@@ -182,3 +187,15 @@ class HeatingRecommender:
         )
 
         self.recommendations.extend(recommendations)
+
+    def recommend_roomstat_programmer_trvs(self, phase):
+        """
+
+        :param phase:
+        :return:
+        """
+        # We recommend the heating controls
+        controls_recommender = HeatingControlRecommender(self.property)
+        controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
+
+        controls_recommender.recommendation

From 45552f5e06d3b814729cc57b6ca4329d19a8c31e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 14:39:28 +0100
Subject: [PATCH 169/262] Added costing for boiler

---
 recommendations/Costs.py                     | 51 ++++++++++++
 recommendations/HeatingControlRecommender.py |  6 +-
 recommendations/HeatingRecommender.py        | 83 +++++++++++++++++++-
 recommendations/Recommendations.py           |  3 +
 4 files changed, 137 insertions(+), 6 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 47844657..e5ceb0c0 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -59,6 +59,26 @@ TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17  # (Assume ~ 10 mins install pe
 TTZC_SMART_RADIATOR_VALUES = 50
 TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37  # (Assume ~ 15-30 mins install per valve)
 
+# Low carbon combi boiler - median value based on £2200 - £3000 range
+LOW_CARBON_COMBI_BOILER = 2200
+
+# boiler prices based on
+# https://www.greenmatch.co.uk/boilers/30kw-boiler
+# https://www.greenmatch.co.uk/boilers/35kw-boiler
+# https://www.greenmatch.co.uk/boilers/40kw-boiler
+# These are exclusive of installation costs
+COMBI_BOILER_COSTS = {
+    "30kw": 1550,
+    "35kw": 1610,
+    "40kw": 1625
+}
+
+CONVENTIONAL_BOILER_COSTS = {
+    "30kw": 1117,
+    "35kw": 1546,
+    "40kw": 1776
+}
+
 
 class Costs:
     """
@@ -1079,3 +1099,34 @@ class Costs:
             "labour_hours": labour_hours,
             "labour_days": labour_days,
         }
+
+    def low_carbon_boiler(self, is_combi, size):
+        """
+        Based on a basic estimate of median value £2600 to install a low carbon combi boiler
+        :return:
+        """
+
+        unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
+        # The unit cost is the cost without VAT
+        # We now need to estimate the cost of the works
+        labour_days = 2
+        labour_rate = 500
+
+        # Average cost of installation is 1 (maybe 2days) at £300 per day
+        # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
+        # To be pessimistic, assume 2 days work and £500 day rate
+        labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
+        # Add contingency and preliminaries
+        labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+        vat = labour_cost * self.VAT_RATE
+
+        subtotal_before_vat = unit_cost + labour_cost
+        total_cost = subtotal_before_vat + vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_days * 8,
+            "labour_days": labour_days,
+        }
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 99b41469..547ea497 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -189,7 +189,7 @@ class HeatingControlRecommender:
         # 2) The current heating controls are not already at 'Very Good' or above
 
         if (
-            (self.property["thermostatic_control"] == "time and temperature zone control") or
+            (self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or
             (self.property.data["mainheatc-energy-eff"] in ["Very Good"])
         ):
             # No recommendation needed
@@ -209,7 +209,9 @@ class HeatingControlRecommender:
         self.recommendation.append(
             {
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
-                **self.costs.time_and_temperature_zone_control(),
+                **self.costs.time_and_temperature_zone_control(
+                    number_heated_rooms=int(self.property.data["number-heated-rooms"])
+                ),
                 "simulation_config": simulation_config
             }
         )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 6467bd2f..c7064274 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -28,7 +28,7 @@ class HeatingRecommender:
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
         if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]:
-            self.recommend_roomstat_programmer_trvs(phase=phase)
+            self.recommend_boiler_upgrades(phase=phase)
             return
 
     @staticmethod
@@ -188,14 +188,89 @@ class HeatingRecommender:
 
         self.recommendations.extend(recommendations)
 
-    def recommend_roomstat_programmer_trvs(self, phase):
-        """
+    @staticmethod
+    def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
+        # Step 1: Base size estimation based on property type (as a starting point)
+        base_size = {
+            'Flat': 25,
+            'House': 30,
+            'Maisonette': 28,
+            'Bungalow': 27
+        }
 
+        # Step 2: Calculate the volume of the property
+        volume = floor_area * floor_height
+
+        # Step 3: Adjust base size for built form (to account for heat retention)
+        form_adjustment = {
+            'Mid-Terrace': 0,
+            'End-Terrace': 2,
+            'Semi-Detached': 4,
+            'Detached': 6
+        }
+
+        # Step 4: Further adjust for the total volume and number of heated rooms
+        volume_adjustment = (volume / 100)  # Simplified adjustment factor for volume
+        rooms_adjustment = (num_heated_rooms - 5) * 0.5  # Assuming base case of 5 rooms
+
+        # Calculate the estimated boiler size
+        estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment
+
+        # Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more
+        available_sizes = [30, 35, 40, 45, 50]
+        estimated_size = min(max(estimated_size, 30), 40)  # Ensure within 30kW to 35kW range
+
+        # Find the closest available size (in this case, either rounding up or down to align with 30 or 35)
+        closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size))
+
+        return closest_size
+
+    def recommend_boiler_upgrades(self, phase):
+        """
+        This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
+        is generally more expensive
         :param phase:
         :return:
         """
+
+        # We now recommend boiler upgrades, if applicable
+        if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
+            boiler_size = self.estimate_boiler_size(
+                property_type=self.property.data["property-type"],
+                built_form=self.property.data["built-form"],
+                floor_area=self.property.floor_area,
+                floor_height=self.property.floor_height,
+                num_heated_rooms=self.property.data["number-heated-rooms"],
+            )
+
+            # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
+            is_combi = self.property.hotwater["clean_description"] in ["From main system"]
+            if is_combi:
+                description = "Upgrade to a low carbon combi boiler"
+            else:
+                description = "Upgrade to a low carbon boiler"
+
+            self.recommendations.append(
+                {
+                    "phase": phase,
+                    "parts": [
+                        # TODO
+                    ],
+                    "type": "heating",
+                    "description": description,
+                    "starting_u_value": None,
+                    "new_u_value": None,
+                    "sap_points": None,
+                    **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+                }
+            )
+
         # We recommend the heating controls
         controls_recommender = HeatingControlRecommender(self.property)
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
+        # We may have 2 recommendations from the heating controls
 
-        controls_recommender.recommendation
+        # The heating controls recommendation is distrinct from the boiler upgrade recommendation
+        # We insert phase into the recommendations for heating controls
+        for recommendation in controls_recommender.recommendation:
+            recommendation["phase"] = phase
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 944fec7a..d9a0a0fd 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -110,6 +110,9 @@ class Recommendations:
             self.heating_recommender.recommend(phase=phase)
             if self.heating_recommender.recommendations:
                 property_recommendations.append(self.heating_recommender.recommendations)
+                # We check if we have distinct heating and heating controls recommendations
+                # If so, we increment by 2 (one of the heating system, one for the heating controls)
+                # otherwise we incremenet by 1
                 phase += 1
 
         # Hot water

From 09bbeaecae8156faedf090a28bfe0bcae231f0d2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 14:57:11 +0100
Subject: [PATCH 170/262] incorporate heating and heating control
 recommendations

---
 recommendations/HeatingRecommender.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index c7064274..676a4b06 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -233,6 +233,8 @@ class HeatingRecommender:
         :return:
         """
 
+        recommendation_phase = phase
+
         # We now recommend boiler upgrades, if applicable
         if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
             boiler_size = self.estimate_boiler_size(
@@ -252,7 +254,7 @@ class HeatingRecommender:
 
             self.recommendations.append(
                 {
-                    "phase": phase,
+                    "phase": recommendation_phase,
                     "parts": [
                         # TODO
                     ],
@@ -261,16 +263,21 @@ class HeatingRecommender:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
+                    "simulation_config": {"mainheat_energy_eff_ending": "Good"},
                     **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
                 }
             )
 
+            # We increment the recommendation phase, in the case of us having heating control recommendations
+            recommendation_phase += 1
+
         # We recommend the heating controls
         controls_recommender = HeatingControlRecommender(self.property)
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
         # We may have 2 recommendations from the heating controls
 
-        # The heating controls recommendation is distrinct from the boiler upgrade recommendation
-        # We insert phase into the recommendations for heating controls
-        for recommendation in controls_recommender.recommendation:
-            recommendation["phase"] = phase
+        if controls_recommender.recommendation:
+            # The heating controls recommendation is distrinct from the boiler upgrade recommendation
+            # We insert phase into the recommendations for heating controls
+            for recommendation in controls_recommender.recommendation:
+                recommendation["phase"] = recommendation_phase

From 9130ad55fffc21858ca7061d26a2f6ecb8d66e3d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 14:59:42 +0100
Subject: [PATCH 171/262] Added missing controls to output

---
 recommendations/HeatingRecommender.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 676a4b06..9658aaa3 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -281,3 +281,5 @@ class HeatingRecommender:
             # We insert phase into the recommendations for heating controls
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
+
+        self.recommendations.extend(controls_recommender.recommendation)

From a9c2bf1b9c0be1192edbeb50ba01401d1e55578f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 15:06:44 +0100
Subject: [PATCH 172/262] added correct incrementing of phase

---
 recommendations/HeatingControlRecommender.py | 8 ++++++++
 recommendations/Recommendations.py           | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 547ea497..e224f243 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -161,6 +161,7 @@ class HeatingControlRecommender:
 
         self.recommendation.append(
             {
+                "type": "heating_control",
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
                 **self.costs.roomstat_programmer_trvs(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"]),
@@ -168,6 +169,9 @@ class HeatingControlRecommender:
                     has_room_thermostat=has_room_thermostat,
                     has_trvs=has_trvs
                 ),
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
                 "simulation_config": simulation_config
             }
         )
@@ -208,10 +212,14 @@ class HeatingControlRecommender:
 
         self.recommendation.append(
             {
+                "type": "heating_control",
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
                 "simulation_config": simulation_config
             }
         )
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d9a0a0fd..902023dc 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -113,7 +113,9 @@ class Recommendations:
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
-                phase += 1
+                max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations])
+                amount_to_increment = max_used_phase - phase + 1
+                phase += amount_to_increment
 
         # Hot water
         if "hot_water" not in self.exclusions:

From 2234269ca62611c9f0285acc0f79491ce98cf277 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 15:14:19 +0100
Subject: [PATCH 173/262] added simulation

---
 backend/Property.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index d97ce8cf..82108bbb 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -344,7 +344,7 @@ class Property:
                 else:
                     output["glazed_type_ending"] = "double glazing installed during or after 2002"
 
-            if recommendation["type"] in ["heating", "hot_water_tank_insulation"]:
+            if recommendation["type"] in ["heating", "hot_water_tank_insulation", "heating_control"]:
                 # We update the data, as defined in the recommendaton
 
                 simulation_config = recommendation["simulation_config"]
@@ -364,7 +364,8 @@ class Property:
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                 "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                 "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
-                "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation"
+                "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
+                "heating_control",
             ]:
                 raise NotImplementedError("Implement me, given type %s" % recommendation["type"])
 

From f2cec8de11305c7d763a712050f0da685001bd7f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 16:30:45 +0100
Subject: [PATCH 174/262] fixed description for ttaz

---
 recommendations/HeatingControlRecommender.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index e224f243..7010ad53 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -162,6 +162,7 @@ class HeatingControlRecommender:
         self.recommendation.append(
             {
                 "type": "heating_control",
+                "parts": [],
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
                 **self.costs.roomstat_programmer_trvs(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"]),
@@ -213,7 +214,8 @@ class HeatingControlRecommender:
         self.recommendation.append(
             {
                 "type": "heating_control",
-                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
+                "parts": [],
+                "description": "Upgrade heating controls to Time and Temperature Zone Controls",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),

From 519dc6cfcb31ce4093ae0e6cace03ba30920e5e7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 19:17:27 +0100
Subject: [PATCH 175/262] added off-gas property recommendations

---
 backend/app/plan/router.py                   |   1 +
 etl/customers/gla_croydon_demo/asset_list.py |  42 +++-
 etl/customers/gla_croydon_demo/slides.py     | 200 ++++++++++++++++++-
 recommendations/HeatingControlRecommender.py |   2 +-
 recommendations/HeatingRecommender.py        |  12 +-
 5 files changed, 247 insertions(+), 10 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 50b8a837..4868749d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -389,6 +389,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # Commit final changes
         session.commit()
+
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
         session.rollback()
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 3a3f02a3..52e9422c 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -4,6 +4,23 @@ from utils.s3 import save_csv_to_s3
 USER_ID = 8
 PORTFOLIO_ID = 67
 
+archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
+                     100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
+                     200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
+archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
+                     100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
+                     100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
+                     100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
+                     100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
+                     100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
+                     100020576459, 100020665643, 100020665654, 100022917303]
+archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
+                     100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
+                     100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
+                     100020692380, 100020581690]
+archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
+                     200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
+
 
 def app():
     """
@@ -84,14 +101,15 @@ def app():
     archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
 
     # Archetype 3: defined below:
-    # 1) EPC F
+    # 1) EPC E or below
     # 2) Solid brick wall
     # 3) House
     # 4) Pitched roof with no insulation
     # Just 7 properties (more expensive to retrofit)
     archetype_3_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
-        (epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])) &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
+        epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
         epc_data["ROOF_DESCRIPTION"].isin(
             [
                 "Pitched, no insulation",
@@ -119,7 +137,6 @@ def app():
     archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
 
-    # 104 total properties
     asset_list = pd.concat(
         [
             archetype_1_sample_asset_list,
@@ -152,6 +169,25 @@ def app():
             ]
         )
     ]
+    # We have slightly too many properties, so we take a random sample of each archetype
+    # achetype_1_size = 20
+    # achetype_2_size = 46
+    # achetype_3_size = 23
+    # achetype_4_size = 13
+    # archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
+    #     int(achetype_1_size)
+    # ).tolist()
+    # archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
+    #     int(achetype_2_size)
+    # ).tolist()
+    # archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
+    #     int(achetype_3_size)
+    # ).tolist()
+    # archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
+    #     int(achetype_4_size)
+    # ).tolist()
+    uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
+    asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
 
     filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
     save_csv_to_s3(
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index 1d217226..e6c4b5b8 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -27,8 +27,24 @@ SAP_TARGET_1 = 69
 SAP_TARGET_2 = 100
 CUSTOMER_KEY = "gla-demo"
 
+# Sample UPRNS
+archetype_1_sample = ['100020618076', '100020619814', '100020581792', '100020671205', '100020585305', '100020606853',
+                      '100020625813', '100020618042', '200001188304', '200001187196', '100020603026', '100020604138',
+                      '100020615039', '200001188299', '100020618060', '200001192253']
 
-def app():
+archetype_2_sample = ['100020616325', '100020665634', '100020665654', '100020665638', '100020587936', '100020587905',
+                      '100020665645', '100020625597', '100022907528', '100020665630', '100020624348', '10001008257',
+                      '100020666735', '100020698027', '100020624351', '100020665656', '100020666716', '100020665632',
+                      '100020666715', '100020645639', '200001191309', '100020625451', '100020624347', '100020665658',
+                      '100020585002', '100022917303', '100020665650', '100020667737', '100020620659', '100022904981',
+                      '100020642337', '100020657902', '100020615603', '100020626800', '100020665647', '100020665643']
+
+archetype_3_sample = ['100020607980', '200001193193', '100020581690', '100020665611']
+archetype_4_sample = ['100020631683', '100020607667', '100020660228', '100020605116', '200001187539', '100020582907',
+                      '100020610165', '100020650607', '100020655500', '100020598277', '100020642537']
+
+
+def scenario_1():
     # Connect to database
     session = sessionmaker(bind=db_engine)()
 
@@ -36,9 +52,7 @@ def app():
     # Get the data we need
     ########################################################################
 
-    # TODO: Update to portfolio desired
-    # portfolio_id = PORTFOLIO_ID_1
-    portfolio_id = PORTFOLIO_ID_2
+    portfolio_id = PORTFOLIO_ID_1
 
     # Get the asset list
     asset_list = read_csv_from_s3(
@@ -157,3 +171,181 @@ def app():
     # Overview
     ########################
     overview_totals = recommendations_summary.sum()
+
+
+def make_sample():
+    # sample_proportion = 67 / 102
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+    )
+    asset_list = pd.DataFrame(asset_list)
+
+    # From the asset list, we deduce how many properties we need
+    archetype_1_sample_size = 16
+    archetype_2_sample_size = 36
+    archetype_3_sample_size = 4
+    archetype_4_sample_size = 11
+
+    # We take the sample and we'll keep the uprns static
+    archetype_1_sample = asset_list[
+        asset_list["archetype"] == "Archetype 1"
+        ].sample(archetype_1_sample_size)["uprn"].to_list()
+
+    archetype_2_sample = asset_list[
+        asset_list["archetype"] == "Archetype 2"
+        ].sample(archetype_2_sample_size)["uprn"].to_list()
+
+    archetype_3_sample = asset_list[
+        asset_list["archetype"] == "Archetype 3"
+        ].sample(archetype_3_sample_size)["uprn"].to_list()
+
+    archetype_4_sample = asset_list[
+        asset_list["archetype"] == "Archetype 4"
+        ].sample(archetype_4_sample_size)["uprn"].to_list()
+
+
+def scenario_2():
+    # Connect to database
+    session = sessionmaker(bind=db_engine)()
+
+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
+    portfolio_id = PORTFOLIO_ID_2
+
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+    )
+    asset_list = pd.DataFrame(asset_list)
+
+    sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample
+
+    # Filter on sample uprns
+    asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)]
+
+    # Get the properties for the portfolio
+    properties = get_properties_with_default_recommendations(session, portfolio_id)
+    properties_df = pd.DataFrame(properties)
+    properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)]
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+    property_details_df = pd.DataFrame(property_details)
+    property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)]
+    # We estimate bills based on the adjusted_energy_consumption
+    property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+        lambda x: AnnualBillSavings.calculate_annual_bill(x)
+    )
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans = get_plan_by_portfolio_id(session, portfolio_id)
+    plans_df = pd.DataFrame(plans)
+
+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+    # Add uprn on
+    recommendations_df = recommendations_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        how="left",
+        on="property_id"
+    )
+
+    recommendations_summary = create_recommendations_summary(
+        recommendations_df,
+        properties_df,
+        property_details_df,
+        SAP_TARGET_1
+    )
+
+    # Calculate % changes of energ, co2 and abs
+    recommendations_summary["carbon_percent_change"] = (
+        recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+    )
+
+    recommendations_summary["energy_percent_change"] = (
+        recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+    )
+
+    recommendations_summary["bills_percent_change"] = (
+        recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+    )
+
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
+    overview_means = recommendations_summary.mean()
+
+    ########################
+    # Measures
+    ########################
+    measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+
+    z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
+
+    recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts()
+
+    # Summary information by each archetype
+    ########################
+    # Archetype 1
+    ########################
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+    recommendations_arch_1_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+
+    ########################
+    # Archetype 2
+    ########################
+    archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+    recommendations_arch_2_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_2_recommendation_min = recommendations_arch_2_summary.min()
+    arch_2_recommendation_max = recommendations_arch_2_summary.max()
+    arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+    ########################
+    # Archetype 3
+    ########################
+    archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+    recommendations_arch_3_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_3_recommendation_min = recommendations_arch_3_summary.min()
+    arch_3_recommendation_max = recommendations_arch_3_summary.max()
+    arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+    ########################
+    # Archetype 4
+    ########################
+    archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+    recommendations_arch_4_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_4_recommendation_min = recommendations_arch_4_summary.min()
+    arch_4_recommendation_max = recommendations_arch_4_summary.max()
+    arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+    property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]["total_floor_area"].mean()
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 7010ad53..95b5e3b1 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -215,7 +215,7 @@ class HeatingControlRecommender:
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "Upgrade heating controls to Time and Temperature Zone Controls",
+                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 9658aaa3..8b20c0cd 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -19,9 +19,17 @@ class HeatingRecommender:
         self.recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
-        if self.property.main_heating["clean_description"] in [
+
+        has_electric_heating_description = self.property.main_heating["clean_description"] in [
             "Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
-        ]:
+        ]
+
+        no_heating_no_mains = (
+            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
+            not self.property.data["mains-gas-flag"]
+        )
+
+        if has_electric_heating_description or no_heating_no_mains:
             # Recommend high heat retention storage heaters
             self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
             return

From 47ebf866ee141c8ed91a7191b5bb75ef49246950 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 20:02:37 +0100
Subject: [PATCH 176/262] fixed sample in slides

---
 backend/app/plan/router.py               |  1 -
 etl/customers/gla_croydon_demo/slides.py | 35 +++++++++++----------
 recommendations/HeatingRecommender.py    | 39 ++++++++++++++++++++++--
 3 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4868749d..50b8a837 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -389,7 +389,6 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # Commit final changes
         session.commit()
-
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
         session.rollback()
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index e6c4b5b8..cbd1f7e4 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -28,20 +28,22 @@ SAP_TARGET_2 = 100
 CUSTOMER_KEY = "gla-demo"
 
 # Sample UPRNS
-archetype_1_sample = ['100020618076', '100020619814', '100020581792', '100020671205', '100020585305', '100020606853',
-                      '100020625813', '100020618042', '200001188304', '200001187196', '100020603026', '100020604138',
-                      '100020615039', '200001188299', '100020618060', '200001192253']
+archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060',
+                      '100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489',
+                      '100020588913']
 
-archetype_2_sample = ['100020616325', '100020665634', '100020665654', '100020665638', '100020587936', '100020587905',
-                      '100020665645', '100020625597', '100022907528', '100020665630', '100020624348', '10001008257',
-                      '100020666735', '100020698027', '100020624351', '100020665656', '100020666716', '100020665632',
-                      '100020666715', '100020645639', '200001191309', '100020625451', '100020624347', '100020665658',
-                      '100020585002', '100022917303', '100020665650', '100020667737', '100020620659', '100022904981',
-                      '100020642337', '100020657902', '100020615603', '100020626800', '100020665647', '100020665643']
+archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348',
+                      '100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597',
+                      '100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303',
+                      '100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405',
+                      '100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643']
 
-archetype_3_sample = ['100020607980', '200001193193', '100020581690', '100020665611']
-archetype_4_sample = ['100020631683', '100020607667', '100020660228', '100020605116', '200001187539', '100020582907',
-                      '100020610165', '100020650607', '100020655500', '100020598277', '100020642537']
+archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611',
+                      '100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980',
+                      '200001185785', '100020616446', '100020692380']
+
+archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277',
+                      '100020650607', '100020605116', '100020650603']
 
 
 def scenario_1():
@@ -182,10 +184,11 @@ def make_sample():
     asset_list = pd.DataFrame(asset_list)
 
     # From the asset list, we deduce how many properties we need
-    archetype_1_sample_size = 16
-    archetype_2_sample_size = 36
-    archetype_3_sample_size = 4
-    archetype_4_sample_size = 11
+    # Need to figure out the sizes
+    archetype_1_sample_size = 13
+    archetype_2_sample_size = 30
+    archetype_3_sample_size = 15
+    archetype_4_sample_size = 9
 
     # We take the sample and we'll keep the uprns static
     archetype_1_sample = asset_list[
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 8b20c0cd..9d2e99e3 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -4,6 +4,7 @@ from recommendations.Costs import Costs
 from recommendations.recommendation_utils import check_simulation_difference
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
 from recommendations.HeatingControlRecommender import HeatingControlRecommender
 
 
@@ -35,7 +36,14 @@ class HeatingRecommender:
             return
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
-        if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]:
+        has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
+
+        # We also check that the property doesn't have a heating system, but it has access to the mains gas
+        no_heating_has_mains = self.property.main_heating["clean_description"] in [
+            'No system present, electric heaters assumed'
+        ] and self.property.data["mains-gas-flag"]
+
+        if has_boiler or no_heating_has_mains:
             self.recommend_boiler_upgrades(phase=phase)
             return
 
@@ -254,12 +262,37 @@ class HeatingRecommender:
             )
 
             # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
-            is_combi = self.property.hotwater["clean_description"] in ["From main system"]
+            hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"]
+            access_to_mains_no_system = self.property.main_heating["clean_description"] in [
+                'No system present, electric heaters assumed'
+            ] and self.property.data["mains-gas-flag"]
+            is_combi = hotwater_from_mains or access_to_mains_no_system
             if is_combi:
                 description = "Upgrade to a low carbon combi boiler"
             else:
                 description = "Upgrade to a low carbon boiler"
 
+            simulation_config = {"mainheat_energy_eff_ending": "Good"}
+            if access_to_mains_no_system:
+                # Installation of a boiler improves the hot water system so we need to reflect this in
+                # the outcome of the recommendation
+                heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
+                hotwater_ending_config = HotWaterAttributes("From main system").process()
+
+                heating_simulation_config = check_simulation_difference(
+                    new_config=heating_ending_config, old_config=self.property.main_heating
+                )
+                hotwater_simulation_config = check_simulation_difference(
+                    new_config=hotwater_ending_config, old_config=self.property.hotwater
+                )
+
+                simulation_config = {
+                    **simulation_config,
+                    **heating_simulation_config,
+                    **hotwater_simulation_config,
+                    "hot_water_energy_eff_ending": "Good"
+                }
+
             self.recommendations.append(
                 {
                     "phase": recommendation_phase,
@@ -271,7 +304,7 @@ class HeatingRecommender:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
-                    "simulation_config": {"mainheat_energy_eff_ending": "Good"},
+                    "simulation_config": simulation_config,
                     **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
                 }
             )

From 93830f90bb785a3f7f17e77a1ef8285d4aed966e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 4 Apr 2024 16:35:14 +0100
Subject: [PATCH 177/262] removed low carbon from boiler terminology

---
 backend/ml_models/AnnualBillSavings.py   |   6 +-
 etl/customers/gla_croydon_demo/slides.py | 424 ++++++++++++++++++++++-
 recommendations/HeatingRecommender.py    |  15 +-
 3 files changed, 431 insertions(+), 14 deletions(-)

diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 9be9d78a..99d67126 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -19,7 +19,8 @@ class AnnualBillSavings:
     PRICE_FACTOR = 0.09549999999999999
 
     # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
-    DAILY_STANDARD_CHARGE = 0.3143
+    DAILY_STANDARD_CHARGE_GAS = 0.3143
+    DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
 
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
@@ -45,11 +46,12 @@ class AnnualBillSavings:
     def calculate_annual_bill(cls, kwh):
         """
         This method will estimate the total annual bill for a property
+        It assumed gas & electricity are used
         :param kwh: The total kwh consumption
         :return: An estimate for annual bill
         """
 
-        return cls.PRICE_FACTOR * kwh + cls.DAILY_STANDARD_CHARGE * 365
+        return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
 
     @classmethod
     def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index cbd1f7e4..9f791bbd 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -112,6 +112,49 @@ def scenario_1():
         recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
     )
 
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
+    overview_means = recommendations_summary.mean()
+
+    ########################
+    # Measures
+    ########################
+    measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+    wall_insulation_measures = measures_count[
+        measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+    ]["id"].sum()
+    ventilation_measures = measures_count[
+        measures_count["type"].isin(["mechanical_ventilation"])
+    ]["id"].sum()
+    roof_insulation_measures = measures_count[
+        measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+    ]["id"].sum()
+    floor_insulation_measures = measures_count[
+        measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+    ]["id"].sum()
+    windows = measures_count[
+        measures_count["type"].isin(["windows_glazing"])
+    ]["id"].sum()
+    heating = measures_count[
+        measures_count["type"].isin(["heating"])
+    ]["id"].sum()
+    heating_controls = measures_count[
+        measures_count["type"].isin(["heating_control"])
+    ]["id"].sum()
+    solar = measures_count[
+        measures_count["type"].isin(["solar_pv"])
+    ]["id"].sum()
+    other = measures_count[
+        ~measures_count["type"].isin([
+            "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+            "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+            "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+            "mechanical_ventilation"
+        ])
+    ]["id"].sum()
+
     # Summary information by each archetype
     ########################
     # Archetype 1
@@ -121,10 +164,54 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
     ]
 
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+    arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
-    arch_1_recommendation_min = recommendations_arch_1_summary.min()
-    arch_1_recommendation_max = recommendations_arch_1_summary.max()
-    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+    cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand",
+                    "energy_percent_change", "carbon_percent_change", "bills_percent_change"]
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep]
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep]
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep]
+    arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep]
+
+    annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['energy_percent_change']} - "
+                           f"{arch_1_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_1_recommendation_min['total_bill_savings']} - "
+                 f"{arch_1_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_1_recommendation_min['bills_percent_change']} - "
+                         f"{arch_1_recommendation_max['bills_percent_change']}")
 
     ########################
     # Archetype 2
@@ -134,11 +221,53 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
     ]
 
+    arch_2_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+    arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_2_recommendation_min = recommendations_arch_2_summary.min()
     arch_2_recommendation_max = recommendations_arch_2_summary.max()
     arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
 
+    total_cost = recommendations_arch_2_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['energy_percent_change']} - "
+                           f"{arch_2_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_2_recommendation_min['total_bill_savings']} - "
+                 f"{arch_2_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_2_recommendation_min['bills_percent_change']} - "
+                         f"{arch_2_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 3
     ########################
@@ -147,11 +276,53 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
     ]
 
+    arch_3_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+    arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_3_recommendation_min = recommendations_arch_3_summary.min()
     arch_3_recommendation_max = recommendations_arch_3_summary.max()
     arch_3_recommendation_means = recommendations_arch_3_summary.mean()
 
+    total_cost = recommendations_arch_3_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['energy_percent_change']} - "
+                           f"{arch_3_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_3_recommendation_min['total_bill_savings']} - "
+                 f"{arch_3_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_3_recommendation_min['bills_percent_change']} - "
+                         f"{arch_3_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 4
     ########################
@@ -160,14 +331,52 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
     ]
 
+    arch_4_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+    arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_4_recommendation_min = recommendations_arch_4_summary.min()
     arch_4_recommendation_max = recommendations_arch_4_summary.max()
     arch_4_recommendation_means = recommendations_arch_4_summary.mean()
 
-    property_details_df[
-        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
-    ]["total_floor_area"].mean()
+    total_cost = recommendations_arch_4_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['energy_percent_change']} - "
+                           f"{arch_4_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_4_recommendation_min['total_bill_savings']} - "
+                 f"{arch_4_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_4_recommendation_min['bills_percent_change']} - "
+                         f"{arch_4_recommendation_max['bills_percent_change']}")
 
     ########################
     # Overview
@@ -291,6 +500,38 @@ def scenario_2():
     # Measures
     ########################
     measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+    wall_insulation_measures = measures_count[
+        measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+    ]["id"].sum()
+    ventilation_measures = measures_count[
+        measures_count["type"].isin(["mechanical_ventilation"])
+    ]["id"].sum()
+    roof_insulation_measures = measures_count[
+        measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+    ]["id"].sum()
+    floor_insulation_measures = measures_count[
+        measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+    ]["id"].sum()
+    windows = measures_count[
+        measures_count["type"].isin(["windows_glazing"])
+    ]["id"].sum()
+    heating = measures_count[
+        measures_count["type"].isin(["heating"])
+    ]["id"].sum()
+    heating_controls = measures_count[
+        measures_count["type"].isin(["heating_control"])
+    ]["id"].sum()
+    solar = measures_count[
+        measures_count["type"].isin(["solar_pv"])
+    ]["id"].sum()
+    other = measures_count[
+        ~measures_count["type"].isin([
+            "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+            "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+            "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+            "mechanical_ventilation"
+        ])
+    ]["id"].sum()
 
     z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
 
@@ -305,11 +546,54 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
     ]
 
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+    arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_1_recommendation_min = recommendations_arch_1_summary.min()
     arch_1_recommendation_max = recommendations_arch_1_summary.max()
     arch_1_recommendation_means = recommendations_arch_1_summary.mean()
 
+    arch_1_totals = recommendations_arch_1_summary.sum()
+
+    annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['energy_percent_change']} - "
+                           f"{arch_1_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_1_recommendation_min['total_bill_savings']} - "
+                 f"{arch_1_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_1_recommendation_min['bills_percent_change']} - "
+                         f"{arch_1_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 2
     ########################
@@ -318,11 +602,53 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
     ]
 
+    arch_2_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+    arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_2_recommendation_min = recommendations_arch_2_summary.min()
     arch_2_recommendation_max = recommendations_arch_2_summary.max()
     arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
 
+    total_cost = recommendations_arch_2_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['energy_percent_change']} - "
+                           f"{arch_2_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_2_recommendation_min['total_bill_savings']} - "
+                 f"{arch_2_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_2_recommendation_min['bills_percent_change']} - "
+                         f"{arch_2_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 3
     ########################
@@ -331,11 +657,53 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
     ]
 
+    arch_3_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+    arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_3_recommendation_min = recommendations_arch_3_summary.min()
     arch_3_recommendation_max = recommendations_arch_3_summary.max()
     arch_3_recommendation_means = recommendations_arch_3_summary.mean()
 
+    total_cost = recommendations_arch_3_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['energy_percent_change']} - "
+                           f"{arch_3_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_3_recommendation_min['total_bill_savings']} - "
+                 f"{arch_3_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_3_recommendation_min['bills_percent_change']} - "
+                         f"{arch_3_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 4
     ########################
@@ -344,11 +712,49 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
     ]
 
+    arch_4_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+    arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_4_recommendation_min = recommendations_arch_4_summary.min()
     arch_4_recommendation_max = recommendations_arch_4_summary.max()
     arch_4_recommendation_means = recommendations_arch_4_summary.mean()
 
-    property_details_df[
-        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
-    ]["total_floor_area"].mean()
+    total_cost = recommendations_arch_4_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['energy_percent_change']} - "
+                           f"{arch_4_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_4_recommendation_min['total_bill_savings']} - "
+                 f"{arch_4_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_4_recommendation_min['bills_percent_change']} - "
+                         f"{arch_4_recommendation_max['bills_percent_change']}")
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 9d2e99e3..2c075820 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -186,9 +186,18 @@ class HeatingRecommender:
         # This upgrade will only take the heating system to average energy efficiency
         heating_simulation_config["mainheat_energy_eff_ending"] = "Average"
 
+        # If the property is off-gas and has no heating system in place, the number of heated rooms will actually
+        # be 0, so we use the number of rooms as the figure
+        number_heated_rooms = (
+            self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0
+            else (
+                self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else
+                self.property.number_of_rooms
+            )
+        )
         # Upgrade to electric storage heaters
         costs = self.costs.high_heat_electric_storage_heaters(
-            number_heated_rooms=self.property.data["number-heated-rooms"]
+            number_heated_rooms=number_heated_rooms
         )
         description = "Install high heat retention electric storage heaters"
 
@@ -268,9 +277,9 @@ class HeatingRecommender:
             ] and self.property.data["mains-gas-flag"]
             is_combi = hotwater_from_mains or access_to_mains_no_system
             if is_combi:
-                description = "Upgrade to a low carbon combi boiler"
+                description = "Upgrade to a new combi boiler"
             else:
-                description = "Upgrade to a low carbon boiler"
+                description = "Upgrade to a new boiler"
 
             simulation_config = {"mainheat_energy_eff_ending": "Good"}
             if access_to_mains_no_system:

From e182d7acd77aa9dfc56a03650c59ffb3d763aa36 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 10:19:22 +0100
Subject: [PATCH 178/262] change calculation of energy savings to use adjusted
 heat demand, not heat demand

---
 backend/app/db/functions/portfolio_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index a8a882bd..ead8280f 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -11,7 +11,7 @@ def aggregate_portfolio_recommendations(
         session.query(
             func.sum(Recommendation.estimated_cost).label("cost"),
             func.sum(Recommendation.total_work_hours).label("total_work_hours"),
-            func.sum(Recommendation.heat_demand).label("energy_savings"),
+            func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
             func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
             func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
         )

From 02e72c569513b846cd1348caa17d20a786507c7b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 14:02:48 +0100
Subject: [PATCH 179/262] prevent hot water tank insulation recommendations
 when no heating system is in place

---
 recommendations/HotwaterRecommendations.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 298671a2..667f5f69 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -22,8 +22,13 @@ class HotwaterRecommendations:
 
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
-        if (self.property.hotwater["heater_type"] in ["electric immersion"]) & \
-            (self.property.data["hot-water-energy-eff"] == "Very Poor"):
+        # If there is not system present, we do not recommend anything, since we will have a separate recommendation
+        # suggesting system upgrades (e.g. boiler replacement)
+        if (
+            (self.property.hotwater["heater_type"] in ["electric immersion"]) &
+            (self.property.data["hot-water-energy-eff"] == "Very Poor") &
+            (self.property.hotwater["no_system_present"] is None)
+        ):
             self.recommend_tank_insulation(phase=phase)
             return
 

From 4134fdbb755f4a25e8162bfb851709372d0c5677 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 15:00:24 +0100
Subject: [PATCH 180/262] Added pruning of solar panel options to prevent
 systems much too large or much too small

---
 recommendations/SolarPvRecommendations.py | 46 +++++++++++++++++++----
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 3a89b213..744351be 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -8,6 +8,9 @@ class SolarPvRecommendations:
     # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
     SOLAR_PANEL_WATTAGE = 250
 
+    MAX_SYSTEM_WATTAGE = 4200
+    MIN_SYSTEM_WATTAGE = 2500
+
     def __init__(self, property_instance):
         """
         :param property_instance: Instance of the Property class, for the home associated to property_id
@@ -18,6 +21,19 @@ class SolarPvRecommendations:
 
         self.recommendation = []
 
+    @staticmethod
+    def trim_solar_wattage_options(scenarios_with_wattage):
+        # Initialize the list with the first element, assuming the list is not empty
+        trimmed_list = [scenarios_with_wattage[0]]
+
+        # Iterate over the list starting from the second element
+        for scenario in scenarios_with_wattage[1:]:
+            # Compare the second element (index 1) of the current tuple with the last tuple in the trimmed list
+            if scenario[1] > trimmed_list[-1][1]:
+                trimmed_list.append(scenario)
+
+        return trimmed_list
+
     def recommend(self, phase):
         """
         We check if a property is potentially suitable for solar PV based on the following criteria:
@@ -46,26 +62,40 @@ class SolarPvRecommendations:
             self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
             self.property.solar_pv_percentage + 0.1
         ]
-        # We make sure we haven't gone too low or high
-        roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 1]
+        # We make sure we haven't gone too low or high - we allow no more than 60% coverage
+        roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
+        # If we only have two scenarios, we add a coverage scenario 10% less than the smallest
+        if len(roof_coverage_scenarios) == 2:
+            roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1)
         battery_scenarios = [False, True]
 
-        # I now produce the cross product of the scenarios
-        scenarios = [(roof, battery) for roof in roof_coverage_scenarios for battery in battery_scenarios]
-
-        for roof_coverage, has_battery in scenarios:
+        scenarios_with_wattage = []
+        for roof_coverage in roof_coverage_scenarios:
             # We now have a property which is potentially suitable for solar PV
             solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage)
 
             number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
             solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+            solar_panel_wattage = np.clip(
+                a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
+            )
+            scenarios_with_wattage.append((roof_coverage, solar_panel_wattage))
 
+        # We trim the scenarios, so that we don't have duplicate wattages
+        scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage)
+
+        # Produce the cross product of the scenarios
+        scenarios = [
+            (roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios
+        ]
+        # We deduce the wattage of the solar panels based on the roof coverage
+
+        for roof_coverage, solar_panel_wattage, has_battery in scenarios:
+            # We now have a property which is potentially suitable for solar PV
             roof_coverage_percent = round(roof_coverage * 100)
-
             # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
             # of solar PV installations
             cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery)
-
             kw = np.floor(solar_panel_wattage / 100) / 10
 
             if has_battery:

From ec6fc84911d1a8ac3689c9f07b866fda98086212 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 15:14:55 +0100
Subject: [PATCH 181/262] updating solar panel logic

---
 recommendations/SolarPvRecommendations.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 744351be..4cf1c1fc 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -8,8 +8,8 @@ class SolarPvRecommendations:
     # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
     SOLAR_PANEL_WATTAGE = 250
 
-    MAX_SYSTEM_WATTAGE = 4200
-    MIN_SYSTEM_WATTAGE = 2500
+    MAX_SYSTEM_WATTAGE = 6000
+    MIN_SYSTEM_WATTAGE = 1000
 
     def __init__(self, property_instance):
         """
@@ -60,8 +60,9 @@ class SolarPvRecommendations:
         # 2) With and without battery
         roof_coverage_scenarios = [
             self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
-            self.property.solar_pv_percentage + 0.1
         ]
+        if self.property.solar_pv_percentage <= 0.4:
+            roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1)
         # We make sure we haven't gone too low or high - we allow no more than 60% coverage
         roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
         # If we only have two scenarios, we add a coverage scenario 10% less than the smallest
@@ -76,6 +77,10 @@ class SolarPvRecommendations:
 
             number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
             solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+
+            if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE:
+                continue
+
             solar_panel_wattage = np.clip(
                 a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
             )

From 6258c347d68ecd1156387f9e2a532d099e2be2c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 16:06:30 +0100
Subject: [PATCH 182/262] updating boiler recommendation to impact mains fuel
 and consider the impact on the main fuel

---
 etl/customers/gla_croydon_demo/asset_list.py |  4 ++
 recommendations/HeatingRecommender.py        | 58 +++++++++++++++-----
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 52e9422c..777cba83 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,6 +34,10 @@ def app():
         low_memory=False
     )
 
+    z = epc_data.groupby(["MAINHEAT_DESCRIPTION", "MAINHEATCONT_DESCRIPTION", "MAIN_FUEL"]).size().reset_index(
+        name="count")
+    z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
 
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 2c075820..f602ecab 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -5,6 +5,7 @@ from recommendations.recommendation_utils import check_simulation_difference
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
 from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
+from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
 from recommendations.HeatingControlRecommender import HeatingControlRecommender
 
 
@@ -44,7 +45,7 @@ class HeatingRecommender:
         ] and self.property.data["mains-gas-flag"]
 
         if has_boiler or no_heating_has_mains:
-            self.recommend_boiler_upgrades(phase=phase)
+            self.recommend_boiler_upgrades(phase=phase, no_heating_has_mains=no_heating_has_mains)
             return
 
     @staticmethod
@@ -250,17 +251,20 @@ class HeatingRecommender:
 
         return closest_size
 
-    def recommend_boiler_upgrades(self, phase):
+    def recommend_boiler_upgrades(self, phase, no_heating_has_mains):
         """
         This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
         is generally more expensive
         :param phase:
+        :param no_heating_has_mains: indicaes if the property has no heating system, but has access to the mains gas
         :return:
         """
 
         recommendation_phase = phase
 
         # We now recommend boiler upgrades, if applicable
+        simulation_config = {}
+        boiler_costs = {}
         if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
             boiler_size = self.estimate_boiler_size(
                 property_type=self.property.data["property-type"],
@@ -272,21 +276,20 @@ class HeatingRecommender:
 
             # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
             hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"]
-            access_to_mains_no_system = self.property.main_heating["clean_description"] in [
-                'No system present, electric heaters assumed'
-            ] and self.property.data["mains-gas-flag"]
-            is_combi = hotwater_from_mains or access_to_mains_no_system
+
+            is_combi = hotwater_from_mains or no_heating_has_mains
             if is_combi:
                 description = "Upgrade to a new combi boiler"
             else:
                 description = "Upgrade to a new boiler"
 
             simulation_config = {"mainheat_energy_eff_ending": "Good"}
-            if access_to_mains_no_system:
+            if no_heating_has_mains:
                 # Installation of a boiler improves the hot water system so we need to reflect this in
                 # the outcome of the recommendation
                 heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
                 hotwater_ending_config = HotWaterAttributes("From main system").process()
+                fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
 
                 heating_simulation_config = check_simulation_difference(
                     new_config=heating_ending_config, old_config=self.property.main_heating
@@ -294,14 +297,20 @@ class HeatingRecommender:
                 hotwater_simulation_config = check_simulation_difference(
                     new_config=hotwater_ending_config, old_config=self.property.hotwater
                 )
+                fuel_simulation_config = check_simulation_difference(
+                    new_config=fuel_ending_config, old_config=self.property.main_fuel
+                )
 
                 simulation_config = {
                     **simulation_config,
                     **heating_simulation_config,
                     **hotwater_simulation_config,
+                    **fuel_simulation_config,
                     "hot_water_energy_eff_ending": "Good"
                 }
 
+            boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+
             self.recommendations.append(
                 {
                     "phase": recommendation_phase,
@@ -314,22 +323,45 @@ class HeatingRecommender:
                     "new_u_value": None,
                     "sap_points": None,
                     "simulation_config": simulation_config,
-                    **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+                    **boiler_costs
                 }
             )
 
-            # We increment the recommendation phase, in the case of us having heating control recommendations
-            recommendation_phase += 1
-
         # We recommend the heating controls
+        # If the property did not previously have a boiler, we combine
         controls_recommender = HeatingControlRecommender(self.property)
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
         # We may have 2 recommendations from the heating controls
 
-        if controls_recommender.recommendation:
+        if not controls_recommender.recommendation:
+            return
+
+        if no_heating_has_mains:
+            # We combine the heating and controls recommendations
+            boiler_recommendation = self.recommendations[0].copy()
+            combined_recommendations = []
+            for controls_recommendation in controls_recommender.recommendation:
+                combined_recommendation = self.combine_heating_and_controls(
+                    controls_recommendations=[controls_recommendation],
+                    heating_simulation_config=simulation_config,
+                    costs=boiler_costs,
+                    description=boiler_recommendation["description"],
+                    phase=recommendation_phase,
+                    heating_controls_only=False,
+                    system_change=True
+                )
+                combined_recommendations.extend(combined_recommendation)
+
+            # Overwrite the existing boiler recommendation
+            self.recommendations = combined_recommendations
+        else:
+            # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
+            recommendation_phase += 1
             # The heating controls recommendation is distrinct from the boiler upgrade recommendation
             # We insert phase into the recommendations for heating controls
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-        self.recommendations.extend(controls_recommender.recommendation)
+            self.recommendations.extend(controls_recommender.recommendation)
+
+        return

From 35a288fd7406c630fddde596360fa35e53d3fdd4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 16:47:15 +0100
Subject: [PATCH 183/262] Updating recommendations

---
 backend/Property.py                          | 5 -----
 etl/customers/gla_croydon_demo/asset_list.py | 3 +--
 recommendations/HeatingRecommender.py        | 7 ++++++-
 recommendations/HotwaterRecommendations.py   | 5 +++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 19f15b02..d3dd8395 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -298,11 +298,6 @@ class Property:
                 if recommendation["type"] == "cavity_wall_insulation":
                     output["is_filled_cavity_ending"] = True
 
-                # TODO: perhaps detrimental
-                # When making a recommendation for the wall, we will also update the ventilation
-                # if output["mechanical_ventilation_ending"] == 'natural':
-                #     output["mechanical_ventilation_ending"] = 'mechanical, extract only'
-
             else:
                 if output["walls_thermal_transmittance_ending"] is None:
                     raise ValueError("We should not have a None value for the u value")
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 777cba83..7dde8926 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,8 +34,7 @@ def app():
         low_memory=False
     )
 
-    z = epc_data.groupby(["MAINHEAT_DESCRIPTION", "MAINHEATCONT_DESCRIPTION", "MAIN_FUEL"]).size().reset_index(
-        name="count")
+    z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
     z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
 
     # Filter on entries where we have a UPRN
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index f602ecab..aec1f419 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -104,8 +104,13 @@ class HeatingRecommender:
                     **recommendation_simulation_config,
                     **controls_recommendations[0]["simulation_config"]
                 }
+                controls_description = controls_recommendations[0]['description']
+                # Make the first letter of the description lowercase
+                controls_description = (
+                    controls_description[0].lower() + controls_description[1:]
+                )
 
-                recommendation_description = f"{description} and {controls_recommendations[0]['description']}"
+                recommendation_description = f"{description} and {controls_description}"
 
             recommendation = {
                 "phase": phase,
diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 667f5f69..7f77597f 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -22,8 +22,9 @@ class HotwaterRecommendations:
 
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
-        # If there is not system present, we do not recommend anything, since we will have a separate recommendation
-        # suggesting system upgrades (e.g. boiler replacement)
+
+        # If there is no system present, but access to the mains, we
+
         if (
             (self.property.hotwater["heater_type"] in ["electric immersion"]) &
             (self.property.data["hot-water-energy-eff"] == "Very Poor") &

From 0142e6fe5fcbcffc836bc139df48cf31e77545f1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Apr 2024 15:29:52 +0100
Subject: [PATCH 184/262] wip matching completed surveys back to the asset list

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 78 +++++++++++++++++++
 3 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b4b82d0b..de2c0e6a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -6907,3 +6907,81 @@ def app():
         december_figures["ECO4 remaining"]
     )
     december_figures["ECO4 remaining"].sum()
+
+    # Adhoc - for UNITAS, stripping out additional surveys that have been completed
+    unitas_data = loader.data["HA50"].copy()
+    unitas_asset_list = unitas_data["asset_list"].copy()
+    unitas_survey_sheet = unitas_data["survey_list"].copy()
+    # We remove the surveyed properties from the asset sheet
+    unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
+    unitas_asset_list = unitas_asset_list.merge(
+        unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
+        how="left",
+        on="asset_list_row_id"
+    )
+    unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
+    unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
+
+    # We read in the data for the further completed surveys
+    unitas_phase_1_workbook = openpyxl.load_workbook(
+        "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
+    )
+    phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
+    phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
+    phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
+    phase_1_rows_data = []
+    for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_1_rows_data.append(row_data)
+
+    phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
+
+    # Correct phase 1 surveys in the same fashion as the previous approach
+    phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
+
+    # We check all phase 1 surveys are contained in the data we had before
+    additional = []
+    for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
+        # We look for the entry in the old survey sheet:
+        # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
+        # if matched_uprn.shape[0] == 1:
+        #     continue
+
+        matched_1 = unitas_survey_sheet[
+            (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_1.shape[0] == 1:
+            continue
+
+        matched_2 = unitas_survey_sheet[
+            (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_2.shape[0] == 1:
+            continue
+
+        additional.append(row.to_dict())
+    additional = pd.DataFrame(additional)
+
+    phase_2_rows_data = []
+    for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_2_rows_data.append(row_data)
+
+    phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
+    phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
+    # Drop all of the occurances of "OFFICE USE ONLY" columns
+    phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
+    common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
+    additional_filtered = additional[common_columns]
+
+    further_unitas_completed_surveys = pd.concat(
+        [phase_2_surveys, additional_filtered],
+        axis=0,
+        ignore_index=True
+    )
+
+    # We match these back to the asset list

From dc80313eca2119703e161c6a6ad1c9380f1cc886 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Apr 2024 14:57:55 +0100
Subject: [PATCH 185/262] merging EPC data and survey outcomes to asset list

---
 .../ha_15_32/ha_analysis_batch_3.py           | 413 ++++++++++++++----
 1 file changed, 334 insertions(+), 79 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index de2c0e6a..35bb63fe 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3459,7 +3459,7 @@ class DataLoader:
                     "not eligible",
                     asset_list["ECO Eligibility"]
                 )
-                asset_list = asset_list.drop(columns=["has_eco3"])
+                # asset_list = asset_list.drop(columns=["has_eco3"])
 
             # Report on sales
             sales_report = {}
@@ -6778,6 +6778,339 @@ def identify_eco_works(loader):
     breakdowns = breakdowns.fillna(0)
 
 
+def unitas_data_prep(loader):
+    #####
+    # Adhoc - for UNITAS, stripping out additional surveys that have been completed
+    unitas_data = loader.data["HA50"].copy()
+    unitas_asset_list = unitas_data["asset_list"].copy()
+    unitas_survey_sheet = unitas_data["survey_list"].copy()
+
+    # We remove the surveyed properties from the asset sheet
+    unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
+    unitas_asset_list = unitas_asset_list.merge(
+        unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
+        how="left",
+        on="asset_list_row_id"
+    )
+    unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
+    unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
+
+    # We read in the data for the further completed surveys
+    unitas_phase_1_workbook = openpyxl.load_workbook(
+        "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
+    )
+    phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
+    phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
+    phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
+    phase_1_rows_data = []
+    for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_1_rows_data.append(row_data)
+
+    phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
+
+    # Correct phase 1 surveys in the same fashion as the previous approach
+    phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
+
+    # We check all phase 1 surveys are contained in the data we had before
+    additional = []
+    for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
+        # We look for the entry in the old survey sheet:
+        # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
+        # if matched_uprn.shape[0] == 1:
+        #     continue
+
+        matched_1 = unitas_survey_sheet[
+            (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_1.shape[0] == 1:
+            continue
+
+        matched_2 = unitas_survey_sheet[
+            (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_2.shape[0] == 1:
+            continue
+
+        additional.append(row.to_dict())
+    additional = pd.DataFrame(additional)
+
+    phase_2_rows_data = []
+    for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_2_rows_data.append(row_data)
+
+    phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
+    phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
+    # Drop all of the occurances of "OFFICE USE ONLY" columns
+    phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
+    common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
+    additional_filtered = additional[common_columns]
+
+    further_unitas_completed_surveys = pd.concat(
+        [phase_2_surveys, additional_filtered],
+        axis=0,
+        ignore_index=True
+    )
+
+    # Add a phase 2 key
+    further_unitas_completed_surveys["survey_list_row_id"] = [
+        "unitas_phase_2" + str(i) for i in further_unitas_completed_surveys.index
+    ]
+
+    not_in_asset_list = [
+        "unitas_phase_20", "unitas_phase_234", "unitas_phase_2163", "unitas_phase_2173", "unitas_phase_2374"
+    ]
+
+    additional_postcodes = ["st28bg"]
+
+    full_asset_list = unitas_data["asset_list"].copy()
+    full_asset_list["matching_postcode"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+    further_unitas_completed_surveys["Post Code"] = further_unitas_completed_surveys["Post Code"].str.replace(
+        "ST 5DT", "ST3 5DT"
+    )
+
+    # We match these back to the asset list
+    matching_lookup = []
+    for _, row in tqdm(further_unitas_completed_surveys.iterrows(), total=len(further_unitas_completed_surveys)):
+
+        if row["survey_list_row_id"] in not_in_asset_list:
+            continue
+
+        postcode_lower = row["Post Code"].lower().strip().replace(" ", "")
+        if postcode_lower in additional_postcodes:
+            continue
+
+        # Confirmed not in asset lsit
+        # Filter asset list on postcode
+        df = full_asset_list[
+            full_asset_list["matching_postcode"].str.contains(postcode_lower)
+        ]
+
+        df = df[df["HouseNo"] == str(row["NO."])]
+
+        if df.shape[0] != 1:
+            raise Exception("NOT FOUND")
+
+        matching_lookup.append(
+            {
+                "survey_list_row_id": row["survey_list_row_id"],
+                "asset_list_row_id": df["asset_list_row_id"].values[0],
+            }
+        )
+
+    matching_lookup = pd.DataFrame(matching_lookup)
+    matching_lookup["phase_2_surveyed"] = True
+
+    # We merge this onto the asset list and remove the rows
+    unitas_asset_list = unitas_asset_list.merge(
+        matching_lookup, how="left", on="asset_list_row_id"
+    )
+    # Drop rows where phase_2_surveyed is populated
+    unitas_asset_list = unitas_asset_list[
+        pd.isnull(unitas_asset_list["phase_2_surveyed"])
+    ]
+
+    # We add in the new CIGA submissions
+    unitas_round_2_ciga_workbook = openpyxl.load_workbook("local_data/ha_data/Unitas second round CIGA checks.xlsx")
+    ciga_round_2_worksheet = unitas_round_2_ciga_workbook["Worksheet"]
+    ciga_round_2_colnames = [cell.value for cell in ciga_round_2_worksheet[1]]
+    round_2_rows_data = []
+    for row in ciga_round_2_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        round_2_rows_data.append(row_data)
+
+    ciga_round_2 = pd.DataFrame(round_2_rows_data, columns=ciga_round_2_colnames)
+    # We merge the ciga sheet to the asset list
+    ciga_dependent_asset_list = unitas_asset_list[
+        unitas_asset_list["ECO Eligibility"].str.contains("subject to ciga")
+    ].copy()
+
+    # We merge the ciga sheet to the asset list
+    ciga_round_2_matched = ciga_dependent_asset_list.merge(
+        ciga_round_2, how="inner", on=["Address Line 1", "Post Code"]
+    )
+    # Filter on just the properties that had no guarantee
+    ciga_round_2_matched = ciga_round_2_matched[ciga_round_2_matched["Guarantee"] == "No"]
+
+    # ECO Eligibility
+    # not eligible              9227
+    # failed ciga               2711
+    # eco4 (subject to ciga)    2238
+    # eco4 - passed ciga         901
+    # gbis                       114
+    # eco4                        91
+
+    # We filter on the properties we're looking to re-survey
+    unitas_properties_to_survey = unitas_asset_list[
+        unitas_asset_list["ECO Eligibility"].isin(
+            [
+                "eco4 - passed ciga",
+                "eco4"
+            ]
+        )
+    ].copy()
+
+    unitas_properties_to_survey = pd.concat(
+        [
+            unitas_properties_to_survey,
+            ciga_round_2_matched[unitas_properties_to_survey.columns]
+        ]
+    )
+
+    epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
+
+    # We now retrieve the lastest EPC data
+    epc_data = []
+    for _, unitas_property in tqdm(unitas_properties_to_survey.iterrows(), total=len(unitas_properties_to_survey)):
+        property_type, _ = get_property_type_and_built_form(property_meta=unitas_property, ha_name="HA50")
+
+        full_address = unitas_property["matching_address"]
+
+        searcher = SearchEpc(
+            address1=str(unitas_property["HouseNo"]),
+            postcode=unitas_property["matching_postcode"],
+            auth_token=epc_api_key,
+            os_api_key="",
+            property_type=property_type,
+            full_address=full_address,
+            fast=True
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_row_id": unitas_property["asset_list_row_id"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+    # Pull out just the columns we need
+    epc_df = epc_df[
+        [
+            "asset_list_row_id",
+            "address1", "postcode",
+            "current-energy-efficiency",
+            "current-energy-rating",
+            "inspection-date",
+            "transaction-type",
+            "built-form"
+        ]
+    ]
+
+    epc_df["EPC Rating"] = (
+        epc_df["current-energy-efficiency"].astype(str) +
+        epc_df["current-energy-rating"].astype(str)
+    )
+
+    # Merge onto the Unitas data:
+    unitas_properties_to_survey_full = unitas_properties_to_survey.merge(
+        epc_df[
+            [
+                "asset_list_row_id",
+                "EPC Rating",
+                "inspection-date",
+                "transaction-type",
+                "built-form"
+            ]
+        ],
+        how="left",
+        on="asset_list_row_id"
+    )
+
+    unitas_properties_to_survey_full["ECO Eligibility"] = unitas_properties_to_survey_full["ECO Eligibility"].replace(
+        "eco4 (subject to ciga)", "eco4 - passed ciga, phase 2 check"
+    )
+
+    for col in ["EPC Rating", "inspection-date", "transaction-type", "built-form"]:
+        unitas_properties_to_survey_full[col] = np.where(
+            pd.isnull(unitas_properties_to_survey_full[col]),
+            "No EPC found",
+            unitas_properties_to_survey_full[col]
+        )
+        unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].fillna(
+            "No EPC found"
+        )
+        unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].astype(str)
+
+    unitas_properties_to_survey_full = unitas_properties_to_survey_full.rename(
+        columns={
+            "inspection-date": "Last EPC Inspection Date",
+            "transaction-type": "Last EPC Reason",
+            "built-form": "Last EPC Built Form",
+        }
+    )
+
+    # We now match to the survey outcomes
+    unitas_survey_outcomes_workbook = openpyxl.load_workbook(
+        "local_data/ha_data/UNITAS - survey outcomes 26.03.2024.xlsx"
+    )
+    unitas_survey_outcomes_worksheet = unitas_survey_outcomes_workbook["OUTCOMES"]
+    unitas_outcomes_colnames = [cell.value for cell in unitas_survey_outcomes_worksheet[2]]
+    outcomes_rows_data = []
+    for row in unitas_survey_outcomes_worksheet.iter_rows(min_row=3, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        outcomes_rows_data.append(row_data)
+
+    unitas_outcomes = pd.DataFrame(outcomes_rows_data, columns=unitas_outcomes_colnames)
+    unitas_outcomes = unitas_outcomes.rename(
+        columns={
+            "Notes                 (If 'no answer' under outcomes, have you checked around the property for access "
+            "issues where possible?)": "Notes"
+        }
+    )
+
+    unitas_outcomes["Postcode"].unique()
+    eg1 = unitas_properties_to_survey_full[
+        (unitas_properties_to_survey_full["Post Code"] == "ST6 6RF")
+    ]
+    eg1_outcomes = unitas_outcomes[
+        (unitas_outcomes["Postcode"] == "ST6 6RF")
+    ]
+
+    # Merge outcomes onto properties to survey. Will probably have to do algorithmically
+    full_asset_list["matching_postcode_nospace"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+    outcome_matching = []
+    for _, outcome in tqdm(unitas_outcomes.iterrows(), total=len(unitas_outcomes)):
+        # We search for the corresponding entry in the asset list
+        postcode_lower = outcome["Postcode"].lower().strip().replace(" ", "")
+
+        # Confirmed not in asset lsit
+        # Filter asset list on postcode
+        df = unitas_properties_to_survey_full[
+            unitas_properties_to_survey_full["matching_postcode_nospace"].str.contains(postcode_lower)
+        ]
+
+        df = df[df["HouseNo"] == str(outcome["No."])]
+        if df.empty:
+            continue
+
+        if df.shape[0] == 1:
+            outcome_matching.append(
+                {
+                    "asset_list_row_id": df["asset_list_row_id"].values[0],
+                    **outcome.to_dict()
+                }
+            )
+            continue
+
+        raise Exception("something went wrong")
+
+    # Store as an excel
+    unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx")
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
@@ -6907,81 +7240,3 @@ def app():
         december_figures["ECO4 remaining"]
     )
     december_figures["ECO4 remaining"].sum()
-
-    # Adhoc - for UNITAS, stripping out additional surveys that have been completed
-    unitas_data = loader.data["HA50"].copy()
-    unitas_asset_list = unitas_data["asset_list"].copy()
-    unitas_survey_sheet = unitas_data["survey_list"].copy()
-    # We remove the surveyed properties from the asset sheet
-    unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
-    unitas_asset_list = unitas_asset_list.merge(
-        unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
-        how="left",
-        on="asset_list_row_id"
-    )
-    unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
-    unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
-
-    # We read in the data for the further completed surveys
-    unitas_phase_1_workbook = openpyxl.load_workbook(
-        "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
-    )
-    phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
-    phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
-    phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
-    phase_1_rows_data = []
-    for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
-        row_data = [cell.value for cell in row]  # This will get you the cell values
-        phase_1_rows_data.append(row_data)
-
-    phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
-
-    # Correct phase 1 surveys in the same fashion as the previous approach
-    phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
-
-    # We check all phase 1 surveys are contained in the data we had before
-    additional = []
-    for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
-        # We look for the entry in the old survey sheet:
-        # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
-        # if matched_uprn.shape[0] == 1:
-        #     continue
-
-        matched_1 = unitas_survey_sheet[
-            (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
-            (unitas_survey_sheet["NO."] == row["NO."])
-            ]
-
-        if matched_1.shape[0] == 1:
-            continue
-
-        matched_2 = unitas_survey_sheet[
-            (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
-            (unitas_survey_sheet["NO."] == row["NO."])
-            ]
-
-        if matched_2.shape[0] == 1:
-            continue
-
-        additional.append(row.to_dict())
-    additional = pd.DataFrame(additional)
-
-    phase_2_rows_data = []
-    for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
-        row_data = [cell.value for cell in row]  # This will get you the cell values
-        phase_2_rows_data.append(row_data)
-
-    phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
-    phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
-    # Drop all of the occurances of "OFFICE USE ONLY" columns
-    phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
-    common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
-    additional_filtered = additional[common_columns]
-
-    further_unitas_completed_surveys = pd.concat(
-        [phase_2_surveys, additional_filtered],
-        axis=0,
-        ignore_index=True
-    )
-
-    # We match these back to the asset list

From f0c4ca0143ee886ba84960b00e3f2700b6047429 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 11:14:33 +0100
Subject: [PATCH 186/262] completed unitas

---
 .../ha_15_32/ha_analysis_batch_3.py           | 46 ++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 35bb63fe..f99c7b1a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -7106,9 +7106,53 @@ def unitas_data_prep(loader):
             continue
 
         raise Exception("something went wrong")
+    outcome_matching = pd.DataFrame(outcome_matching)
+
+    # We can have duplicate matches, so we format the Date letter sent column and retrieve the newest outcome
+    outcome_matching["Date letters sent"] = outcome_matching["Date letters sent"].str.lower()
+    outcome_matching["Extracted Date"] = outcome_matching["Date letters sent"].str.extract(
+        r'(?:w[./]c )(\d{2}\.\d{2}\.\d{4})')
+    outcome_matching["Extracted Date"] = pd.to_datetime(outcome_matching["Extracted Date"], format='%d.%m.%Y')
+    # We sort by asset_list_row_id and extracted date, and retrieve the newest
+    outcome_matching = outcome_matching.sort_values(["asset_list_row_id", "Extracted Date"], ascending=[True, False])
+
+    # Some properties will have multiple outcomes - for these, we re-format
+    outcome_matching_grouped = []
+    for asset_list_row_id, grouped_data in outcome_matching.groupby("asset_list_row_id"):
+        if grouped_data.shape[0] == 1:
+            outcome_matching_grouped.append(
+                {
+                    "Number of previous visits": 1,
+                    **grouped_data.to_dict("records")[0]
+                }
+            )
+            continue
+        if grouped_data.shape[0] == 2:
+            newest_visit = grouped_data.head(1)
+            oldest_visit = grouped_data.tail(1)[['Outcomes', 'Surveyor', 'Notes', 'Date letters sent']].add_suffix(
+                " second visit")
+            to_append = {
+                "Number of previous visits": 2,
+                **newest_visit.to_dict("records")[0],
+                **oldest_visit.to_dict("records")[0]
+            }
+            outcome_matching_grouped.append(to_append)
+        else:
+            raise Exception("something went wrong")
+
+    outcome_matching_grouped = pd.DataFrame(outcome_matching_grouped)
+
+    unitas_properties_to_survey_with_outcomes = unitas_properties_to_survey_full.merge(
+        outcome_matching_grouped, how="left", on="asset_list_row_id"
+    )
+    unitas_properties_to_survey_with_outcomes["Number of previous visits"] = (
+        unitas_properties_to_survey_with_outcomes["Number of previous visits"].fillna(0)
+    )
 
     # Store as an excel
-    unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx")
+    unitas_properties_to_survey_with_outcomes.to_excel("Unitas - phase 2 properties to Survey.xlsx")
+
+    unitas_properties_to_survey_with_outcomes["Last EPC Built Form"].value_counts()
 
 
 def app():

From cf7627a8d7fa06df445faf7637e06eefd7f8764b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 12:04:17 +0100
Subject: [PATCH 187/262] started setting up asset list and gathering council
 tax bands

---
 etl/customers/immo/pilot/asset_list.py | 44 ++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 etl/customers/immo/pilot/asset_list.py

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
new file mode 100644
index 00000000..33f79729
--- /dev/null
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -0,0 +1,44 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+# Read in the .env file in backend
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+USER_ID = 8
+PORTFOLIO_ID = 70
+
+council_tax_bands = [
+    {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'},
+    {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'},
+    {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'},
+    {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'},
+    {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'},
+    {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'},
+    {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'},
+    {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'},
+    {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
+    {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
+]
+
+
+def app():
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx",
+        header_row=0
+    )
+    raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"])
+    # Extract address and postcode
+    raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+    raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+    raw_asset_list[["address", "postcode"]].to_dict("records")

From b791ecb054f0e5be39f91f78771f74ed80fe904d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 12:08:47 +0100
Subject: [PATCH 188/262] set up asset list

---
 etl/customers/immo/pilot/asset_list.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 33f79729..269ffe00 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -41,4 +41,23 @@ def app():
     raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
     raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
 
-    raw_asset_list[["address", "postcode"]].to_dict("records")
+    council_tax_bands = pd.DataFrame(council_tax_bands)
+    asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "budget": None,
+    }
+    print(body)

From 5079170a25066e4ed3ab96c7a5034f1ddce5ada2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 13:34:15 +0100
Subject: [PATCH 189/262] pulled valuations for immo pilot from Zoopla

---
 .idea/Model.iml                |  2 +-
 .idea/misc.xml                 |  2 +-
 backend/app/plan/router.py     | 10 ++++++++++
 backend/ml_models/Valuation.py | 11 +++++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 50b8a837..c71533fa 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -142,6 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 )
             )
 
+        z = []
+        for p in input_properties:
+            z.append(
+                {
+                    "uprn": p.uprn,
+                    "address": p.address,
+                    "postcode": p.postcode,
+                }
+            )
+
         if not input_properties:
             return Response(status_code=204)
 
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 2bb7de32..251c016a 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -52,6 +52,17 @@ class PropertyValuation:
         10070056829: 76_000,
         10070056920: 76_000,
         10023345463: 76_000,
+        # IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        90070461: 172_000,  # Based on Zoopla
+        90022227: 181_000,  # Based on Zoopla
+        90106884: 180_000,  # Based on Zoopla
+        90051858: 201_000,  # Based on Zoopla
+        90060989: 172_000,  # Based on Zoopla
+        90048026: 196_000,  # Based on Zoopla
+        90077535: 192_000,  # Based on Zoopla
+        90093693: 279_000,  # Based on Zoopla
+        90055152: 149_000,  # Based on Zoopla
+        90028499: 238_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources

From 5ac5cd7737a5b632258d130ea0e36057c25b0b6a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 14:02:19 +0100
Subject: [PATCH 190/262] fixing bug when setting phase for heating controls,
 without a recommendation

---
 backend/app/plan/router.py            | 10 ----------
 recommendations/HeatingRecommender.py |  7 ++++++-
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c71533fa..50b8a837 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -142,16 +142,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                 )
             )
 
-        z = []
-        for p in input_properties:
-            z.append(
-                {
-                    "uprn": p.uprn,
-                    "address": p.address,
-                    "postcode": p.postcode,
-                }
-            )
-
         if not input_properties:
             return Response(status_code=204)
 
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index aec1f419..91730053 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -361,7 +361,12 @@ class HeatingRecommender:
             self.recommendations = combined_recommendations
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
-            recommendation_phase += 1
+            # but we'll only upgrade if we have a heating recommendation
+            has_heating_recommendation = any(
+                recommendation["type"] == "heating" for recommendation in self.recommendations
+            )
+            if has_heating_recommendation:
+                recommendation_phase += 1
             # The heating controls recommendation is distrinct from the boiler upgrade recommendation
             # We insert phase into the recommendations for heating controls
             for recommendation in controls_recommender.recommendation:

From 4e4199345511c2aa8e838581cebe9e7c307c1475 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 17:20:49 +0100
Subject: [PATCH 191/262] savings

---
 etl/customers/immo/pilot/asset_list.py           | 13 +------------
 recommendations/optimiser/optimiser_functions.py |  6 +-----
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 269ffe00..7939a555 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -1,18 +1,7 @@
-import os
-
 import pandas as pd
-from tqdm import tqdm
-
-from dotenv import load_dotenv
 from utils.s3 import read_excel_from_s3
-from backend.SearchEpc import SearchEpc
-from epc_api.client import EpcClient
 from utils.s3 import save_csv_to_s3
 
-# Read in the .env file in backend
-load_dotenv(dotenv_path="backend/.env")
-EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
-
 USER_ID = 8
 PORTFOLIO_ID = 70
 
@@ -28,6 +17,7 @@ council_tax_bands = [
     {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
     {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
 ]
+council_tax_bands = pd.DataFrame(council_tax_bands)
 
 
 def app():
@@ -41,7 +31,6 @@ def app():
     raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
     raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
 
-    council_tax_bands = pd.DataFrame(council_tax_bands)
     asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
 
     # Store the data in s3
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 27838d6e..9860c5ea 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -20,10 +20,6 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
     if not goal_key:
         raise NotImplementedError("Not implemented this gain type - investigate me")
 
-    # We don't include suspended and solid floor insulation as possible measures in private housing, because
-    # of the need to decant the tenant
-    ignored_measures = ["suspended_floor_insulation", "solid_floor_insulation"] if housing_type == "Private" else []
-
     input_measures = []
     for recs in property_recommendations:
         input_measures.append(
@@ -34,7 +30,7 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
                     "gain": rec[goal_key],
                     "type": rec["type"]
                 }
-                for rec in recs if rec["type"] not in ignored_measures
+                for rec in recs
             ]
         )
 

From 346b798c192e4c071640123379c021373d965543 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 17:26:00 +0100
Subject: [PATCH 192/262] removed whitespace

---
 backend/app/plan/router.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 50b8a837..bbf9261b 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -75,7 +75,6 @@ async def trigger_plan(body: PlanTriggerRequest):
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
-
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 

From e0e60f8c9822aec63e1acb74bdb037a8a4840210 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 17:26:23 +0100
Subject: [PATCH 193/262] added whitespace

---
 backend/app/plan/router.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index bbf9261b..4b4d45e7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -75,6 +75,7 @@ async def trigger_plan(body: PlanTriggerRequest):
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
+    
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 

From 505fe0736becf7ad649d24ff68bf902825239b02 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 18:46:51 +0100
Subject: [PATCH 194/262] Updating optimiser to only optimise solar
 recommendations that include the battery

---
 backend/app/plan/router.py                       |  7 ++-----
 recommendations/SolarPvRecommendations.py        |  3 ++-
 recommendations/optimiser/optimiser_functions.py | 12 +++++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4b4d45e7..6f179c79 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -75,7 +75,7 @@ async def trigger_plan(body: PlanTriggerRequest):
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
-    
+
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 
@@ -242,7 +242,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 expected_adjusted_energy=expected_adjusted_energy
             )
 
-            input_measures = prepare_input_measures(recommendations_with_impact, body.goal, body.housing_type)
+            input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
 
             current_sap_points = int(property_instance.data["current-energy-efficiency"])
             target_sap_points = epc_to_sap_lower_bound(body.goal_value)
@@ -279,9 +279,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                 if ventilation_rec:
                     selected_recommendations.add(ventilation_rec["recommendation_id"])
 
-            # We check if the selected recommendation is wall ventilation and if so, we make sure
-            # mechanical ventilation is selected
-
             # We'll use the set of selected recommendations to filter the recommendations to upload
             final_recommendations = [
                 [
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 4cf1c1fc..f75003ce 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -122,6 +122,7 @@ class SolarPvRecommendations:
                     **cost_result,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here
-                    "photo_supply": 100 * roof_coverage
+                    "photo_supply": 100 * roof_coverage,
+                    "has_battery": has_battery
                 }
             )
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 9860c5ea..6159b930 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -1,17 +1,13 @@
-def prepare_input_measures(property_recommendations, goal, housing_type):
+def prepare_input_measures(property_recommendations, goal):
     """
     Basic function to convert recommendations_to_upload to a format that is
     suitable for the optimiser - large
     :param property_recommendations:   object containing the recommendations, created in the plan trigger api
     :param goal:    goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points,
                     the goal should reflect that desired gain
-    :param housing_type:    type of housing the recommendations are for - should be one of "Social" or "Private"
     :return:    Nested list of input measures
     """
 
-    if housing_type not in ["Social", "Private"]:
-        raise ValueError("Invalid housing type - investigate me")
-
     goal_map = {
         "Increase EPC": "sap_points"
     }
@@ -22,6 +18,12 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
 
     input_measures = []
     for recs in property_recommendations:
+        if recs[0]["type"] == "solar_pv":
+            # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
+            # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
+            # under-report the potential cost
+            recs = [r for r in recs if recs["has_battery"]]
+
         input_measures.append(
             [
                 {

From f04b79d6800fce396fdbc5494b66f221d43a9826 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 18:54:58 +0100
Subject: [PATCH 195/262] fixed bug with selecting batter solar recommendations

---
 recommendations/optimiser/optimiser_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 6159b930..d6353eea 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -22,7 +22,7 @@ def prepare_input_measures(property_recommendations, goal):
             # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
             # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
             # under-report the potential cost
-            recs = [r for r in recs if recs["has_battery"]]
+            recs = [r for r in recs if r["has_battery"]]
 
         input_measures.append(
             [

From 43af0de04732ba737459a1f04ccb50950287c235 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 10:30:36 +0100
Subject: [PATCH 196/262] Updated condittions we recommend loft insulation, so
 it is not recommended if the home has more than 200mm insulation in place
 already

---
 recommendations/RoofRecommendations.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index eb1c6c4f..8d6a91e7 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -20,8 +20,9 @@ class RoofRecommendations:
 
     DIMINISHING_RETURNS_U_VALUE = 0.14
 
-    # It is recommended that lofts should have at least 270mm of insulation
-    MINIMUM_LOFT_ISULATION_MM = 270
+    # It is recommended that lofts should have at least 270mm of insulation. If the property has more than 200mm of
+    # loft insulation in place already, we do not recommend anything for the moment
+    MINIMUM_LOFT_ISULATION_MM = 200
     # Flat roof should have at least 100mm of insulation
     MINIMUM_FLAT_ROOF_ISULATION_MM = 100
 
@@ -71,7 +72,7 @@ class RoofRecommendations:
         # Building regulations part L recommend installing at least 270mm of insulation, however generally we
         # experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
         # This only holds true for pitched roofs.
-        if (insulation_thickness >= self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
+        if (insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
             return
 
         if (insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:

From db6fd58af4e89dcbdbecd436f2a9328ea6924521 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 10:56:17 +0100
Subject: [PATCH 197/262] changing the logic we use to recommend a combi boiler

---
 backend/Property.py                   | 13 +++++++++-
 recommendations/HeatingRecommender.py | 36 ++++++++++++++++++---------
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index d3dd8395..6f2e648d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -55,7 +55,13 @@ class Property:
 
     DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
 
-    def __init__(self, id, postcode, address, epc_record):
+    # Surplus information, that can be provided as optional inputs, by a customer
+    n_bathrooms = None
+    n_bedrooms = None
+
+    def __init__(
+        self, id, postcode, address, epc_record, **kwargs
+    ):
 
         self.epc_record = epc_record
 
@@ -133,6 +139,11 @@ class Property:
 
         self.recommendations_scoring_data = []
 
+    def parse_kwargs(self, kwargs):
+        # We extract the elements from kwargs that we recognise. Anything additional is ignored
+        self.n_bathrooms = kwargs.get("n_bathrooms", None)
+        self.n_bedrooms = kwargs.get("n_bedrooms", None)
+
     def create_base_difference_epc_record(self, cleaned_lookup: dict):
         """
         Creates a EPCDifferenceRecord object, which is used to store the difference between the current and
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 91730053..d4fe0a90 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -34,7 +34,6 @@ class HeatingRecommender:
         if has_electric_heating_description or no_heating_no_mains:
             # Recommend high heat retention storage heaters
             self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
-            return
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
         has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
@@ -44,9 +43,16 @@ class HeatingRecommender:
             'No system present, electric heaters assumed'
         ] and self.property.data["mains-gas-flag"]
 
-        if has_boiler or no_heating_has_mains:
-            self.recommend_boiler_upgrades(phase=phase, no_heating_has_mains=no_heating_has_mains)
-            return
+        # We also check if the property has electric heating, but it has access to the mains gas
+        electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
+
+        if has_boiler or no_heating_has_mains or electic_heating_has_mains:
+            # This indicates that the home previously did not have a boiler in place and so would require
+            # an overhaul to the system
+            system_change = not has_boiler
+            self.recommend_boiler_upgrades(phase=phase, system_change=system_change)
+
+        return
 
     @staticmethod
     def check_simulation_difference(old_config, new_config):
@@ -256,12 +262,14 @@ class HeatingRecommender:
 
         return closest_size
 
-    def recommend_boiler_upgrades(self, phase, no_heating_has_mains):
+    def recommend_boiler_upgrades(self, phase, system_change):
         """
         This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
         is generally more expensive
         :param phase:
-        :param no_heating_has_mains: indicaes if the property has no heating system, but has access to the mains gas
+        :param system_change: Indicates if the property would be undergoing a heating system change. This could be true
+                              if the home didn't have a heating system in place, or if the home had electric heating
+                              previously
         :return:
         """
 
@@ -279,17 +287,21 @@ class HeatingRecommender:
                 num_heated_rooms=self.property.data["number-heated-rooms"],
             )
 
-            # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
-            hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"]
-
-            is_combi = hotwater_from_mains or no_heating_has_mains
+            # We recommend a combi boiler under the following conditions
+            # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
+            #    heated if there is no existing heating system).
+            # 2) There is more than 1 bathroom
+            is_combi = (
+                (self.property.data["number-heated-rooms"] <= 4) or
+                (self.property.n_bathrooms not in [None, 0, 1])
+            )
             if is_combi:
                 description = "Upgrade to a new combi boiler"
             else:
-                description = "Upgrade to a new boiler"
+                description = "Upgrade to a new gas condensing boiler"
 
             simulation_config = {"mainheat_energy_eff_ending": "Good"}
-            if no_heating_has_mains:
+            if system_change:
                 # Installation of a boiler improves the hot water system so we need to reflect this in
                 # the outcome of the recommendation
                 heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()

From ac8cf271698788d4479626dae19f09a0027c79aa Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 11:20:29 +0100
Subject: [PATCH 198/262] created extract kwargs to read bathrooms and bedrooms

---
 backend/Property.py        | 22 ++++++++++++++++++++++
 backend/app/plan/router.py |  1 +
 2 files changed, 23 insertions(+)

diff --git a/backend/Property.py b/backend/Property.py
index 6f2e648d..5fe9716e 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -139,6 +139,28 @@ class Property:
 
         self.recommendations_scoring_data = []
 
+    @classmethod
+    def extract_kwargs(cls, kwargs):
+        """
+        This method is to be used in the router, to extract the kwargs from the request and prevent any errors such as
+        non-integer values, or inputs that clash with the __init__ method of this class
+        :param kwargs:
+        :return:
+        """
+        n_bathrooms = kwargs.get("n_bathrooms", None)
+        if n_bathrooms is not None:
+            # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
+            n_bathrooms = int(round(n_bathrooms + 1e-5))
+
+        n_bedrooms = kwargs.get("n_bedrooms", None)
+        if n_bedrooms is not None:
+            n_bedrooms = int(round(n_bedrooms + 1e-5))
+
+        return {
+            "n_bathrooms": n_bathrooms,
+            "n_bedrooms": n_bedrooms,
+        }
+
     def parse_kwargs(self, kwargs):
         # We extract the elements from kwargs that we recognise. Anything additional is ignored
         self.n_bathrooms = kwargs.get("n_bathrooms", None)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 6f179c79..7dc11bb9 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -139,6 +139,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
+                    **Property.extract_kwargs(config)
                 )
             )
 

From 2aa2e5947e6d29acf5c82962788a18ad9daf3351 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 11:36:14 +0100
Subject: [PATCH 199/262] adding bedrooms and bathrooms to asset list for immo

---
 .idea/Model.iml                        | 2 +-
 .idea/misc.xml                         | 2 +-
 etl/customers/immo/pilot/asset_list.py | 8 ++++++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 7939a555..9756e00b 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -33,6 +33,14 @@ def app():
 
     asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
 
+    # We're provided with number of bathrooms and number of bedrooms.
+    asset_list = asset_list.rename(
+        columns={
+            "No. of Beds": "n_bedrooms",
+            "No. of WC's": "n_bathrooms"
+        }
+    )
+
     # Store the data in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
     save_csv_to_s3(

From 606fd3a615e2188f78e2721aef9732e5d0d76328 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 11:49:48 +0100
Subject: [PATCH 200/262] Adding parsing of kwargs to Property class

---
 .idea/Model.iml            |  2 +-
 .idea/misc.xml             |  2 +-
 backend/Property.py        |  6 ++++--
 backend/app/plan/router.py | 20 ++++++++++----------
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 5fe9716e..950c1ac9 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -139,6 +139,8 @@ class Property:
 
         self.recommendations_scoring_data = []
 
+        self.parse_kwargs(kwargs)
+
     @classmethod
     def extract_kwargs(cls, kwargs):
         """
@@ -150,11 +152,11 @@ class Property:
         n_bathrooms = kwargs.get("n_bathrooms", None)
         if n_bathrooms is not None:
             # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
-            n_bathrooms = int(round(n_bathrooms + 1e-5))
+            n_bathrooms = int(round(float(n_bathrooms) + 1e-5))
 
         n_bedrooms = kwargs.get("n_bedrooms", None)
         if n_bedrooms is not None:
-            n_bedrooms = int(round(n_bedrooms + 1e-5))
+            n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
 
         return {
             "n_bathrooms": n_bathrooms,
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7dc11bb9..3cb2027d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -109,16 +109,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
             # if a new record was not created, we don't produduce recommendations
-            if not is_new:
-                continue
-
-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            # if not is_new:
+            #     continue
+            #
+            # create_property_targets(
+            #     session,
+            #     property_id=property_id,
+            #     portfolio_id=body.portfolio_id,
+            #     epc_target=body.goal_value,
+            #     heat_demand_target=None
+            # )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),

From 69424149510c38f59d1d847cbcef740a287da23b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 15:40:50 +0100
Subject: [PATCH 201/262] Updating heating recommender to recommend heating
 controls, with the heating change

---
 backend/app/plan/router.py            | 21 ++++++++++-----------
 recommendations/HeatingRecommender.py |  6 +++---
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 3cb2027d..4b91566e 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -108,17 +108,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            # if a new record was not created, we don't produduce recommendations
-            # if not is_new:
-            #     continue
-            #
-            # create_property_targets(
-            #     session,
-            #     property_id=property_id,
-            #     portfolio_id=body.portfolio_id,
-            #     epc_target=body.goal_value,
-            #     heat_demand_target=None
-            # )
+            if not is_new:
+                continue
+
+            create_property_targets(
+                session,
+                property_id=property_id,
+                portfolio_id=body.portfolio_id,
+                epc_target=body.goal_value,
+                heat_demand_target=None
+            )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index d4fe0a90..6e4b2230 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -48,7 +48,7 @@ class HeatingRecommender:
 
         if has_boiler or no_heating_has_mains or electic_heating_has_mains:
             # This indicates that the home previously did not have a boiler in place and so would require
-            # an overhaul to the system
+            # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
             system_change = not has_boiler
             self.recommend_boiler_upgrades(phase=phase, system_change=system_change)
 
@@ -353,8 +353,8 @@ class HeatingRecommender:
         if not controls_recommender.recommendation:
             return
 
-        if no_heating_has_mains:
-            # We combine the heating and controls recommendations
+        if system_change:
+            # We combine the heating and controls recommendations, in the case of a system change
             boiler_recommendation = self.recommendations[0].copy()
             combined_recommendations = []
             for controls_recommendation in controls_recommender.recommendation:

From 014d51c0605e853351b621fbeafdf8ca3b870cbf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 16:09:31 +0100
Subject: [PATCH 202/262] fixing the case where we recommend a boiler and new
 heating controls, as well as an improved electrical system

---
 recommendations/HeatingRecommender.py | 36 +++++++++++++--------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 6e4b2230..1813e5e8 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -278,6 +278,7 @@ class HeatingRecommender:
         # We now recommend boiler upgrades, if applicable
         simulation_config = {}
         boiler_costs = {}
+        boiler_recommendation = {}
         if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
             boiler_size = self.estimate_boiler_size(
                 property_type=self.property.data["property-type"],
@@ -290,10 +291,12 @@ class HeatingRecommender:
             # We recommend a combi boiler under the following conditions
             # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
             #    heated if there is no existing heating system).
-            # 2) There is more than 1 bathroom
+            # 2) There 1 or fewer bathrooms
+            # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
+            # bathrooms
             is_combi = (
                 (self.property.data["number-heated-rooms"] <= 4) or
-                (self.property.n_bathrooms not in [None, 0, 1])
+                (self.property.n_bathrooms in [None, 0, 1])
             )
             if is_combi:
                 description = "Upgrade to a new combi boiler"
@@ -328,21 +331,19 @@ class HeatingRecommender:
 
             boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
 
-            self.recommendations.append(
-                {
-                    "phase": recommendation_phase,
-                    "parts": [
-                        # TODO
-                    ],
-                    "type": "heating",
-                    "description": description,
-                    "starting_u_value": None,
-                    "new_u_value": None,
-                    "sap_points": None,
-                    "simulation_config": simulation_config,
-                    **boiler_costs
-                }
-            )
+            boiler_recommendation = {
+                "phase": recommendation_phase,
+                "parts": [
+                    # TODO
+                ],
+                "type": "heating",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "simulation_config": simulation_config,
+                **boiler_costs
+            }
 
         # We recommend the heating controls
         # If the property did not previously have a boiler, we combine
@@ -355,7 +356,6 @@ class HeatingRecommender:
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
-            boiler_recommendation = self.recommendations[0].copy()
             combined_recommendations = []
             for controls_recommendation in controls_recommender.recommendation:
                 combined_recommendation = self.combine_heating_and_controls(

From 88f43bcc822b4550540c88e7363d920937563072 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 16:49:44 +0100
Subject: [PATCH 203/262] fixed the combi boiler logic

---
 recommendations/HeatingControlRecommender.py | 3 ++-
 recommendations/HeatingRecommender.py        | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 95b5e3b1..76eaba4f 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -215,7 +215,8 @@ class HeatingControlRecommender:
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves",
+                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves "
+                               "(time & temperature zone control)",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 1813e5e8..bd4d87a2 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -295,7 +295,7 @@ class HeatingRecommender:
             # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
             # bathrooms
             is_combi = (
-                (self.property.data["number-heated-rooms"] <= 4) or
+                (self.property.data["number-heated-rooms"] <= 4) and
                 (self.property.n_bathrooms in [None, 0, 1])
             )
             if is_combi:
@@ -370,7 +370,7 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations = combined_recommendations
+            self.recommendations.extend(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation

From 61584a6320bfd50bb4f18266a09cc1bb1e4e2ba1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 18:14:38 +0100
Subject: [PATCH 204/262] extend recommendations to cover portable electric
 heaters

---
 recommendations/Costs.py              | 18 ++++++++++++-
 recommendations/HeatingRecommender.py | 37 ++++++++++++++++++++++++---
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index e5ceb0c0..f4ac259b 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -79,6 +79,10 @@ CONVENTIONAL_BOILER_COSTS = {
     "40kw": 1776
 }
 
+# Assumes 3 hours to remove each heater (including re-decorating)
+ROOM_HEATER_REMOVAL_COST = 120
+ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
+
 
 class Costs:
     """
@@ -1100,7 +1104,7 @@ class Costs:
             "labour_days": labour_days,
         }
 
-    def low_carbon_boiler(self, is_combi, size):
+    def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
         :return:
@@ -1118,6 +1122,18 @@ class Costs:
         labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
         # Add contingency and preliminaries
         labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+        # if there are existing room heaters, we need to add the cost of removing them
+        if exising_room_heaters:
+            removal_cost = ROOM_HEATER_REMOVAL_COST * n_heated_rooms
+            removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_heated_rooms
+        else:
+            removal_cost = 0
+            removal_labour_hours = 0
+
+        labour_cost = labour_cost + removal_cost
+        labour_days = labour_days + (removal_labour_hours / 8)
+
         vat = labour_cost * self.VAT_RATE
 
         subtotal_before_vat = unit_cost + labour_cost
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index bd4d87a2..14509eea 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -43,14 +43,36 @@ class HeatingRecommender:
             'No system present, electric heaters assumed'
         ] and self.property.data["mains-gas-flag"]
 
+        has_gas_heaters = (
+            self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
+            self.property.data["mains-gas-flag"]
+        )
+
         # We also check if the property has electric heating, but it has access to the mains gas
         electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
 
-        if has_boiler or no_heating_has_mains or electic_heating_has_mains:
+        portable_heaters_has_mains = (
+            self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
+            self.property.data["mains-gas-flag"]
+        )
+
+        if (
+            has_boiler or
+            no_heating_has_mains or
+            electic_heating_has_mains or
+            has_gas_heaters or
+            portable_heaters_has_mains
+        ):
             # This indicates that the home previously did not have a boiler in place and so would require
             # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
             system_change = not has_boiler
-            self.recommend_boiler_upgrades(phase=phase, system_change=system_change)
+            exising_room_heaters = self.property.main_heating["clean_description"] in [
+                "Room heaters, electric", "Room heaters, mains gas"
+            ]
+
+            self.recommend_boiler_upgrades(
+                phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
+            )
 
         return
 
@@ -262,7 +284,7 @@ class HeatingRecommender:
 
         return closest_size
 
-    def recommend_boiler_upgrades(self, phase, system_change):
+    def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
         """
         This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
         is generally more expensive
@@ -270,6 +292,8 @@ class HeatingRecommender:
         :param system_change: Indicates if the property would be undergoing a heating system change. This could be true
                               if the home didn't have a heating system in place, or if the home had electric heating
                               previously
+        :param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler
+                                     recommendation will need to be accompanied by removal of the room heaters
         :return:
         """
 
@@ -329,7 +353,12 @@ class HeatingRecommender:
                     "hot_water_energy_eff_ending": "Good"
                 }
 
-            boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+            boiler_costs = self.costs.boiler(
+                is_combi=is_combi,
+                size=f"{boiler_size}kw",
+                exising_room_heaters=exising_room_heaters,
+                n_heated_rooms=self.property.data["number-heated-rooms"]
+            )
 
             boiler_recommendation = {
                 "phase": recommendation_phase,

From 3ecd7a974276bb6f4296124c6acf7e55f280e574 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 19:14:49 +0100
Subject: [PATCH 205/262] added simulation for secondary heating

---
 backend/Property.py                   |  6 ++-
 recommendations/Costs.py              | 45 ++++++++++++++++------
 recommendations/HeatingRecommender.py |  2 +-
 recommendations/Recommendations.py    |  8 ++++
 recommendations/SecondaryHeating.py   | 55 +++++++++++++++++++++++++++
 5 files changed, 102 insertions(+), 14 deletions(-)
 create mode 100644 recommendations/SecondaryHeating.py

diff --git a/backend/Property.py b/backend/Property.py
index 950c1ac9..0f5e7e77 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -456,7 +456,9 @@ class Property:
                         "double glazing installed during or after 2002"
                     )
 
-            if recommendation["type"] in ["heating", "hot_water_tank_insulation", "heating_control"]:
+            if recommendation["type"] in [
+                "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating"
+            ]:
                 # We update the data, as defined in the recommendaton
 
                 simulation_config = recommendation["simulation_config"]
@@ -477,7 +479,7 @@ class Property:
                 "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                 "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
                 "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
-                "heating_control",
+                "heating_control", "secondary_heating"
             ]:
                 raise NotImplementedError(
                     "Implement me, given type %s" % recommendation["type"]
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index f4ac259b..45c17102 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -1104,6 +1104,28 @@ class Costs:
             "labour_days": labour_days,
         }
 
+    def heater_removal(self, n_rooms):
+        """
+        Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
+        :return:
+        """
+
+        removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms
+        removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms
+
+        vat = removal_cost * self.VAT_RATE
+
+        subtotal_before_vat = removal_cost
+        total_cost = subtotal_before_vat + vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": removal_labour_hours,
+            "labour_days": np.ceil(removal_labour_hours / 8),
+        }
+
     def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
@@ -1114,6 +1136,7 @@ class Costs:
         # The unit cost is the cost without VAT
         # We now need to estimate the cost of the works
         labour_days = 2
+        labour_hours = labour_days * 8
         labour_rate = 500
 
         # Average cost of installation is 1 (maybe 2days) at £300 per day
@@ -1123,26 +1146,26 @@ class Costs:
         # Add contingency and preliminaries
         labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
 
-        # if there are existing room heaters, we need to add the cost of removing them
-        if exising_room_heaters:
-            removal_cost = ROOM_HEATER_REMOVAL_COST * n_heated_rooms
-            removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_heated_rooms
-        else:
-            removal_cost = 0
-            removal_labour_hours = 0
-
-        labour_cost = labour_cost + removal_cost
-        labour_days = labour_days + (removal_labour_hours / 8)
+        # labour_days = labour_days + (removal_labour_hours / 8)
 
         vat = labour_cost * self.VAT_RATE
 
         subtotal_before_vat = unit_cost + labour_cost
         total_cost = subtotal_before_vat + vat
 
+        # if there are existing room heaters, we need to add the cost of removing them
+        if exising_room_heaters:
+            removal_costing = self.heater_removal(n_rooms=n_heated_rooms)
+            # Add the totals to the existing totals
+            total_cost += removal_costing["total"]
+            subtotal_before_vat += removal_costing["subtotal"]
+            labour_hours += removal_costing["labour_hours"]
+            labour_days += removal_costing["labour_days"]
+
         return {
             "total": total_cost,
             "subtotal": subtotal_before_vat,
             "vat": vat,
-            "labour_hours": labour_days * 8,
+            "labour_hours": labour_hours,
             "labour_days": labour_days,
         }
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 14509eea..92457a27 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -319,7 +319,7 @@ class HeatingRecommender:
             # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
             # bathrooms
             is_combi = (
-                (self.property.data["number-heated-rooms"] <= 4) and
+                (self.property.number_of_rooms <= 4) and
                 (self.property.n_bathrooms in [None, 0, 1])
             )
             if is_combi:
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 902023dc..68fead16 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -11,6 +11,7 @@ from recommendations.SolarPvRecommendations import SolarPvRecommendations
 from recommendations.WindowsRecommendations import WindowsRecommendations
 from recommendations.HeatingRecommender import HeatingRecommender
 from recommendations.HotwaterRecommendations import HotwaterRecommendations
+from recommendations.SecondaryHeating import SecondaryHeating
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 
 
@@ -46,6 +47,7 @@ class Recommendations:
         self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
         self.heating_recommender = HeatingRecommender(property_instance=property_instance)
         self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
+        self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)
 
     def recommend(self):
 
@@ -130,6 +132,12 @@ class Recommendations:
                 property_recommendations.append(self.lighting_recommender.recommendation)
                 phase += 1
 
+        if "secondary_heating" not in self.exclusions:
+            self.secondary_heating_recommender.recommend(phase=phase)
+            if self.secondary_heating_recommender.recommendation:
+                property_recommendations.append(self.secondary_heating_recommender.recommendation)
+                phase += 1
+
         # Renewables
         if "solar_pv" not in self.exclusions:
             self.solar_recommender.recommend(phase=phase)
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
new file mode 100644
index 00000000..f31c4c05
--- /dev/null
+++ b/recommendations/SecondaryHeating.py
@@ -0,0 +1,55 @@
+from recommendations.Costs import Costs
+from backend.Property import Property
+
+
+class SecondaryHeating:
+    """
+    This class recommends the removal of the secondary heating system for properties that have a primary heating
+    system.
+    """
+
+    # The list of existing heating systems that are accepted
+    ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"]
+    ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"]
+    # These are the heaters where works are required to remove them
+    FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
+
+    def __init__(self, property_instance: Property):
+        self.property = property_instance
+        self.costs = Costs(self.property)
+
+        self.recommendation = []
+
+    def recommend(self, phase: int):
+        # Reset
+        self.recommendation = []
+
+        if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS:
+            return
+
+        # TODO: We need to clean secondary data
+        if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS:
+            return
+
+        if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS:
+            # We have an associated cost otherwise, there is no cost
+            n_rooms = self.property.data['number-heated-rooms']
+        else:
+            n_rooms = 0
+
+        costs = self.costs.heater_removal(n_rooms=n_rooms)
+        self.recommendation.append(
+            {
+                "phase": phase,
+                "parts": [],
+                "type": "secondary_heating",
+                "description": "Remove the secondary heating system",
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                **costs,
+                "simulation_config": {
+                    "secondheat_description_ending": "None"
+                }
+            }
+        )

From 0b75ec9210e7c7c097bf4e6b5d2d87cb273af6cd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 14:41:19 +0100
Subject: [PATCH 206/262] Added patches and overrides to immo asset list

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 .../AirSourceHeatPumpEfficiency.py            | 78 +++++++++++++++++++
 etl/air_source_heat_pump/app.py               | 24 ++++++
 etl/customers/immo/pilot/asset_list.py        | 70 ++++++++++++++++-
 5 files changed, 172 insertions(+), 4 deletions(-)
 create mode 100644 etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
 create mode 100644 etl/air_source_heat_pump/app.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
new file mode 100644
index 00000000..2ba82e77
--- /dev/null
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@@ -0,0 +1,78 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from etl.epc.settings import EARLIEST_EPC_DATE
+
+logger = setup_logger()
+
+
+class AirSourceHeatPumpEfficiency:
+
+    def __init__(self, file_directories, cleaned_lookup):
+        """
+        :param file_directories: A list of directories where files are stored.
+        :param cleaned_lookup: A dictionary containing cleaned lookup data.
+        """
+        self.file_directories = file_directories
+        self.cleaned_lookup = cleaned_lookup
+
+        self.results = []
+
+    def create_dataset(self):
+        logger.info("Creating solar photo supply dataset")
+        for dir in tqdm(self.file_directories):
+            filepath = dir / "certificates.csv"
+            df = pd.read_csv(filepath, low_memory=False)
+            df = df[~pd.isnull(df["UPRN"])]
+            df["UPRN"] = df["UPRN"].astype(int).astype(str)
+            # Take entries after SAP12
+            df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
+            df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
+
+            df = df[
+                ~df["TENURE"].isin(
+                    [
+                        "unknown",
+                        "Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
+                        "It is not to be used for an existing dwelling"
+                    ]
+                )
+            ]
+
+            # Take entries that contain an air source heat pump
+            df = df[
+                df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
+            ]
+            # Get the columns we're interested in
+            df = df[
+                [
+                    "MAINHEAT_DESCRIPTION",
+                    "MAINHEAT_ENERGY_EFF",
+                    "MAINHEATCONT_DESCRIPTION",
+                    "MAINHEATC_ENERGY_EFF",
+                    "MAIN_FUEL",
+                    "HOTWATER_DESCRIPTION",
+                    "HOT_WATER_ENERGY_EFF",
+                    "MAINS_GAS_FLAG"
+                ]
+            ]
+
+            counts = df.groupby(
+                [
+                    "MAINHEAT_DESCRIPTION",
+                    "MAINHEAT_ENERGY_EFF",
+                    "MAINHEATCONT_DESCRIPTION",
+                    "MAINHEATC_ENERGY_EFF",
+                    "MAIN_FUEL",
+                    "HOTWATER_DESCRIPTION",
+                    "HOT_WATER_ENERGY_EFF",
+                    "MAINS_GAS_FLAG"
+                ]
+            ).size().reset_index(name="count")
+
+            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+                df = df[~pd.isnull(df[col])]
+            # Take newest LODGEMENT_DATE per UPRN
+            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
diff --git a/etl/air_source_heat_pump/app.py b/etl/air_source_heat_pump/app.py
new file mode 100644
index 00000000..ac87b34b
--- /dev/null
+++ b/etl/air_source_heat_pump/app.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+from backend.app.plan.utils import get_cleaned
+from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    """
+    This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
+    heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
+    how to set the simulation
+    :return:
+    """
+
+    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+    cleaned_lookup = get_cleaned()
+
+    ashp_data_client = AirSourceHeatPumpEfficiency(
+        file_directories=directories,
+        cleaned_lookup=cleaned_lookup
+    )
+
+    ashp_data_client.create_dataset()
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 9756e00b..0da8f885 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -19,6 +19,40 @@ council_tax_bands = [
 ]
 council_tax_bands = pd.DataFrame(council_tax_bands)
 
+# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
+# that has not reached the API
+patches = [
+    {
+        'address': '6 Beech Road', 'postcode': 'DY1 4BP',
+        'walls-description': 'Mixed: Filled cavity and external insulated solid brick',
+        'walls-energy-eff': 'Good',
+        'roof-description': 'Pitched, 12 mm loft insulation',
+        'roof-energy-eff': 'Very Poor',
+        'windows-description': 'Fully double glazed',
+        'windows-energy-eff': 'Good',
+        'mainheat-description': 'Room heaters, electric',
+        'mainheat-energy-eff': 'Very Poor',
+        'mainheatcont-description': 'Appliance thermostats',
+        'mainheatc-energy-eff': 'Good',
+        'lighting-description': 'Low energy lighting in 25% of fixed outlets',
+        'lighting-energy-eff': 'Good',
+        'floor-description': 'Mixed: Solid no insulation and suspended no insulation',
+        'secondheat-description': 'None',
+        'current-energy-efficiency': '32',
+    }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+overrides = [
+    {
+        'address': '5 Oaklands',
+        'postcode': 'B62 0JA',
+        "overrides": ["windows_glazing"]
+    }
+]
+
 
 def app():
     raw_asset_list = read_excel_from_s3(
@@ -41,7 +75,7 @@ def app():
         }
     )
 
-    # Store the data in s3
+    # Store the asset list in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
     save_csv_to_s3(
         dataframe=asset_list,
@@ -49,12 +83,44 @@ def app():
         file_name=filename
     )
 
+    # Store overrides in s3
+    overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(overrides),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=overrides_filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    # EPC C portoflio
     body = {
         "portfolio_id": str(PORTFOLIO_ID),
         "housing_type": "Private",
         "goal": "Increase EPC",
-        "goal_value": "A",
+        "goal_value": "C",
         "trigger_file_path": filename,
+        "overrides_file_path": overrides_filename,
+        "patches_file_path": patches_filename,
+        "budget": None,
+    }
+    print(body)
+
+    # EPC B portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID + 1),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "overrides_file_path": overrides_filename,
+        "patches_file_path": patches_filename,
         "budget": None,
     }
     print(body)

From ab180f65225507c6d666516fd70259a7c0ec4ac5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:06:12 +0100
Subject: [PATCH 207/262] Added overrides and patches to router

---
 .idea/Model.iml                        |  2 +-
 .idea/misc.xml                         |  2 +-
 backend/Property.py                    |  6 ++++-
 backend/app/plan/router.py             | 34 ++++++++++++++++++--------
 backend/app/plan/schemas.py            |  2 ++
 etl/customers/immo/pilot/asset_list.py |  4 +--
 6 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 0f5e7e77..882e450c 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -60,7 +60,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, **kwargs
+        self, id, postcode, address, epc_record, overrides=None, **kwargs
     ):
 
         self.epc_record = epc_record
@@ -74,6 +74,10 @@ class Property:
         }
         self.old_data = epc_record.get("old_data")
         self.property_dimensions = None
+        # This is a list of measures that have already been installed in the property, typically found as a result
+        # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
+        # cost and instead, provide a message that the measure has already been installed
+        self.overrides = overrides
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4b91566e..8d39c97f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -44,20 +44,15 @@ BATCH_SIZE = 5
 SCORING_BATCH_SIZE = 400
 
 
-def patch_epc(config, epc_records):
+def patch_epc(patch, epc_records):
     """
     This utility function is useful to patch the epc data if we have data from the customer
     :return:
     """
 
-    number_habitable_rooms = config.get("number-habitable-rooms", None)
-    number_heated_rooms = config.get("number-heated-rooms", None)
-
-    if number_habitable_rooms is not None:
-        epc_records["original_epc"]["number-habitable-rooms"] = int(number_habitable_rooms)
-
-    if number_heated_rooms is not None:
-        epc_records["original_epc"]["number-heated-rooms"] = int(number_heated_rooms)
+    for patch_variable, patch_value in patch.items():
+        if patch_variable in epc_records["original_epc"]:
+            epc_records["original_epc"][patch_variable] = patch_value
 
     return epc_records
 
@@ -85,6 +80,17 @@ async def trigger_plan(body: PlanTriggerRequest):
         session.begin()
         logger.info("Getting the inputs")
         plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+        # If we have patches or overrides, we should read them in here
+        patches = []
+        if body.patches_file_path:
+            patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
+
+        overrides = []
+        if body.overrides_file_path:
+            overrides = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.overrides_file_path
+            )
+
         cleaning_data = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
@@ -124,7 +130,11 @@ async def trigger_plan(body: PlanTriggerRequest):
                 'full_sap_epc': epc_searcher.full_sap_epc.copy(),
                 'old_data': epc_searcher.older_epcs.copy(),
             }
-            epc_records = patch_epc(config, epc_records)
+
+            patch = next((
+                x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), None)
+            epc_records = patch_epc(patch, epc_records)
 
             prepared_epc = EPCRecord(
                 epc_records=epc_records,
@@ -132,12 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
+            overrides = next((
+                x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), None)
             input_properties.append(
                 Property(
                     id=property_id,
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
+                    overrides=overrides,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index b8a99704..ec49e41e 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -9,6 +9,8 @@ class PlanTriggerRequest(BaseModel):
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
+    overrides_file_path: Optional[str] = None
+    patches_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
 
     # Pre-defined list of possibilities for exclusions
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 0da8f885..15681d42 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -24,7 +24,7 @@ council_tax_bands = pd.DataFrame(council_tax_bands)
 patches = [
     {
         'address': '6 Beech Road', 'postcode': 'DY1 4BP',
-        'walls-description': 'Mixed: Filled cavity and external insulated solid brick',
+        'walls-description': 'Cavity wall, filled cavity',
         'walls-energy-eff': 'Good',
         'roof-description': 'Pitched, 12 mm loft insulation',
         'roof-energy-eff': 'Very Poor',
@@ -36,7 +36,7 @@ patches = [
         'mainheatc-energy-eff': 'Good',
         'lighting-description': 'Low energy lighting in 25% of fixed outlets',
         'lighting-energy-eff': 'Good',
-        'floor-description': 'Mixed: Solid no insulation and suspended no insulation',
+        'floor-description': 'Solid, no insulation (assumed)',
         'secondheat-description': 'None',
         'current-energy-efficiency': '32',
     }

From 8e2d823693f53ad47a4fe857fd8f24d84c0c4ec1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:11:51 +0100
Subject: [PATCH 208/262] corrected parsing of overrides

---
 backend/Property.py                    | 4 ++--
 backend/app/plan/router.py             | 8 ++++----
 etl/customers/immo/pilot/asset_list.py | 3 +++
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 882e450c..3fac3667 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -60,7 +60,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, overrides=None, **kwargs
+        self, id, postcode, address, epc_record, override=None, **kwargs
     ):
 
         self.epc_record = epc_record
@@ -77,7 +77,7 @@ class Property:
         # This is a list of measures that have already been installed in the property, typically found as a result
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
-        self.overrides = overrides
+        self.override = override
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 8d39c97f..08ce0dcc 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -133,7 +133,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             patch = next((
                 x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), None)
+            ), {})
             epc_records = patch_epc(patch, epc_records)
 
             prepared_epc = EPCRecord(
@@ -142,16 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
-            overrides = next((
+            override = next((
                 x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), None)
+            ), {})
             input_properties.append(
                 Property(
                     id=property_id,
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
-                    overrides=overrides,
+                    override=override,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 15681d42..07ebe884 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -39,6 +39,9 @@ patches = [
         'floor-description': 'Solid, no insulation (assumed)',
         'secondheat-description': 'None',
         'current-energy-efficiency': '32',
+        'energy-consumption-current': '491',
+        'co2-emissions-current': '5.0',
+        'potential-energy-efficiency': '87'
     }
 ]
 

From 0ede95cc4a7499ad0db1c6eda5ef6e012ab9f763 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:25:08 +0100
Subject: [PATCH 209/262] added override to wall insulation

---
 backend/Property.py                     |  4 +++-
 recommendations/WallRecommendations.py  | 15 ++++++++++++++-
 recommendations/recommendation_utils.py | 12 ++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 3fac3667..d000be28 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1,4 +1,5 @@
 import os
+import ast
 from itertools import groupby
 import pandas as pd
 
@@ -77,7 +78,8 @@ class Property:
         # This is a list of measures that have already been installed in the property, typically found as a result
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
-        self.override = override
+
+        self.override = ast.literal_eval(override['overrides']) if override is not None else []
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 6b59c148..3acc17f0 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -8,7 +8,7 @@ from backend.Property import Property
 from BaseUtility import Definitions
 from recommendations.recommendation_utils import (
     r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_wall_u_value
+    get_recommended_part, get_wall_u_value, override_costs
 )
 from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
 from recommendations.Costs import Costs
@@ -221,6 +221,10 @@ class WallRecommendations(Definitions):
                     material=material.to_dict(),
                 )
 
+                is_override = "cavity_wall_insulation" in cost_result
+                if is_override:
+                    cost_result = override_costs(cost_result)
+
                 recommendations.append(
                     {
                         "phase": phase,
@@ -237,6 +241,7 @@ class WallRecommendations(Definitions):
                         "starting_u_value": u_value,
                         "new_u_value": new_u_value,
                         "sap_points": None,
+                        "is_override": is_override,
                         **cost_result
                     }
                 )
@@ -277,12 +282,19 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+                        is_override = "internal_wall_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
+
                     elif material["type"] == "external_wall_insulation":
                         cost_result = self.costs.external_wall_insulation(
                             wall_area=self.property.insulation_wall_area,
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+                        is_override = "external_wall_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
 
@@ -301,6 +313,7 @@ class WallRecommendations(Definitions):
                             "description": self._make_description(material),
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
+                            "is_override": is_override,
                             "sap_points": None,
                             **cost_result
                         }
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 0d5f9743..a3043c31 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -767,3 +767,15 @@ def check_simulation_difference(old_config, new_config):
     differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
 
     return differences
+
+
+def override_costs(costs):
+    """
+    If the method is overridden, we want to make sure that the costs are zero. This function sets the costs to zero
+    :param costs: Dictionary of costing, as returned by the Costs class
+    :return:
+    """
+    for k in costs:
+        costs[k] = 0
+
+    return costs

From 1c5ccb2c8c46a613851dfaf153a16ee4242eaf0a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:26:11 +0100
Subject: [PATCH 210/262] added override to roof insulation

---
 recommendations/RoofRecommendations.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 8d6a91e7..ed087228 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -5,7 +5,7 @@ from typing import List
 from datatypes.enums import QuantityUnits
 from recommendations.recommendation_utils import (
     get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
-    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric
+    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
 )
 from recommendations.Costs import Costs
 
@@ -207,12 +207,18 @@ class RoofRecommendations:
                             floor_area=self.property.insulation_floor_area,
                             material=material
                         )
+                        is_override = "loft_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
                     elif material["type"] == "flat_roof_insulation":
                         cost_result = self.costs.flat_roof_insulation(
                             floor_area=self.property.insulation_floor_area,
                             material=material,
                             non_insulation_materials=non_insulation_materials
                         )
+                        is_override = "flat_roof_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
 
@@ -232,6 +238,7 @@ class RoofRecommendations:
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
+                            "is_override": is_override,
                             **cost_result
                         }
                     )

From adcd31c8f4e69e92ff592a03103eb60f1c06617a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:27:58 +0100
Subject: [PATCH 211/262] correcting override in walls and roof

---
 recommendations/RoofRecommendations.py        | 4 ++--
 recommendations/VentilationRecommendations.py | 4 ++++
 recommendations/WallRecommendations.py        | 6 +++---
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index ed087228..5ba7e82e 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -207,7 +207,7 @@ class RoofRecommendations:
                             floor_area=self.property.insulation_floor_area,
                             material=material
                         )
-                        is_override = "loft_insulation" in cost_result
+                        is_override = "loft_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
                     elif material["type"] == "flat_roof_insulation":
@@ -216,7 +216,7 @@ class RoofRecommendations:
                             material=material,
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "flat_roof_insulation" in cost_result
+                        is_override = "flat_roof_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
                     else:
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 1657b759..aa6299e0 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -56,6 +56,10 @@ class VentilationRecommendations(Definitions):
         part[0]["quantity"] = n_units
         part[0]["quantity_unit"] = "part"
 
+        is_override = "cavity_wall_insulation" in cost_result
+        if is_override:
+            cost_result = override_costs(cost_result)
+
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
             {
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 3acc17f0..471a62cb 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -221,7 +221,7 @@ class WallRecommendations(Definitions):
                     material=material.to_dict(),
                 )
 
-                is_override = "cavity_wall_insulation" in cost_result
+                is_override = "cavity_wall_insulation" in self.property.override
                 if is_override:
                     cost_result = override_costs(cost_result)
 
@@ -282,7 +282,7 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "internal_wall_insulation" in cost_result
+                        is_override = "internal_wall_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
 
@@ -292,7 +292,7 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "external_wall_insulation" in cost_result
+                        is_override = "external_wall_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
                     else:

From fadff714d2c3227eb835b94951ed09b25ff870c4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:29:41 +0100
Subject: [PATCH 212/262] add override to ventilation

---
 recommendations/VentilationRecommendations.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index aa6299e0..07f7cf1e 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -50,16 +50,16 @@ class VentilationRecommendations(Definitions):
 
         part = self.materials.copy()
 
-        estimated_cost = n_units * part[0]["cost"]
+        is_override = "cavity_wall_insulation" in self.property.override
+
+        estimated_cost = n_units * part[0]["cost"] if not is_override else 0
+        labour_hours = 4 * n_units if not is_override else 0
+        labour_days = 4 * n_units / 8.0 if not is_override else 0
 
         part[0]["total"] = estimated_cost
         part[0]["quantity"] = n_units
         part[0]["quantity_unit"] = "part"
 
-        is_override = "cavity_wall_insulation" in cost_result
-        if is_override:
-            cost_result = override_costs(cost_result)
-
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
             {
@@ -76,7 +76,7 @@ class VentilationRecommendations(Definitions):
                 "energy_cost_savings": 0,
                 "total": estimated_cost,
                 # We use a very simple and rough estimate of 4 hours per unit
-                "labour_hours": 4 * n_units,
-                "labour_days": 4 * n_units / 8.0  # Assume 8 hour day
+                "labour_hours": labour_hours,
+                "labour_days": labour_days  # Assume 8 hour day
             }
         ]

From 493db6c4a01dcf825fe49d77cfc8fcb974a7d1e1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:31:07 +0100
Subject: [PATCH 213/262] added floor insulation to override

---
 recommendations/FloorRecommendations.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 713d5f92..1744a928 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -8,7 +8,7 @@ from datatypes.enums import QuantityUnits
 from backend.Property import Property
 from recommendations.recommendation_utils import (
     r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_floor_u_value
+    get_recommended_part, get_floor_u_value, override_costs
 )
 from recommendations.Costs import Costs
 
@@ -192,12 +192,22 @@ class FloorRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+
+                        is_override = "suspended_floor_insulation" in self.property.override
+                        if is_override:
+                            cost_result = override_costs(cost_result)
+
                     elif material["type"] == "solid_floor_insulation":
                         cost_result = self.costs.solid_floor_insulation(
                             insulation_floor_area=self.property.insulation_floor_area,
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+
+                        is_override = "solid_floor_insulation" in self.property.override
+                        if is_override:
+                            cost_result = override_costs(cost_result)
+
                     else:
                         raise NotImplementedError("Implement me!")
 

From b052c9925f9064d2462442cccecac08bc511cc21 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:41:52 +0100
Subject: [PATCH 214/262] Added heating override

---
 recommendations/FireplaceRecommendations.py |  4 ++-
 recommendations/FloorRecommendations.py     |  2 +-
 recommendations/HeatingRecommender.py       | 22 ++++++++++++-----
 recommendations/WindowsRecommendations.py   | 27 +++++++++++++--------
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py
index 5d620d49..c1114f31 100644
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@@ -32,7 +32,8 @@ class FireplaceRecommendations(Definitions):
         if number_open_fireplaces == 0:
             return
 
-        estimated_cost = number_open_fireplaces * self.COST_OF_WORK
+        is_override = "sealing_open_fireplace" in self.property.override
+        estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not is_override else 0
 
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
@@ -44,6 +45,7 @@ class FireplaceRecommendations(Definitions):
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "total": estimated_cost,
                 # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
                 "labour_hours": 6 * number_open_fireplaces,
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 1744a928..b7bd370c 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -207,7 +207,6 @@ class FloorRecommendations(Definitions):
                         is_override = "solid_floor_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
-
                     else:
                         raise NotImplementedError("Implement me!")
 
@@ -227,6 +226,7 @@ class FloorRecommendations(Definitions):
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
+                            "is_override": is_override,
                             **cost_result
                         }
                     )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 92457a27..27e4985a 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -1,7 +1,7 @@
 import pandas as pd
 
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import check_simulation_difference
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
 from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
@@ -33,7 +33,7 @@ class HeatingRecommender:
 
         if has_electric_heating_description or no_heating_no_mains:
             # Recommend high heat retention storage heaters
-            self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
+            self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
         has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
@@ -89,9 +89,8 @@ class HeatingRecommender:
 
         return differences
 
-    @staticmethod
     def combine_heating_and_controls(
-        controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
+        self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
         system_change
     ):
         """
@@ -140,6 +139,11 @@ class HeatingRecommender:
 
                 recommendation_description = f"{description} and {controls_description}"
 
+            is_override = "cavity_wall_insulation" in self.property.override
+            if is_override:
+                total_costs = override_costs(total_costs)
+                recommendation_description = "Heating system has already been upgraded, no further action needed."
+
             recommendation = {
                 "phase": phase,
                 "parts": [
@@ -150,6 +154,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **total_costs,
                 "simulation_config": recommendation_simulation_config
             }
@@ -181,9 +186,8 @@ class HeatingRecommender:
 
         return output
 
-    def recommend_electric_storage_heaters(self, phase, system_change, heating_controls_only):
+    def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
         """
-        We recommend electric storage heaters as an upgrade to the heating system.
         We will recommend upgrading to a high heat retention storage system, if the current system is not already
         high heat retention storage
 
@@ -360,6 +364,11 @@ class HeatingRecommender:
                 n_heated_rooms=self.property.data["number-heated-rooms"]
             )
 
+            is_override = "heating" in self.property.override
+            if is_override:
+                boiler_costs = override_costs(boiler_costs)
+                description = "Heating system has already been upgraded, no further action needed."
+
             boiler_recommendation = {
                 "phase": recommendation_phase,
                 "parts": [
@@ -370,6 +379,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "simulation_config": simulation_config,
                 **boiler_costs
             }
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index d7404e3b..b2fe20a6 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -4,6 +4,7 @@ import numpy as np
 
 from backend.Property import Property
 from recommendations.Costs import Costs
+from recommendation_utils import override_costs
 
 
 class WindowsRecommendations:
@@ -70,18 +71,23 @@ class WindowsRecommendations:
             is_secondary_glazing=is_secondary_glazing
         )
 
-        glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
-        if self.property.windows["glazing_coverage"] in ["partial", "most"]:
-            description = f"Install {glazing_type} to the remaining windows"
+        is_override = "windows_glazing" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "The property already has double glazing installed. No further action is required."
         else:
-            description = f"Install {glazing_type} to all windows"
+            glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
+            if self.property.windows["glazing_coverage"] in ["partial", "most"]:
+                description = f"Install {glazing_type} to the remaining windows"
+            else:
+                description = f"Install {glazing_type} to all windows"
 
-        if self.property.is_listed:
-            description += ". Secondary glazing recommended due to listed building status"
-        elif self.property.is_heritage:
-            description += ". Secondary glazing recommended due to herigate building status"
-        elif self.property.in_conservation_area:
-            description += ". Secondary glazing recommended due to conservation area status"
+            if self.property.is_listed:
+                description += ". Secondary glazing recommended due to listed building status"
+            elif self.property.is_heritage:
+                description += ". Secondary glazing recommended due to herigate building status"
+            elif self.property.in_conservation_area:
+                description += ". Secondary glazing recommended due to conservation area status"
 
         self.recommendation = [
             {
@@ -92,6 +98,7 @@ class WindowsRecommendations:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **cost_result,
                 "is_secondary_glazing": is_secondary_glazing
             }

From 1ee115fa7e73f170d559a24026680677f89aaf5d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:48:44 +0100
Subject: [PATCH 215/262] Added overrides

---
 recommendations/HotwaterRecommendations.py | 11 ++++++++++-
 recommendations/LightingRecommendations.py |  7 +++++++
 recommendations/SecondaryHeating.py        | 12 +++++++++++-
 recommendations/SolarPvRecommendations.py  |  6 ++++++
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 7f77597f..88cfa932 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -1,5 +1,6 @@
 from backend.Property import Property
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 
 
 class HotwaterRecommendations:
@@ -41,6 +42,13 @@ class HotwaterRecommendations:
 
         recommendation_cost = self.costs.hot_water_tank_insulation()
 
+        is_override = "hot_water_tank_insulation" in self.property.override
+        if is_override:
+            recommendation_cost = override_costs(recommendation_cost)
+            description = "Insulation tank has already been insulated, no further action required"
+        else:
+            description = "Insulate hot water tank"
+
         self.recommendations.append(
             {
                 "phase": phase,
@@ -48,10 +56,11 @@ class HotwaterRecommendations:
                     # TODO
                 ],
                 "type": "hot_water_tank_insulation",
-                "description": "Insulate the hot water tank with an insulation jacket",
+                "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **recommendation_cost,
                 "simulation_config": {"hot_water_energy_eff_ending": "Average"}
             }
diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py
index 352c4d8a..9e4c8e43 100644
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@@ -1,6 +1,7 @@
 from backend.Property import Property
 from typing import List
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 
 
 class LightingRecommendations:
@@ -91,6 +92,11 @@ class LightingRecommendations:
 
         heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets)
 
+        is_override = "low_energy_lighting" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "Low energy lighting has already been installed, no further action required"
+
         self.recommendation = [
             {
                 "phase": phase,
@@ -99,6 +105,7 @@ class LightingRecommendations:
                 "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
+                "is_override": is_override,
                 # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
                 # the proportion of lights that will be set to low energy
                 "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
index f31c4c05..e426977e 100644
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@@ -1,4 +1,5 @@
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 from backend.Property import Property
 
 
@@ -38,15 +39,24 @@ class SecondaryHeating:
             n_rooms = 0
 
         costs = self.costs.heater_removal(n_rooms=n_rooms)
+
+        is_override = "secondary_heating" in self.property.override
+        if is_override:
+            costs = override_costs(costs)
+            description = "Secondary heating system has already been removed, no further action required"
+        else:
+            description = "Remove the secondary heating system"
+
         self.recommendation.append(
             {
                 "phase": phase,
                 "parts": [],
                 "type": "secondary_heating",
-                "description": "Remove the secondary heating system",
+                "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **costs,
                 "simulation_config": {
                     "secondheat_description_ending": "None"
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index f75003ce..72fcdf4b 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -1,5 +1,6 @@
 import numpy as np
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 
 
 class SolarPvRecommendations:
@@ -110,6 +111,10 @@ class SolarPvRecommendations:
                 description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
                                f"anel system on {round(roof_coverage_percent)}% the roof.")
 
+            is_override = "solar_pv" in self.property.override
+            if is_override:
+                cost_result = override_costs(cost_result)
+
             self.recommendation.append(
                 {
                     "phase": phase,
@@ -119,6 +124,7 @@ class SolarPvRecommendations:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
+                    "is_override": is_override,
                     **cost_result,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here

From 14a1f35fb16cbf1199afbd66ce50f598b5d7a10b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 16:27:26 +0100
Subject: [PATCH 216/262] ammended system change costs for first time central
 heating

---
 recommendations/Costs.py              | 72 +++++++++++++++++++++++++--
 recommendations/HeatingRecommender.py |  9 +++-
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 45c17102..0e67b352 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -83,6 +83,14 @@ CONVENTIONAL_BOILER_COSTS = {
 ROOM_HEATER_REMOVAL_COST = 120
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
 
+# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
+SYSTEM_FLUSH_COST = 250
+
+SINGLE_RADIATOR_COST = 150
+DOUBLE_RADIATOR_COST = 300
+FLUE_COST = 600
+PIPEWORK_COST = 750  # Min cost is £500
+
 
 class Costs:
     """
@@ -1126,9 +1134,45 @@ class Costs:
             "labour_days": np.ceil(removal_labour_hours / 8),
         }
 
-    def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms):
+    @staticmethod
+    def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
+        # Base number of radiators: one per habitable room
+        base_radiators = number_habitable_rooms
+
+        # Additional radiators for non-habitable essential areas (e.g., kitchens, hallways)
+        additional_radiators = 3  # Initial assumption
+
+        # Adjust additional radiators based on property type
+        if property_type == 'Flat':
+            additional_radiators -= 1  # Flats may need fewer radiators due to less exposure
+        elif property_type in ['House', 'Bungalow', 'Maisonette']:
+            # Multiple floors in Maisonette may require additional heating points
+            additional_radiators += 2  # Houses and bungalows might need more due to greater exposure
+        else:
+            raise Exception("Invalid property type")
+
+        # Adjust total radiator needs based on built form
+        form_factor = {
+            'Mid-Terrace': 0.95,
+            'Semi-Detached': 1.05,
+            'Detached': 1.25,
+            'End-Terrace': 1.05
+        }
+
+        # Calculate total heating power needed and number of radiators based on standard output
+        total_heating_power_required = total_floor_area * 80  # Watts per square meter
+        radiator_output = 1000  # Average wattage per radiator
+        total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
+
+        # Final estimation taking the higher of calculated needs or base room count
+        estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
+        return round(estimated_radiators)
+
+    def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
+        First time central heating vosts can als be found here:
+        https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/
         :return:
         """
 
@@ -1137,11 +1181,11 @@ class Costs:
         # We now need to estimate the cost of the works
         labour_days = 2
         labour_hours = labour_days * 8
-        labour_rate = 500
+        labour_rate = 300
 
         # Average cost of installation is 1 (maybe 2days) at £300 per day
         # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
-        # To be pessimistic, assume 2 days work and £500 day rate
+        # To be pessimistic, assume 2 days work
         labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
         # Add contingency and preliminaries
         labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
@@ -1161,6 +1205,28 @@ class Costs:
             subtotal_before_vat += removal_costing["subtotal"]
             labour_hours += removal_costing["labour_hours"]
             labour_days += removal_costing["labour_days"]
+            vat += removal_costing["vat"]
+
+        if system_change:
+            # We need the cost of radiators
+            n_radiators = self._estimate_n_radiators(
+                number_habitable_rooms=n_rooms,
+                total_floor_area=self.property.floor_area,
+                property_type=self.property.data["property-type"],
+                built_form=self.property.data["built-form"]
+            )
+
+            additionals_labour_cost = labour_rate * self.labour_adjustment_factor
+            radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
+            system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
+            system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
+            system_change_vat = system_change_cost - system_change_cost_before_vat
+            # We add an extra labour day for the system change
+            labour_days += 1
+            labour_hours += 8
+            total_cost += system_change_cost
+            subtotal_before_vat += system_change_cost_before_vat
+            vat += system_change_vat
 
         return {
             "total": total_cost,
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 27e4985a..d83b755e 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -18,6 +18,11 @@ class HeatingRecommender:
         self.recommendations = []
 
     def recommend(self, phase=0):
+
+        # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
+        #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
+        #       in the Costs class, stored as SYSTEM_FLUSH_COST
+
         self.recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
@@ -361,7 +366,9 @@ class HeatingRecommender:
                 is_combi=is_combi,
                 size=f"{boiler_size}kw",
                 exising_room_heaters=exising_room_heaters,
-                n_heated_rooms=self.property.data["number-heated-rooms"]
+                system_change=system_change,
+                n_heated_rooms=self.property.data["number-heated-rooms"],
+                n_rooms=self.property.number_of_rooms
             )
 
             is_override = "heating" in self.property.override

From 94f9979f561c5a64acea1fc871c38a9d4868f8e0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 16:31:11 +0100
Subject: [PATCH 217/262] fixed override bug

---
 backend/Property.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index d000be28..2892b86e 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -79,7 +79,7 @@ class Property:
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
 
-        self.override = ast.literal_eval(override['overrides']) if override is not None else []
+        self.override = ast.literal_eval(override['overrides']) if override else []
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")

From d8caacae97006638aed112e7c8682a0a23372690 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 17:46:06 +0100
Subject: [PATCH 218/262] creating non-invasive survey results WIP

---
 .idea/Model.iml                           |   2 +-
 .idea/misc.xml                            |   2 +-
 etl/customers/immo/pilot/non_invasive.py  | 131 ++++++++++++++++++++++
 etl/customers/immo/pilot/requirements.txt |   1 +
 4 files changed, 134 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/immo/pilot/non_invasive.py
 create mode 100644 etl/customers/immo/pilot/requirements.txt

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
new file mode 100644
index 00000000..cb978059
--- /dev/null
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -0,0 +1,131 @@
+import extract_msg
+
+
+def parse_msg_body(text):
+    # Split the text into lines
+    lines = text.split('\r\n')
+
+    # Dictionary to hold the parsed data
+    data = {}
+
+    # Process each line
+    for line in lines:
+        # Remove all asterisks and extra whitespace
+        clean_line = line.replace('*', '').strip()
+
+        if clean_line:  # Ensure the line is not empty after cleaning
+            # Attempt to split clean '=' if present
+            if '=' in clean_line:
+                clean_line = clean_line.replace(' = ', ': ')
+
+            # Use line content as a key with a default value indicating presence
+            # Generate a unique key for lines without '='
+            data[f"Info{len(data) + 1}"] = clean_line
+
+    return data
+
+
+def app():
+    """
+    This code retrieves the results of the non-invasive surveys, to be stored in S3
+    :return:
+    """
+
+    # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 "
+    #             "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg")
+    # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 "
+    #             "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg")
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 "
+    #     "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
+    #     "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
+    # )
+    filepath = (
+        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
+        "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+    )
+
+    with extract_msg.Message(filepath) as msg:
+        sender = msg.sender
+        recipients = msg.to
+        subject = msg.subject
+        body = msg.body
+        # If the msg has attachments, they can be extracted as well
+        attachments = msg.attachments
+
+    from pprint import pprint
+    pprint(parse_msg_body(body))
+
+    # We manually create the non-invasive notes for the pilot
+    non_invasive_notes = [
+        {
+            'address': '5 Oaklands',
+            'postcode': 'B62 0JA',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation. '
+                               'There is a shared alleyway with the neighbour, that is a solid brick wall.',
+            'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window',
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: North East, Back house direction: South West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '6 Beech Road',
+            'postcode': 'DY1 4BP',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
+                               'retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Side house direction: North East',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '8 Corporation Road',
+            'postcode': 'DY2 7PX',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': "External wall insulation",
+            'Wall Render': "Render finish throughout",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: North East, Back house direction: South West',
+            'Access to mains?': None,
+        },
+        {
+
+            'address': '21 Wells Road',
+            'postcode': 'DY5 3TB',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: East, Back house direction: West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '47 Fairfield Road',
+            'postcode': 'DY8 5UJ',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: East, Back house direction: West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': None,
+            'postcode': None,
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': None,
+            'Wall Render': None,
+            'Existing solar PV': None,
+            'Orientation': None,
+            'Access to mains?': None,
+        },
+    ]
diff --git a/etl/customers/immo/pilot/requirements.txt b/etl/customers/immo/pilot/requirements.txt
new file mode 100644
index 00000000..4673ab35
--- /dev/null
+++ b/etl/customers/immo/pilot/requirements.txt
@@ -0,0 +1 @@
+extract-msg

From a158f2353c0f84bb005924441166ef56a899f59c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 15:36:58 +0100
Subject: [PATCH 219/262] manually created non-invasie notes

---
 etl/customers/immo/pilot/non_invasive.py | 63 ++++++++++++++++++++----
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
index cb978059..c2b8ea64 100644
--- a/etl/customers/immo/pilot/non_invasive.py
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -43,9 +43,17 @@ def app():
     #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
     #     "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
     # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
+    #     "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
+    #     "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
+    # )
     filepath = (
-        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
-        "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
+        "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
     )
 
     with extract_msg.Message(filepath) as msg:
@@ -119,13 +127,50 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
-            'address': None,
-            'postcode': None,
+            'address': '53 Bromley',
+            'postcode': 'DY5 4PJ',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
-            'Wall Insulation': None,
-            'Wall Render': None,
-            'Existing solar PV': None,
-            'Orientation': None,
-            'Access to mains?': None,
+            'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
+                               "recommends a cavity wall fill",
+            "Roof": "Hipped roof",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West",
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '91 Osprey Drive',
+            'postcode': 'DY1 2JS',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': 'Tile hung front and rear of property',
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Side house direction: East',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '150 Huntingtree Road',
+            'postcode': 'B63 4HP',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Heating': 'Electric (storage heaters)',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            "Roof": "Hipped roof",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: "
+                           "North East",
+        },
+        {
+            'address': '195 Ashenhurst Road',
+            'postcode': 'DY1 2JB',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': "Solid render front and rear of property",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: South, Back house direction: North',
+            'Access to mains?': 'Property has access to the mains',
         },
     ]
+
+    # TODO: Push the non-invasive results straight to the database from here

From 485c01cbd69cf8b562b2d53da0ae03915edf8d93 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:14:01 +0100
Subject: [PATCH 220/262] Added uprns to non-invaive notes

---
 etl/customers/immo/pilot/non_invasive.py | 35 ++++++++++++++++++------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
index c2b8ea64..0a376388 100644
--- a/etl/customers/immo/pilot/non_invasive.py
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -51,18 +51,17 @@ def app():
     #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
     #     "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
     # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
+    #     "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
+    # )
     filepath = (
-        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
-        "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
+        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
+        "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
     )
 
     with extract_msg.Message(filepath) as msg:
-        sender = msg.sender
-        recipients = msg.to
-        subject = msg.subject
         body = msg.body
-        # If the msg has attachments, they can be extracted as well
-        attachments = msg.attachments
 
     from pprint import pprint
     pprint(parse_msg_body(body))
@@ -70,6 +69,7 @@ def app():
     # We manually create the non-invasive notes for the pilot
     non_invasive_notes = [
         {
+            'uprn': 90028499,
             'address': '5 Oaklands',
             'postcode': 'B62 0JA',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -82,6 +82,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90055152,
             'address': '6 Beech Road',
             'postcode': 'DY1 4BP',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -94,6 +95,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90070461,
             'address': '8 Corporation Road',
             'postcode': 'DY2 7PX',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -104,7 +106,7 @@ def app():
             'Access to mains?': None,
         },
         {
-
+            'uprn': 90022227,
             'address': '21 Wells Road',
             'postcode': 'DY5 3TB',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -116,6 +118,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90077535,
             'address': '47 Fairfield Road',
             'postcode': 'DY8 5UJ',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -127,6 +130,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90060989,
             'address': '53 Bromley',
             'postcode': 'DY5 4PJ',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -138,6 +142,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90048026,
             'address': '91 Osprey Drive',
             'postcode': 'DY1 2JS',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -149,6 +154,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90093693,
             'address': '150 Huntingtree Road',
             'postcode': 'B63 4HP',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -161,6 +167,7 @@ def app():
                            "North East",
         },
         {
+            'uprn': 90051858,
             'address': '195 Ashenhurst Road',
             'postcode': 'DY1 2JB',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -171,6 +178,18 @@ def app():
             'Orientation': 'Front house direction: South, Back house direction: North',
             'Access to mains?': 'Property has access to the mains',
         },
+        {
+            'uprn': 90106884,
+            'address': '27 Milton Road',
+            'postcode': 'WV14 8HZ',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': "Solid render front and rear of property",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: South East, Back house direction: North West',
+            'Access to mains?': 'Property has access to the mains',
+        },
     ]
 
     # TODO: Push the non-invasive results straight to the database from here

From 65f83930d56290fc73846ca4c8626ac46e3cd7c6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:25:12 +0100
Subject: [PATCH 221/262] added is_override to storage of recommendation

---
 .../db/functions/recommendations_functions.py |  3 ++-
 .../app/db/models/non_intrusive_surveys.py    | 24 +++++++++++++++++++
 backend/app/db/models/recommendations.py      |  1 +
 3 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 backend/app/db/models/non_intrusive_surveys.py

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index 1426e339..43daec77 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -85,7 +85,8 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
             "co2_equivalent_savings": rec["co2_equivalent_savings"],
             "total_work_hours": rec["labour_hours"],
             "energy_cost_savings": rec["energy_cost_savings"],
-            "labour_days": rec["labour_days"]
+            "labour_days": rec["labour_days"],
+            "is_override": rec["is_override"],
         }
         for rec in recommendations_to_upload
     ]
diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py
new file mode 100644
index 00000000..c5f3734a
--- /dev/null
+++ b/backend/app/db/models/non_intrusive_surveys.py
@@ -0,0 +1,24 @@
+from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum, Integer
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.sql import func
+from backend.app.db.models.portfolio import Portfolio, PropertyModel
+from backend.app.db.models.materials import Material
+from datatypes.enums import QuantityUnits
+
+Base = declarative_base()
+
+
+class NonIntrusiveSurvey(Base):
+    __tablename__ = 'non_intrusive_survey'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(Integer, nullable=False)
+    survey_date = Column(TIMESTAMP, nullable=False)
+    surveyor = Column(String, nullable=False)
+
+
+class NonIntrusiveSurveyNotes(Base):
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
+    title = Column(String, nullable=False)
+    note = Column(String, nullable=False)
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index a492f2f2..be5ff30c 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -30,6 +30,7 @@ class Recommendation(Base):
     rental_yield_increase = Column(Float)
     total_work_hours = Column(Float)
     labour_days = Column(Float)
+    is_override = Column(Boolean, nullable=False, default=False)
 
 
 class RecommendationMaterials(Base):

From aaa279463eea2505b3d36ee46c26b33b17955e77 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:37:28 +0100
Subject: [PATCH 222/262] Added is_override to heating controls

---
 .idea/Model.iml                              |  2 +-
 .idea/misc.xml                               |  2 +-
 recommendations/HeatingControlRecommender.py | 46 ++++++++++++++------
 3 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 76eaba4f..63218163 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -1,5 +1,5 @@
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import check_simulation_difference
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
 
@@ -159,20 +159,30 @@ class HeatingControlRecommender:
         has_room_thermostat = not needs_room_thermostat
         has_trvs = not needs_trvs
 
+        cost_result = self.costs.roomstat_programmer_trvs(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+            has_programmer=has_programmer,
+            has_room_thermostat=has_room_thermostat,
+            has_trvs=has_trvs
+        )
+
+        description = "upgrade heating controls to Room thermostat, programmer and TRVs"
+
+        is_override = "heating_control" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
         self.recommendation.append(
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
-                **self.costs.roomstat_programmer_trvs(
-                    number_heated_rooms=int(self.property.data["number-heated-rooms"]),
-                    has_programmer=has_programmer,
-                    has_room_thermostat=has_room_thermostat,
-                    has_trvs=has_trvs
-                ),
+                "description": description,
+                **cost_result,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "simulation_config": simulation_config
             }
         )
@@ -211,18 +221,28 @@ class HeatingControlRecommender:
         if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
             simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
 
+        cost_result = self.costs.time_and_temperature_zone_control(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"])
+        )
+
+        description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
+                       "temperature zone control)")
+
+        is_override = "heating_control" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
         self.recommendation.append(
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves "
-                               "(time & temperature zone control)",
-                **self.costs.time_and_temperature_zone_control(
-                    number_heated_rooms=int(self.property.data["number-heated-rooms"])
-                ),
+                "description": description,
+                **cost_result,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "simulation_config": simulation_config
             }
         )

From 527291b4395eb8b5563f52fd8449faee569d6789 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:40:13 +0100
Subject: [PATCH 223/262] Added is_override to mechanical ventilation
 recommendation

---
 recommendations/VentilationRecommendations.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 07f7cf1e..7ffcda08 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -69,6 +69,7 @@ class VentilationRecommendations(Definitions):
                 "description": f"Install {n_units} {part[0]['description']} units",
                 "starting_u_value": None,
                 "new_u_value": None,
+                "is_override": is_override,
                 "sap_points": 0,
                 "heat_demand": 0,
                 "adjusted_heat_demand": 0,

From 34d6a075289b0c2d31d75a1bad8ea5c969f12fca Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 17:07:42 +0100
Subject: [PATCH 224/262] Pushed non-intrusive survey results to bd

---
 .../app/db/functions/non_intrusive_surveys.py | 50 ++++++++++
 .../app/db/models/non_intrusive_surveys.py    |  8 +-
 etl/customers/immo/pilot/non_invasive.py      | 99 +++++++++++--------
 3 files changed, 110 insertions(+), 47 deletions(-)
 create mode 100644 backend/app/db/functions/non_intrusive_surveys.py

diff --git a/backend/app/db/functions/non_intrusive_surveys.py b/backend/app/db/functions/non_intrusive_surveys.py
new file mode 100644
index 00000000..93348121
--- /dev/null
+++ b/backend/app/db/functions/non_intrusive_surveys.py
@@ -0,0 +1,50 @@
+from sqlalchemy.orm import Session
+from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes
+
+
+def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500):
+    """
+    Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents
+    one survey and its associated notes.
+
+    :param session: SQLAlchemy Session object through which all database transactions are handled.
+    :param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn',
+                               'survey_date', 'surveyor', and other notes as key-value pairs.
+    :param batch_size: The size of each batch to be processed (default is 500).
+    :return: None
+    """
+
+    # Helper function to process each batch
+    def process_batch(batch):
+        surveys = []
+        notes = []
+
+        for note in batch:
+            survey = NonIntrusiveSurvey(
+                uprn=note['uprn'],
+                survey_date=note['survey_date'],
+                surveyor=note['surveyor']
+            )
+            surveys.append(survey)
+
+        session.add_all(surveys)
+        session.flush()  # Get IDs for surveys
+
+        for note, survey in zip(batch, surveys):
+            for key, value in note.items():
+                if key not in ['uprn', 'survey_date', 'surveyor']:
+                    notes.append(NonIntrusiveSurveyNotes(
+                        survey_id=survey.id,
+                        title=key,
+                        note=value
+                    ))
+
+        session.bulk_save_objects(notes)
+        session.commit()
+
+    # Split the data into batches and process each batch
+    total = len(non_invasive_notes)
+    for start in range(0, total, batch_size):
+        end = min(start + batch_size, total)
+        batch = non_invasive_notes[start:end]
+        process_batch(batch)
diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py
index c5f3734a..bc2d8adc 100644
--- a/backend/app/db/models/non_intrusive_surveys.py
+++ b/backend/app/db/models/non_intrusive_surveys.py
@@ -1,9 +1,5 @@
-from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum, Integer
+from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
 from sqlalchemy.orm import declarative_base
-from sqlalchemy.sql import func
-from backend.app.db.models.portfolio import Portfolio, PropertyModel
-from backend.app.db.models.materials import Material
-from datatypes.enums import QuantityUnits
 
 Base = declarative_base()
 
@@ -18,6 +14,8 @@ class NonIntrusiveSurvey(Base):
 
 
 class NonIntrusiveSurveyNotes(Base):
+    __tablename__ = 'non_intrusive_survey_notes'
+
     id = Column(BigInteger, primary_key=True, autoincrement=True)
     survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
     title = Column(String, nullable=False)
diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
index 0a376388..6dc22c62 100644
--- a/etl/customers/immo/pilot/non_invasive.py
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -1,4 +1,8 @@
-import extract_msg
+# import extract_msg
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
 
 
 def parse_msg_body(text):
@@ -55,24 +59,25 @@ def app():
     #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
     #     "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
     # )
-    filepath = (
-        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
-        "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
-    )
-
-    with extract_msg.Message(filepath) as msg:
-        body = msg.body
-
-    from pprint import pprint
-    pprint(parse_msg_body(body))
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
+    #     "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
+    # )
+    #
+    # with extract_msg.Message(filepath) as msg:
+    #     body = msg.body
+    #
+    # from pprint import pprint
+    # pprint(parse_msg_body(body))
 
     # We manually create the non-invasive notes for the pilot
     non_invasive_notes = [
         {
             'uprn': 90028499,
-            'address': '5 Oaklands',
-            'postcode': 'B62 0JA',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '5 Oaklands',
+            # 'postcode': 'B62 0JA',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation. '
                                'There is a shared alleyway with the neighbour, that is a solid brick wall.',
@@ -83,9 +88,10 @@ def app():
         },
         {
             'uprn': 90055152,
-            'address': '6 Beech Road',
-            'postcode': 'DY1 4BP',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '6 Beech Road',
+            # 'postcode': 'DY1 4BP',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
                                'retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
@@ -96,9 +102,10 @@ def app():
         },
         {
             'uprn': 90070461,
-            'address': '8 Corporation Road',
-            'postcode': 'DY2 7PX',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '8 Corporation Road',
+            # 'postcode': 'DY2 7PX',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': "External wall insulation",
             'Wall Render': "Render finish throughout",
             'Existing solar PV': 'No existing solar',
@@ -107,9 +114,10 @@ def app():
         },
         {
             'uprn': 90022227,
-            'address': '21 Wells Road',
-            'postcode': 'DY5 3TB',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '21 Wells Road',
+            # 'postcode': 'DY5 3TB',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': None,
@@ -119,9 +127,10 @@ def app():
         },
         {
             'uprn': 90077535,
-            'address': '47 Fairfield Road',
-            'postcode': 'DY8 5UJ',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '47 Fairfield Road',
+            # 'postcode': 'DY8 5UJ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': None,
@@ -131,9 +140,10 @@ def app():
         },
         {
             'uprn': 90060989,
-            'address': '53 Bromley',
-            'postcode': 'DY5 4PJ',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '53 Bromley',
+            # 'postcode': 'DY5 4PJ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
                                "recommends a cavity wall fill",
             "Roof": "Hipped roof",
@@ -143,9 +153,10 @@ def app():
         },
         {
             'uprn': 90048026,
-            'address': '91 Osprey Drive',
-            'postcode': 'DY1 2JS',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '91 Osprey Drive',
+            # 'postcode': 'DY1 2JS',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': 'Tile hung front and rear of property',
@@ -155,9 +166,10 @@ def app():
         },
         {
             'uprn': 90093693,
-            'address': '150 Huntingtree Road',
-            'postcode': 'B63 4HP',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '150 Huntingtree Road',
+            # 'postcode': 'B63 4HP',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Heating': 'Electric (storage heaters)',
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
@@ -168,9 +180,10 @@ def app():
         },
         {
             'uprn': 90051858,
-            'address': '195 Ashenhurst Road',
-            'postcode': 'DY1 2JB',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '195 Ashenhurst Road',
+            # 'postcode': 'DY1 2JB',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': "Solid render front and rear of property",
@@ -180,9 +193,10 @@ def app():
         },
         {
             'uprn': 90106884,
-            'address': '27 Milton Road',
-            'postcode': 'WV14 8HZ',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '27 Milton Road',
+            # 'postcode': 'WV14 8HZ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': "Solid render front and rear of property",
@@ -192,4 +206,5 @@ def app():
         },
     ]
 
-    # TODO: Push the non-invasive results straight to the database from here
+    session = sessionmaker(bind=db_engine)()
+    upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)

From 954fa9d32c5d30bd63098b74512b006b47bf3056 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 14:57:13 +0100
Subject: [PATCH 225/262] changed is_override to already_installed in
 recommendations

---
 backend/Property.py                           |  4 ++--
 backend/app/plan/router.py                    | 17 +++++++++--------
 backend/app/plan/schemas.py                   |  2 +-
 recommendations/FireplaceRecommendations.py   |  6 +++---
 recommendations/FloorRecommendations.py       | 10 +++++-----
 recommendations/HeatingControlRecommender.py  | 12 ++++++------
 recommendations/HeatingRecommender.py         | 12 ++++++------
 recommendations/HotwaterRecommendations.py    |  6 +++---
 recommendations/LightingRecommendations.py    |  6 +++---
 recommendations/RoofRecommendations.py        | 10 +++++-----
 recommendations/SecondaryHeating.py           |  6 +++---
 recommendations/SolarPvRecommendations.py     |  6 +++---
 recommendations/VentilationRecommendations.py | 10 +++++-----
 recommendations/WallRecommendations.py        | 16 ++++++++--------
 recommendations/WindowsRecommendations.py     |  6 +++---
 15 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 2892b86e..a8ed9129 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, override=None, **kwargs
+        self, id, postcode, address, epc_record, already_installed=None, **kwargs
     ):
 
         self.epc_record = epc_record
@@ -79,7 +79,7 @@ class Property:
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
 
-        self.override = ast.literal_eval(override['overrides']) if override else []
+        self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 08ce0dcc..49e14872 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -74,7 +74,7 @@ async def trigger_plan(body: PlanTriggerRequest):
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 
-    # TODO: Create the ability to congigure/switch off certain measures
+    # TODO: if the measure is already installed, it should actually be the very first phase
 
     try:
         session.begin()
@@ -85,10 +85,10 @@ async def trigger_plan(body: PlanTriggerRequest):
         if body.patches_file_path:
             patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
 
-        overrides = []
-        if body.overrides_file_path:
-            overrides = read_csv_from_s3(
-                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.overrides_file_path
+        already_installed = []
+        if body.already_installed_file_path:
+            already_installed = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
             )
 
         cleaning_data = read_dataframe_from_s3_parquet(
@@ -142,8 +142,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
-            override = next((
-                x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            property_already_installed = next((
+                x for x in already_installed if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
             ), {})
             input_properties.append(
                 Property(
@@ -151,7 +152,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
-                    override=override,
+                    already_installed=property_already_installed,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index ec49e41e..76eb49d2 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -9,7 +9,7 @@ class PlanTriggerRequest(BaseModel):
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
-    overrides_file_path: Optional[str] = None
+    already_installed_file_path: Optional[str] = None
     patches_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
 
diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py
index c1114f31..601a8eb0 100644
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@@ -32,8 +32,8 @@ class FireplaceRecommendations(Definitions):
         if number_open_fireplaces == 0:
             return
 
-        is_override = "sealing_open_fireplace" in self.property.override
-        estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not is_override else 0
+        already_installed = "sealing_open_fireplace" in self.property.already_installed
+        estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0
 
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
@@ -45,7 +45,7 @@ class FireplaceRecommendations(Definitions):
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "total": estimated_cost,
                 # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
                 "labour_hours": 6 * number_open_fireplaces,
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index b7bd370c..3f764d83 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -193,8 +193,8 @@ class FloorRecommendations(Definitions):
                             non_insulation_materials=non_insulation_materials
                         )
 
-                        is_override = "suspended_floor_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "suspended_floor_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
 
                     elif material["type"] == "solid_floor_insulation":
@@ -204,8 +204,8 @@ class FloorRecommendations(Definitions):
                             non_insulation_materials=non_insulation_materials
                         )
 
-                        is_override = "solid_floor_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "solid_floor_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     else:
                         raise NotImplementedError("Implement me!")
@@ -226,7 +226,7 @@ class FloorRecommendations(Definitions):
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
-                            "is_override": is_override,
+                            "already_installed": already_installed,
                             **cost_result
                         }
                     )
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 63218163..d24ad811 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -168,8 +168,8 @@ class HeatingControlRecommender:
 
         description = "upgrade heating controls to Room thermostat, programmer and TRVs"
 
-        is_override = "heating_control" in self.property.override
-        if is_override:
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "Heating controls have already been upgraded, no further action needed."
 
@@ -182,7 +182,7 @@ class HeatingControlRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "simulation_config": simulation_config
             }
         )
@@ -228,8 +228,8 @@ class HeatingControlRecommender:
         description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
                        "temperature zone control)")
 
-        is_override = "heating_control" in self.property.override
-        if is_override:
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "Heating controls have already been upgraded, no further action needed."
 
@@ -242,7 +242,7 @@ class HeatingControlRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "simulation_config": simulation_config
             }
         )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index d83b755e..432dc6a6 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -144,8 +144,8 @@ class HeatingRecommender:
 
                 recommendation_description = f"{description} and {controls_description}"
 
-            is_override = "cavity_wall_insulation" in self.property.override
-            if is_override:
+            already_installed = "cavity_wall_insulation" in self.property.already_installed
+            if already_installed:
                 total_costs = override_costs(total_costs)
                 recommendation_description = "Heating system has already been upgraded, no further action needed."
 
@@ -159,7 +159,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **total_costs,
                 "simulation_config": recommendation_simulation_config
             }
@@ -371,8 +371,8 @@ class HeatingRecommender:
                 n_rooms=self.property.number_of_rooms
             )
 
-            is_override = "heating" in self.property.override
-            if is_override:
+            already_installed = "heating" in self.property.already_installed
+            if already_installed:
                 boiler_costs = override_costs(boiler_costs)
                 description = "Heating system has already been upgraded, no further action needed."
 
@@ -386,7 +386,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "simulation_config": simulation_config,
                 **boiler_costs
             }
diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 88cfa932..9c5c7045 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -42,8 +42,8 @@ class HotwaterRecommendations:
 
         recommendation_cost = self.costs.hot_water_tank_insulation()
 
-        is_override = "hot_water_tank_insulation" in self.property.override
-        if is_override:
+        already_installed = "hot_water_tank_insulation" in self.property.already_installed
+        if already_installed:
             recommendation_cost = override_costs(recommendation_cost)
             description = "Insulation tank has already been insulated, no further action required"
         else:
@@ -60,7 +60,7 @@ class HotwaterRecommendations:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **recommendation_cost,
                 "simulation_config": {"hot_water_energy_eff_ending": "Average"}
             }
diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py
index 9e4c8e43..31720579 100644
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@@ -92,8 +92,8 @@ class LightingRecommendations:
 
         heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets)
 
-        is_override = "low_energy_lighting" in self.property.override
-        if is_override:
+        already_installed = "low_energy_lighting" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "Low energy lighting has already been installed, no further action required"
 
@@ -105,7 +105,7 @@ class LightingRecommendations:
                 "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
                 # the proportion of lights that will be set to low energy
                 "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 5ba7e82e..dc5ee7db 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -207,8 +207,8 @@ class RoofRecommendations:
                             floor_area=self.property.insulation_floor_area,
                             material=material
                         )
-                        is_override = "loft_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "loft_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     elif material["type"] == "flat_roof_insulation":
                         cost_result = self.costs.flat_roof_insulation(
@@ -216,8 +216,8 @@ class RoofRecommendations:
                             material=material,
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "flat_roof_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "flat_roof_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
@@ -238,7 +238,7 @@ class RoofRecommendations:
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
-                            "is_override": is_override,
+                            "already_installed": already_installed,
                             **cost_result
                         }
                     )
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
index e426977e..5d763510 100644
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@@ -40,8 +40,8 @@ class SecondaryHeating:
 
         costs = self.costs.heater_removal(n_rooms=n_rooms)
 
-        is_override = "secondary_heating" in self.property.override
-        if is_override:
+        already_installed = "secondary_heating" in self.property.already_installed
+        if already_installed:
             costs = override_costs(costs)
             description = "Secondary heating system has already been removed, no further action required"
         else:
@@ -56,7 +56,7 @@ class SecondaryHeating:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **costs,
                 "simulation_config": {
                     "secondheat_description_ending": "None"
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 72fcdf4b..58cf9735 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -111,8 +111,8 @@ class SolarPvRecommendations:
                 description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
                                f"anel system on {round(roof_coverage_percent)}% the roof.")
 
-            is_override = "solar_pv" in self.property.override
-            if is_override:
+            already_installed = "solar_pv" in self.property.already_installed
+            if already_installed:
                 cost_result = override_costs(cost_result)
 
             self.recommendation.append(
@@ -124,7 +124,7 @@ class SolarPvRecommendations:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
-                    "is_override": is_override,
+                    "already_installed": already_installed,
                     **cost_result,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 7ffcda08..5b36bd9c 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -50,11 +50,11 @@ class VentilationRecommendations(Definitions):
 
         part = self.materials.copy()
 
-        is_override = "cavity_wall_insulation" in self.property.override
+        already_installed = "cavity_wall_insulation" in self.property.already_installed
 
-        estimated_cost = n_units * part[0]["cost"] if not is_override else 0
-        labour_hours = 4 * n_units if not is_override else 0
-        labour_days = 4 * n_units / 8.0 if not is_override else 0
+        estimated_cost = n_units * part[0]["cost"] if not already_installed else 0
+        labour_hours = 4 * n_units if not already_installed else 0
+        labour_days = 4 * n_units / 8.0 if not already_installed else 0
 
         part[0]["total"] = estimated_cost
         part[0]["quantity"] = n_units
@@ -69,7 +69,7 @@ class VentilationRecommendations(Definitions):
                 "description": f"Install {n_units} {part[0]['description']} units",
                 "starting_u_value": None,
                 "new_u_value": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "sap_points": 0,
                 "heat_demand": 0,
                 "adjusted_heat_demand": 0,
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 471a62cb..feb2620b 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -221,8 +221,8 @@ class WallRecommendations(Definitions):
                     material=material.to_dict(),
                 )
 
-                is_override = "cavity_wall_insulation" in self.property.override
-                if is_override:
+                already_installed = "cavity_wall_insulation" in self.property.already_installed
+                if already_installed:
                     cost_result = override_costs(cost_result)
 
                 recommendations.append(
@@ -241,7 +241,7 @@ class WallRecommendations(Definitions):
                         "starting_u_value": u_value,
                         "new_u_value": new_u_value,
                         "sap_points": None,
-                        "is_override": is_override,
+                        "already_installed": already_installed,
                         **cost_result
                     }
                 )
@@ -282,8 +282,8 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "internal_wall_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "internal_wall_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
 
                     elif material["type"] == "external_wall_insulation":
@@ -292,8 +292,8 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "external_wall_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "external_wall_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
@@ -313,7 +313,7 @@ class WallRecommendations(Definitions):
                             "description": self._make_description(material),
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
-                            "is_override": is_override,
+                            "already_installed": already_installed,
                             "sap_points": None,
                             **cost_result
                         }
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index b2fe20a6..b7c2823a 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -71,8 +71,8 @@ class WindowsRecommendations:
             is_secondary_glazing=is_secondary_glazing
         )
 
-        is_override = "windows_glazing" in self.property.override
-        if is_override:
+        already_installed = "windows_glazing" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "The property already has double glazing installed. No further action is required."
         else:
@@ -98,7 +98,7 @@ class WindowsRecommendations:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **cost_result,
                 "is_secondary_glazing": is_secondary_glazing
             }

From c58389a26695d863d003a4cf2c9f26515f9898ea Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 14:57:46 +0100
Subject: [PATCH 226/262] updated push to db

---
 backend/app/db/functions/recommendations_functions.py | 2 +-
 backend/app/db/models/recommendations.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index 43daec77..b22ce92f 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -86,7 +86,7 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
             "total_work_hours": rec["labour_hours"],
             "energy_cost_savings": rec["energy_cost_savings"],
             "labour_days": rec["labour_days"],
-            "is_override": rec["is_override"],
+            "already_installed": rec["already_installed"],
         }
         for rec in recommendations_to_upload
     ]
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index be5ff30c..186f87a8 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -30,7 +30,7 @@ class Recommendation(Base):
     rental_yield_increase = Column(Float)
     total_work_hours = Column(Float)
     labour_days = Column(Float)
-    is_override = Column(Boolean, nullable=False, default=False)
+    already_installed = Column(Boolean, nullable=False, default=False)
 
 
 class RecommendationMaterials(Base):

From f1e3bca9bff0c68ba9ce068c91a91268da794cb0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 14:59:30 +0100
Subject: [PATCH 227/262] updated asset list for immo to reference already
 installed

---
 etl/customers/immo/pilot/asset_list.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 07ebe884..d8839924 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -48,7 +48,7 @@ patches = [
 # This is information that is found as a result of the non-invasives, that mean that certain measures
 # have been installed already. To reflect this in the front end, it is included in the recommendation, however
 # the cost is removed and instead, a message is presented saying that the measure is already installed.
-overrides = [
+already_installed = [
     {
         'address': '5 Oaklands',
         'postcode': 'B62 0JA',
@@ -87,11 +87,11 @@ def app():
     )
 
     # Store overrides in s3
-    overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json"
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
     save_csv_to_s3(
-        dataframe=pd.DataFrame(overrides),
+        dataframe=pd.DataFrame(already_installed),
         bucket_name="retrofit-plan-inputs-dev",
-        file_name=overrides_filename
+        file_name=already_installed_filename
     )
 
     # Store patches in s3
@@ -109,7 +109,7 @@ def app():
         "goal": "Increase EPC",
         "goal_value": "C",
         "trigger_file_path": filename,
-        "overrides_file_path": overrides_filename,
+        "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
         "budget": None,
     }
@@ -122,7 +122,7 @@ def app():
         "goal": "Increase EPC",
         "goal_value": "B",
         "trigger_file_path": filename,
-        "overrides_file_path": overrides_filename,
+        "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
         "budget": None,
     }

From 046ac3dc39bc7c478a91fcaa58bddc30508c5166 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 15:05:31 +0100
Subject: [PATCH 228/262] fixed bug in already installed

---
 etl/customers/immo/pilot/asset_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index d8839924..e587cc25 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -52,7 +52,7 @@ already_installed = [
     {
         'address': '5 Oaklands',
         'postcode': 'B62 0JA',
-        "overrides": ["windows_glazing"]
+        "already_installed": ["windows_glazing"]
     }
 ]
 

From 56bf3c121fbc0d4bb31a5e1b073b80daac7dba51 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 13:31:46 +0100
Subject: [PATCH 229/262] Adding cdn to terraform

---
 infrastructure/terraform/main.tf              |  9 +++
 .../terraform/modules/cloudfront/main.tf      | 65 +++++++++++++++++++
 .../terraform/modules/cloudfront/variables.tf |  9 +++
 3 files changed, 83 insertions(+)
 create mode 100644 infrastructure/terraform/modules/cloudfront/main.tf
 create mode 100644 infrastructure/terraform/modules/cloudfront/variables.tf

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index d545cdf8..1d0562dd 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -181,4 +181,13 @@ module "lambda_carbon_prediction_ecr" {
 module "lambda_heat_prediction_ecr" {
   ecr_name = "lambda-heat-prediction-${var.stage}"
   source   = "./modules/ecr"
+}
+
+##############################################
+# CDN - Cloudfront
+##############################################
+module "cloudfront_distribution" {
+  source      = "./modules/cloudfront"
+  bucket_name = module.s3.bucket_name
+  stage       = var.stage
 }
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/infrastructure/terraform/modules/cloudfront/main.tf
new file mode 100644
index 00000000..fbb88160
--- /dev/null
+++ b/infrastructure/terraform/modules/cloudfront/main.tf
@@ -0,0 +1,65 @@
+resource "aws_cloudfront_distribution" "s3_distribution" {
+  origin {
+    domain_name = "${aws_s3_bucket.bucket.bucket_regional_domain_name}"
+    origin_id   = "S3-${var.bucket_name}"
+
+    s3_origin_config {
+      origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
+    }
+  }
+
+  enabled = true
+
+  default_cache_behavior {
+    allowed_methods        = ["GET", "HEAD"]
+    cached_methods         = ["GET", "HEAD"]
+    target_origin_id       = "S3-${var.bucket_name}"
+    viewer_protocol_policy = "redirect-to-https"
+    compress               = true
+
+    forwarded_values {
+      query_string = false
+      cookies {
+        forward = "none"
+      }
+    }
+
+    min_ttl     = 0
+    default_ttl = 86400
+    max_ttl     = 31536000
+  }
+
+  price_class = "PriceClass_All"
+
+  restrictions {
+    geo_restriction {
+      restriction_type = "none"
+    }
+  }
+
+  viewer_certificate {
+    cloudfront_default_certificate = true
+  }
+}
+
+resource "aws_cloudfront_origin_access_identity" "oai" {
+  comment = "OAI for ${var.bucket_name}"
+}
+
+resource "aws_s3_bucket_policy" "bucket_policy" {
+  bucket = aws_s3_bucket.bucket.id
+
+  policy = jsonencode({
+    Version   = "2012-10-17"
+    Statement = [
+      {
+        Effect    = "Allow"
+        Principal = {
+          AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
+        }
+        Action   = "s3:GetObject"
+        Resource = "${aws_s3_bucket.bucket.arn}/*"
+      },
+    ]
+  })
+}
diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/infrastructure/terraform/modules/cloudfront/variables.tf
new file mode 100644
index 00000000..433edc24
--- /dev/null
+++ b/infrastructure/terraform/modules/cloudfront/variables.tf
@@ -0,0 +1,9 @@
+variable "bucket_name" {
+  description = "The name of the bucket"
+  type        = string
+}
+
+variable "stage" {
+  description = "The deployment stage"
+  type        = string
+}

From ce546b56f7db4a88d82ee3f72148d2b4fe64f1c2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 13:38:23 +0100
Subject: [PATCH 230/262] passing additional data to cloudfront distribution

---
 infrastructure/terraform/main.tf                  |  9 ++++++---
 .../terraform/modules/cloudfront/main.tf          |  6 +++---
 .../terraform/modules/cloudfront/variables.tf     | 15 +++++++++++++++
 infrastructure/terraform/modules/s3/outputs.tf    | 12 ++++++++++++
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index 1d0562dd..fde25487 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -187,7 +187,10 @@ module "lambda_heat_prediction_ecr" {
 # CDN - Cloudfront
 ##############################################
 module "cloudfront_distribution" {
-  source      = "./modules/cloudfront"
-  bucket_name = module.s3.bucket_name
-  stage       = var.stage
+  source             = "./modules/cloudfront"
+  bucket_name        = module.s3.bucket_name
+  bucket_id          = module.s3.bucket_id
+  bucket_arn         = module.s3.bucket_arn
+  bucket_domain_name = module.s3.bucket_domain_name
+  stage              = var.stage
 }
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/infrastructure/terraform/modules/cloudfront/main.tf
index fbb88160..281ff09f 100644
--- a/infrastructure/terraform/modules/cloudfront/main.tf
+++ b/infrastructure/terraform/modules/cloudfront/main.tf
@@ -1,6 +1,6 @@
 resource "aws_cloudfront_distribution" "s3_distribution" {
   origin {
-    domain_name = "${aws_s3_bucket.bucket.bucket_regional_domain_name}"
+    domain_name = var.bucket_domain_name
     origin_id   = "S3-${var.bucket_name}"
 
     s3_origin_config {
@@ -47,7 +47,7 @@ resource "aws_cloudfront_origin_access_identity" "oai" {
 }
 
 resource "aws_s3_bucket_policy" "bucket_policy" {
-  bucket = aws_s3_bucket.bucket.id
+  bucket = var.bucket_id
 
   policy = jsonencode({
     Version   = "2012-10-17"
@@ -58,7 +58,7 @@ resource "aws_s3_bucket_policy" "bucket_policy" {
           AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
         }
         Action   = "s3:GetObject"
-        Resource = "${aws_s3_bucket.bucket.arn}/*"
+        Resource = "${var.bucket_arn}/*"
       },
     ]
   })
diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/infrastructure/terraform/modules/cloudfront/variables.tf
index 433edc24..88f770a8 100644
--- a/infrastructure/terraform/modules/cloudfront/variables.tf
+++ b/infrastructure/terraform/modules/cloudfront/variables.tf
@@ -7,3 +7,18 @@ variable "stage" {
   description = "The deployment stage"
   type        = string
 }
+
+variable "bucket_id" {
+  description = "The ID of the S3 bucket"
+  type        = string
+}
+
+variable "bucket_arn" {
+  description = "The ARN of the S3 bucket"
+  type        = string
+}
+
+variable "bucket_domain_name" {
+  description = "The regional domain name of the S3 bucket"
+  type        = string
+}
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/s3/outputs.tf b/infrastructure/terraform/modules/s3/outputs.tf
index a5e7ddb4..7668dbc4 100644
--- a/infrastructure/terraform/modules/s3/outputs.tf
+++ b/infrastructure/terraform/modules/s3/outputs.tf
@@ -2,3 +2,15 @@ output "bucket_name" {
   description = "The name of the S3 bucket"
   value       = aws_s3_bucket.bucket.bucket
 }
+
+output "bucket_id" {
+  value = aws_s3_bucket.bucket.id
+}
+
+output "bucket_arn" {
+  value = aws_s3_bucket.bucket.arn
+}
+
+output "bucket_domain_name" {
+  value = aws_s3_bucket.bucket.bucket_regional_domain_name
+}
\ No newline at end of file

From e6f9416c8e4b3452f42c47044503c4fdcd68b7cf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 14:05:36 +0100
Subject: [PATCH 231/262] upgrade db instance version

---
 infrastructure/terraform/main.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index fde25487..55266e10 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" {
 resource "aws_db_instance" "default" {
   allocated_storage      = var.allocated_storage
   engine                 = "postgres"
-  engine_version         = "14.7"
+  engine_version         = "14.10"
   instance_class         = var.instance_class
   db_name                = var.database_name
   username               = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]

From 6076eb4f24905ad026c7a0dca9eb3d15f7678a5b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 14:50:00 +0100
Subject: [PATCH 232/262] building photo upload app

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 etl/non_invasive_surveys/photos/README.md     |  19 +++
 etl/non_invasive_surveys/photos/app.py        | 120 ++++++++++++++++++
 .../photos/requirements.txt                   |   3 +
 5 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 etl/non_invasive_surveys/photos/README.md
 create mode 100644 etl/non_invasive_surveys/photos/app.py
 create mode 100644 etl/non_invasive_surveys/photos/requirements.txt

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..c75af922 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="non_invasive_surveys-photos" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1f2c584d 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="non_invasive_surveys-photos" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_invasive_surveys/photos/README.md
new file mode 100644
index 00000000..9dbe951f
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/README.md
@@ -0,0 +1,19 @@
+# Non Intrusive Surveys - photo upload
+
+This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID.
+
+## Getting started
+
+Install the required packages by running the following command:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+The main application is found in the app.py file. To run the application, use the following command:
+
+```bash
+python app.py
+```
\ No newline at end of file
diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py
new file mode 100644
index 00000000..1b6790f9
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/app.py
@@ -0,0 +1,120 @@
+import boto3
+from PIL import Image
+from pathlib import Path
+from dotenv import load_dotenv
+
+# Inputs
+ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env"
+PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
+FOLDER_UPRN_LOOKUP = {
+    "91 Osprey Drive DY1 2JS": 90048026,
+    "195 Ashenhurst Rd DY1 2JB": 90051858,
+    "6 Beech Rd DY1 4BP": 90055152,
+    "53 Bromley DY5 4PJ": 90060989,
+    "5 Oaklands B62 0JA": 90028499,
+    "47 Fairfield Rd DY8 5UJ": 90077535,
+    "150 Huntingtree Rd B63 4HP": 90093693,
+    "27 Milton Rd DY1 2JB": 90106884,
+    "21 Wells Rd DY5 3TB": 90022227,
+    "8 Corporation Rd DY2 7PX": 90070461
+}
+
+
+def list_subdirectories(directory_path):
+    """
+    List all subdirectories within a given directory.
+
+    :param directory_path: Path to the directory.
+    :return: A list of paths to the subdirectories.
+    """
+    directory = Path(directory_path)
+    subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()]
+    return subdirectories
+
+
+def list_files_in_directory(directory_path, file_extension=".jpg"):
+    """
+    List all files with a specific extension within a given directory and its subdirectories.
+
+    :param directory_path: Path to the directory to scan.
+    :param file_extension: File extension to filter by.
+    :return: A list of paths to the files.
+    """
+    # Convert the directory path to a Path object if it's not already one
+    directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path
+
+    # List all files of the specified type in the directory and subdirectories
+    file_list = [file for file in directory.rglob(f'*{file_extension}')]
+
+    return file_list
+
+
+def create_images(input_path):
+    # Load the image
+    with Image.open(input_path) as img:
+        # Create a thumbnail
+        thumbnail = img.copy()
+        thumbnail.thumbnail((128, 128), Image.ANTIALIAS)  # Resize to 128x128 (or any desired size)
+        thumbnail.save('thumbnail.jpg')
+
+        # Create a 1080p version
+        full_hd = img.copy()
+        full_hd.thumbnail((1920, 1080), Image.ANTIALIAS)  # Resize to 1080p
+        full_hd.save('1080p.jpg')
+
+    # Return paths to the processed images
+    return 'thumbnail.jpg', '1080p.jpg', input_path
+
+
+def upload_to_s3(bucket_name, file_path, object_name):
+    s3_client = boto3.client('s3')
+    s3_client.upload_file(file_path, bucket_name, object_name)
+    print(f"Uploaded {object_name} to S3 bucket {bucket_name}")
+
+
+def upload_photos_to_s3(bucket_name, photo_paths):
+    # Upload each photo
+    for path in photo_paths:
+        object_name = path.split('/')[-1]  # Assuming the path format is folder/filename
+        upload_to_s3(bucket_name, path, object_name)
+
+
+def generate_cdn_url(distribution_domain, object_name):
+    return f"https://{distribution_domain}/{object_name}"
+
+
+def process_and_upload_images(input_image_path, bucket_name, distribution_domain):
+    # Create images
+    thumbnail, full_hd, original = create_images(input_image_path)
+
+    # Upload images
+    upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
+
+    # Generate CDN links
+    cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
+
+    return cdn_links
+
+
+def app():
+    """
+    This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the
+    database.
+    To begin with, this app will simply read the files from the local machine, however we will come up with a more
+    efficient way to do this in the future.
+
+    :return:
+    """
+
+    # List all files in the directory using pathlib
+    property_directories = list_subdirectories(PHOTO_DIRECTORY)
+
+    # For each property, we want to list all of the photos in the directory
+    for property_dir in property_directories:
+        photo_files = list_files_in_directory(property_dir)
+
+        # We now want to convert each file, and upload it to s3
+        for photo_filepath in photo_files:
+            process_and_upload_images(
+                photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com"
+            )
diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_invasive_surveys/photos/requirements.txt
new file mode 100644
index 00000000..2199a0b4
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/requirements.txt
@@ -0,0 +1,3 @@
+Pillow
+boto3
+python-dotenv
\ No newline at end of file

From d3a175468330774214e4c7225157dd4481cb60cd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 15:20:38 +0100
Subject: [PATCH 233/262] modifying photo upload code

---
 etl/non_invasive_surveys/photos/app.py | 43 ++++++++++++++++++++------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py
index 1b6790f9..ffd993a6 100644
--- a/etl/non_invasive_surveys/photos/app.py
+++ b/etl/non_invasive_surveys/photos/app.py
@@ -1,4 +1,5 @@
 import boto3
+import os
 from PIL import Image
 from pathlib import Path
 from dotenv import load_dotenv
@@ -19,6 +20,10 @@ FOLDER_UPRN_LOOKUP = {
     "8 Corporation Rd DY2 7PX": 90070461
 }
 
+load_dotenv(ENV_FILEPATH)
+CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME = os.getenv("CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME", None)
+CDN_BUCKET_NAME = os.getenv("CDN_BUCKET_NAME", None)
+
 
 def list_subdirectories(directory_path):
     """
@@ -49,21 +54,33 @@ def list_files_in_directory(directory_path, file_extension=".jpg"):
     return file_list
 
 
-def create_images(input_path):
+def create_images(input_path, uprn):
+    # Need to create local directory if it doesn't exist
+    os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True)
+
     # Load the image
     with Image.open(input_path) as img:
+        # Define output paths
+        thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg"
+        full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg"
+        webp_path = f"non_invasive_photos/{uprn}/webp.webp"  # Save as WebP format
+
         # Create a thumbnail
         thumbnail = img.copy()
-        thumbnail.thumbnail((128, 128), Image.ANTIALIAS)  # Resize to 128x128 (or any desired size)
-        thumbnail.save('thumbnail.jpg')
+        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)  # High-quality downsampling
+        thumbnail.save(thumbnail_path, 'JPEG', quality=85)  # Save as JPEG with quality setting
 
         # Create a 1080p version
         full_hd = img.copy()
-        full_hd.thumbnail((1920, 1080), Image.ANTIALIAS)  # Resize to 1080p
-        full_hd.save('1080p.jpg')
+        full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
+        full_hd.save(full_hd_path, 'JPEG', quality=90)  # Slightly higher quality for larger image
+
+        # Convert to WebP for better compression
+        webp = img.copy()
+        webp.save(webp_path, 'WEBP', quality=90)
 
     # Return paths to the processed images
-    return 'thumbnail.jpg', '1080p.jpg', input_path
+    return thumbnail_path, full_hd_path, webp_path
 
 
 def upload_to_s3(bucket_name, file_path, object_name):
@@ -83,9 +100,9 @@ def generate_cdn_url(distribution_domain, object_name):
     return f"https://{distribution_domain}/{object_name}"
 
 
-def process_and_upload_images(input_image_path, bucket_name, distribution_domain):
+def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
     # Create images
-    thumbnail, full_hd, original = create_images(input_image_path)
+    thumbnail, full_hd, original = create_images(str(uprn), input_image_path)
 
     # Upload images
     upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
@@ -93,6 +110,10 @@ def process_and_upload_images(input_image_path, bucket_name, distribution_domain
     # Generate CDN links
     cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
 
+    # Delete local files
+    for path in [thumbnail, full_hd, original]:
+        os.remove(path)
+
     return cdn_links
 
 
@@ -112,9 +133,13 @@ def app():
     # For each property, we want to list all of the photos in the directory
     for property_dir in property_directories:
         photo_files = list_files_in_directory(property_dir)
+        uprn = FOLDER_UPRN_LOOKUP[property_dir.name]
 
         # We now want to convert each file, and upload it to s3
         for photo_filepath in photo_files:
             process_and_upload_images(
-                photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com"
+                uprn=uprn,
+                input_image_path=photo_filepath,
+                bucket_name=CDN_BUCKET_NAME,
+                distribution_domain=CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME
             )

From 5d3440815d7616bf3af37ca68136a73d610f071a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 16:33:15 +0100
Subject: [PATCH 234/262] Pushing non-invasive photos to app wip

---
 .../photos/README.md                          |  0
 .../photos/app.py                             | 28 +++++++++++--------
 .../photos/requirements.txt                   |  0
 3 files changed, 16 insertions(+), 12 deletions(-)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/README.md (100%)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/app.py (84%)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/requirements.txt (100%)

diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_intrusive_surveys/photos/README.md
similarity index 100%
rename from etl/non_invasive_surveys/photos/README.md
rename to etl/non_intrusive_surveys/photos/README.md
diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_intrusive_surveys/photos/app.py
similarity index 84%
rename from etl/non_invasive_surveys/photos/app.py
rename to etl/non_intrusive_surveys/photos/app.py
index ffd993a6..c531355b 100644
--- a/etl/non_invasive_surveys/photos/app.py
+++ b/etl/non_intrusive_surveys/photos/app.py
@@ -5,7 +5,7 @@ from pathlib import Path
 from dotenv import load_dotenv
 
 # Inputs
-ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env"
+ENV_FILEPATH = "etl/non_intrusive_surveys/photos/.env"
 PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
 FOLDER_UPRN_LOOKUP = {
     "91 Osprey Drive DY1 2JS": 90048026,
@@ -55,25 +55,29 @@ def list_files_in_directory(directory_path, file_extension=".jpg"):
 
 
 def create_images(input_path, uprn):
+    # Define the base directory path
+    base_directory = f"non_intrusive_photos/{uprn}"
+    print(f"Creating directory: {base_directory}")  # Debug: print the directory to be created
+
     # Need to create local directory if it doesn't exist
-    os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True)
+    os.makedirs(base_directory, exist_ok=True)
+
+    # Define output paths
+    thumbnail_path = os.path.join(base_directory, "thumbnail.jpg")
+    full_hd_path = os.path.join(base_directory, "1080p.jpg")
+    webp_path = os.path.join(base_directory, "webp.webp")  # Save as WebP format
 
     # Load the image
     with Image.open(input_path) as img:
-        # Define output paths
-        thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg"
-        full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg"
-        webp_path = f"non_invasive_photos/{uprn}/webp.webp"  # Save as WebP format
-
         # Create a thumbnail
         thumbnail = img.copy()
-        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)  # High-quality downsampling
-        thumbnail.save(thumbnail_path, 'JPEG', quality=85)  # Save as JPEG with quality setting
+        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)
+        thumbnail.save(thumbnail_path, 'JPEG', quality=85)
 
         # Create a 1080p version
         full_hd = img.copy()
         full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
-        full_hd.save(full_hd_path, 'JPEG', quality=90)  # Slightly higher quality for larger image
+        full_hd.save(full_hd_path, 'JPEG', quality=90)
 
         # Convert to WebP for better compression
         webp = img.copy()
@@ -102,10 +106,10 @@ def generate_cdn_url(distribution_domain, object_name):
 
 def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
     # Create images
-    thumbnail, full_hd, original = create_images(str(uprn), input_image_path)
+    thumbnail, full_hd, original = create_images(input_image_path, uprn=str(uprn))
 
     # Upload images
-    upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
+    upload_photos_to_s3(bucket_name, photo_paths=[thumbnail, full_hd, original])
 
     # Generate CDN links
     cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_intrusive_surveys/photos/requirements.txt
similarity index 100%
rename from etl/non_invasive_surveys/photos/requirements.txt
rename to etl/non_intrusive_surveys/photos/requirements.txt

From d6fa81939d6a0f7752728953250b3554995a5297 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 23:41:24 +0100
Subject: [PATCH 235/262] creating new aggregations for front end

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 backend/Property.py                           |   8 +-
 .../app/db/functions/portfolio_functions.py   |   3 +-
 backend/app/plan/router.py                    | 128 +++++++++++++++++-
 recommendations/Recommendations.py            |  11 +-
 6 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index c75af922..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="non_invasive_surveys-photos" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1f2c584d..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="non_invasive_surveys-photos" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index a8ed9129..7b5a6bc3 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -142,6 +142,8 @@ class Property:
 
         self.current_adjusted_energy = None
         self.expected_adjusted_energy = None
+        self.current_energy_bill = None
+        self.expected_energy_bill = None
 
         self.recommendations_scoring_data = []
 
@@ -892,12 +894,16 @@ class Property:
 
         return component_data
 
-    def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy):
+    def set_adjusted_energy(
+        self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill
+    ):
         """
         Stores these values for usage later
         """
         self.current_adjusted_energy = current_adjusted_energy
         self.expected_adjusted_energy = expected_adjusted_energy
+        self.current_energy_bill = current_energy_bill
+        self.expected_energy_bill = expected_energy_bill
 
     def set_windows_count(self):
         """
diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index ead8280f..69203368 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -4,7 +4,7 @@ from backend.app.db.models.portfolio import Portfolio
 
 
 def aggregate_portfolio_recommendations(
-    session, portfolio_id: int, total_valuation_increase: float, labour_days: float
+    session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
 ):
     # Aggregate multiple fields
     aggregates = (
@@ -27,6 +27,7 @@ def aggregate_portfolio_recommendations(
         "energy_savings": aggregates.energy_savings or 0,
         "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
         "energy_cost_savings": aggregates.energy_cost_savings or 0,
+        **aggregated_data
     }
 
     # Get the portfolio and update the fields
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 49e14872..b8b2d5c8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -1,3 +1,4 @@
+import json
 from datetime import datetime
 
 from tqdm import tqdm
@@ -57,6 +58,109 @@ def patch_epc(patch, epc_records):
     return epc_records
 
 
+def extract_portfolio_aggregation_data(
+    input_properties, total_valuation_increase, recommendations, new_epc_bands
+):
+    # We aggregate a number of metrics for the portfolio:
+    # 1) A breakdown of the number of properties in each EPC band
+    #    a) before retrofit
+    #    b) after retrofit
+    # 2) Number of units
+    # 3) Co2/unit
+    #    a) before retrofit
+    #    b) after retrofit
+    # 4) Energy bulls/unit
+    #    a) before retrofit
+    #    b) after retrofit
+    # 5) Average valuation improvement/unit
+    # 6) Total cost
+    # 7) Cost per unit
+    # 8) £ per CO2 saved
+    # 9) £ per SAP point
+
+    # We need to construct the underlyind data for this
+
+    # Helper function to reformat the EPC data
+    def reformat_epc_data(epc_counts):
+        # Define all possible EPC bands in the required order
+        epc_bands = ["G", "F", "E", "D", "C", "B", "A"]
+
+        # Create the formatted data list by checking each band in the order
+        formatted_data = []
+        for band in epc_bands:
+            # Get the count from the dictionary, defaulting to 0 if not present
+            count = epc_counts.get(band, 0)
+            # Append the formatted dictionary to the list
+            formatted_data.append({"name": band, band: count})
+
+        return formatted_data
+
+    n_units = len(input_properties)
+
+    agg_data = []
+    for p in input_properties:
+        # Get the recommendations for the property
+        property_recommendations = recommendations.get(p.id, [])
+        if not property_recommendations:
+            continue
+        # Get just the default recommendations
+        default_recommendations = [r for r in property_recommendations if r["default"]]
+
+        # We can now calculate multiple outputs based on default recommendations
+        carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
+
+        pre_retrofit_co2 = p.data["co2-emissions-current"]
+        post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
+
+        pre_retrofit_energy_bill = p.current_energy_bill
+        post_retrofit_energy_bill = p.expected_energy_bill
+
+        cost = sum([r["total"] for r in default_recommendations])
+        sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
+
+        agg_data.append({
+            "pre_retrofit_epc": p.data["current-energy-rating"],
+            "post_retrofit_epc": new_epc_bands[p.id],
+            "pre_retrofit_co2": pre_retrofit_co2,
+            "post_retrofit_co2": post_retrofit_co2,
+            "pre_retrofit_energy_bill": pre_retrofit_energy_bill,
+            "post_retrofit_energy_bill": post_retrofit_energy_bill,
+            "cost": cost,
+            "sap_point_improvement": sap_point_improvement
+        })
+
+    agg_data = pd.DataFrame(agg_data)
+
+    n_units_to_retrofit = len(agg_data)
+
+    valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit
+
+    total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
+    total_sap_points = agg_data["sap_point_improvement"].sum()
+
+    aggregation_data = {
+        "epc_breakdown_pre_retrofit": json.dumps(
+            reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
+        ),
+        "epc_breakdown_post_retrofit": json.dumps(
+            reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
+        ),
+        "number_of_properties": n_units,
+        "n_units_to_retrofit": n_units_to_retrofit,
+        "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(),
+        "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(),
+        "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(),
+        "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(),
+        "valuation_improvement_per_unit": valuation_improvment_per_unit,
+        "total_cost": agg_data["cost"].sum(),
+        "cost_per_unit": agg_data["cost"].mean(),
+        "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved,
+        "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points
+    }
+
+    return aggregation_data
+
+
 router = APIRouter(
     prefix="/plan",
     tags=["plan"],
@@ -243,7 +347,13 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
-            recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
+            (
+                recommendations_with_impact,
+                current_adjusted_energy,
+                expected_adjusted_energy,
+                current_energy_bill,
+                expected_energy_bill
+            ) = (
                 Recommendations.calculate_recommendation_impact(
                     property_instance=property_instance,
                     all_predictions=all_predictions,
@@ -254,7 +364,9 @@ async def trigger_plan(body: PlanTriggerRequest):
             # Store the resulting adjusted energy in the property instance
             property_instance.set_adjusted_energy(
                 current_adjusted_energy=current_adjusted_energy,
-                expected_adjusted_energy=expected_adjusted_energy
+                expected_adjusted_energy=expected_adjusted_energy,
+                current_energy_bill=current_energy_bill,
+                expected_energy_bill=expected_energy_bill
             )
 
             input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
@@ -316,6 +428,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Uploading recommendations to the database")
         property_valuation_increases = []
         session.commit()
+        new_epc_bands = {}
         for i in range(0, len(input_properties), BATCH_SIZE):
             try:
                 # Take a slice of the input_properties list to make a batch
@@ -327,6 +440,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     total_sap_points = sum([r["sap_points"] for r in default_recommendations])
                     new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
                     new_epc = sap_to_epc(new_sap_points)
+                    new_epc_bands[p.id] = new_epc
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
 
@@ -392,11 +506,19 @@ async def trigger_plan(body: PlanTriggerRequest):
             [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
         ))
 
+        aggregated_data = extract_portfolio_aggregation_data(
+            input_properties=input_properties,
+            total_valuation_increase=total_valuation_increase,
+            recommendations=recommendations,
+            new_epc_bands=new_epc_bands
+        )
+
         aggregate_portfolio_recommendations(
             session,
             portfolio_id=body.portfolio_id,
             total_valuation_increase=total_valuation_increase,
-            labour_days=labour_days
+            labour_days=labour_days,
+            aggregated_data=aggregated_data
         )
 
         # Commit final changes
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 68fead16..659b41a8 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -281,6 +281,9 @@ class Recommendations:
             current_adjusted_energy - expected_adjusted_energy
         )
 
+        current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
+        expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
+
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
 
@@ -355,4 +358,10 @@ class Recommendations:
                     rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
                     raise ValueError("sap points, co2 or heat demand is missing")
 
-        return property_recommendations, current_adjusted_energy, expected_adjusted_energy
+        return (
+            property_recommendations,
+            current_adjusted_energy,
+            expected_adjusted_energy,
+            current_energy_bill,
+            expected_energy_bill
+        )

From cc6277c191dea07ce1a8a26b8083e1eebdd2887b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 23:52:10 +0100
Subject: [PATCH 236/262] extended outputs

---
 backend/app/plan/router.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b8b2d5c8..f7a825db 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -115,6 +115,9 @@ def extract_portfolio_aggregation_data(
         pre_retrofit_energy_bill = p.current_energy_bill
         post_retrofit_energy_bill = p.expected_energy_bill
 
+        pre_retrofit_energy_consumption = p.current_adjusted_energy
+        post_retrofit_energy_consumption = p.expected_adjusted_energy
+
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
@@ -125,6 +128,8 @@ def extract_portfolio_aggregation_data(
             "post_retrofit_co2": post_retrofit_co2,
             "pre_retrofit_energy_bill": pre_retrofit_energy_bill,
             "post_retrofit_energy_bill": post_retrofit_energy_bill,
+            "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
+            "post_retrofit_energy_consumption": post_retrofit_energy_consumption,
             "cost": cost,
             "sap_point_improvement": sap_point_improvement
         })
@@ -138,6 +143,9 @@ def extract_portfolio_aggregation_data(
     total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
     total_sap_points = agg_data["sap_point_improvement"].sum()
 
+    def format_money(amount):
+        return f"£{amount:,.0f}"
+
     aggregation_data = {
         "epc_breakdown_pre_retrofit": json.dumps(
             reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
@@ -147,15 +155,18 @@ def extract_portfolio_aggregation_data(
         ),
         "number_of_properties": n_units,
         "n_units_to_retrofit": n_units_to_retrofit,
-        "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(),
-        "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(),
-        "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(),
-        "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(),
-        "valuation_improvement_per_unit": valuation_improvment_per_unit,
-        "total_cost": agg_data["cost"].sum(),
-        "cost_per_unit": agg_data["cost"].mean(),
-        "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved,
-        "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points
+        "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
+        "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
+        "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
+        "energy_bill_per_unit_post_retrofit": format_money(agg_data["post_retrofit_energy_bill"].mean()),
+        "energy_consumption_per_unit_pre_retrofit": str(
+            round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
+        "energy_consumption_per_unit_post_retrofit": str(
+            round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
+        "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
+        "cost_per_unit": format_money(agg_data["cost"].mean()),
+        "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
+        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points)
     }
 
     return aggregation_data

From 83d472a7108019fb7ea9f21c9196a5abba154ad0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 03:05:26 +0100
Subject: [PATCH 237/262] debugging

---
 backend/app/db/models/portfolio.py | 15 +++++++++++++++
 backend/app/plan/router.py         | 18 ++++++++++++------
 recommendations/Recommendations.py |  3 +++
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index 830866e6..aa0146c0 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -45,6 +45,21 @@ class Portfolio(Base):
     labour_days = Column(Float)
     created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
     updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    # Aggregations for summary
+    epc_breakdown_pre_retrofit = Column(Text)
+    epc_breakdown_post_retrofit = Column(Text)
+    n_units_to_retrofit = Column(Integer)
+    co2_per_unit_pre_retrofit = Column(Text)
+    co2_per_unit_post_retrofit = Column(Text)
+    energy_bill_per_unit_pre_retrofit = Column(Text)
+    energy_bill_per_unit_post_retrofit = Column(Text)
+    energy_consumption_per_unit_pre_retrofit = Column(Text)
+    energy_consumption_per_unit_post_retrofit = Column(Text)
+    valuation_improvement_per_unit = Column(Text)
+    cost_per_unit = Column(Text)
+    cost_per_co2_saved = Column(Text)
+    cost_per_sap_point = Column(Text)
+    valuation_return_on_investment = Column(Text)
 
 
 class PropertyCreationStatus(enum.Enum):
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f7a825db..661858b7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -99,10 +99,9 @@ def extract_portfolio_aggregation_data(
 
     agg_data = []
     for p in input_properties:
-        # Get the recommendations for the property
+        # Get the recommendations for the property - we include all properties, even ones without recommendations
         property_recommendations = recommendations.get(p.id, [])
-        if not property_recommendations:
-            continue
+
         # Get just the default recommendations
         default_recommendations = [r for r in property_recommendations if r["default"]]
 
@@ -113,11 +112,16 @@ def extract_portfolio_aggregation_data(
         post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
 
         pre_retrofit_energy_bill = p.current_energy_bill
-        post_retrofit_energy_bill = p.expected_energy_bill
+        post_retrofit_energy_bill = p.current_energy_bill - sum(
+            [r["energy_cost_savings"] for r in default_recommendations]
+        )
 
         pre_retrofit_energy_consumption = p.current_adjusted_energy
-        post_retrofit_energy_consumption = p.expected_adjusted_energy
+        post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
+            [r["adjusted_heat_demand"] for r in default_recommendations]
+        )
 
+        # Add up energy savings
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
@@ -166,7 +170,9 @@ def extract_portfolio_aggregation_data(
         "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
         "cost_per_unit": format_money(agg_data["cost"].mean()),
         "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
-        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points)
+        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
+        "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2))
+        # TODO: Could we add 10yr carbon credits value?
     }
 
     return aggregation_data
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 659b41a8..e626ecfa 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -272,6 +272,8 @@ class Recommendations:
             current_epc_rating=property_instance.data["current-energy-rating"],
         )
 
+        # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
+        #       actually implemented
         expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
             epc_energy_consumption=expected_heat_demand,
             current_epc_rating=property_instance.data["current-energy-rating"],
@@ -281,6 +283,7 @@ class Recommendations:
             current_adjusted_energy - expected_adjusted_energy
         )
 
+        # TODO: We should determine if the home is gas & electricity or just electricity
         current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
         expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
 

From 0f7e815379eacb6d76100a25186cd38e23d9b8c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:18:36 +0100
Subject: [PATCH 238/262] updating text for valuation improvement

---
 backend/app/plan/router.py | 49 +++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 661858b7..45d87dd3 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -59,7 +59,7 @@ def patch_epc(patch, epc_records):
 
 
 def extract_portfolio_aggregation_data(
-    input_properties, total_valuation_increase, recommendations, new_epc_bands
+    input_properties, total_valuation_increase, recommendations, new_epc_bands, property_value_increase_ranges
 ):
     # We aggregate a number of metrics for the portfolio:
     # 1) A breakdown of the number of properties in each EPC band
@@ -69,7 +69,7 @@ def extract_portfolio_aggregation_data(
     # 3) Co2/unit
     #    a) before retrofit
     #    b) after retrofit
-    # 4) Energy bulls/unit
+    # 4) Energy bill/unit
     #    a) before retrofit
     #    b) after retrofit
     # 5) Average valuation improvement/unit
@@ -105,6 +105,8 @@ def extract_portfolio_aggregation_data(
         # Get just the default recommendations
         default_recommendations = [r for r in property_recommendations if r["default"]]
 
+        has_recommendations = len(default_recommendations) > 0
+
         # We can now calculate multiple outputs based on default recommendations
         carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
 
@@ -125,6 +127,15 @@ def extract_portfolio_aggregation_data(
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
+        lower_bound_valuation_uplift = (
+            property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
+            property_value_increase_ranges[p.id]["current_value"]
+        )
+        upper_bound_valuation_uplift = (
+            property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
+            property_value_increase_ranges[p.id]["current_value"]
+        )
+
         agg_data.append({
             "pre_retrofit_epc": p.data["current-energy-rating"],
             "post_retrofit_epc": new_epc_bands[p.id],
@@ -135,14 +146,22 @@ def extract_portfolio_aggregation_data(
             "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
             "post_retrofit_energy_consumption": post_retrofit_energy_consumption,
             "cost": cost,
-            "sap_point_improvement": sap_point_improvement
+            "sap_point_improvement": sap_point_improvement,
+            "lower_bound_valuation_uplift": lower_bound_valuation_uplift,
+            "upper_bound_valuation_uplift": upper_bound_valuation_uplift,
+            "has_recommendations": has_recommendations
         })
 
     agg_data = pd.DataFrame(agg_data)
 
-    n_units_to_retrofit = len(agg_data)
+    n_units_to_retrofit = agg_data["has_recommendations"].sum()
 
-    valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit
+    valuation_improvement_lower_bound_per_unit = (
+        agg_data["lower_bound_valuation_uplift"].mean()
+    )
+    valuation_improvement_upper_bound_per_unit = (
+        agg_data["upper_bound_valuation_uplift"].mean()
+    )
 
     total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
     total_sap_points = agg_data["sap_point_improvement"].sum()
@@ -150,6 +169,17 @@ def extract_portfolio_aggregation_data(
     def format_money(amount):
         return f"£{amount:,.0f}"
 
+    valuation_improvment_per_unit = format_money(
+        total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
+                                               f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+
+    valuation_return_on_investment = (
+        str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
+        f" ("
+        f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
+        f"{agg_data['upper_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f})"
+    )
+
     aggregation_data = {
         "epc_breakdown_pre_retrofit": json.dumps(
             reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
@@ -167,11 +197,11 @@ def extract_portfolio_aggregation_data(
             round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
         "energy_consumption_per_unit_post_retrofit": str(
             round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
-        "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
+        "valuation_improvement_per_unit": valuation_improvment_per_unit,
         "cost_per_unit": format_money(agg_data["cost"].mean()),
         "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
         "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
-        "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2))
+        "valuation_return_on_investment": valuation_return_on_investment,
         # TODO: Could we add 10yr carbon credits value?
     }
 
@@ -446,6 +476,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         property_valuation_increases = []
         session.commit()
         new_epc_bands = {}
+        property_value_increase_ranges = {}
         for i in range(0, len(input_properties), BATCH_SIZE):
             try:
                 # Take a slice of the input_properties list to make a batch
@@ -460,6 +491,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     new_epc_bands[p.id] = new_epc
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
+                    property_value_increase_ranges[p.id] = valuations
 
                     # Your existing operations
                     property_details_epc = p.get_property_details_epc(
@@ -527,7 +559,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             input_properties=input_properties,
             total_valuation_increase=total_valuation_increase,
             recommendations=recommendations,
-            new_epc_bands=new_epc_bands
+            new_epc_bands=new_epc_bands,
+            property_value_increase_ranges=property_value_increase_ranges
         )
 
         aggregate_portfolio_recommendations(

From 02399667798370cab35608dc5edac17db7de1960 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:32:15 +0100
Subject: [PATCH 239/262] setting up non-invasive recommendations

---
 etl/customers/immo/pilot/asset_list.py | 29 +++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index e587cc25..614fa8a0 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -21,6 +21,7 @@ council_tax_bands = pd.DataFrame(council_tax_bands)
 
 # This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
 # that has not reached the API
+# For 53 Bromley, the non-invasives found the walls to be partially filled
 patches = [
     {
         'address': '6 Beech Road', 'postcode': 'DY1 4BP',
@@ -42,7 +43,11 @@ patches = [
         'energy-consumption-current': '491',
         'co2-emissions-current': '5.0',
         'potential-energy-efficiency': '87'
-    }
+    },
+    {
+        'address': '53 Bromley', 'postcode': 'DY5 4PJ',
+        'walls-description': 'Cavity wall, partial insulation',
+    },
 ]
 
 # This is information that is found as a result of the non-invasives, that mean that certain measures
@@ -56,6 +61,19 @@ already_installed = [
     }
 ]
 
+non_invasive_recommendations = [
+    {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'recommendations': []},
+    {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'recommendations': ['cavity_surveyed_as_filled_is_partial']},
+    {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'recommendations': []},
+    {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'recommendations': ['cavity_extract_and_refill']},
+]
+
 
 def app():
     raw_asset_list = read_excel_from_s3(
@@ -102,6 +120,14 @@ def app():
         file_name=patches_filename
     )
 
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
     # EPC C portoflio
     body = {
         "portfolio_id": str(PORTFOLIO_ID),
@@ -111,6 +137,7 @@ def app():
         "trigger_file_path": filename,
         "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "budget": None,
     }
     print(body)

From b3e7675488b7004cc98f171b8d78793188345148 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:38:58 +0100
Subject: [PATCH 240/262] added non-invasive recommendations to property class

---
 backend/Property.py                    |  7 ++++++-
 backend/app/plan/router.py             | 13 +++++++++++++
 backend/app/plan/schemas.py            |  1 +
 etl/customers/immo/pilot/asset_list.py |  1 +
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index 7b5a6bc3..2d1dbd5d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,8 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, already_installed=None, **kwargs
+        self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None,
+        **kwargs
     ):
 
         self.epc_record = epc_record
@@ -80,6 +81,10 @@ class Property:
         # cost and instead, provide a message that the measure has already been installed
 
         self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
+        self.non_invasive_recommendations = (
+            ast.literal_eval(property_non_invasive_recommendations['recommendations']) if
+            property_non_invasive_recommendations else []
+        )
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 45d87dd3..e5a2aa79 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -242,6 +242,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
             )
 
+        non_invasive_recommendations = []
+        if body.non_invasive_recommendations_file_path:
+            non_invasive_recommendations = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
+            )
+
         cleaning_data = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
@@ -297,6 +303,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 x for x in already_installed if
                 (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
             ), {})
+
+            property_non_invasive_recommendations = next((
+                x for x in non_invasive_recommendations if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
+
             input_properties.append(
                 Property(
                     id=property_id,
@@ -304,6 +316,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
                     already_installed=property_already_installed,
+                    non_invasive_recommendations=property_non_invasive_recommendations,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 76eb49d2..59c0ebef 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -11,6 +11,7 @@ class PlanTriggerRequest(BaseModel):
     trigger_file_path: str
     already_installed_file_path: Optional[str] = None
     patches_file_path: Optional[str] = None
+    non_invasive_recommendations_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
 
     # Pre-defined list of possibilities for exclusions
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 614fa8a0..57fa5957 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -151,6 +151,7 @@ def app():
         "trigger_file_path": filename,
         "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "budget": None,
     }
     print(body)

From 0c1fb0360fa1473d4123e3a41c3a82f65d9a3512 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:50:02 +0100
Subject: [PATCH 241/262] fixed patching of partial cwi description

---
 backend/app/plan/router.py             | 2 ++
 etl/customers/immo/pilot/asset_list.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e5a2aa79..7200d2ef 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -52,6 +52,8 @@ def patch_epc(patch, epc_records):
     """
 
     for patch_variable, patch_value in patch.items():
+        if patch_value == "":
+            continue
         if patch_variable in epc_records["original_epc"]:
             epc_records["original_epc"][patch_variable] = patch_value
 
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 57fa5957..6329a2be 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -46,7 +46,7 @@ patches = [
     },
     {
         'address': '53 Bromley', 'postcode': 'DY5 4PJ',
-        'walls-description': 'Cavity wall, partial insulation',
+        'walls-description': 'Cavity wall, partial insulation (assumed)',
     },
 ]
 

From 4cf4d67ac91610d19e418aa33ae794a37c1be505 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 13:21:14 +0100
Subject: [PATCH 242/262] Added cavity extraction and re-fill recommendation
 and costing

---
 backend/Property.py                    | 17 ++++++++++++++---
 backend/app/plan/router.py             | 14 ++++++++------
 recommendations/Costs.py               | 13 ++++++++++++-
 recommendations/Recommendations.py     | 19 ++++++++++++++-----
 recommendations/WallRecommendations.py | 17 +++++++++++++++--
 5 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 2d1dbd5d..2e6cbbb6 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None,
+        self, id, postcode, address, epc_record, already_installed=None, non_invasive_recommendations=None,
         **kwargs
     ):
 
@@ -82,8 +82,8 @@ class Property:
 
         self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
         self.non_invasive_recommendations = (
-            ast.literal_eval(property_non_invasive_recommendations['recommendations']) if
-            property_non_invasive_recommendations else []
+            ast.literal_eval(non_invasive_recommendations['recommendations']) if
+            non_invasive_recommendations else []
         )
 
         self.uprn = epc_record.get("uprn")
@@ -284,6 +284,7 @@ class Property:
                     recommendation_record=recommendation_record,
                     recommendations=previous_phase_representatives + [rec],
                     primary_recommendation_id=rec["recommendation_id"],
+                    non_invasive_recommendations=self.non_invasive_recommendations,
                 )
                 self.recommendations_scoring_data.append(scoring_dict)
 
@@ -293,6 +294,7 @@ class Property:
         recommendation_record,
         recommendations: list,
         primary_recommendation_id: int,
+        non_invasive_recommendations: list = None,
     ):
         """
         This function will iterate through a list of recommendations and apply a simulation for each recommendation
@@ -301,10 +303,12 @@ class Property:
         :param recommendation_record: The record of the property, which will be updated
         :param recommendations: The list of recommendations to apply
         :param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record
+        :param non_invasive_recommendations: The list of non-invasive recommendations
         :return: The updated recommendation record
         """
 
         output = recommendation_record.copy()
+        non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations
 
         for col in [
             "walls_insulation_thickness",
@@ -323,6 +327,13 @@ class Property:
                 "external_wall_insulation",
                 "cavity_wall_insulation",
             ]:
+
+                # # If we have a non-incasive recommendation that the cavity wall is partially filled, we skip the
+                # # cavity wall insulation recommendation (since on the EPC, the property will look like how it did
+                # # before any works)
+                # if "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations:
+                #     continue
+
                 # The upgrade made here is to the u-value of the walls and the description of the
                 # insulation thickness
                 output["walls_thermal_transmittance_ending"] = recommendation[
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7200d2ef..9854abe8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -171,11 +171,13 @@ def extract_portfolio_aggregation_data(
     def format_money(amount):
         return f"£{amount:,.0f}"
 
-    valuation_improvment_per_unit = format_money(
-        total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
-                                               f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+    valuation_improvment_per_unit = str(
+        format_money(
+            total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
+                                                   f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+    )
 
-    valuation_return_on_investment = (
+    valuation_return_on_investment = str(
         str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
         f" ("
         f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
@@ -189,8 +191,8 @@ def extract_portfolio_aggregation_data(
         "epc_breakdown_post_retrofit": json.dumps(
             reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
         ),
-        "number_of_properties": n_units,
-        "n_units_to_retrofit": n_units_to_retrofit,
+        "number_of_properties": int(n_units),
+        "n_units_to_retrofit": int(n_units_to_retrofit),
         "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
         "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
         "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 0e67b352..852bb11f 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -91,6 +91,10 @@ DOUBLE_RADIATOR_COST = 300
 FLUE_COST = 600
 PIPEWORK_COST = 750  # Min cost is £500
 
+# This is the cost per meter squared for cavity extraction
+# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/
+CAVITY_EXTRACTION_COST = 21.5
+
 
 class Costs:
     """
@@ -173,7 +177,7 @@ class Costs:
         if not self.labour_adjustment_factor:
             raise ValueError("Labour adjustment factor not found")
 
-    def cavity_wall_insulation(self, wall_area, material):
+    def cavity_wall_insulation(self, wall_area, material, is_extraction_and_refill=False):
         """
         Calculates the total cost for cavity wall insulation based on material and labor costs,
         including contingency, preliminaries, profit, and VAT.
@@ -208,6 +212,13 @@ class Costs:
         # Assume a team of 2
         labour_days = (labour_hours / 8) / 2
 
+        if is_extraction_and_refill:
+            # bump up the cost of the work
+            total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area
+            # Additional 2 days work
+            labour_hours = labour_hours + (2 * 8)
+            labour_days = labour_days + 2
+
         return {
             "total": total_cost,
             "subtotal": subtotal_before_vat,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index e626ecfa..5960d7be 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -149,12 +149,14 @@ class Recommendations:
         property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
 
         # We also need to create the representative recommendations for each recommendation type
-        property_representative_recommendations = self.create_representative_recommendations(property_recommendations)
+        property_representative_recommendations = self.create_representative_recommendations(
+            property_recommendations, non_invasive_recommendations=self.property_instance.non_invasive_recommendations
+        )
 
         return property_recommendations, property_representative_recommendations
 
     @staticmethod
-    def create_representative_recommendations(property_recommendations):
+    def create_representative_recommendations(property_recommendations, non_invasive_recommendations):
         """
         This method will create a representative recommendation for each recommendation type
         In order to create a representative recommendation, we choose the recommendation that has:
@@ -169,6 +171,13 @@ class Recommendations:
 
         for recommendations_by_type in property_recommendations:
 
+            # If the property was initially surveyed as filled, but the cavity was only partially filled, we don't
+            # want to include the cavity wall insulation recommendation in the defaults
+            # if (recommendations_by_type[0].get("type") == "cavity_wall_insulation") and (
+            #     "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations
+            # ):
+            #     continue
+
             if recommendations_by_type[0].get("type") == "mechanical_ventilation":
                 continue
 
@@ -238,13 +247,13 @@ class Recommendations:
 
         property_sap_predictions = all_predictions["sap_change_predictions"][
             all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
         property_heat_predictions = all_predictions["heat_demand_predictions"][
             all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
         property_carbon_predictions = all_predictions["carbon_change_predictions"][
             all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
 
         property_recommendations = recommendations[property_instance.id].copy()
 
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index feb2620b..20fc453c 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -113,7 +113,9 @@ class WallRecommendations(Definitions):
         insulation_thickness = self.property.walls["insulation_thickness"]
 
         # We check if the wall is already insulated and if so, we exit
-        if (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]:
+        if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and (
+            "cavity_extract_and_refill" not in self.property.non_invasive_recommendations
+        ):
             return
 
         if u_value:
@@ -216,15 +218,26 @@ class WallRecommendations(Definitions):
             if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE:
                 lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
 
+                is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations
+
                 cost_result = self.costs.cavity_wall_insulation(
                     wall_area=self.property.insulation_wall_area,
                     material=material.to_dict(),
+                    is_extraction_and_refill=is_extraction_and_refill
                 )
 
                 already_installed = "cavity_wall_insulation" in self.property.already_installed
                 if already_installed:
                     cost_result = override_costs(cost_result)
 
+                if is_extraction_and_refill:
+                    description = f"Extract and refill cavity wall insulation with {material['description']}"
+                else:
+                    description = self._make_description(material)
+
+                # updated the new u-value with the best possible our installers have
+                new_u_value = max(0.31, new_u_value)
+
                 recommendations.append(
                     {
                         "phase": phase,
@@ -237,7 +250,7 @@ class WallRecommendations(Definitions):
                             )
                         ],
                         "type": "cavity_wall_insulation",
-                        "description": self._make_description(material),
+                        "description": description,
                         "starting_u_value": u_value,
                         "new_u_value": new_u_value,
                         "sap_points": None,

From e000c87cad98963e8c734a5cf8990a5a7b713217 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 12:16:13 +0100
Subject: [PATCH 243/262] added patches for immo pilot 2

---
 etl/customers/immo/pilot/asset_list_2.py | 126 +++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 etl/customers/immo/pilot/asset_list_2.py

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
new file mode 100644
index 00000000..f722a490
--- /dev/null
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -0,0 +1,126 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 72
+
+# For
+patches = [
+    {
+        'address': '116 Parkes Hall Road',
+        'postcode': 'DY1 3RJ',
+        'walls-description': 'Cavity wall, filled cavity',
+        'walls-energy-eff': 'Average',
+        'roof-description': 'Pitched, 270 mm loft insulation',
+        'roof-energy-eff': 'Good',
+        'windows-description': 'Fully double glazed',
+        'windows-energy-eff': 'Good',
+        'mainheat-description': 'Boiler and radiators, mains gas',
+        'mainheat-energy-eff': 'Good',
+        'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+        'mainheatc-energy-eff': 'Good',
+        'lighting-description': 'Low energy lighting in 27% of fixed outlets',
+        'lighting-energy-eff': 'Good',
+        'floor-description': 'Solid, no insulation (assumed)',
+        'secondheat-description': 'None',
+        'current-energy-efficiency': '73',
+        'current-energy-rating': 'C',
+        'energy-consumption-current': '184',
+        'co2-emissions-current': '2.4',
+        'potential-energy-efficiency': '88',
+        'total-floor-area': '73',
+        'construction-age-band': 'England and Wales: 1930-1949',
+        'property-type': 'House',
+        'built-form': 'Mid-Terrace',
+    }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+already_installed = []
+
+non_invasive_recommendations = []
+
+
+def app():
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Immo/Dudley Asset List - Hestia - pilot2.xlsx",
+        header_row=0
+    )
+
+    raw_asset_list = raw_asset_list[raw_asset_list["in_pilot"]].copy()
+
+    # Extract address and postcode
+    raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+    raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+    # We're provided with number of bathrooms and number of bedrooms.
+    asset_list = raw_asset_list.rename(
+        columns={
+            "No. of Beds": "n_bedrooms",
+            "No. of WC's": "n_bathrooms"
+        }
+    )
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store overrides in s3
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(already_installed),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=already_installed_filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "budget": None,
+    }
+    print(body)
+
+    # EPC B portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID + 1),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "budget": None,
+    }
+    print(body)

From acada27061d09f47ac76ecd2785c95eb39e741d3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 15:16:46 +0100
Subject: [PATCH 244/262] rounding up roof coverage %

---
 backend/SearchEpc.py                      |  9 +++++++--
 backend/app/plan/router.py                | 11 +++++++++--
 backend/ml_models/Valuation.py            |  8 ++++++++
 etl/customers/immo/pilot/asset_list_2.py  | 21 ++++++++++++++++++---
 etl/epc/Record.py                         |  2 +-
 recommendations/SolarPvRecommendations.py | 10 +++++++---
 6 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index cc2ee4a9..44178792 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -709,8 +709,13 @@ class SearchEpc:
                 self.full_sap_epc = {}
 
                 # Finally, set a standardised address 1 and postcode
-                self.address_clean = self.ordnance_survey_client.address_os
-                self.postcode_clean = self.ordnance_survey_client.postcode_os
+                self.address_clean = (
+                    self.ordnance_survey_client.address_os if self.ordnance_survey_client.address_os else self.address1
+                )
+                self.postcode_clean = (
+                    self.ordnance_survey_client.postcode_os if self.ordnance_survey_client.postcode_os else
+                    self.postcode
+                )
             return
 
         os_response = self.ordnance_survey_client.get_places_api()
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 9854abe8..a8464ee6 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -52,6 +52,10 @@ def patch_epc(patch, epc_records):
     """
 
     for patch_variable, patch_value in patch.items():
+
+        if patch_variable in ["address", "postcode"]:
+            continue
+
         if patch_value == "":
             continue
         if patch_variable in epc_records["original_epc"]:
@@ -268,9 +272,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 postcode=config["postcode"],
                 uprn=uprn,
                 auth_token=get_settings().EPC_AUTH_TOKEN,
-                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
+                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
             )
-            epc_searcher.find_property()
+            epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
+            epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
+            # For the moment, our OS API access is unavailable, so we skip and interpolate
+            epc_searcher.find_property(skip_os=True)
             # Create a record in db
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 251c016a..39ea5a98 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -63,6 +63,14 @@ class PropertyValuation:
         90093693: 279_000,  # Based on Zoopla
         90055152: 149_000,  # Based on Zoopla
         90028499: 238_000,  # Based on Zoopla
+        # IMMO Dudley Pilot 2- search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        90039318: 177_000,  # Based on Zoopla
+        90038384: 170_000,  # Based on Zoopla
+        90105380: 185_000,  # Based on Zoopla
+        90124001: 165_000,  # Based on Zoopla
+        90013980: 148_000,  # Based on Zoopla
+        90087154: 184_000,  # Based on Zoopla
+        90046817: 167_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index f722a490..121e7a81 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -10,6 +10,7 @@ patches = [
     {
         'address': '116 Parkes Hall Road',
         'postcode': 'DY1 3RJ',
+        'uprn': '90046817',
         'walls-description': 'Cavity wall, filled cavity',
         'walls-energy-eff': 'Average',
         'roof-description': 'Pitched, 270 mm loft insulation',
@@ -21,7 +22,7 @@ patches = [
         'mainheatcont-description': 'Programmer, room thermostat and TRVs',
         'mainheatc-energy-eff': 'Good',
         'lighting-description': 'Low energy lighting in 27% of fixed outlets',
-        'lighting-energy-eff': 'Good',
+        'lighting-energy-eff': 'Average',
         'floor-description': 'Solid, no insulation (assumed)',
         'secondheat-description': 'None',
         'current-energy-efficiency': '73',
@@ -39,7 +40,11 @@ patches = [
 # This is information that is found as a result of the non-invasives, that mean that certain measures
 # have been installed already. To reflect this in the front end, it is included in the recommendation, however
 # the cost is removed and instead, a message is presented saying that the measure is already installed.
-already_installed = []
+already_installed = [
+    {
+        'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
+    }
+]
 
 non_invasive_recommendations = []
 
@@ -58,13 +63,23 @@ def app():
     raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
 
     # We're provided with number of bathrooms and number of bedrooms.
+    # THe UPRNs are not the official ones
     asset_list = raw_asset_list.rename(
         columns={
             "No. of Beds": "n_bedrooms",
-            "No. of WC's": "n_bathrooms"
+            "No. of WC's": "n_bathrooms",
+            'Property Type': 'property_type',
+            'Architype': 'built_form'
         }
     )
 
+    # Remap the values
+    asset_list["built_form"] = asset_list["built_form"].map({
+        "SEMI DETACHED": "Semi-Detached",
+        "MID TERRACE": "Mid-Terrace",
+        "END TERRACE": "End-Terrace",
+    })
+
     # Store the asset list in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
     save_csv_to_s3(
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index e74330a2..9a965c6a 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -191,7 +191,7 @@ class EPCRecord:
         This method will clean the records using the data processor
         """
         epc_data_processor = EPCDataProcessor(
-            data=self.epc_record_as_dataframe("prepared_epc"),
+            data=self.epc_record_as_dataframe("prepared_epc").copy(),
             run_mode="newdata",
             cleaning_averages=self.cleaning_data,
         )
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 58cf9735..b44557ab 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -56,14 +56,18 @@ class SolarPvRecommendations:
         if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
             return
 
+        solar_pv_percentage = self.property.solar_pv_percentage
+        # We round up to the neaest 10%
+        solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
+
         # For the solar recommendations, we produce the following scenarios:
         # 1) Solar panels only, we present a high, medium and low coverage
         # 2) With and without battery
         roof_coverage_scenarios = [
-            self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
+            solar_pv_percentage - 0.1, solar_pv_percentage,
         ]
-        if self.property.solar_pv_percentage <= 0.4:
-            roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1)
+        if solar_pv_percentage <= 0.4:
+            roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
         # We make sure we haven't gone too low or high - we allow no more than 60% coverage
         roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
         # If we only have two scenarios, we add a coverage scenario 10% less than the smallest

From db2586061598471f182fc338668618dfd4109a61 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 16:01:41 +0100
Subject: [PATCH 245/262] Completed pilot 2

---
 etl/customers/immo/pilot/asset_list_2.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index 121e7a81..1b4fad9a 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -43,6 +43,15 @@ patches = [
 already_installed = [
     {
         'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '51 Hillwood Road', 'postcode': 'B62 8NQ', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"]
     }
 ]
 

From 3593b7ae9ebd4245985a2dabc80446b23f00d84e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 13:54:04 +0100
Subject: [PATCH 246/262] Added boiler upgrade recommendation

---
 etl/customers/gla_croydon_demo/asset_list.py |  5 ++--
 recommendations/Costs.py                     | 12 ++------
 recommendations/HeatingRecommender.py        | 31 +++++++++-----------
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 7dde8926..1655979b 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,8 +34,9 @@ def app():
         low_memory=False
     )
 
-    z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
-    z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+    z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+    z["HOTWATER_DESCRIPTION"].value_counts()
+    z["MAIN_FUEL"].value_counts()
 
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 852bb11f..d7a8ad2f 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -67,18 +67,12 @@ LOW_CARBON_COMBI_BOILER = 2200
 # https://www.greenmatch.co.uk/boilers/35kw-boiler
 # https://www.greenmatch.co.uk/boilers/40kw-boiler
 # These are exclusive of installation costs
-COMBI_BOILER_COSTS = {
+CONDENSING_BOILER_COSTS = {
     "30kw": 1550,
     "35kw": 1610,
     "40kw": 1625
 }
 
-CONVENTIONAL_BOILER_COSTS = {
-    "30kw": 1117,
-    "35kw": 1546,
-    "40kw": 1776
-}
-
 # Assumes 3 hours to remove each heater (including re-decorating)
 ROOM_HEATER_REMOVAL_COST = 120
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
@@ -1179,7 +1173,7 @@ class Costs:
         estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
         return round(estimated_radiators)
 
-    def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
+    def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
         First time central heating vosts can als be found here:
@@ -1187,7 +1181,7 @@ class Costs:
         :return:
         """
 
-        unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
+        unit_cost = CONDENSING_BOILER_COSTS[size]
         # The unit cost is the cost without VAT
         # We now need to estimate the cost of the works
         labour_days = 2
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 432dc6a6..2423901a 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -312,7 +312,15 @@ class HeatingRecommender:
         simulation_config = {}
         boiler_costs = {}
         boiler_recommendation = {}
-        if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
+
+        has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]
+
+        has_inefficient_mains_water = (
+            self.property.hotwater["clean_description"] in ["From main system"] and
+            self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
+        )
+
+        if has_inefficient_space_heating or has_inefficient_mains_water:
             boiler_size = self.estimate_boiler_size(
                 property_type=self.property.data["property-type"],
                 built_form=self.property.data["built-form"],
@@ -321,22 +329,12 @@ class HeatingRecommender:
                 num_heated_rooms=self.property.data["number-heated-rooms"],
             )
 
-            # We recommend a combi boiler under the following conditions
-            # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
-            #    heated if there is no existing heating system).
-            # 2) There 1 or fewer bathrooms
-            # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
-            # bathrooms
-            is_combi = (
-                (self.property.number_of_rooms <= 4) and
-                (self.property.n_bathrooms in [None, 0, 1])
-            )
-            if is_combi:
-                description = "Upgrade to a new combi boiler"
-            else:
-                description = "Upgrade to a new gas condensing boiler"
+            description = "Upgrade to a new condensing boiler"
 
-            simulation_config = {"mainheat_energy_eff_ending": "Good"}
+            simulation_config = {
+                "mainheat_energy_eff_ending": "Good",
+                "hot_water_energy_eff_ending": "Good"
+            }
             if system_change:
                 # Installation of a boiler improves the hot water system so we need to reflect this in
                 # the outcome of the recommendation
@@ -363,7 +361,6 @@ class HeatingRecommender:
                 }
 
             boiler_costs = self.costs.boiler(
-                is_combi=is_combi,
                 size=f"{boiler_size}kw",
                 exising_room_heaters=exising_room_heaters,
                 system_change=system_change,

From 391cb356ee12270aa9f5a4ffeff6a917f07ff05e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 14:07:47 +0100
Subject: [PATCH 247/262] debugging recommendation when we have independent
 boiler upgrade and heating controls

---
 recommendations/HeatingRecommender.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 2423901a..aa5cabdb 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -394,9 +394,13 @@ class HeatingRecommender:
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
         # We may have 2 recommendations from the heating controls
 
-        if not controls_recommender.recommendation:
+        if not controls_recommender.recommendation and not boiler_recommendation:
             return
 
+        if not system_change and len(boiler_recommendation):
+            # If there is not a system change, we add the boiler recommendation at point.
+            self.recommendations.append(boiler_recommendation)
+
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
             combined_recommendations = []

From 8bd899bcba8739b3232ec254fa799ff8497efb0f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 16:43:13 +0100
Subject: [PATCH 248/262] debugging structure of heating recommendations

---
 backend/app/plan/router.py            | 1 +
 recommendations/HeatingRecommender.py | 8 ++++----
 recommendations/Recommendations.py    | 9 +++++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a8464ee6..06d1aadf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -380,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         logger.info("Preparing data for scoring in sap change api")
         recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+
         recommendations_scoring_data = recommendations_scoring_data.drop(
             columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
                      "carbon_ending"]
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index aa5cabdb..fe5cdd46 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -399,7 +399,7 @@ class HeatingRecommender:
 
         if not system_change and len(boiler_recommendation):
             # If there is not a system change, we add the boiler recommendation at point.
-            self.recommendations.append(boiler_recommendation)
+            self.recommendations.append([boiler_recommendation])
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
@@ -417,12 +417,12 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations.extend(combined_recommendations)
+            self.recommendations.append(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation
             has_heating_recommendation = any(
-                recommendation["type"] == "heating" for recommendation in self.recommendations
+                rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation
             )
             if has_heating_recommendation:
                 recommendation_phase += 1
@@ -431,6 +431,6 @@ class HeatingRecommender:
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-            self.recommendations.extend(controls_recommender.recommendation)
+            self.recommendations.append(controls_recommender.recommendation)
 
         return
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 5960d7be..aba75ad9 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -111,11 +111,16 @@ class Recommendations:
         if "heating" not in self.exclusions:
             self.heating_recommender.recommend(phase=phase)
             if self.heating_recommender.recommendations:
-                property_recommendations.append(self.heating_recommender.recommendations)
+                if len(self.heating_recommender.recommendations) == 1:
+                    property_recommendations.append(self.heating_recommender.recommendations)
+                else:
+                    property_recommendations.extend(self.heating_recommender.recommendations)
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
-                max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations])
+                max_used_phase = max(
+                    [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs]
+                )
                 amount_to_increment = max_used_phase - phase + 1
                 phase += amount_to_increment
 

From 7bdf2147badefd9f43250ac0eedc933f6378b842 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 18:38:16 +0100
Subject: [PATCH 249/262] restructured output of heating and heating control
 recommendations

---
 backend/app/plan/router.py            | 20 ++++++++++----------
 recommendations/HeatingRecommender.py | 16 +++++++++-------
 recommendations/Recommendations.py    | 19 +++++++++++++------
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 06d1aadf..ebaf482d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            if not is_new:
-                continue
-
-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            # if not is_new:
+            #     continue
+            #
+            # create_property_targets(
+            #     session,
+            #     property_id=property_id,
+            #     portfolio_id=body.portfolio_id,
+            #     epc_target=body.goal_value,
+            #     heat_demand_target=None
+            # )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index fe5cdd46..537125a1 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -15,7 +15,8 @@ class HeatingRecommender:
         self.property = property_instance
         self.costs = Costs(self.property)
 
-        self.recommendations = []
+        self.heating_recommendations = []
+        self.heating_control_recommendations = []
 
     def recommend(self, phase=0):
 
@@ -23,7 +24,8 @@ class HeatingRecommender:
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
         #       in the Costs class, stored as SYSTEM_FLUSH_COST
 
-        self.recommendations = []
+        self.heating_recommendations = []
+        self.heating_control_recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
@@ -254,7 +256,7 @@ class HeatingRecommender:
             system_change=system_change
         )
 
-        self.recommendations.extend(recommendations)
+        self.heating_recommendations.extend(recommendations)
 
     @staticmethod
     def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
@@ -399,7 +401,7 @@ class HeatingRecommender:
 
         if not system_change and len(boiler_recommendation):
             # If there is not a system change, we add the boiler recommendation at point.
-            self.recommendations.append([boiler_recommendation])
+            self.heating_recommendations.extend([boiler_recommendation])
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
@@ -417,12 +419,12 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations.append(combined_recommendations)
+            self.heating_recommendations.extend(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation
             has_heating_recommendation = any(
-                rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation
+                rec["type"] == "heating" for rec in self.heating_recommendations
             )
             if has_heating_recommendation:
                 recommendation_phase += 1
@@ -431,6 +433,6 @@ class HeatingRecommender:
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-            self.recommendations.append(controls_recommender.recommendation)
+            self.heating_control_recommendations.extend(controls_recommender.recommendation)
 
         return
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index aba75ad9..06dc2d61 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -110,16 +110,23 @@ class Recommendations:
         # Heating and Electical systems
         if "heating" not in self.exclusions:
             self.heating_recommender.recommend(phase=phase)
-            if self.heating_recommender.recommendations:
-                if len(self.heating_recommender.recommendations) == 1:
-                    property_recommendations.append(self.heating_recommender.recommendations)
-                else:
-                    property_recommendations.extend(self.heating_recommender.recommendations)
+            if (
+                self.heating_recommender.heating_recommendations or
+                self.heating_recommender.heating_control_recommendations
+            ):
+                if self.heating_recommender.heating_recommendations:
+                    property_recommendations.append(self.heating_recommender.heating_recommendations)
+
+                if self.heating_recommender.heating_control_recommendations:
+                    property_recommendations.append(self.heating_recommender.heating_control_recommendations)
+
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
                 max_used_phase = max(
-                    [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs]
+                    [rec["phase"] for rec in
+                     self.heating_recommender.heating_recommendations +
+                     self.heating_recommender.heating_control_recommendations]
                 )
                 amount_to_increment = max_used_phase - phase + 1
                 phase += amount_to_increment

From 5a879572f46fba68fc136f2d0681805119e60ccb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 23 Apr 2024 15:34:29 +0100
Subject: [PATCH 250/262] final modifications for immo pilot

---
 etl/customers/immo/pilot/asset_list_2.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index 1b4fad9a..52260f57 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -51,7 +51,9 @@ already_installed = [
         'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
     },
     {
-        'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"]
+        'address': '44 Hatfield Road',
+        'postcode': 'DY9 7LW',
+        "already_installed": ["loft_insulation", "cavity_wall_insulation"]
     }
 ]
 

From 7a275deb6df6a231bde60d64d78ba3b04ab32f38 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 23 Apr 2024 17:12:39 +0100
Subject: [PATCH 251/262] route march code

---
 .idea/Model.iml                      |  2 +-
 .idea/misc.xml                       |  2 +-
 etl/customers/guiness/route_march.py | 98 ++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/guiness/route_march.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/guiness/route_march.py b/etl/customers/guiness/route_march.py
new file mode 100644
index 00000000..28f350d3
--- /dev/null
+++ b/etl/customers/guiness/route_march.py
@@ -0,0 +1,98 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+    route march
+
+    These properties were provided to us by Ecosurv
+    :return:
+    """
+    asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/guiness/TGP CW Properties PV.xlsx",
+        header_row=0
+    )
+
+    epc_data = []
+    for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        searcher = SearchEpc(
+            address1=str(guiness_property["Address"]),
+            postcode=guiness_property["POSTCODES"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": guiness_property["Address"],
+            "asset_list_postcode": guiness_property["POSTCODES"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "asset_list_postcode",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type"
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"]
+    )
+
+    # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+    asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"])
+    asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "inspection-date": "Last EPC Inspection Date",
+        "current-energy-rating": "Last survey EPC Rating",
+        "current-energy-efficiency": "Last survey SAP Score",
+        "roof-description": "Roof Construction",
+        "walls-description": "Wall Construction",
+        "transaction-type": "Last EPC Reason"
+    })
+
+    # Store as an excel
+    filename = "Guiness EPC data.xlsx"
+    asset_list.to_excel(filename, index=False)

From 03ca16bfc5c94d8325f5c20e5a82aabbb66e014d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Apr 2024 14:06:48 +0100
Subject: [PATCH 252/262] Added rightmove property valuation increase estimates

---
 .idea/Model.iml                              |  2 +-
 .idea/misc.xml                               |  2 +-
 backend/app/plan/router.py                   | 20 +++----
 backend/ml_models/Valuation.py               | 39 +++++++++++-
 etl/customers/gla_croydon_demo/asset_list.py |  4 --
 etl/customers/goldman/asset_list.py          | 63 ++++++++++++++++++++
 etl/customers/goldman/epc_f_g_properties.py  | 25 ++++++++
 recommendations/HeatingRecommender.py        |  1 -
 8 files changed, 137 insertions(+), 19 deletions(-)
 create mode 100644 etl/customers/goldman/asset_list.py
 create mode 100644 etl/customers/goldman/epc_f_g_properties.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index ebaf482d..06d1aadf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            # if not is_new:
-            #     continue
-            #
-            # create_property_targets(
-            #     session,
-            #     property_id=property_id,
-            #     portfolio_id=body.portfolio_id,
-            #     epc_target=body.goal_value,
-            #     heat_demand_target=None
-            # )
+            if not is_new:
+                continue
+
+            create_property_targets(
+                session,
+                property_id=property_id,
+                portfolio_id=body.portfolio_id,
+                epc_target=body.goal_value,
+                heat_demand_target=None
+            )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 39ea5a98..5c781979 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -71,6 +71,14 @@ class PropertyValuation:
         90013980: 148_000,  # Based on Zoopla
         90087154: 184_000,  # Based on Zoopla
         90046817: 167_000,  # Based on Zoopla
+        # Goldman Sachs Pilot for inrto - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        100070358888: 153_000,  # Based on Zoopla
+        10090436544: 282_000,  # Based on Zoopla
+        100070365751: 177_000,  # Based on Zoopla
+        10095952767: 168_000,  # Based on Zoopla
+        100070520130: 177_000,  # Based on Zoopla
+        100070333957: 185_000,  # Based on Zoopla
+        100070543258: 211_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources
@@ -108,6 +116,29 @@ class PropertyValuation:
         # {"start": "D", "end": "A", "increase_percentage": 0.017},
     ]
 
+    # Found here: https://www.rightmove.co.uk/news/articles/property-news/green-premium-epc-ratings/
+    # F -> C is + 15%
+    # E -> C is +7%
+    # D -> C is +3%
+    RIGHTMOVE_MAPPING = [
+        {"start": "G", "end": "C", "increase_percentage": 0.15},
+        {"start": "G", "end": "B", "increase_percentage": 0.15},
+        {"start": "G", "end": "A", "increase_percentage": 0.15},
+
+        {"start": "F", "end": "C", "increase_percentage": 0.15},
+        {"start": "F", "end": "B", "increase_percentage": 0.15},
+        {"start": "F", "end": "A", "increase_percentage": 0.15},
+
+        {"start": "E", "end": "C", "increase_percentage": 0.07},
+        {"start": "E", "end": "B", "increase_percentage": 0.07},
+        {"start": "E", "end": "A", "increase_percentage": 0.07},
+
+        {"start": "D", "end": "C", "increase_percentage": 0.03},
+        {"start": "D", "end": "B", "increase_percentage": 0.03},
+        {"start": "D", "end": "A", "increase_percentage": 0.03},
+
+    ]
+
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
     @classmethod
@@ -159,14 +190,18 @@ class PropertyValuation:
 
         msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
 
-        # We now use the knight frank and nationwide data to get further valuation evidence, if we have it
+        # We now use the knight frank, nationwide and Rightmove data to get further valuation evidence, if we have it
         kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
         nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
+        rm_increase = [x for x in cls.RIGHTMOVE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
 
         kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
         nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
+        rm_increase = rm_increase[0]["increase_percentage"] if rm_increase else None
 
-        all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
+        all_increases = [
+            x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase, rm_increase] if x is not None
+        ]
 
         max_increase = max(all_increases)
         min_increase = min(all_increases)
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 1655979b..52e9422c 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,10 +34,6 @@ def app():
         low_memory=False
     )
 
-    z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
-    z["HOTWATER_DESCRIPTION"].value_counts()
-    z["MAIN_FUEL"].value_counts()
-
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
 
diff --git a/etl/customers/goldman/asset_list.py b/etl/customers/goldman/asset_list.py
new file mode 100644
index 00000000..afe3c64c
--- /dev/null
+++ b/etl/customers/goldman/asset_list.py
@@ -0,0 +1,63 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 75
+USER_ID = 8
+
+
+def app():
+    asset_list = [
+        {
+            "address": "19 Emily Gardens",
+            "postcode": "B16 0ED",
+        },
+        {
+            "address": "Flat 6 41 Bradford Street",
+            "postcode": "B5 6HX",
+        },
+        {
+            "address": "197 FIELD LANE",
+            "postcode": "B32 4HL",
+        },
+        {
+            "address": "FLAT 4 108 SUMMER ROAD",
+            "postcode": "B23 6DY",
+        },
+        {
+            "address": "1, St. Benedicts Road",
+            "postcode": "B10 9DP",
+        },
+        {
+            "address": "29 COOKSEY LANE",
+            "postcode": "B44 9QL",
+        },
+        {
+            "address": "40 TRITTIFORD ROAD",
+            "postcode": "B13 0HG",
+        }
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/etl/customers/goldman/epc_f_g_properties.py b/etl/customers/goldman/epc_f_g_properties.py
new file mode 100644
index 00000000..28197126
--- /dev/null
+++ b/etl/customers/goldman/epc_f_g_properties.py
@@ -0,0 +1,25 @@
+import pandas as pd
+
+
+def app():
+    """
+    Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs
+    """
+    epc_data = pd.read_csv(
+        "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+        low_memory=False
+    )
+
+    epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+    epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+    # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+    epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+    epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+    # Get G & F properties
+    epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+
+    # Save as an excel
+    epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False)
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 537125a1..8988d2a6 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -359,7 +359,6 @@ class HeatingRecommender:
                     **heating_simulation_config,
                     **hotwater_simulation_config,
                     **fuel_simulation_config,
-                    "hot_water_energy_eff_ending": "Good"
                 }
 
             boiler_costs = self.costs.boiler(

From 155a8c568c595207e4d69cd2f766eeec4b5129f1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Apr 2024 17:41:33 +0100
Subject: [PATCH 253/262] working through the air source heat pump
 recommendations, added route march code for livewest

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 .../AirSourceHeatPumpEfficiency.py            |  46 +++++-
 etl/customers/livewest/route_march.py         | 135 +++++++++++++++++
 .../places_for_people/route_march.py          | 137 ++++++++++++++++++
 recommendations/Costs.py                      |  29 ++++
 recommendations/HeatingControlRecommender.py  |   3 +
 recommendations/HeatingRecommender.py         | 127 +++++++++++++++-
 .../tests/test_air_source_heat_pump.py        |  77 ++++++++++
 9 files changed, 546 insertions(+), 12 deletions(-)
 create mode 100644 etl/customers/livewest/route_march.py
 create mode 100644 etl/customers/places_for_people/route_march.py
 create mode 100644 recommendations/tests/test_air_source_heat_pump.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
index 2ba82e77..044cc830 100644
--- a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@@ -21,6 +21,8 @@ class AirSourceHeatPumpEfficiency:
 
     def create_dataset(self):
         logger.info("Creating solar photo supply dataset")
+
+        all_counts = []
         for dir in tqdm(self.file_directories):
             filepath = dir / "certificates.csv"
             df = pd.read_csv(filepath, low_memory=False)
@@ -44,9 +46,15 @@ class AirSourceHeatPumpEfficiency:
             df = df[
                 df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
             ]
+
+            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+                df = df[~pd.isnull(df[col])]
             # Get the columns we're interested in
             df = df[
                 [
+                    "PROPERTY_TYPE",
+                    "BUILT_FORM",
                     "MAINHEAT_DESCRIPTION",
                     "MAINHEAT_ENERGY_EFF",
                     "MAINHEATCONT_DESCRIPTION",
@@ -60,6 +68,8 @@ class AirSourceHeatPumpEfficiency:
 
             counts = df.groupby(
                 [
+                    "PROPERTY_TYPE",
+                    "BUILT_FORM",
                     "MAINHEAT_DESCRIPTION",
                     "MAINHEAT_ENERGY_EFF",
                     "MAINHEATCONT_DESCRIPTION",
@@ -71,8 +81,34 @@ class AirSourceHeatPumpEfficiency:
                 ]
             ).size().reset_index(name="count")
 
-            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
-            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
-                df = df[~pd.isnull(df[col])]
-            # Take newest LODGEMENT_DATE per UPRN
-            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
+            all_counts.append(counts)
+
+        all_counts = pd.concat(all_counts)
+
+        all_counts_agg = all_counts.groupby(
+            [
+                "PROPERTY_TYPE",
+                "BUILT_FORM",
+                "MAINHEAT_DESCRIPTION",
+                "MAINHEAT_ENERGY_EFF",
+                "MAINHEATCONT_DESCRIPTION",
+                "MAINHEATC_ENERGY_EFF",
+                "MAIN_FUEL",
+                "HOTWATER_DESCRIPTION",
+                "HOT_WATER_ENERGY_EFF",
+                "MAINS_GAS_FLAG"
+            ]
+        )["count"].sum().reset_index()
+
+        all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum()
+        # In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses
+        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True)
+
+        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts()
+
+        # In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses
+        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True)
+
+        # TODO: Research options for mid and end-terrace houses
+        # TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the
+        #       install process work?
diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py
new file mode 100644
index 00000000..713ee56a
--- /dev/null
+++ b/etl/customers/livewest/route_march.py
@@ -0,0 +1,135 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def route_march_may_2024():
+    """
+    This code pulls supplementary data for a route march that is expected to happen in May 2024. This code
+    was authored on the 30th April 2024.
+    """
+
+    asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
+        header_row=1
+    )
+    asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Livewest proposed route march Apr-May 2024.xlsx")
+
+    epc_data = []
+    for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]]
+        lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+        full_address = ", ".join(lst)
+
+        searcher = SearchEpc(
+            address1=str(unit["NO"]),
+            postcode=unit["POSTCODE"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            # We try with a different address 1
+            add1 = str(unit["NO"]).lower()
+            add1 = (
+                add1
+                .replace("flat", "")
+                .replace("ft", "")
+                .replace("t", "").strip()
+            )
+
+            searcher = SearchEpc(
+                address1=add1,
+                postcode=unit["POSTCODE"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                continue
+
+        epc = {
+            "asset_list_house_no": unit["NO"],
+            "asset_list_address1": unit["ADDRESS 1"],
+            "asset_list_postcode": unit["POSTCODE"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    #
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_house_no",
+            "asset_list_address1",
+            "asset_list_postcode",
+            "uprn",
+            "address",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type"
+        ]
+    ].rename(columns={"address": "Matched EPC Address"})
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["NO", "ADDRESS 1", "POSTCODE"],
+        right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]
+    )
+
+    asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"])
+    asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "inspection-date": "Last EPC Inspection Date",
+        "current-energy-rating": "Last survey EPC Rating",
+        "current-energy-efficiency": "Last survey SAP Score",
+        "roof-description": "Roof Construction",
+        "walls-description": "Wall Construction",
+        "transaction-type": "Last EPC Reason"
+    })
+
+    # Store as an excel
+    filename = "Livewest EPC data.xlsx"
+    asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py
new file mode 100644
index 00000000..c38c71d3
--- /dev/null
+++ b/etl/customers/places_for_people/route_march.py
@@ -0,0 +1,137 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+    route march
+
+    These properties were provided to us by Ecosurv
+    :return:
+    """
+    asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Places For People/PFP ROUTE MARCH PHASE 1.xlsx",
+        header_row=1
+    )
+
+    epc_data = []
+    for _, pfp_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        lst = [
+            pfp_property["ADDRESS"],
+            pfp_property["ADDRESS.1"],
+            pfp_property["ADDRESS.2"],
+            pfp_property["POSTCODE"]
+        ]
+        lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+        full_address = ", ".join(lst)
+
+        searcher = SearchEpc(
+            address1=str(pfp_property["ADDRESS"]),
+            postcode=pfp_property["POSTCODE"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            # We try with a different address 1
+            add1 = str(pfp_property["ADDRESS"]).lower()
+            add1 = add1.replace("ft", "").replace("t", "").strip()
+
+            searcher = SearchEpc(
+                address1=add1,
+                postcode=pfp_property["POSTCODE"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                continue
+
+        epc = {
+            "asset_list_address": pfp_property["ADDRESS"],
+            "asset_list_address1": pfp_property["ADDRESS.1"],
+            "asset_list_postcode": pfp_property["POSTCODE"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # 702
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "asset_list_address1",
+            "asset_list_postcode",
+            "uprn",
+            "address",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type"
+        ]
+    ].rename(columns={"address": "Matched EPC Address"})
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["ADDRESS", "ADDRESS.1", "POSTCODE"],
+        right_on=["asset_list_address", "asset_list_address1", "asset_list_postcode"]
+    )
+
+    # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+    asset_list = asset_list.drop_duplicates(subset=["ADDRESS", "ADDRESS.1", "POSTCODE"])
+    asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_address1", "asset_list_postcode"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "inspection-date": "Last EPC Inspection Date",
+        "current-energy-rating": "Last survey EPC Rating",
+        "current-energy-efficiency": "Last survey SAP Score",
+        "roof-description": "Roof Construction",
+        "walls-description": "Wall Construction",
+        "transaction-type": "Last EPC Reason"
+    })
+
+    # Store as an excel
+    filename = "Places For People EPC data.xlsx"
+    asset_list.to_excel(filename, index=False)
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index d7a8ad2f..113bb6f8 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -37,6 +37,24 @@ MCS_SOLAR_PV_COST_DATA = {
     "average_cost_per_kwh-Northern Ireland": 2126.09,
 }
 
+# This data is based on the MCS database
+MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = {
+    "Outer London": None,
+    "Inner London": None,
+    "South East England": None,
+    "South West England": None,
+    "East of England": None,
+    "East Midlands": None,
+    "West Midlands": None,
+    "North East England": None,
+    "North West England": None,
+    "Yorkshire and the Humber": None,
+    "Wales": None,
+    "Scotland": None,
+    "Northern Ireland": None,
+}
+BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
+
 # This is based on quotes from installers
 BATTERY_COST = 3500
 
@@ -1240,3 +1258,14 @@ class Costs:
             "labour_hours": labour_hours,
             "labour_days": labour_days,
         }
+
+    def air_source_heat_pump(self):
+        """
+        Based on the region and type of property, this function will produce a cost estimation for an air source heat
+        pump. This cost will include the boiler upgrade scheme grant
+
+        :return:
+        """
+
+        regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region]
+        pass
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index d24ad811..76da6c37 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -35,6 +35,9 @@ class HeatingControlRecommender:
 
             return
 
+        if heating_description in ["Air source heat pump, radiators, electric"]:
+            self.recommend_time_temperature_zone_controls()
+
     def recommend_room_heaters_electric_controls(self):
         """
         If the home has Room heaters, electric, we start by identifying potential heating controls that could
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 8988d2a6..b197d817 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -1,6 +1,4 @@
-import pandas as pd
-
-from recommendations.Costs import Costs
+from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
 from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
@@ -18,7 +16,14 @@ class HeatingRecommender:
         self.heating_recommendations = []
         self.heating_control_recommendations = []
 
-    def recommend(self, phase=0):
+    def recommend(self, has_cavity_and_loft_recommendations, phase=0):
+        """
+        Produces heating recommendations
+        :param has_cavity_and_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
+        recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
+        before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
+        :param phase: indicates the phase of the retrofit programme
+        """
 
         # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
@@ -81,8 +86,120 @@ class HeatingRecommender:
                 phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
             )
 
+        # We recommend air source heat pumps
+        # Heat pumps are suitable for all property types:
+        # https://energysavingtrust.org.uk/from-flats-to-terraced-houses-heat-pumps-are-suitable-for-all-property-types/
+        # Just seems least probable for flats, so we'll allow houses and bungalows
+        # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
+        # and either allow or prevent the recommendation of an air source heat pump
+
+        suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+        if suitable_property_types and not has_air_source_heat_pump:
+            self.recommend_air_source_heat_pump(
+                phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
+            )
+
         return
 
+    def recommend_air_source_heat_pump(self, phase, has_cavity_and_loft_recommendations):
+        """
+        This method will implement the recommendation for an air source heat pump
+        This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
+        heating system recommendations
+        :return:
+        """
+
+        controls_recommender = HeatingControlRecommender(self.property)
+        controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
+
+        ashp_costs = self.costs.air_source_heat_pump()
+        # We add the costs of the heating controls, onto each key in the costs dictionary
+        if controls_recommender.recommendation:
+            for key in ashp_costs:
+                ashp_costs[key] += controls_recommender.recommendation[0][key]
+
+        already_installed = "air_source_heat_pump" in self.property.already_installed
+        if already_installed:
+            ashp_costs = override_costs(ashp_costs)
+            description = "The property already has an air source heat pump, no further action needed."
+        else:
+            if controls_recommender.recommendation:
+                description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
+                               "room sensors and smart radiator valves (time & temperature zone control) ")
+            else:
+                description = "Install an air source heat pump."
+
+            # If the property does not have existing cavity and loft insulation, we include a note that the cost
+            # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
+            # to the funding
+            if has_cavity_and_loft_recommendations:
+                description = description + (f" The cost of works includes the £"
+                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
+                                             f"You must ensure that the property has an insulated cavity and "
+                                             f"270mm+ loft insulation to qualify for the grant")
+            else:
+                description = description + (f" The cost of works includes the £"
+                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
+
+        simulation_config = {
+            "mainheat_energy_eff_ending": "Good",
+            "hot_water_energy_eff_ending": "Good"
+        }
+        # Installation of a boiler improves the hot water system so we need to reflect this in
+        # the outcome of the recommendation
+        heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
+        hotwater_ending_config = HotWaterAttributes("From main system").process()
+
+        # If the property does not currently have electric main fuel, we'll simulate the change
+        fuel_ending_config = {}
+        if self.property.main_fuel["fuel_type"] != "electricity":
+            fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
+
+        # Check the simulation differences
+        heating_simulation_config = check_simulation_difference(
+            new_config=heating_ending_config, old_config=self.property.main_heating
+        )
+        hotwater_simulation_config = check_simulation_difference(
+            new_config=hotwater_ending_config, old_config=self.property.hotwater
+        )
+        fuel_simulation_config = check_simulation_difference(
+            new_config=fuel_ending_config, old_config=self.property.main_fuel
+        )
+
+        simulation_config = {
+            **simulation_config,
+            **heating_simulation_config,
+            **hotwater_simulation_config,
+            **fuel_simulation_config,
+        }
+
+        if controls_recommender.recommendation:
+            # We should have just the single recommendation for heat controls, which is time
+            # and temperature zone controls
+            simulation_config = {
+                **simulation_config,
+                **controls_recommender.recommendation[0]["simulation_config"]
+            }
+
+        ashp_recommendation = {
+            "phase": phase,
+            "parts": [
+                # TODO
+            ],
+            "type": "heating",
+            "description": description,
+            "starting_u_value": None,
+            "new_u_value": None,
+            "sap_points": None,
+            "already_installed": already_installed,
+            "simulation_config": simulation_config,
+            **ashp_costs
+        }
+
+        self.heating_recommendations.append(ashp_recommendation)
+
     @staticmethod
     def check_simulation_difference(old_config, new_config):
         """
@@ -146,7 +263,7 @@ class HeatingRecommender:
 
                 recommendation_description = f"{description} and {controls_description}"
 
-            already_installed = "cavity_wall_insulation" in self.property.already_installed
+            already_installed = "heating_controls" in self.property.already_installed
             if already_installed:
                 total_costs = override_costs(total_costs)
                 recommendation_description = "Heating system has already been upgraded, no further action needed."
diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py
new file mode 100644
index 00000000..d80afc6e
--- /dev/null
+++ b/recommendations/tests/test_air_source_heat_pump.py
@@ -0,0 +1,77 @@
+from backend.Property import Property
+from recommendations.HeatingRecommender import HeatingRecommender
+from etl.epc.Record import EPCRecord
+
+
+class TestAirSourceHeatPump:
+
+    def test_eligible(self):
+        # This tests a house, which will be suitable for an air source heat pump
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = {
+            "county": "Broxbourne",
+            "mainheat-energy-eff": "Good",
+            "hot-water-energy-eff": "Good",
+            "mainheatc-energy-eff": "Good",
+            "number-heated-rooms": 5,
+            "property-type": "House",
+            "built-form": "Semi-Detached"
+        }
+
+        property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
+        property_instance.main_heating = {
+            'original_description': 'Boiler and radiators, mains gas',
+            "clean_description": "Boiler and radiators, mains gas",
+            'has_radiators': True,
+            'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
+            'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
+            'has_air_source_heat_pump': False,
+            'has_room_heaters': False, 'has_electric_storage_heaters': False,
+            'has_warm_air': False,
+            'has_electric_underfloor_heating': False,
+            'has_electric_ceiling_heating': False, 'has_community_scheme': False,
+            'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+            'has_portable_electric_heaters': False,
+            'has_water_source_heat_pump': False, 'has_electric': False,
+            'has_mains_gas': True, 'has_wood_logs': False,
+            'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
+            'has_anthracite': False,
+            'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False,
+            'has_lpg': False, 'has_assumed': False,
+            'has_electricaire': False, 'has_assumed_for_most_rooms': False,
+            'has_underfloor_heating': False,
+            "has_electric_heat_pumps": False,
+            "has_micro-cogeneration": False
+        }
+        property_instance.main_fuel = {
+            'original_description': 'mains gas (not community)', 'fuel_type': 'mains gas',
+            'tariff_type': None,
+            'is_community': False, 'no_individual_heating_or_community_network': False,
+            'complex_fuel_type': None
+        }
+        property_instance.hotwater = {
+            'original_description': 'From main system',
+            'clean_description': 'From main system',
+            'heater_type': None,
+            'system_type': 'from main system',
+            'thermostat_characteristics': None, 'heating_scope': None,
+            'energy_recovery': None, 'tariff_type': None,
+            'extra_features': None, 'chp_systems': None, 'distribution_system': None,
+            'no_system_present': None,
+            'assumed': False, "appliance": None
+        }
+        property_instance.main_heating_controls = {
+            'original_description': 'Programmer, room thermostat and TRVs',
+            'thermostatic_control': 'room thermostat', 'charging_system': None, 'switch_system': 'programmer',
+            'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False,
+            'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': None
+
+        }
+
+        recommender = HeatingRecommender(property_instance=property_instance)
+
+        assert not recommender.heating_recommendations
+
+        recommender.recommend(phase=0)
+
+        assert recommender.recommendation is None

From cce9c64fdc029b0f3fa35445f5784ad5698b7b29 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 2 May 2024 00:37:36 +0100
Subject: [PATCH 254/262] Adding company ownership matching code for goldman
 poc

---
 backend/SearchEpc.py                        | 34 ++++----
 etl/customers/goldman/property_ownership.py | 87 +++++++++++++++++++++
 etl/customers/livewest/route_march.py       |  3 +-
 3 files changed, 104 insertions(+), 20 deletions(-)
 create mode 100644 etl/customers/goldman/property_ownership.py

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 44178792..06eea258 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -193,33 +193,31 @@ class SearchEpc:
     @classmethod
     def get_house_number(cls, address: str) -> str | None:
         """
-        This method will use the usaddress library to parse an address and extract the house number
-        :return:
+        This method uses the usaddress library to parse an address and extract the primary house or flat number.
         """
+        try:
+            parsed = usaddress.parse(address)
+            # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
+            for part, type_ in parsed:
+                if type_ == 'OccupancyIdentifier':
+                    return part  # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
+                    # number
 
-        parsed = usaddress.parse(address)
-        parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
-        parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
+            # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
+            address_number = next((part for part, type_ in parsed if type_ == 'AddressNumber'), None)
+            if address_number:
+                return address_number.replace(",", "")  # Remove any trailing commas
 
-        if parsed_house_number is None:
-            # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
-            # we also add a custom approach
-
-            # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+            # Further fallback to custom regex (in case usaddress completely fails)
             pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
-
             match = re.search(pattern, address)
-
             if match:
-                # Return the first non-None group found
                 return next(g for g in match.groups() if g is not None)
-            else:
-                return None
 
-        # Remove training commas
-        parsed_house_number = parsed_house_number.replace(",", "")
+        except Exception as e:
+            print(f"Error parsing address: {e}")
 
-        return parsed_house_number
+        return None
 
     @staticmethod
     def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
new file mode 100644
index 00000000..17db71b2
--- /dev/null
+++ b/etl/customers/goldman/property_ownership.py
@@ -0,0 +1,87 @@
+import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+
+
+def aggregate_matches(matching_lookup, company_ownership):
+    df = matching_lookup.merge(company_ownership, how="left", on="Title Number")
+    counts = (
+        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+        .count()
+        .reset_index(name="number_of_properties")
+    )
+    counts = counts.sort_values("number_of_properties", ascending=False)
+
+    return counts
+
+
+def app():
+    """
+    This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
+    """
+
+    properties = pd.read_excel("Birmingham EPC F & G Properties.xlsx")
+    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    # FIlter on relevant postcodes
+    company_ownership = company_ownership[
+        company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
+
+    # Now we filter properties the other way around
+    properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
+    # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
+
+    ignore_title_numbers = [
+        "WM922695",  # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788
+        "WM426374",  # land on the south side of 15 Carlyle Road, Edgbaston, Birmingham (B16 9BH): relates to WM537591
+        "WM44948",
+    ]
+    company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)]
+    # Remove entries where the address begins with the term "land adjoining":
+
+    company_ownership = company_ownership[~company_ownership["Property Address"].str.startswith("land adjoining")]
+
+    freehold_matching_lookup = []
+    leasehold_matching_lookup = []
+    for _, address in tqdm(properties.iterrows(), total=len(properties)):
+        filtered = company_ownership[
+            company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
+            ].copy()
+
+        filtered["house_number"] = filtered["Property Address"].apply(SearchEpc.get_house_number)
+        house_no = SearchEpc.get_house_number(address["ADDRESS1"])
+
+        filtered = filtered[filtered["house_number"] == house_no]
+
+        if filtered.empty:
+            continue
+
+        filtered_freehold = filtered[filtered["Tenure"] == "Freehold"]
+        filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
+
+        if filtered_freehold.shape[0] > 1:
+            raise ValueError("Multiple freehold matches")
+
+        if filtered_leasehold.shape[0] > 1:
+            raise ValueError("Multiple leasehold matches")
+
+        if not filtered_leasehold.empty:
+            leasehold_matching_lookup.append(
+                {
+                    "UPRN": address["UPRN"],
+                    "Title Number": filtered_leasehold["Title Number"].values[0]
+                }
+            )
+
+        if not filtered_freehold.empty:
+            freehold_matching_lookup.append(
+                {
+                    "UPRN": address["UPRN"],
+                    "Title Number": filtered_freehold["Title Number"].values[0]
+                }
+            )
+
+    freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
+    leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
+
+    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership)
+    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership)
diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py
index 713ee56a..9e69fd43 100644
--- a/etl/customers/livewest/route_march.py
+++ b/etl/customers/livewest/route_march.py
@@ -22,9 +22,8 @@ def route_march_may_2024():
     asset_list = read_excel_from_s3(
         bucket_name="retrofit-datalake-dev",
         file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
-        header_row=1
+        header_row=0
     )
-    asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Livewest proposed route march Apr-May 2024.xlsx")
 
     epc_data = []
     for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):

From 76ef5c897a2471473058a39d765f55e452a82db5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 2 May 2024 00:41:47 +0100
Subject: [PATCH 255/262] handling genuine dual leasehold ownership

---
 etl/customers/goldman/property_ownership.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 17db71b2..4a6faede 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -42,6 +42,7 @@ def app():
 
     freehold_matching_lookup = []
     leasehold_matching_lookup = []
+    shared_leasehold_match = []
     for _, address in tqdm(properties.iterrows(), total=len(properties)):
         filtered = company_ownership[
             company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
@@ -62,9 +63,10 @@ def app():
             raise ValueError("Multiple freehold matches")
 
         if filtered_leasehold.shape[0] > 1:
-            raise ValueError("Multiple leasehold matches")
-
-        if not filtered_leasehold.empty:
+            matched = filtered_leasehold[["Title Number"]].copy()
+            matched.insert(0, "UPRN", address["UPRN"])
+            shared_leasehold_match.append(matched)
+        elif not filtered_leasehold.empty:
             leasehold_matching_lookup.append(
                 {
                     "UPRN": address["UPRN"],

From 5cb35e1d9eb3beec22d772293208fef09c18fbba Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 2 May 2024 18:33:25 +0100
Subject: [PATCH 256/262] working on property ownership pipeline

---
 backend/SearchEpc.py                          |  13 +-
 etl/customers/goldman/property_ownership.py   | 369 ++++++++++++++++--
 etl/customers/vander_elliot/__init__.py       |   0
 .../vander_elliot/single_property_pilot.py    |  56 +++
 recommendations/HeatingRecommender.py         |  14 +-
 recommendations/Recommendations.py            |   2 +-
 recommendations/SolarPvRecommendations.py     |   2 +-
 7 files changed, 418 insertions(+), 38 deletions(-)
 create mode 100644 etl/customers/vander_elliot/__init__.py
 create mode 100644 etl/customers/vander_elliot/single_property_pilot.py

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 06eea258..db9ec4ff 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -196,6 +196,13 @@ class SearchEpc:
         This method uses the usaddress library to parse an address and extract the primary house or flat number.
         """
         try:
+
+            # Custom regex to catch a broad range of cases
+            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+            match = re.search(pattern, address)
+            if match:
+                return next(g for g in match.groups() if g is not None)
+
             parsed = usaddress.parse(address)
             # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
             for part, type_ in parsed:
@@ -208,12 +215,6 @@ class SearchEpc:
             if address_number:
                 return address_number.replace(",", "")  # Remove any trailing commas
 
-            # Further fallback to custom regex (in case usaddress completely fails)
-            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
-            match = re.search(pattern, address)
-            if match:
-                return next(g for g in match.groups() if g is not None)
-
         except Exception as e:
             print(f"Error parsing address: {e}")
 
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 4a6faede..abc2645d 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -1,27 +1,248 @@
+import re
 import pandas as pd
 from tqdm import tqdm
+import Levenshtein
 from backend.SearchEpc import SearchEpc
 
+# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
+# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
+# +15% impact on valuation and D -> C has a +3% impact on valuation.
+# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
+# Therefore value_of_F * 1.15 = value_of_D * 1.03
+# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
+PROPERTY_VALUE_ESTIMATE = 213_165
 
-def aggregate_matches(matching_lookup, company_ownership):
-    df = matching_lookup.merge(company_ownership, how="left", on="Title Number")
+
+def aggregate_matches(matching_lookup, company_ownership, properties):
+    df = matching_lookup.merge(
+        company_ownership, how="left", on="Title Number"
+    ).merge(
+        properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN"
+    )
     counts = (
-        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
         .count()
         .reset_index(name="number_of_properties")
     )
     counts = counts.sort_values("number_of_properties", ascending=False)
 
-    return counts
+    pivot_counts = counts.pivot_table(
+        index=["Company Registration No. (1)", "Proprietor Name (1)"],  # Rows: companies and proprietors
+        columns="LOCAL_AUTHORITY_LABEL",  # Columns: each local authority
+        values="number_of_properties",  # The counts of properties
+        fill_value=0  # Fill missing values with 0 (where there are no properties owned)
+    ).reset_index()
+
+    total_counts = (
+        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+        .count()
+        .reset_index(name="total_number_of_properties")
+    )
+
+    pivot_counts = pivot_counts.merge(
+        total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"]
+    )
+
+    pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
+
+    pivot_counts["approx_value"] = PROPERTY_VALUE_ESTIMATE * pivot_counts["total_number_of_properties"]
+    pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
+
+    return pivot_counts
+
+
+def find_f_g_properties(paths):
+    data = []
+    for path in tqdm(paths):
+        epc_data = pd.read_csv(path, low_memory=False)
+
+        epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+        epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+        # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+        epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+        # Get G & F properties
+        epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+        data.append(epc_data)
+
+    data = pd.concat(data)
+
+    # Save as an excel
+    data.to_excel("EPC F & G Properties.xlsx", index=False)
+
+
+def remove_text_in_brackets(address: str) -> str:
+    """
+    Removes any text within parentheses, including the parentheses themselves.
+
+    Parameters:
+    - address (str): The address string to clean.
+
+    Returns:
+    - str: The cleaned address with text in parentheses removed.
+    """
+    # Regex to find and remove content in parentheses
+    cleaned_address = re.sub(r'\s*\([^)]*\)', '', address)
+    return cleaned_address
+
+
+def extract_numeric_part(house_number: str) -> str:
+    """
+    Extracts only the numeric part from a house number that may contain letters.
+
+    Parameters:
+    - house_number (str): The house number string possibly containing letters.
+
+    Returns:
+    - str: The numeric part of the house number.
+    """
+    # Use regular expression to replace all non-digit characters with nothing
+    numeric_part = re.sub(r'\D', '', house_number)
+    return numeric_part
+
+
+def levenstein_match(matching_string, df, address_col):
+    match_to = df[address_col].tolist()
+    # Strip out punctuation and spaces
+    match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+    match_to = [x.replace(" ", "") for x in match_to]
+
+    # Perform matching between full key and match_to
+    distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+    best_match_index = distances.index(min(distances))
+    # We might want to consider a threshold for the distance, however for the momeny,
+    # we don't consider this for the moment
+    df = df.iloc[best_match_index:best_match_index + 1]
+
+    return df
+
+
+def extract_range_from_house_number(house_number_range: str):
+    """
+    Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this range.
+    Non-numeric strings containing hyphens are ignored.
+
+    Parameters:
+    - house_number_range (str): The house number string that might contain a range.
+
+    Returns:
+    - list of str: A list of all numbers within the range if it is a range; otherwise, returns None.
+    """
+
+    if not house_number_range:
+        return None
+
+    if '-' in house_number_range:
+        parts = house_number_range.split('-')
+        if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
+            # Both parts are numeric, so it's a valid range
+            start, end = map(int, parts)  # Convert parts to integers
+            return [str(x) for x in range(start, end + 1)]
+        else:
+            # Not a valid numeric range
+            return None
+    else:
+        # No hyphen present or not a range
+        return None
+
+
+def is_in_range(row, house_no):
+    """ Check if the house number is within the range provided in the row. """
+    if row and any(house_no == num for num in row):
+        return True
+    return False
+
+
+def remove_duplicate_matches(matching_lookup, properties, company_ownership):
+    duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique()
+
+    to_drop = []
+    for dupe_title in duplicated_titles:
+        dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy()
+        matched_addresses = dupe_data.merge(
+            properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+            how="left", on="UPRN"
+        ).merge(
+            company_ownership[["Title Number", "Property Address"]],
+            how="left", on="Title Number"
+        )
+        # We perform levenstein to get the best match
+        best_match = levenstein_match(
+            matching_string=matched_addresses["Property Address"].values[0],
+            df=matched_addresses,
+            address_col="epc_address"
+        )
+        matches_to_drop = matched_addresses[
+            ~matched_addresses["UPRN"].isin(best_match["UPRN"].values)
+        ]
+
+        to_drop.append(
+            matches_to_drop[["UPRN", "Title Number"]].copy()
+        )
+
+    to_drop = pd.concat(to_drop)
+
+    if not to_drop.empty:
+        merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
+        merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+        return merged
+
+    return matching_lookup
 
 
 def app():
     """
     This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
     """
+    # paths = [
+    #     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
+    #     #
+    #     "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
+    #     # East midlands
+    #     "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
+    # ]
+    # paths = list(set(paths))
+    # find_f_g_properties(paths)
 
-    properties = pd.read_excel("Birmingham EPC F & G Properties.xlsx")
+    properties = pd.read_excel("EPC F & G Properties.xlsx")
     company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    company_ownership["is_overseas"] = False
+    overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
+    overseas_company_ownership["is_overseas"] = True
+
+    company_ownership = pd.concat([company_ownership, overseas_company_ownership])
+
     # FIlter on relevant postcodes
     company_ownership = company_ownership[
         company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
@@ -29,6 +250,10 @@ def app():
     # Now we filter properties the other way around
     properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
     # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
+    # Take just private rentals
+    properties = properties[
+        properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
+    ]
 
     ignore_title_numbers = [
         "WM922695",  # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788
@@ -36,22 +261,78 @@ def app():
         "WM44948",
     ]
     company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)]
-    # Remove entries where the address begins with the term "land adjoining":
 
-    company_ownership = company_ownership[~company_ownership["Property Address"].str.startswith("land adjoining")]
+    # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
+    # the property itself
+    starting_terms = [
+        "land adjoining", "land on the", "land to the rear of", "land and buildings on the",
+        "garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining",
+        "all royal mines"
+    ]
+    for starting_term in starting_terms:
+        company_ownership = company_ownership[
+            ~company_ownership["Property Address"].str.lower().str.startswith()
+        ]
 
-    freehold_matching_lookup = []
-    leasehold_matching_lookup = []
+    biggest_ownership = (
+        company_ownership
+        .groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["Title Number"]
+        .count()
+        .reset_index(name="n_owned_properties")
+    )
+    biggest_ownership = biggest_ownership.sort_values("n_owned_properties", ascending=False)
+
+    freehold_matching_lookup = []  # 634
+    leasehold_matching_lookup = []  # 86
     shared_leasehold_match = []
+    shared_freehold_match = []
     for _, address in tqdm(properties.iterrows(), total=len(properties)):
+        match_type = "exact"
         filtered = company_ownership[
             company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
             ].copy()
 
-        filtered["house_number"] = filtered["Property Address"].apply(SearchEpc.get_house_number)
+        # Remove postcode and remove trailing commas
+        filtered["house_number"] = (
+            filtered["Property Address"]
+            .apply(remove_text_in_brackets)
+            .apply(SearchEpc.get_house_number)
+            .str.lower()
+            .str.replace(",", "")
+        )
         house_no = SearchEpc.get_house_number(address["ADDRESS1"])
+        if house_no is not None:
+            house_no = house_no.replace(",", "")
 
-        filtered = filtered[filtered["house_number"] == house_no]
+        if house_no is None:
+            # It's hard for us to get a reliable match
+            # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
+            # if filtered.shape[0] > 1:
+            #     raise Exception("No valid - maybe we should do levenstein?")
+            continue
+
+        else:
+
+            if house_no not in filtered["house_number"].values:
+                # If this happens, we check house_number for a x-y range of addresses
+                filtered["house_number_range"] = filtered["house_number"].apply(extract_range_from_house_number)
+                # If we have found a house number range, we check if the house number is in the range and if not,
+                # we drop the row
+                filtered['is_in_range'] = filtered['house_number_range'].apply(lambda x: is_in_range(x, house_no))
+
+                if filtered['is_in_range'].any():
+                    # If house_no is found in any range, keep only rows where it is in range
+                    filtered = filtered[filtered['is_in_range']]
+                else:
+                    # If house_no is not found in any range, filter out rows where 'house_number_range' is not None
+                    filtered = filtered[filtered['house_number_range'].isnull()]
+
+                # Strip out letters from house_no and house_number
+                house_no = extract_numeric_part(house_no)
+                filtered["house_number"] = filtered["house_number"].astype(str).apply(extract_numeric_part)
+                match_type = "approximate"
+
+            filtered = filtered[filtered["house_number"] == house_no]
 
         if filtered.empty:
             continue
@@ -60,7 +341,17 @@ def app():
         filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
 
         if filtered_freehold.shape[0] > 1:
-            raise ValueError("Multiple freehold matches")
+            matched = filtered_leasehold[["Title Number"]].copy()
+            matched.insert(0, "UPRN", address["UPRN"])
+            shared_freehold_match.append(matched)
+        elif not filtered_freehold.empty:
+            freehold_matching_lookup.append(
+                {
+                    "UPRN": address["UPRN"],
+                    "Title Number": filtered_freehold["Title Number"].values[0],
+                    "match_type": match_type,
+                }
+            )
 
         if filtered_leasehold.shape[0] > 1:
             matched = filtered_leasehold[["Title Number"]].copy()
@@ -70,20 +361,52 @@ def app():
             leasehold_matching_lookup.append(
                 {
                     "UPRN": address["UPRN"],
-                    "Title Number": filtered_leasehold["Title Number"].values[0]
-                }
-            )
-
-        if not filtered_freehold.empty:
-            freehold_matching_lookup.append(
-                {
-                    "UPRN": address["UPRN"],
-                    "Title Number": filtered_freehold["Title Number"].values[0]
+                    "Title Number": filtered_leasehold["Title Number"].values[0],
+                    "match_type": match_type,
                 }
             )
 
     freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
     leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
+    shared_leasehold_match = pd.concat(shared_leasehold_match)
 
-    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership)
-    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership)
+    # The approximate matches aren't very good
+    freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
+    leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"]
+
+    # There are some cases where we have duplicates
+    freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
+    leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
+
+    matched_addresses = freehold_matching_lookup.merge(
+        properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+        how="left", on="UPRN"
+    ).merge(
+        company_ownership[["Title Number", "Property Address"]],
+        how="left", on="Title Number"
+    )
+
+    # shared_freehold_match = pd.DataFrame(shared_freehold_match)
+    # Strore these files
+    freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
+    leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
+    shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
+    # shared_freehold_match.to_excel("shared_freehold_match.xlsx")
+
+    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
+    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
+
+    combined_aggregate = aggregate_matches(
+        pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
+    )
+
+    investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
+    investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
+
+    z = company_ownership[
+        (company_ownership["Company Registration No. (1)"] == freehold_aggregate["Company Registration No. (1)"].values[
+            0]) &
+        (company_ownership["Title Number"].isin(freehold_matching_lookup["Title Number"].values))
+        ]
+
+    df = freehold_matching_lookup.merge(company_ownership, how="left", on="Title Number")
diff --git a/etl/customers/vander_elliot/__init__.py b/etl/customers/vander_elliot/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/vander_elliot/single_property_pilot.py b/etl/customers/vander_elliot/single_property_pilot.py
new file mode 100644
index 00000000..99624dfc
--- /dev/null
+++ b/etl/customers/vander_elliot/single_property_pilot.py
@@ -0,0 +1,56 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 77
+USER_ID = 8
+
+patches = [
+    {
+        "address": "79 Perryn Road",
+        "postcode": "W3 7LT",
+        "roof-description": "Pitched, no insulation (assumed)"
+    }
+]
+
+
+def app():
+    asset_list = [
+        {
+            'uprn': 12103117,
+            "address": "79 Perryn Road",
+            "postcode": "W3 7LT",
+        },
+
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index b197d817..b42a9d5b 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -93,13 +93,13 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
-        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-
-        if suitable_property_types and not has_air_source_heat_pump:
-            self.recommend_air_source_heat_pump(
-                phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
-            )
+        # suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
+        # has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+        #
+        # if suitable_property_types and not has_air_source_heat_pump:
+        #     self.recommend_air_source_heat_pump(
+        #         phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
+        #     )
 
         return
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 06dc2d61..1a6d7a1c 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -109,7 +109,7 @@ class Recommendations:
 
         # Heating and Electical systems
         if "heating" not in self.exclusions:
-            self.heating_recommender.recommend(phase=phase)
+            self.heating_recommender.recommend(phase=phase, has_cavity_and_loft_recommendations=None)
             if (
                 self.heating_recommender.heating_recommendations or
                 self.heating_recommender.heating_control_recommendations
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index b44557ab..58d4b123 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -44,7 +44,7 @@ class SolarPvRecommendations:
         :return:
         """
 
-        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
         is_valid_roof_type = (
             self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
         )

From 9f9799cfa8a65a2714a91bd47a68dc57538758d0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 3 May 2024 13:35:49 +0100
Subject: [PATCH 257/262] finishing property ownership|

---
 etl/customers/goldman/property_ownership.py | 27 +++------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index abc2645d..89e7c976 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -255,13 +255,6 @@ def app():
         properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
     ]
 
-    ignore_title_numbers = [
-        "WM922695",  # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788
-        "WM426374",  # land on the south side of 15 Carlyle Road, Edgbaston, Birmingham (B16 9BH): relates to WM537591
-        "WM44948",
-    ]
-    company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)]
-
     # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
     # the property itself
     starting_terms = [
@@ -271,17 +264,9 @@ def app():
     ]
     for starting_term in starting_terms:
         company_ownership = company_ownership[
-            ~company_ownership["Property Address"].str.lower().str.startswith()
+            ~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
         ]
 
-    biggest_ownership = (
-        company_ownership
-        .groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["Title Number"]
-        .count()
-        .reset_index(name="n_owned_properties")
-    )
-    biggest_ownership = biggest_ownership.sort_values("n_owned_properties", ascending=False)
-
     freehold_matching_lookup = []  # 634
     leasehold_matching_lookup = []  # 86
     shared_leasehold_match = []
@@ -400,13 +385,9 @@ def app():
         pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
     )
 
+    df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
+
     investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
     investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
 
-    z = company_ownership[
-        (company_ownership["Company Registration No. (1)"] == freehold_aggregate["Company Registration No. (1)"].values[
-            0]) &
-        (company_ownership["Title Number"].isin(freehold_matching_lookup["Title Number"].values))
-        ]
-
-    df = freehold_matching_lookup.merge(company_ownership, how="left", on="Title Number")
+    properties["WALLS_DESCRIPTION"].value_counts(normalize=True)

From 7ec795f5bb247d5a441501e64b5e4a9b61a0d53d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 3 May 2024 15:46:03 +0100
Subject: [PATCH 258/262] completing ashp recommendations

---
 .idea/Model.iml                       |  2 +-
 .idea/misc.xml                        |  2 +-
 recommendations/Costs.py              | 48 ++++++++++++++++++---------
 recommendations/HeatingRecommender.py | 30 +++++++++--------
 recommendations/Recommendations.py    | 11 +++++-
 5 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 113bb6f8..fd3c1692 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -37,21 +37,22 @@ MCS_SOLAR_PV_COST_DATA = {
     "average_cost_per_kwh-Northern Ireland": 2126.09,
 }
 
-# This data is based on the MCS database
+# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
+# to be conservative
 MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = {
-    "Outer London": None,
-    "Inner London": None,
-    "South East England": None,
-    "South West England": None,
-    "East of England": None,
-    "East Midlands": None,
-    "West Midlands": None,
-    "North East England": None,
-    "North West England": None,
-    "Yorkshire and the Humber": None,
-    "Wales": None,
-    "Scotland": None,
-    "Northern Ireland": None,
+    "Outer London": 13220,
+    "Inner London": 13220,
+    "South East England": 13547,
+    "South West England": 12776,
+    "East of England": 12585,
+    "East Midlands": 12239,
+    "West Midlands": 13182,
+    "North East England": 11829,
+    "North West England": 11714,
+    "Yorkshire and the Humber": 11919,
+    "Wales": 13701,
+    "Scotland": 12586,
+    "Northern Ireland": 12000,  # There are hardly any air source heat pump installs going on in Northern Ireland
 }
 BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
 
@@ -1264,8 +1265,23 @@ class Costs:
         Based on the region and type of property, this function will produce a cost estimation for an air source heat
         pump. This cost will include the boiler upgrade scheme grant
 
-        :return:
         """
 
+        # This is the average cost of a project, we'll add some additional contingency
         regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region]
-        pass
+
+        total_cost = regional_cost * (1 + self.CONTINGENCY) - BOILER_UPGRADE_SCHEME_ASHP_VALUE
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # We assume 3 days installation
+        labour_days = 3
+        labour_hours = labour_days * 8
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index b42a9d5b..a51803f2 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -16,10 +16,10 @@ class HeatingRecommender:
         self.heating_recommendations = []
         self.heating_control_recommendations = []
 
-    def recommend(self, has_cavity_and_loft_recommendations, phase=0):
+    def recommend(self, has_cavity_or_loft_recommendations, phase=0):
         """
         Produces heating recommendations
-        :param has_cavity_and_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
+        :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
         recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
         before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
         :param phase: indicates the phase of the retrofit programme
@@ -93,17 +93,17 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        # suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
-        # has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-        #
-        # if suitable_property_types and not has_air_source_heat_pump:
-        #     self.recommend_air_source_heat_pump(
-        #         phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
-        #     )
+        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+        if suitable_property_type and not has_air_source_heat_pump:
+            self.recommend_air_source_heat_pump(
+                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+            )
 
         return
 
-    def recommend_air_source_heat_pump(self, phase, has_cavity_and_loft_recommendations):
+    def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations):
         """
         This method will implement the recommendation for an air source heat pump
         This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
@@ -127,20 +127,20 @@ class HeatingRecommender:
         else:
             if controls_recommender.recommendation:
                 description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
-                               "room sensors and smart radiator valves (time & temperature zone control) ")
+                               "room sensors and smart radiator valves (time & temperature zone control).")
             else:
                 description = "Install an air source heat pump."
 
             # If the property does not have existing cavity and loft insulation, we include a note that the cost
             # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
             # to the funding
-            if has_cavity_and_loft_recommendations:
-                description = description + (f" The cost of works includes the £"
+            if has_cavity_or_loft_recommendations:
+                description = description + (f" The cost includes the £"
                                              f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
                                              f"You must ensure that the property has an insulated cavity and "
                                              f"270mm+ loft insulation to qualify for the grant")
             else:
-                description = description + (f" The cost of works includes the £"
+                description = description + (f" The cost includes the £"
                                              f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
 
         simulation_config = {
@@ -178,6 +178,8 @@ class HeatingRecommender:
         if controls_recommender.recommendation:
             # We should have just the single recommendation for heat controls, which is time
             # and temperature zone controls
+            if len(controls_recommender.recommendation) != 1:
+                raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
             simulation_config = {
                 **simulation_config,
                 **controls_recommender.recommendation[0]["simulation_config"]
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 1a6d7a1c..0942ab12 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -109,7 +109,16 @@ class Recommendations:
 
         # Heating and Electical systems
         if "heating" not in self.exclusions:
-            self.heating_recommender.recommend(phase=phase, has_cavity_and_loft_recommendations=None)
+
+            cavity_or_loft_recommendations = [
+                r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
+                if r["type"] in ["cavity_wall_insulation", "loft_insulation"]
+            ]
+            has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
+
+            self.heating_recommender.recommend(
+                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+            )
             if (
                 self.heating_recommender.heating_recommendations or
                 self.heating_recommender.heating_control_recommendations

From f21221d721049444c82bce084199421aab19ce23 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 3 May 2024 16:08:14 +0100
Subject: [PATCH 259/262] working on ashp recommendations

---
 recommendations/Recommendations.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 0942ab12..c8113cdc 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -123,11 +123,28 @@ class Recommendations:
                 self.heating_recommender.heating_recommendations or
                 self.heating_recommender.heating_control_recommendations
             ):
-                if self.heating_recommender.heating_recommendations:
-                    property_recommendations.append(self.heating_recommender.heating_recommendations)
 
-                if self.heating_recommender.heating_control_recommendations:
-                    property_recommendations.append(self.heating_recommender.heating_control_recommendations)
+                # We split into first and second phase recommendations
+                first_phase_recommendations = [
+                    r for r in (
+                        self.heating_recommender.heating_recommendations +
+                        self.heating_recommender.heating_control_recommendations
+                    )
+                    if r["phase"] == phase
+                ]
+                second_phase_recommendations = [
+                    r for r in (
+                        self.heating_recommender.heating_recommendations +
+                        self.heating_recommender.heating_control_recommendations
+                    )
+                    if r["phase"] == phase + 1
+                ]
+
+                if first_phase_recommendations:
+                    property_recommendations.append(first_phase_recommendations)
+
+                if second_phase_recommendations:
+                    property_recommendations.append(second_phase_recommendations)
 
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)

From 56472f201e9dee48d8fa31b9dced73acc7fcc37d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 7 May 2024 16:56:14 +0100
Subject: [PATCH 260/262] Added ashp unit tests

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 etl/customers/goldman/property_ownership.py   |  14 +
 .../tests/test_air_source_heat_pump.py        | 867 ++++++++++++++++++
 4 files changed, 883 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 89e7c976..24922f68 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -391,3 +391,17 @@ def app():
     investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
 
     properties["WALLS_DESCRIPTION"].value_counts(normalize=True)
+
+
+def company_aggregation():
+    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    aggregation = (
+        company_ownership
+        .groupby(["Proprietor Name (1)", "Company Registration No. (1)"])
+        ["Property Address"]
+        .count()
+        .reset_index(name="Number of Properties")
+    )
+    aggregation = aggregation.sort_values("Number of Properties", ascending=False)
+
+    aggregation.to_excel("Company ownership aggregation.xlsx")
diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py
index d80afc6e..0d69b10d 100644
--- a/recommendations/tests/test_air_source_heat_pump.py
+++ b/recommendations/tests/test_air_source_heat_pump.py
@@ -1,6 +1,154 @@
+import pandas as pd
+import msgpack
+from datetime import datetime
+
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
 from backend.Property import Property
 from recommendations.HeatingRecommender import HeatingRecommender
+from recommendations.Recommendations import Recommendations
 from etl.epc.Record import EPCRecord
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from backend.ml_models.api import ModelApi
+
+
+def find_examples():
+    """ Some scrappy helper code to find EPC examples"""
+    # Let's look for some testing data, where the only thing different pre and post is the installation of an
+    # air source heat pump
+    data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev",
+        file_key="sap_change_model/2024-03-24-15-51-13/dataset_no_cleaning.parquet"
+    )
+
+    # Firstly, take records where before there was no air source heat pump and afterwards there was
+    data = data[
+        data["has_air_source_heat_pump_ending"] & ~data["has_air_source_heat_pump"]
+        ]
+
+    # Start with a property that has a boiler
+    data = data[data["has_boiler"]]
+
+    static_columns = [
+        # Walls
+        'walls_thermal_transmittance_ending',
+        'is_filled_cavity_ending',
+        'is_park_home_ending',
+        'walls_insulation_thickness_ending',
+        'external_insulation_ending',
+        'internal_insulation_ending',
+        # Floors
+        # 'floor_thermal_transmittance_ending',  # Don't subset on this, because it changes based on floor area
+        'floor_insulation_thickness_ending',
+        # Roof
+        'roof_thermal_transmittance_ending',
+        'is_at_rafters_ending',
+        'roof_insulation_thickness_ending',
+        # Hot water - air source heat pump will shange the hot water system (probably from whatever it was -> main)
+        # 'heater_type_ending',
+        # 'system_type_ending',
+        # 'thermostat_characteristics_ending',
+        # 'heating_scope_ending',
+        # 'energy_recovery_ending',
+        # 'hotwater_tariff_type_ending',
+        # 'extra_features_ending',
+        # 'chp_systems_ending',
+        # 'distribution_system_ending',
+        # 'no_system_present_ending',
+        # 'appliance_ending',
+        # Heating - Will change when installing an ASHP
+        # 'has_radiators_ending',
+        # 'has_fan_coil_units_ending',
+        # 'has_pipes_in_screed_above_insulation_ending',
+        # 'has_pipes_in_insulated_timber_floor_ending',
+        # 'has_pipes_in_concrete_slab_ending',
+        # 'has_boiler_ending',
+        # 'has_air_source_heat_pump_ending',  # We want the air source heat pump to change
+        # 'has_room_heaters_ending',
+        # 'has_electric_storage_heaters_ending',
+        # 'has_warm_air_ending',
+        # 'has_electric_underfloor_heating_ending',
+        # 'has_electric_ceiling_heating_ending',
+        # 'has_community_scheme_ending',
+        # 'has_ground_source_heat_pump_ending',
+        # 'has_no_system_present_ending',
+        # 'has_portable_electric_heaters_ending',
+        # 'has_water_source_heat_pump_ending',
+        # 'has_electric_heat_pump_ending',
+        # 'has_micro-cogeneration_ending',
+        # 'has_solar_assisted_heat_pump_ending',
+        # 'has_exhaust_source_heat_pump_ending',
+        # 'has_community_heat_pump_ending',
+        # 'has_electric_ending',
+        # 'has_mains_gas_ending',
+        # 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending',
+        # 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending',
+        # 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending',
+        # 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending',
+        # 'thermostatic_control_ending',
+        # 'charging_system_ending',
+        # 'switch_system_ending',
+        # 'no_control_ending',
+        # 'dhw_control_ending',
+        # 'community_heating_ending',
+        # 'multiple_room_thermostats_ending',
+        # 'auxiliary_systems_ending',
+        # 'trvs_ending',
+        # 'rate_control_ending',
+        # Window
+        'glazing_type_ending',
+        # Fuel - could change with ASHP
+        # 'fuel_type_ending',
+        # 'main-fuel_tariff_type_ending',
+        # 'is_community_ending',
+        # 'no_individual_heating_or_community_network_ending',
+        # 'complex_fuel_type_ending',
+
+        'mechanical_ventilation_ending', 'secondheat_description_ending', 'glazed_type_ending',
+        'multi_glaze_proportion_ending', 'low_energy_lighting_ending', 'number_open_fireplaces_ending',
+        'solar_water_heating_flag_ending',
+        'photo_supply_ending',
+        'energy_tariff_ending',
+        'extension_count_ending',
+        'total_floor_area_ending',
+        # 'hot_water_energy_eff_ending',
+        'floor_energy_eff_ending',
+        'windows_energy_eff_ending',
+        'walls_energy_eff_ending',
+        'sheating_energy_eff_ending',
+        'roof_energy_eff_ending',
+        # 'mainheat_energy_eff_ending',
+        # 'mainheatc_energy_eff_ending',
+        'lighting_energy_eff_ending',
+        'number_habitable_rooms_ending',
+        'number_heated_rooms_ending',
+    ]
+
+    for col in static_columns:
+
+        base_starting = col.split("_ending")[0]
+        if base_starting + "_starting" in data.columns:
+            starting_col = base_starting + "_starting"
+        else:
+            starting_col = base_starting
+        # Filter
+        print("Column: %s" % col)
+        print("Starting size: %s" % data.shape[0])
+        data = data[data[starting_col] == data[col]]
+        print("Ending size: %s" % data.shape[0])
+
+        z = data[['uprn', col, starting_col]]
+
+    # Great example UPRNs
+    # 100030969273
+    # 10034685399 - Completely transforms the heating and hot water systems in the home (goes from oil -> electricity)
+    # 100091200828 - goes from a liquid petroleum gas boiler to ashp
+
+    # Look for starting with a gas boiler
+    data[
+        data["has_boiler"] & data["has_radiators"] & data["has_mains_gas"] & ~data["has_boiler_ending"]
+        ]
+
+    # UPRN: 100011776843
 
 
 class TestAirSourceHeatPump:
@@ -75,3 +223,722 @@ class TestAirSourceHeatPump:
         recommender.recommend(phase=0)
 
         assert recommender.recommendation is None
+
+    def test_air_source_heat_pump_gas_boiler_starting(self):
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.62', 'heating-cost-potential': '599', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '67', 'construction-age-band': 'England and Wales: 1950-1966',
+            'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good',
+            'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '72',
+            'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '913',
+            'address3': '', 'mainheatcont-description': 'Programmer, no room thermostat', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'Wigan', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '210',
+            'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', 'constituency': 'E14001039',
+            'co2-emissions-potential': '2.6', 'number-heated-rooms': '4',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '180',
+            'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-02-15',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '78', 'address1': '430 Gidlow Lane',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+            'environment-impact-current': '38', 'co2-emissions-current': '6.2',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+            'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'mains gas (not community)',
+            'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+            'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+            'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+            'lighting-cost-potential': '67', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+            'main-heating-controls': '', 'lodgement-datetime': '2022-02-23 16:39:41', 'flat-top-storey': '',
+            'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+            'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+            'current-energy-efficiency': '45', 'energy-consumption-current': '441',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '67',
+            'lodgement-date': '2022-02-23', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
+            'lmk-key': '46cb404438a6d88ddff8965cab8b3027ec15c32d93e0b6a5f0381a5109b9bb0d', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '77',
+            'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity',
+            'hotwater-description': 'From main system, no cylinder thermostat'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.62', 'heating-cost-potential': '803', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '292', 'construction-age-band': 'England and Wales: 1950-1966',
+            'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Good',
+            'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '78',
+            'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '861',
+            'address3': '', 'mainheatcont-description': 'Time and temperature zone control',
+            'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Wigan',
+            'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+            'hot-water-cost-current': '434', 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14001039', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '147',
+            'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-05-11',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '43', 'address1': '430 Gidlow Lane',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+            'environment-impact-current': '63', 'co2-emissions-current': '3.4',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+            'mainheatc-energy-eff': 'Very Good', 'main-fuel': 'electricity (not community)',
+            'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+            'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+            'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+            'lighting-cost-potential': '67', 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100',
+            'main-heating-controls': '', 'lodgement-datetime': '2022-06-06 13:01:20', 'flat-top-storey': '',
+            'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+            'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+            'current-energy-efficiency': '53', 'energy-consumption-current': '252',
+            'mainheat-description': 'Air source heat pump, radiators, electric', 'lighting-cost-current': '67',
+            'lodgement-date': '2022-06-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Good',
+            'lmk-key': '672d5947f3d4a55d97255af71651d6127a939418fa66a687070af77e0ba90df2', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '70',
+            'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        # differences = []
+        # for k, v in ending_epc.items():
+        #     if v != starting_epc[k]:
+        #         differences.append(
+        #             {
+        #                 "variable": k,
+        #                 "starting_value": starting_epc[k],
+        #                 "ending_value": v
+        #             }
+        #         )
+        # differences = pd.DataFrame(differences)
+        #
+        # diffs = differences[
+        #     differences["variable"].isin(
+        #         [
+        #             "mainheat-energy-eff",
+        #             "mainheatcont-description",
+        #             "mainheatc-energy-eff",
+        #             "main-fuel",
+        #             "mainheat-env-eff",
+        #             "mainheat-description",
+        #             "hot-water-energy-eff",
+        #             "hotwater-description"
+        #         ]
+        #     )
+        # ]
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+
+        # Patch - for this property, the hot water energy efficiency is very poor. it's not clear why this is,
+        # but we insert this for this test
+        recommender.heating_recommendations[0]["simulation_config"]["hot_water_energy_eff_ending"] = "Very Poor"
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        assert len(recommender.heating_recommendations) == 1
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 52.2
+
+    def test_air_source_heat_pump_gas_boiler_starting_2(self):
+        """
+        This property seems to have miniscule movement in SAP - just 2 poins
+        :return:
+        """
+
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.3', 'heating-cost-potential': '394', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '48', 'construction-age-band': 'England and Wales: 1967-1975',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+            'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '487', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', 'fixed-lighting-outlets-count': '5',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '86',
+            'county': '', 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+            'co2-emissions-potential': '0.8', 'number-heated-rooms': '2',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '105',
+            'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-25',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '56', 'address1': '31 Whinney Hill Park',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Calder Valley',
+            'roof-energy-eff': 'Good', 'total-floor-area': '44.0', 'building-reference-number': '10001772583',
+            'environment-impact-current': '62', 'co2-emissions-current': '2.5',
+            'roof-description': 'Pitched, 250 mm loft insulation', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '2', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'BRIGHOUSE',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Good',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2021-11-25 11:39:35', 'flat-top-storey': '', 'current-energy-rating': 'D',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '62',
+            'energy-consumption-current': '322', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '56', 'lodgement-date': '2021-11-25', 'extension-count': '0',
+            'mainheatc-env-eff': 'Good', 'lmk-key': '077f70657e9c3f1f0ce5392798398398616b159493b2a8ca2338961596631c27',
+            'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+            'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '60',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.3', 'heating-cost-potential': '277',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '266',
+            'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+            'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Good',
+            'environment-impact-potential': '90', 'glazed-type': 'double glazing, unknown install date',
+            'heating-cost-current': '331', 'address3': '',
+            'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'Calderdale',
+            'fixed-lighting-outlets-count': '5', 'energy-tariff': 'Single',
+            'mechanical-ventilation': 'natural', 'hot-water-cost-current': '404', 'county': '',
+            'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+            'co2-emissions-potential': '0.7', 'number-heated-rooms': '2',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '92',
+            'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+            'inspection-date': '2021-11-25', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '48',
+            'address1': '31 Whinney Hill Park', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Calder Valley', 'roof-energy-eff': 'Good', 'total-floor-area': '44.0',
+            'building-reference-number': '10001772583', 'environment-impact-current': '68',
+            'co2-emissions-current': '2.1', 'roof-description': 'Pitched, 250 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '2', 'address2': '',
+            'hot-water-env-eff': 'Poor', 'posttown': 'BRIGHOUSE', 'mainheatc-energy-eff': 'Average',
+            'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+            'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2022-03-23 16:06:21', 'flat-top-storey': '', 'current-energy-rating': 'D',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '64',
+            'energy-consumption-current': '283',
+            'mainheat-description': 'Air source heat pump, radiators, electric',
+            'lighting-cost-current': '57', 'lodgement-date': '2022-03-23', 'extension-count': '0',
+            'mainheatc-env-eff': 'Average',
+            'lmk-key': '6296248141447b53426a40f1c39da17dad5f4786485db55ee38737891111a4d4',
+            'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+            'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Very Poor',
+            'low-energy-lighting': '60', 'walls-description': 'Cavity wall, filled cavity',
+            'hotwater-description': 'From main system'
+        }
+
+        # differences = []
+        # for k, v in ending_epc.items():
+        #     if v != starting_epc[k]:
+        #         differences.append(
+        #             {
+        #                 "variable": k,
+        #                 "starting_value": starting_epc[k],
+        #                 "ending_value": v
+        #             }
+        #         )
+        # differences = pd.DataFrame(differences)
+        #
+        # diffs = differences[
+        #     differences["variable"].isin(
+        #         [
+        #             "mainheat-energy-eff",
+        #             "mainheatcont-description",
+        #             "mainheatc-energy-eff",
+        #             "main-fuel",
+        #             "mainheat-env-eff",
+        #             "mainheat-description",
+        #             "hot-water-energy-eff",
+        #             "hotwater-description"
+        #         ]
+        #     )
+        # ]
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        assert len(recommender.heating_recommendations) == 1
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 69.3
+
+        # In actuality with this property, the heating controls get downgraded, so we test a manual patch of this
+        patched_simulation_config = {
+            'mainheat_energy_eff_ending': "Very Good",
+            'hot_water_energy_eff_ending': 'Very Poor',
+            'has_boiler_ending': False,
+            'has_air_source_heat_pump_ending': True,
+            'has_electric_ending': True,
+            'has_mains_gas_ending': False,
+            'fuel_type_ending': 'electricity',
+            'trvs_ending': None,
+            "mainheatc_energy_eff_ending": 'Average'
+        }
+
+        # PATCHING
+        property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+            [recommender.heating_recommendations]
+        )
+        property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations_patch, []
+        )
+
+        scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict_patch = model_api.predict_all(
+            df=scoring_data_patch,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        # The error is only 0.3, so the model is working
+        assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 64.3
+        assert ending_epc["current-energy-efficiency"] == '64'
+
+    def test_air_source_heat_pump_lpg_boiler(self):
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '1628',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
+            'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'D',
+            'mainheat-energy-eff': 'Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+            'environment-impact-potential': '70', 'glazed-type': 'double glazing, unknown install date',
+            'heating-cost-current': '2158', 'address3': 'Perry',
+            'mainheatcont-description': 'No time or thermostatic control of room temperature',
+            'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+            'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+            'hot-water-cost-current': '257', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+            'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '3.3',
+            'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+            'energy-consumption-potential': '128', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+            'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+            'inspection-date': '2023-08-31', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '51',
+            'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+            'building-reference-number': '10005199915', 'environment-impact-current': '50',
+            'co2-emissions-current': '5.9', 'roof-description': 'Pitched, 270 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+            'hot-water-env-eff': 'Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Very Poor',
+            'main-fuel': 'LPG (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+            'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '166',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2023-10-30 13:46:54', 'flat-top-storey': '', 'current-energy-rating': 'F',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '32',
+            'energy-consumption-current': '243', 'mainheat-description': 'Boiler and radiators, LPG',
+            'lighting-cost-current': '277', 'lodgement-date': '2023-10-30', 'extension-count': '0',
+            'mainheatc-env-eff': 'Very Poor',
+            'lmk-key': 'f1d3bd4b8b50bc9b006231ccb158537c408523b748b3f4ef7e98cd03b144afa5', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '56',
+            'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '33',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '917',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '328',
+            'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'A',
+            'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+            'environment-impact-potential': '96', 'glazed-type': 'double glazing, unknown install date',
+            'heating-cost-current': '1098', 'address3': 'Perry',
+            'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+            'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+            'hot-water-cost-current': '328', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+            'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '0.3',
+            'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+            'energy-consumption-potential': '16', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+            'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+            'inspection-date': '2023-10-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '6',
+            'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+            'building-reference-number': '10005199915', 'environment-impact-current': '92',
+            'co2-emissions-current': '0.7', 'roof-description': 'Pitched, 270 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+            'hot-water-env-eff': 'Very Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Average',
+            'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+            'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '', 'lighting-cost-potential': '166',
+            'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2023-11-01 16:29:16', 'flat-top-storey': '', 'current-energy-rating': 'A',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '92',
+            'energy-consumption-current': '37', 'mainheat-description': 'Air source heat pump, radiators, electric',
+            'lighting-cost-current': '277', 'lodgement-date': '2023-11-01', 'extension-count': '0',
+            'mainheatc-env-eff': 'Average',
+            'lmk-key': 'cb7f2838b727907767c8c2a385cd22f722b1e4745463391d910d228e52124515', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '95',
+            'hot-water-energy-eff': 'Good', 'low-energy-lighting': '33',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        assert len(recommender.heating_recommendations) == 1
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        # We predict a huge uplift but not quite as much as the EPC, due to some distinct differences between our
+        # recommendation and the EPC
+        assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 81.3
+        assert ending_epc['current-energy-efficiency'] == '92'
+
+        # PATCH
+        # We patch the simulation config, to reflect the ending EPC, to see if we get the ending EPC's config
+        patched_simulation_config = {
+            'mainheat_energy_eff_ending': "Very Good",
+            'hot_water_energy_eff_ending': 'Good',
+            'has_boiler_ending': False,
+            'has_air_source_heat_pump_ending': True,
+            'has_electric_ending': True,
+            'has_lpg_ending': False,
+            'fuel_type_ending': 'electricity',
+            'switch_system_ending': 'programmer',
+            'no_control_ending': None,
+            'auxiliary_systems_ending': 'bypass',
+            'trvs_ending': 'trvs',
+            "mainheatc_energy_eff_ending": 'Average'
+        }
+
+        # PATCHING
+        property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+            [recommender.heating_recommendations]
+        )
+        property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations_patch, []
+        )
+
+        scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict_patch = model_api.predict_all(
+            df=scoring_data_patch,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 88.9
+        # We still underpredict but the improvement is notable
+
+    def test_offgrid(self):
+        """
+        We test on a property we've worked with before, where we compare two options
+        a) Upgrading to a boiler
+        b) Upgrading to a heat pump
+        :return:
+        """
+
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '6 Beech Road', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.4', 'heating-cost-potential': '612', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '123', 'construction-age-band': 'England and Wales: 1930-1949',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Good',
+            'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+            'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '2278',
+            'address3': '', 'mainheatcont-description': 'Appliance thermostats', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'Dudley', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '604',
+            'county': '', 'postcode': 'DY1 4BP', 'solar-water-heating-flag': 'N', 'constituency': 'E14000671',
+            'co2-emissions-potential': '1.0', 'number-heated-rooms': '4',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '93',
+            'local-authority': 'E08000027', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2024-03-13',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '83', 'address1': '6 Beech Road',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Dudley North',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '60.0', 'building-reference-number': '10005780080',
+            'environment-impact-current': '41', 'co2-emissions-current': '5.0',
+            'roof-description': 'Pitched, 12 mm loft insulation', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'DUDLEY',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+            'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'roof-env-eff': 'Very Poor',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '113',
+            'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2024-03-13 11:29:11', 'flat-top-storey': '', 'current-energy-rating': 'F',
+            'secondheat-description': 'None', 'walls-env-eff': 'Average', 'transaction-type': 'rental',
+            'uprn': '90055152', 'current-energy-efficiency': '32', 'energy-consumption-current': '491',
+            'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '113',
+            'lodgement-date': '2024-03-13', 'extension-count': '1', 'mainheatc-env-eff': 'Good',
+            'lmk-key': '78ddf851b660e599a0894924d0e6b503980f5e0ad1aa711f8411718dc2989c44', 'wind-turbine-count': '0',
+            'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '87',
+            'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '67',
+            'walls-description': 'Cavity wall, filled cavity',
+            'hotwater-description': 'Electric immersion, standard tariff'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+        recommender.recommend_boiler_upgrades(phase=0, system_change=True, exising_room_heaters=False)
+
+        assert len(recommender.heating_recommendations) == 3
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        # The ASHP isn't better under SAP, compared to a gas boiler with good heat controls
+        assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [66.9, 65.5, 65.9]

From f0936bd1d48e70e0afc726d9e34e44de61b92ab8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 7 May 2024 17:46:51 +0100
Subject: [PATCH 261/262] Added an extra test for solar

---
 .../tests/test_solar_pv_recommendations.py    | 161 ++++++++++++++++++
 1 file changed, 161 insertions(+)

diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
index 5481cb17..e912f373 100644
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -2,6 +2,13 @@ import pytest
 from recommendations.SolarPvRecommendations import SolarPvRecommendations
 from backend.Property import Property
 from etl.epc.Record import EPCRecord
+import pandas as pd
+from datetime import datetime
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.Recommendations import Recommendations
+from backend.ml_models.api import ModelApi
+import msgpack
 
 
 class TestSolarPvRecommendations:
@@ -82,3 +89,157 @@ class TestSolarPvRecommendations:
                 'photo_supply': 4000
             }
         ]
+
+    def test_model(self):
+        """
+        This function tests the recommendation engine, in conjunction with the model
+        :return:
+        """
+
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '85',
+            'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '1.5', 'number-heated-rooms': '5',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '92',
+            'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-17',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '61', 'address1': '27 Cromwell Street',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+            'environment-impact-current': '47', 'co2-emissions-current': '5.4',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+            'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '72',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2021-12-01 10:12:23', 'flat-top-storey': '', 'current-energy-rating': 'E',
+            'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+            'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '54',
+            'energy-consumption-current': '346', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '144', 'lodgement-date': '2021-12-01', 'extension-count': '2',
+            'mainheatc-env-eff': 'Good', 'lmk-key': '3ec5533af02ec78361c1f9bea8dd2e878c2c6fa6cf59e5cc505c3eeb038e0f91',
+            'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+            'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)',
+            'hotwater-description': 'From main system'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '86',
+            'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '1.4', 'number-heated-rooms': '5',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '84',
+            'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-12-21',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '49', 'address1': '27 Cromwell Street',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+            'environment-impact-current': '55', 'co2-emissions-current': '4.4',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+            'walls-energy-eff': 'Very Poor', 'photo-supply': '50.0', 'lighting-cost-potential': '72',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2021-12-21 17:33:09', 'flat-top-storey': '', 'current-energy-rating': 'D',
+            'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+            'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '65',
+            'energy-consumption-current': '277', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '144', 'lodgement-date': '2021-12-21', 'extension-count': '2',
+            'mainheatc-env-eff': 'Good', 'lmk-key': 'b0b19583c59afbc69db12f4d6c98cd8837e80da3214d577c426eb3e672d424fc',
+            'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+            'potential-energy-efficiency': '88', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)',
+            'hotwater-description': 'From main system'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = SolarPvRecommendations(property_instance=home)
+        recommender.recommend(phase=0)
+
+        coverage_50_percent = [x for x in recommender.recommendation if x["photo_supply"] == 50]
+        assert len(coverage_50_percent) == 2
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_50_percent])
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9]
+        assert ending_epc["current-energy-efficiency"] == '65'

From 732f3eb356e61b444f7fff002d7f22f13051d5c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 7 May 2024 17:59:30 +0100
Subject: [PATCH 262/262] Added additional test for solar

---
 .../tests/test_solar_pv_recommendations.py    | 164 ++++++++++++++++++
 1 file changed, 164 insertions(+)

diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
index e912f373..fbbfe3a1 100644
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -243,3 +243,167 @@ class TestSolarPvRecommendations:
 
         assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9]
         assert ending_epc["current-energy-efficiency"] == '65'
+
+    def test_model2(self):
+        data[["uprn", "sap_ending"]]
+        #
+
+        searcher = SearchEpc(
+            address1="",
+            postcode="",
+            auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
+            os_api_key="",
+            full_address="",
+            uprn=100030952942,
+        )
+        searcher.find_property(False)
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.49', 'heating-cost-potential': '464',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '46',
+            'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+            'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
+            'environment-impact-potential': '91', 'glazed-type': 'not defined', 'heating-cost-current': '535',
+            'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+            'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow',
+            'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '3',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '56',
+            'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+            'inspection-date': '2022-08-24', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '18',
+            'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+            'building-reference-number': '10002845316', 'environment-impact-current': '85',
+            'co2-emissions-current': '1.2', 'roof-description': 'Pitched, 300 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '',
+            'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good',
+            'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in all fixed outlets', 'roof-env-eff': 'Very Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '40.0', 'lighting-cost-potential': '65',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2022-08-24 15:39:42', 'flat-top-storey': '', 'current-energy-rating': 'B',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'ECO assessment', 'uprn': '100030952942', 'current-energy-efficiency': '87',
+            'energy-consumption-current': '100', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '65', 'lodgement-date': '2022-08-24', 'extension-count': '0',
+            'mainheatc-env-eff': 'Good',
+            'lmk-key': 'e20be883431b1fed15db7fa1f52634fb7655d2b80c2fdad37df779f93ec4dafd',
+            'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+            'potential-energy-efficiency': '91', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.49', 'heating-cost-potential': '464', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '46', 'construction-age-band': 'England and Wales: 1967-1975',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '85', 'glazed-type': 'not defined',
+            'heating-cost-current': '535', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '1.2', 'number-heated-rooms': '3',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '102',
+            'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+            'inspection-date': '2022-05-31', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '40',
+            'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+            'building-reference-number': '10002845316', 'environment-impact-current': '68',
+            'co2-emissions-current': '2.6', 'roof-description': 'Pitched, 300 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', 'hot-water-env-eff': 'Good',
+            'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)',
+            'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
+            'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+            'roof-env-eff': 'Very Good', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+            'lighting-cost-potential': '65', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+            'main-heating-controls': '', 'lodgement-datetime': '2022-06-15 08:38:02', 'flat-top-storey': '',
+            'current-energy-rating': 'D', 'secondheat-description': 'Room heaters, electric',
+            'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100030952942',
+            'current-energy-efficiency': '68', 'energy-consumption-current': '227',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '65',
+            'lodgement-date': '2022-06-15', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
+            'lmk-key': 'ce181970b7077cb9b4626242bfb010b30a0e48541b5f22427e81f1adbeeec4f2', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '85',
+            'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = SolarPvRecommendations(property_instance=home)
+        recommender.recommend(phase=0)
+
+        coverage_40_percent = [x for x in recommender.recommendation if x["photo_supply"] == 40]
+        assert len(coverage_40_percent) == 2
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_40_percent])
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [87.1, 87.1]
+        assert ending_epc["current-energy-efficiency"] == '87'
+        assert starting_epc["current-energy-efficiency"] == '68'