From 615f2289e758c136e73dfaac88d0ff906785f03a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 12:39:46 +0000
Subject: [PATCH 001/248] Debugging list loading

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 81 +++++++------------
 3 files changed, 29 insertions(+), 56 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 92956337..7bb8b40c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -131,9 +131,17 @@ class DataLoader:
 
         return ciga_list
 
+    @staticmethod
+    def get_sheetname(workbook):
+        if "Asset List" in workbook.sheetnames:
+            return "Asset List"
+        else:
+            return "Assets"
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
-        asset_sheet = workbook["Assets"]
+        sheetname = self.get_sheetname(workbook)
+        asset_sheet = workbook[sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
 
         rows_data = []
@@ -170,8 +178,10 @@ class DataLoader:
             # Remove columns that are None
             survey_list = survey_list.loc[:, survey_list.columns.notnull()]
             survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
+
             # Perform survey list merge
-            survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
+            if not survey_list.empty:
+                survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
 
         # We check if there are CIGA checks
         ciga_list = pd.DataFrame()
@@ -185,9 +195,10 @@ class DataLoader:
             ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
             # Remove columns that are None
             ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
             # Perform ciga list merge
-            ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
+            if not ciga_list.empty:
+                ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+                ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
         return asset_list, survey_list, ciga_list
 
@@ -208,6 +219,10 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def correct_ha39_asset_list(asset_list):
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -337,6 +352,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha39_survey_list(survey_list):
+        return survey_list
+
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
         # Correct the asset list
@@ -491,23 +510,10 @@ class DataLoader:
                 ha_name=ha_name,
             )
 
-            if file_config.get("survey_list"):
-                # TODO: Delete this
-                logger.info("Loading survey list for {}".format(ha_name))
-                survey_list, matched_lookup = self.load_survey_list(
-                    asset_list=asset_list,
-                    file_path=file_config["survey_list"]["filepath"],
-                    ha_name=ha_name,
-                    sheet_name=file_config["survey_list"]["sheetname"]
-                )
-            else:
-                survey_list = None
-                matched_lookup = None
-
             data[ha_name] = {
                 "asset_list": asset_list,
                 "survey_list": survey_list,
-                "matched_lookup": matched_lookup
+                "ciga_list": ciga_list
             }
 
         self.data = data
@@ -1288,42 +1294,9 @@ def app():
     # List all of the data in the folder
     directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
 
-    files = {
-        "ha_1": {
-            "asset_list": {
-                "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx",
-                "sheetname": "Energy data"
-            }
-        },
-        "ha_6": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
-                "sheetname": "HA 6"
-            },
-            "survey_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
-                "sheetname": "HA 6"
-            }
-        },
-        "ha_14": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx",
-                "sheetname": "HA 14"
-            }
-        },
-        "ha_39": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx",
-                "sheetname": "Sheet1"
-            }
-        },
-        "ha_107": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx",
-                "sheetname": "HA 107"
-            }
-        }
-    }
+    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
+    # Filter down the directories to only the priority HAs
+    directories = [d for d in directories if d.split("/")[2] in priority_has]
 
     loader = DataLoader(directories, use_cache)
     loader.load()

From a1b2f9bf5bdd2d059c6327612fe2cb83c5be1687 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 12:42:04 +0000
Subject: [PATCH 002/248] Added ciga list id

---
 .../ha_15_32/ha_analysis_batch_3.py           | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7bb8b40c..fffc9daf 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -195,6 +195,7 @@ class DataLoader:
             ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
             # Remove columns that are None
             ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+            survey_list["survey_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(survey_list))]
             # Perform ciga list merge
             if not ciga_list.empty:
                 ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
@@ -440,14 +441,14 @@ class DataLoader:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
                     if df.shape[0] != 1:
                         postcode_lower = row["Post Code"].lower()
-                        if postcode_lower in missed_postcodes:
-                            matching_lookup.append(
-                                {
-                                    "survey_list_row_id": row["survey_list_row_id"],
-                                    "asset_list_row_id": None,
-                                }
-                            )
-                            continue
+                        # if postcode_lower in missed_postcodes:
+                        #     matching_lookup.append(
+                        #         {
+                        #             "survey_list_row_id": row["survey_list_row_id"],
+                        #             "asset_list_row_id": None,
+                        #         }
+                        #     )
+                        #     continue
 
                         print(row["Street / Block Name"])
                         print(house_number)
@@ -456,13 +457,18 @@ class DataLoader:
 
             matching_lookup.append(
                 {
-                    "survey_list_row_id": row["survey_list_row_id"],
+                    "ciga_list_row_id": row["ciga_list_row_id"],
                     "asset_list_row_id": df["asset_list_row_id"].values[0],
                 }
             )
 
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        # Merge onto the ciga list
+        ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id")
+
+        return ciga_list
+
     @staticmethod
     def identify_built_form_ha6(property_string):
         """

From d3bff08df8a4ce0d786acc10f9ab605abc938131 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 12:53:01 +0000
Subject: [PATCH 003/248] debugging survey matching for ha14

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index fffc9daf..d27bf8e8 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -221,7 +221,7 @@ class DataLoader:
         return asset_list
 
     @staticmethod
-    def correct_ha39_asset_list(asset_list):
+    def correct_ha14_asset_list(asset_list):
         return asset_list
 
     @staticmethod
@@ -354,7 +354,15 @@ class DataLoader:
         return survey_list
 
     @staticmethod
-    def correct_ha39_survey_list(survey_list):
+    def correct_ha14_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Godfrey Road", "Godfrey Drive"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Oiliver Road", "Oliver Road"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
@@ -389,7 +397,7 @@ class DataLoader:
             if df.shape[0] != 1:
                 df = df[df["HouseNo"] == str(house_number)]
                 if df.shape[0] != 1:
-                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
+                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
                     if df.shape[0] != 1:
                         postcode_lower = row["Post Code"].lower()
                         if postcode_lower in missed_postcodes:

From c6daf520467b0c994a67f7746b51450f36b6bea7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 16:00:23 +0000
Subject: [PATCH 004/248] Trying to handle streetname extraction and edge case
 in ciga matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 192 +++++++++++++-----
 1 file changed, 143 insertions(+), 49 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d27bf8e8..cb4b9885 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1,4 +1,5 @@
 import os
+import re
 import openpyxl
 from pathlib import Path
 import msgpack
@@ -36,6 +37,10 @@ class DataLoader:
         }
     }
 
+    UNMATCHED_CIGA = {
+        "HA14": 6
+    }
+
     def __init__(self, directories, use_cache):
         self.directories = directories
         self.use_cache = use_cache
@@ -101,6 +106,9 @@ class DataLoader:
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
+            # If we have "flat" or valley" as the house number, then the house number is actually in the second column
+            house_numbers[0] = np.where(house_numbers[0].isin(["flat", "valley"]), house_numbers[1], house_numbers[0])
+
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
             # many columns there might be
             house_numbers = house_numbers.iloc[:, 0:1]
@@ -117,7 +125,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA6"]:
+        if ha_name in ["HA6", "HA14"]:
             split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
@@ -132,16 +140,23 @@ class DataLoader:
         return ciga_list
 
     @staticmethod
-    def get_sheetname(workbook):
+    def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
             return "Asset List"
         else:
             return "Assets"
 
+    @staticmethod
+    def get_ciga_sheetname(workbook):
+        if "CIGA Checks" in workbook.sheetnames:
+            return "CIGA Checks"
+        else:
+            return "CIGA"
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
-        sheetname = self.get_sheetname(workbook)
-        asset_sheet = workbook[sheetname]
+        asset_sheetname = self.get_asset_sheetname(workbook)
+        asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
 
         rows_data = []
@@ -165,41 +180,46 @@ class DataLoader:
 
         asset_list = self.append_asset_list_built_form(ha_name=ha_name, asset_list=asset_list)
 
+        # We correct the asset list if it needs it
+        # Correct the asset list
+        correction_function_name = f"correct_{ha_name.lower()}_asset_list"
+        if hasattr(self, correction_function_name):
+            asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
+            asset_list = asset_list_correction_function(asset_list)
+
         # We check if there is a survey list
-        survey_list = pd.DataFrame()
-        if "ECO Surveys" in workbook.sheetnames:
-            survey_sheet = workbook["ECO Surveys"]
-            survey_rows = []
-            for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                survey_rows.append(row_data)
+        survey_sheetname = "ECO Surveys"
+        survey_sheet = workbook[survey_sheetname]
+        survey_rows = []
+        for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            survey_rows.append(row_data)
 
-            survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
-            # Remove columns that are None
-            survey_list = survey_list.loc[:, survey_list.columns.notnull()]
-            survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
+        survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+        # Remove columns that are None
+        survey_list = survey_list.loc[:, survey_list.columns.notnull()]
+        survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
 
-            # Perform survey list merge
-            if not survey_list.empty:
-                survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
+        # Perform survey list merge
+        if not survey_list.empty:
+            survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
 
         # We check if there are CIGA checks
-        ciga_list = pd.DataFrame()
-        if "CIGA Checks" in workbook.sheetnames:
-            ciga_sheet = workbook["CIGA Checks"]
-            ciga_rows = []
-            for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                ciga_rows.append(row_data)
+        ciga_sheetname = self.get_ciga_sheetname(workbook)
+        ciga_sheet = workbook[ciga_sheetname]
+        ciga_rows = []
+        for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            ciga_rows.append(row_data)
 
-            ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
-            # Remove columns that are None
-            ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-            survey_list["survey_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(survey_list))]
-            # Perform ciga list merge
-            if not ciga_list.empty:
-                ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
-                ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
+        ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
+        # Remove columns that are None
+        ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+        ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
+        # Perform ciga list merge
+        if not ciga_list.empty:
+            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+            ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
         return asset_list, survey_list, ciga_list
 
@@ -222,6 +242,21 @@ class DataLoader:
 
     @staticmethod
     def correct_ha14_asset_list(asset_list):
+
+        # For 5 Queens Court, DE72 3NP, the postcode is actually DE72 3QZ
+        asset_list.loc[
+            (asset_list["Address 1"] == "5 Queens Court") &
+            (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+            "matching_postcode"
+        ] = "DE72 3QZ"
+
+        # We then correct the matching_address
+        asset_list.loc[
+            (asset_list["Address 1"] == "5 Queens Court") &
+            (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+            "matching_address"
+        ] = "5 queens court, garfield avenue, draycott, derby, de72 3qz"
+
         return asset_list
 
     @staticmethod
@@ -363,13 +398,22 @@ class DataLoader:
             "Oiliver Road", "Oliver Road"
         )
 
+        # For postodes DE7 4FB, DE7 4EZ, it's actually spelled WINDERMERE AVENUE, not WINDEREMERE AVENUE (without the
+        # extra e)
+        survey_list.loc[
+            (survey_list["Street / Block Name"] == "WINDEREMERE AVENUE") &
+            (survey_list["Post Code"].isin(["DE7 4FB", "DE7 4EZ"])),
+            "Street / Block Name"
+        ] = "WINDERMERE AVENUE"
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "MACDONALD SQAURE", "MACDONALD SQUARE"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
-        # Correct the asset list
-        asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
-        asset_list = asset_list_correction_function(asset_list)
         # Correct the survey list
         survey_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_survey_list")
         survey_list = survey_list_correction_function(survey_list)
@@ -411,7 +455,7 @@ class DataLoader:
 
                         print(row["Street / Block Name"])
                         print(house_number)
-                        print(row["Post Code"].lower())
+                        print(row["Post Code"])
                         raise ValueError("Investigate")
 
             matching_lookup.append(
@@ -428,8 +472,38 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def extract_streetname(address, house_number=None, postcode=None):
+        """
+        Cleans an address by removing the house number and postcode, and converts everything to lower case.
+
+        :param address: The full address as a string.
+        :param house_number: The house number to remove, as a string or integer.
+        :param postcode: The postcode to remove, as a string.
+        :return: The cleaned address.
+        """
+        # Convert everything to lower case
+        address = address.lower()
+
+        if house_number is not None:
+            # Remove the house number
+            address = re.sub(r'\b{}\b'.format(house_number), '', address, flags=re.IGNORECASE).strip()
+
+        if postcode is not None:
+            # Remove the postcode
+            address = re.sub(r'\b{}\b'.format(re.escape(postcode)), '', address, flags=re.IGNORECASE).strip()
+
+        # Get first section before a comma
+        address = address.split(",")[0]
+        # Additional cleaning to remove extra spaces and commas left over
+        address = re.sub(r'\s+', ' ', address)  # Replace multiple spaces with a single space
+        address = re.sub(r'\s*,\s*', ', ', address)  # Clean up space around commas
+
+        return address
+
     def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name):
         matching_lookup = []
+        unmatched_addresses = []
         for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)):
 
             house_number = row["HouseNo"]
@@ -442,22 +516,35 @@ class DataLoader:
             ].copy()
 
             df = df[df["HouseNo"] == str(house_number)]
+            # For ciga, we skip
+            if df.empty:
+                if row["Matched Postcode"] == "LE3 3EE":
+                    dew
+                unmatched_addresses.append(
+                    {
+                        "ciga_list_row_id": row["ciga_list_row_id"],
+                        "HouseNo": house_number,
+                        "Matched Postcode": row["Matched Postcode"]
+                    }
+                )
+                continue
             # TODO: Might need to consider street name at some point
             if df.shape[0] != 1:
 
-                if df.shape[0] != 1:
-                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
-                    if df.shape[0] != 1:
-                        postcode_lower = row["Post Code"].lower()
-                        # if postcode_lower in missed_postcodes:
-                        #     matching_lookup.append(
-                        #         {
-                        #             "survey_list_row_id": row["survey_list_row_id"],
-                        #             "asset_list_row_id": None,
-                        #         }
-                        #     )
-                        #     continue
+                # We split house number and postcode out of the matched address for ciga
+                street_name = self.extract_streetname(
+                    address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
+                )
+                df = df[df["matching_address"].str.contains(street_name)]
 
+                if df.shape[0] != 1:
+                    # The final check we do here is to check for the presence of flat in the address
+                    if "flat" in row["Matched Address"]:
+                        df = df[df["matching_address"].str.contains("flat")]
+                    else:
+                        df = df[df["matching_address"].str.contains("flat") == False]
+
+                    if df.shape[0] != 1:
                         print(row["Street / Block Name"])
                         print(house_number)
                         print(row["Post Code"].lower())
@@ -470,6 +557,13 @@ class DataLoader:
                 }
             )
 
+        # We have an acceptable number of ciga failures for each HA
+        if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
+            raise ValueError(f"Unmatched addresses for {ha_name} is not as expected")
+
+        # In ciga: 35 Valley Drive, Leicester, LE3 3EE
+        #
+
         matching_lookup = pd.DataFrame(matching_lookup)
 
         # Merge onto the ciga list

From 75102704cdfeacaac68194c9646e23f208e48baf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 16:05:31 +0000
Subject: [PATCH 005/248] ciga matching for ha14

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index cb4b9885..1a28500b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -38,7 +38,9 @@ class DataLoader:
     }
 
     UNMATCHED_CIGA = {
-        "HA14": 6
+        # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
+        # the asset list
+        "HA14": 4
     }
 
     def __init__(self, directories, use_cache):
@@ -518,8 +520,6 @@ class DataLoader:
             df = df[df["HouseNo"] == str(house_number)]
             # For ciga, we skip
             if df.empty:
-                if row["Matched Postcode"] == "LE3 3EE":
-                    dew
                 unmatched_addresses.append(
                     {
                         "ciga_list_row_id": row["ciga_list_row_id"],
@@ -528,18 +528,18 @@ class DataLoader:
                     }
                 )
                 continue
-            # TODO: Might need to consider street name at some point
+            
             if df.shape[0] != 1:
 
                 # We split house number and postcode out of the matched address for ciga
                 street_name = self.extract_streetname(
                     address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
                 )
-                df = df[df["matching_address"].str.contains(street_name)]
+                df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
 
                 if df.shape[0] != 1:
                     # The final check we do here is to check for the presence of flat in the address
-                    if "flat" in row["Matched Address"]:
+                    if "flat" in row["Matched Address"].lower():
                         df = df[df["matching_address"].str.contains("flat")]
                     else:
                         df = df[df["matching_address"].str.contains("flat") == False]

From 32352bbde145c6a0c76f503c766e7fca80c2af99 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 17:46:11 +0000
Subject: [PATCH 006/248] working on survey match for ha107

---
 .../ha_15_32/ha_analysis_batch_3.py           | 45 +++++++++++++------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1a28500b..9e850c0e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -40,7 +40,9 @@ class DataLoader:
     UNMATCHED_CIGA = {
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
-        "HA14": 4
+        "HA14": 4,
+        # There's just too many unmatched here - if we identify some homes that
+        "HA6": 117
     }
 
     def __init__(self, directories, use_cache):
@@ -78,11 +80,11 @@ class DataLoader:
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
-                                             asset_list["Street"].str.lower().str.strip() + ", " + \
-                                             asset_list["Town"].str.lower().str.strip() + ", " + \
-                                             asset_list["District"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+                                             asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["District"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         else:
             raise NotImplementedError("implement me")
 
@@ -155,6 +157,13 @@ class DataLoader:
         else:
             return "CIGA"
 
+    @staticmethod
+    def get_survey_sheetname(workbook):
+        if "ECO Surveys" in workbook.sheetnames:
+            return "ECO Surveys"
+        else:
+            return "ECO surveys"
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
@@ -189,8 +198,13 @@ class DataLoader:
             asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
             asset_list = asset_list_correction_function(asset_list)
 
+        # For HA1, there is an exception in the structure of the data. We don't have any survey or ciga lists, and so
+        # we can return the asset list now
+        if ha_name == "HA1":
+            return asset_list, pd.DataFrame(), pd.DataFrame()
+
         # We check if there is a survey list
-        survey_sheetname = "ECO Surveys"
+        survey_sheetname = self.get_survey_sheetname(workbook)
         survey_sheet = workbook[survey_sheetname]
         survey_rows = []
         for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
@@ -217,6 +231,9 @@ class DataLoader:
         ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
         # Remove columns that are None
         ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+        # Remove rows with missing postcode which happens in a small number of cases
+        ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
+
         ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
         # Perform ciga list merge
         if not ciga_list.empty:
@@ -414,6 +431,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha107_survey_list(survey_list):
+        return survey_list
+
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
         # Correct the survey list
@@ -441,7 +462,7 @@ class DataLoader:
 
             df = df[df["matching_address"].str.contains(str(house_number))]
             if df.shape[0] != 1:
-                df = df[df["HouseNo"] == str(house_number)]
+                df = df[df["HouseNo"].astype(str) == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
                     if df.shape[0] != 1:
@@ -506,6 +527,7 @@ class DataLoader:
     def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name):
         matching_lookup = []
         unmatched_addresses = []
+
         for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)):
 
             house_number = row["HouseNo"]
@@ -528,7 +550,7 @@ class DataLoader:
                     }
                 )
                 continue
-            
+
             if df.shape[0] != 1:
 
                 # We split house number and postcode out of the matched address for ciga
@@ -561,9 +583,6 @@ class DataLoader:
         if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
             raise ValueError(f"Unmatched addresses for {ha_name} is not as expected")
 
-        # In ciga: 35 Valley Drive, Leicester, LE3 3EE
-        #
-
         matching_lookup = pd.DataFrame(matching_lookup)
 
         # Merge onto the ciga list
@@ -612,7 +631,7 @@ class DataLoader:
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
             # Load asset list
-            logger.info("Loading asset list for {}".format(ha_name))
+            logger.info("Loading data for {}".format(ha_name))
             asset_list, survey_list, ciga_list = self.load_asset_list(
                 filepath=filepath,
                 ha_name=ha_name,

From d038d668b8fa8360577ef0f83403e3d4cb6e854e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 17:52:20 +0000
Subject: [PATCH 007/248] ha107 matching 73% complete

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9e850c0e..46581eca 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -433,6 +433,16 @@ class DataLoader:
 
     @staticmethod
     def correct_ha107_survey_list(survey_list):
+        # Replace Front Street, East Stockham with Front Street, East Stockwith
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Front Street, East Stockham", "Front Street, East Stockwith"
+        )
+
+        # Replace "HONEYHOLE L;ANE" with "HONEYHOLES LANE"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "HONEYHOLE L;ANE", "HONEYHOLES LANE"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):

From ccb764d4a968efeaef67a068f1cc21f92dfe7000 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 18:01:24 +0000
Subject: [PATCH 008/248] ha107 matching 74% done

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 46581eca..60ef485a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -443,6 +443,16 @@ class DataLoader:
             "HONEYHOLE L;ANE", "HONEYHOLES LANE"
         )
 
+        # Replace "Croft Lane Cherry Willingham, Lincoln" with "Croft Lane, Cherry Willingham, Lincoln"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Croft Lane Cherry Willingham, Lincoln", "Croft Lane, Cherry Willingham, Lincoln"
+        )
+
+        # Replace "Snelland Road Wickenby, Lincoln" with "Snelland Road, Wickenby, Lincoln"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):

From cef20c6e2cf97275146f36f97349f4d0a46d2410 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 12:08:44 +0000
Subject: [PATCH 009/248] completed matching for ha107, added levenstein method

---
 .../ha_15_32/ha_analysis_batch_3.py           | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 60ef485a..bf3e6d31 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1,6 +1,7 @@
 import os
 import re
 import openpyxl
+import Levenshtein
 from pathlib import Path
 import msgpack
 from datetime import datetime
@@ -453,6 +454,41 @@ class DataLoader:
             "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln"
         )
 
+        # Replace Reasby Road Snelland, Lincoln with Reasby Road, Snelland, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Reasby Road Snelland, Lincoln", "Reasby Road, Snelland, Lincoln"
+        )
+
+        # Replace Silver Street Bardney, Lincoln with Silver Street, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Silver Street Bardney, Lincoln", "Silver Street, Bardney, Lincoln"
+        )
+
+        # Replace Manor Close Bardney, Lincoln with Manor Close, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Manor Close Bardney, Lincoln", "Manor Close, Bardney, Lincoln"
+        )
+
+        # Replace Ferry Road Southrey, Lincoln with Ferry Road, Southrey, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Ferry Road Southrey, Lincoln", "Ferry Road, Southrey, Lincoln"
+        )
+
+        # Replace Harvey Kent Gardens Bardney, Lincoln with Harvey Kent Gardens, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Harvey Kent Gardens Bardney, Lincoln", "Harvey Kent Gardens, Bardney, Lincoln"
+        )
+
+        # Replace Wragby Road Bardney, Lincoln with Wragby Road, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Wragby Road Bardney, Lincoln", "Wragby Road, Bardney, Lincoln"
+        )
+
+        # Replace SPRINKHILL ROAD with SPINKHILL ROAD
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "SPRINKHILL ROAD", "SPINKHILL ROAD"
+        )
+
         return survey_list
 
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
@@ -481,10 +517,35 @@ class DataLoader:
             ].copy()
 
             df = df[df["matching_address"].str.contains(str(house_number))]
+
+            if df.empty:
+                print(row["Street / Block Name"])
+                print(house_number)
+                print(row["Post Code"])
+                raise ValueError("Investigate")
+
             if df.shape[0] != 1:
                 df = df[df["HouseNo"].astype(str) == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
+
+                    full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + row[
+                        "Town/Area"].lower().strip() + row["Post Code"].lower().strip()
+                    # Remove any spaces from the full key
+                    full_key = full_key.replace(" ", "")
+
+                    match_to = df["matching_address"].tolist()
+                    # Strip out punctuation and spaces
+                    match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+                    match_to = [x.replace(" ", "") for x in match_to]
+
+                    # Perform matching between full key and match_to
+                    distances = [Levenshtein.distance(full_key, s) for s in match_to]
+                    best_match_index = distances.index(min(distances))
+                    # We might want to consider a threshold for the distance, however for the momeny,
+                    # we don't consider this for the moment
+                    df = df.iloc[best_match_index:best_match_index + 1]
+
                     if df.shape[0] != 1:
                         postcode_lower = row["Post Code"].lower()
                         if postcode_lower in missed_postcodes:
@@ -510,6 +571,9 @@ class DataLoader:
 
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        if matching_lookup.shape[0] != survey_list.shape[0]:
+            raise ValueError("Mismatch in the number of survey rows and matching lookup rows")
+
         # Merge onto the survey list
         survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
 

From bc0a2b8e37eab7dcfc4130b18b5c3ebe1c0953cc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 12:11:00 +0000
Subject: [PATCH 010/248] debygging location of dropping nulls from ciga list

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bf3e6d31..f1709d6e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -232,12 +232,11 @@ class DataLoader:
         ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
         # Remove columns that are None
         ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-        # Remove rows with missing postcode which happens in a small number of cases
-        ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
-
-        ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
         # Perform ciga list merge
         if not ciga_list.empty:
+            # Remove rows with missing postcode which happens in a small number of cases
+            ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
+            ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
             ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 

From 5a451f2f8239aaac05237c93b99c435de83a8652 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 12:20:46 +0000
Subject: [PATCH 011/248] fixed logic for missed postcodes for ha6

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index f1709d6e..95ca3901 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -518,6 +518,17 @@ class DataLoader:
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:
+
+                postcode_lower = row["Post Code"].lower()
+                if postcode_lower in missed_postcodes:
+                    matching_lookup.append(
+                        {
+                            "survey_list_row_id": row["survey_list_row_id"],
+                            "asset_list_row_id": None,
+                        }
+                    )
+                    continue
+
                 print(row["Street / Block Name"])
                 print(house_number)
                 print(row["Post Code"])
@@ -546,16 +557,6 @@ class DataLoader:
                     df = df.iloc[best_match_index:best_match_index + 1]
 
                     if df.shape[0] != 1:
-                        postcode_lower = row["Post Code"].lower()
-                        if postcode_lower in missed_postcodes:
-                            matching_lookup.append(
-                                {
-                                    "survey_list_row_id": row["survey_list_row_id"],
-                                    "asset_list_row_id": None,
-                                }
-                            )
-                            continue
-
                         print(row["Street / Block Name"])
                         print(house_number)
                         print(row["Post Code"])

From 75183902c193a8c5634b8cbc9c7bf045dd5a0898 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 15:54:28 +0000
Subject: [PATCH 012/248] completed creationg of matching tables

---
 .../ha_15_32/ha_analysis_batch_3.py           | 63 ++++++++++++++-----
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 95ca3901..2d95a946 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -43,7 +43,8 @@ class DataLoader:
         # the asset list
         "HA14": 4,
         # There's just too many unmatched here - if we identify some homes that
-        "HA6": 117
+        "HA6": 117,
+        "HA107": 52
     }
 
     def __init__(self, directories, use_cache):
@@ -130,7 +131,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA6", "HA14"]:
+        if ha_name in ["HA6", "HA14", "HA107"]:
             split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
@@ -153,8 +154,11 @@ class DataLoader:
 
     @staticmethod
     def get_ciga_sheetname(workbook):
+
         if "CIGA Checks" in workbook.sheetnames:
             return "CIGA Checks"
+        elif "CIGA checks" in workbook.sheetnames:
+            return "CIGA checks"
         else:
             return "CIGA"
 
@@ -490,6 +494,22 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def levenstein_match(matching_string, df):
+        match_to = df["matching_address"].tolist()
+        # Strip out punctuation and spaces
+        match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+        match_to = [x.replace(" ", "") for x in match_to]
+
+        # Perform matching between full key and match_to
+        distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+        best_match_index = distances.index(min(distances))
+        # We might want to consider a threshold for the distance, however for the momeny,
+        # we don't consider this for the moment
+        df = df.iloc[best_match_index:best_match_index + 1]
+
+        return df
+
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
         # Correct the survey list
@@ -544,17 +564,7 @@ class DataLoader:
                     # Remove any spaces from the full key
                     full_key = full_key.replace(" ", "")
 
-                    match_to = df["matching_address"].tolist()
-                    # Strip out punctuation and spaces
-                    match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
-                    match_to = [x.replace(" ", "") for x in match_to]
-
-                    # Perform matching between full key and match_to
-                    distances = [Levenshtein.distance(full_key, s) for s in match_to]
-                    best_match_index = distances.index(min(distances))
-                    # We might want to consider a threshold for the distance, however for the momeny,
-                    # we don't consider this for the moment
-                    df = df.iloc[best_match_index:best_match_index + 1]
+                    df = self.levenstein_match(full_key, df)
 
                     if df.shape[0] != 1:
                         print(row["Street / Block Name"])
@@ -623,7 +633,7 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Matched Postcode"].lower().strip())
             ].copy()
 
-            df = df[df["HouseNo"] == str(house_number)]
+            df = df[df["HouseNo"].astype(str) == str(house_number)]
             # For ciga, we skip
             if df.empty:
                 unmatched_addresses.append(
@@ -641,7 +651,9 @@ class DataLoader:
                 street_name = self.extract_streetname(
                     address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
                 )
-                df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
+                # We check if any of the rows contains the street name and if they do, filter
+                if any(df["matching_address"].str.replace(",", "").str.contains(street_name)):
+                    df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
 
                 if df.shape[0] != 1:
                     # The final check we do here is to check for the presence of flat in the address
@@ -650,6 +662,13 @@ class DataLoader:
                     else:
                         df = df[df["matching_address"].str.contains("flat") == False]
 
+                    if df.shape[0] != 1:
+                        full_key = str(row["HouseNo"]).lower().strip() + row["Matched Address"].lower().strip() + row[
+                            "Matched Postcode"].lower().strip()
+                        # Remove any spaces from the full key
+                        full_key = full_key.replace(" ", "")
+                        df = self.levenstein_match(full_key, df)
+
                     if df.shape[0] != 1:
                         print(row["Street / Block Name"])
                         print(house_number)
@@ -737,6 +756,19 @@ class DataLoader:
             s3_file_name="ha-analysis/batch3-inputs.pickle",
         )
 
+    def ha_facts_and_figures(self):
+        """
+        This function will return a dictionary of facts and figures for each HA
+        :return:
+        """
+        ha_facts_and_figures = []
+        for ha_name, data_assets in self.data.items():
+            asset_list = data_assets["asset_list"]
+            survey_list = data_assets["survey_list"]
+            ciga_list = data_assets["ciga_list"]
+
+        return ha_facts_and_figures
+
 
 def get_epc_data(
     loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=True
@@ -1511,6 +1543,7 @@ def app():
 
     loader = DataLoader(directories, use_cache)
     loader.load()
+    loader.ha_facts_and_figures()
 
     # TODO: We probably need to make sure that we have all of the columns that we need
 

From 6693ab4ca6e12a6b9da112e8c8a3d48b1fe6ad87 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 Feb 2024 17:13:18 +0000
Subject: [PATCH 013/248] Added in read of december figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 55 +++++++++++++++++--
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2d95a946..dbe12e92 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -47,11 +47,13 @@ class DataLoader:
         "HA107": 52
     }
 
-    def __init__(self, directories, use_cache):
+    def __init__(self, directories, december_figures_filepath, use_cache):
         self.directories = directories
         self.use_cache = use_cache
+        self.december_figures_filepath = december_figures_filepath
 
         self.data = {}
+        self.december_figures = None
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
@@ -730,6 +732,11 @@ class DataLoader:
             )
             return
 
+        # Get the december figures, which is just a csv
+        self.december_figures = pd.read_csv(self.december_figures_filepath)
+        # Remove the spaces in HA Name
+        self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "")
+
         data = {}
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
@@ -763,9 +770,43 @@ class DataLoader:
         """
         ha_facts_and_figures = []
         for ha_name, data_assets in self.data.items():
-            asset_list = data_assets["asset_list"]
-            survey_list = data_assets["survey_list"]
-            ciga_list = data_assets["ciga_list"]
+            asset_list = data_assets["asset_list"].copy()
+            survey_list = data_assets["survey_list"].copy()
+            ciga_list = data_assets["ciga_list"].copy()
+
+            asset_list["ECO Eligibility"].value_counts()
+
+            # We merge on ciga and update the status to reflect if it has failed ciga or not
+            # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
+            # check
+            asset_list = asset_list.merge(
+                ciga_list[["asset_list_row_id", "Guarantee"]],
+                how='left',
+                on="asset_list_row_id"
+            )
+
+            asset_list["ECO Eligibility"].value_counts()
+
+            asset_list["ECO Eligibility"] = np.where(
+                (
+                    asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) &
+                    (asset_list["Guarantee"] == "Yes")
+                ),
+                "Failed CIGA",
+                asset_list["ECO Eligibility"]
+            )
+
+            # We replace any remaining "Subject to CIGA" with pass Ciga
+            asset_list["ECO Eligibility"] = np.where(
+                asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
+                "Pass CIGA",
+                asset_list["ECO Eligibility"]
+            )
+
+            asset_list = asset_list.drop(columns=["Guarantee"])
+
+            # Update the asset list with the categorisations
+            self.data[ha_name]["asset_list"] = asset_list
 
         return ha_facts_and_figures
 
@@ -1532,16 +1573,18 @@ def app():
     :return:
     """
 
-    use_cache = False
+    use_cache = True
 
     # List all of the data in the folder
     directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
+    # Grab the December HA figures filepath
+    december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 
-    loader = DataLoader(directories, use_cache)
+    loader = DataLoader(directories, december_figures_filepath, use_cache)
     loader.load()
     loader.ha_facts_and_figures()
 

From 8b48dbac9e5e9f25e3c738c1322b1f3a9fbb11db Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 13:37:50 +0000
Subject: [PATCH 014/248] working on eco eligibility code

---
 .../ha_15_32/ha_analysis_batch_3.py           | 153 ++++++++++++++----
 1 file changed, 122 insertions(+), 31 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index dbe12e92..fdc00876 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -725,6 +725,13 @@ class DataLoader:
 
     def load(self):
 
+        # Get the december figures, which is just a csv
+        self.december_figures = pd.read_csv(self.december_figures_filepath)
+        # Remove the spaces in HA Name
+        self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "")
+        self.december_figures["ECO4"] = self.december_figures["ECO4"].astype("Int64")
+        self.december_figures["GBIS"] = self.december_figures["GBIS"].astype("Int64")
+
         if self.use_cache:
             self.data = read_pickle_from_s3(
                 bucket_name="retrofit-datalake-dev",
@@ -732,11 +739,6 @@ class DataLoader:
             )
             return
 
-        # Get the december figures, which is just a csv
-        self.december_figures = pd.read_csv(self.december_figures_filepath)
-        # Remove the spaces in HA Name
-        self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "")
-
         data = {}
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
@@ -768,46 +770,135 @@ class DataLoader:
         This function will return a dictionary of facts and figures for each HA
         :return:
         """
+
+        scheme_map = {
+            "ECO4": "ECO4",
+            "AFFORDABLE WARMTH": "ECO4",
+        }
+
+        eco_eligibility_map = {
+            "not eligble": "not eligible"
+        }
+
         ha_facts_and_figures = []
         for ha_name, data_assets in self.data.items():
             asset_list = data_assets["asset_list"].copy()
             survey_list = data_assets["survey_list"].copy()
             ciga_list = data_assets["ciga_list"].copy()
 
-            asset_list["ECO Eligibility"].value_counts()
+            # Change the column name if it's ECO eligibility
+            asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"})
+            # Remove surplus whitespace from the ECO Eligibility column
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip()
+            # Push to lower case
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower()
+            # Remap
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map)
 
-            # We merge on ciga and update the status to reflect if it has failed ciga or not
-            # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
-            # check
-            asset_list = asset_list.merge(
-                ciga_list[["asset_list_row_id", "Guarantee"]],
-                how='left',
-                on="asset_list_row_id"
-            )
+            if not ciga_list.empty:
+                # We merge on ciga and update the status to reflect if it has failed ciga or not
+                # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
+                # check
+                asset_list = asset_list.merge(
+                    ciga_list[["asset_list_row_id", "Guarantee"]],
+                    how='left',
+                    on="asset_list_row_id"
+                )
 
-            asset_list["ECO Eligibility"].value_counts()
+                asset_list["ECO Eligibility"].value_counts()
 
-            asset_list["ECO Eligibility"] = np.where(
-                (
-                    asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) &
-                    (asset_list["Guarantee"] == "Yes")
-                ),
-                "Failed CIGA",
-                asset_list["ECO Eligibility"]
-            )
+                asset_list["ECO Eligibility"] = np.where(
+                    (
+                        asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+                        (asset_list["Guarantee"] == "Yes")
+                    ),
+                    "failed ciga",
+                    asset_list["ECO Eligibility"]
+                )
 
-            # We replace any remaining "Subject to CIGA" with pass Ciga
-            asset_list["ECO Eligibility"] = np.where(
-                asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
-                "Pass CIGA",
-                asset_list["ECO Eligibility"]
-            )
+                # We replace any remaining "Subject to CIGA" with pass Ciga
+                asset_list["ECO Eligibility"] = np.where(
+                    asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
+                    "eco4 - passed ciga",
+                    asset_list["ECO Eligibility"]
+                )
 
-            asset_list = asset_list.drop(columns=["Guarantee"])
+                asset_list = asset_list.drop(columns=["Guarantee"])
 
-            # Update the asset list with the categorisations
+            # Update the asset list with the categorisations and rename changes
             self.data[ha_name]["asset_list"] = asset_list
 
+            # Report on sales
+            sales_report = {}
+            if not survey_list.empty:
+                scheme_column = survey_list.columns[0]
+                # We clean up the survey list installation or cancelled
+                survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+                # Remove all punctuation
+                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                    r'[^\w\s]', '', regex=True
+                )
+                # Remove double spaces
+                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                    r'\s+', ' ', regex=True
+                )
+                # Remove trailing spaces
+                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+
+                # Remap the values in the scheme column
+                survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map)
+
+                survey_list["installation_status"] = None
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+                    "installed",
+                    survey_list["installation_status"]
+                )
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+                    "cancelled",
+                    survey_list["installation_status"]
+                )
+                # Find partial installations
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+                    "partially installed",
+                    survey_list["installation_status"]
+                )
+                # Find partial cancellations
+                # TODO: We might have more indications of partial cancellations
+                survey_list["installation_status"] = np.where(
+                    survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+                    "partially cancelled",
+                    survey_list["installation_status"]
+                )
+
+                # Finally, for other cases, we set the status to "in progress"
+                survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+
+                # We concatenate the scheme name with the installation status
+                survey_list["installation_status"] = (
+                    survey_list[scheme_column] + " - " + survey_list["installation_status"]
+                )
+
+                # We get the sales
+                sales_report = survey_list["installation_status"].value_counts().to_dict()
+
+            ha_facts_and_figures.append(
+                {
+                    "HA Name": ha_name,
+                    **asset_list["ECO Eligibility"].value_counts().to_dict(),
+                    **sales_report
+                }
+            )
+
+        ha_facts_and_figures = pd.DataFrame(ha_facts_and_figures)
+        ha_facts_and_figures = ha_facts_and_figures.drop(
+            columns=["not eligible"]
+        )
+
+        ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+
         return ha_facts_and_figures
 
 

From ae2cc3fab57687bdc83d4aef4d60c23bd3a3b5e8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 14:14:19 +0000
Subject: [PATCH 015/248] working on ha facts and figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index fdc00876..d75a9f34 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -42,7 +42,7 @@ class DataLoader:
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
         "HA14": 4,
-        # There's just too many unmatched here - if we identify some homes that
+        # There's just too many unmatched here
         "HA6": 117,
         "HA107": 52
     }
@@ -786,6 +786,8 @@ class DataLoader:
             survey_list = data_assets["survey_list"].copy()
             ciga_list = data_assets["ciga_list"].copy()
 
+            asset_list_starting_size = asset_list.shape[0]
+
             # Change the column name if it's ECO eligibility
             asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"})
             # Remove surplus whitespace from the ECO Eligibility column
@@ -793,19 +795,17 @@ class DataLoader:
             # Push to lower case
             asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower()
             # Remap
-            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map)
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].replace(eco_eligibility_map)
 
             if not ciga_list.empty:
                 # We merge on ciga and update the status to reflect if it has failed ciga or not
                 # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
                 # check
-                asset_list = asset_list.merge(
-                    ciga_list[["asset_list_row_id", "Guarantee"]],
-                    how='left',
-                    on="asset_list_row_id"
-                )
 
-                asset_list["ECO Eligibility"].value_counts()
+                ciga_list_to_merge = ciga_list[["asset_list_row_id", "Guarantee"]].copy()
+                ciga_list_to_merge = ciga_list_to_merge[~pd.isnull(ciga_list_to_merge["asset_list_row_id"])]
+
+                asset_list = asset_list.merge(ciga_list_to_merge, how='left', on="asset_list_row_id")
 
                 asset_list["ECO Eligibility"] = np.where(
                     (
@@ -818,7 +818,10 @@ class DataLoader:
 
                 # We replace any remaining "Subject to CIGA" with pass Ciga
                 asset_list["ECO Eligibility"] = np.where(
-                    asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
+                    (
+                        asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+                        (asset_list["Guarantee"] == "No")
+                    ),
                     "eco4 - passed ciga",
                     asset_list["ECO Eligibility"]
                 )
@@ -826,6 +829,8 @@ class DataLoader:
                 asset_list = asset_list.drop(columns=["Guarantee"])
 
             # Update the asset list with the categorisations and rename changes
+            if asset_list.shape[0] != asset_list_starting_size:
+                raise ValueError("The asset list has changed in size")
             self.data[ha_name]["asset_list"] = asset_list
 
             # Report on sales
@@ -846,7 +851,7 @@ class DataLoader:
                 survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
 
                 # Remap the values in the scheme column
-                survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map)
+                survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
 
                 survey_list["installation_status"] = None
                 survey_list["installation_status"] = np.where(

From 8ef0198606486cf3eee9abf84723181ef221ea6b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 16:22:50 +0000
Subject: [PATCH 016/248] handling deduping ciga match

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d75a9f34..6ffe50e3 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -41,7 +41,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
-        "HA14": 4,
+        "HA14": 3,
         # There's just too many unmatched here
         "HA6": 117,
         "HA107": 52
@@ -147,6 +147,17 @@ class DataLoader:
 
         return ciga_list
 
+    @staticmethod
+    def dedupe_ciga_list(ciga_list):
+        ciga_list["unique_key"] = ciga_list["Matched Address"] + ciga_list["Matched Postcode"]
+        # Remove spaces from the unique key
+        ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(" ", "")
+        # Remove punctuation from the unique key
+        ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(r'[^\w\s]', '')
+        # Drop duplicated keys
+        ciga_list = ciga_list[~ciga_list["unique_key"].duplicated()]
+        return ciga_list
+
     @staticmethod
     def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
@@ -244,6 +255,7 @@ class DataLoader:
             ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
             ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
             ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+            ciga_list = self.dedupe_ciga_list(ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
         return asset_list, survey_list, ciga_list
@@ -686,10 +698,15 @@ class DataLoader:
 
         # We have an acceptable number of ciga failures for each HA
         if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
-            raise ValueError(f"Unmatched addresses for {ha_name} is not as expected")
+            raise ValueError(
+                f"Unmatched addresses for {ha_name} is not as expected, got {len(unmatched_addresses)} unmatched")
 
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        # Check dupes as this will cause problems later on
+        if matching_lookup["asset_list_row_id"].duplicated().any():
+            raise ValueError("Duplicated asset list row ids")
+
         # Merge onto the ciga list
         ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id")
 

From 78f5226ad7a5ec81e4da1ca6f9e78565146e0457 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 16:38:14 +0000
Subject: [PATCH 017/248] put together ha facts and figures

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6ffe50e3..bd4d5128 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -44,7 +44,7 @@ class DataLoader:
         "HA14": 3,
         # There's just too many unmatched here
         "HA6": 117,
-        "HA107": 52
+        "HA107": 51
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache):
@@ -54,6 +54,7 @@ class DataLoader:
 
         self.data = {}
         self.december_figures = None
+        self.ha_facts_and_figures = None
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
@@ -794,7 +795,8 @@ class DataLoader:
         }
 
         eco_eligibility_map = {
-            "not eligble": "not eligible"
+            "not eligble": "not eligible",
+            "eco 4(subject to ciga)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -919,9 +921,15 @@ class DataLoader:
             columns=["not eligible"]
         )
 
-        ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+        ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+        # Make all columns apart from HA NAme integers
+        for col in ha_facts_and_figures.columns[1:]:
+            ha_facts_and_figures[col] = ha_facts_and_figures[col].astype(int)
 
-        return ha_facts_and_figures
+        ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+        ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+
+        self.ha_facts_and_figures = ha_facts_and_figures
 
 
 def get_epc_data(

From c18740eebda1a2b307a91e215f78fdeafcad8402 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 18:44:11 +0000
Subject: [PATCH 018/248] updating eligibility detection

---
 etl/eligibility/Eligibility.py                |  57 +--
 .../ha_15_32/ha_analysis_batch_3.py           | 402 ++++++++++--------
 2 files changed, 249 insertions(+), 210 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 906ff594..b09d2df5 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -340,7 +340,6 @@ class Eligibility:
 
         # Check if the property is suitable for cavity wall
         self.cavity_insulation()
-        self.loft_insulation()
 
         self.gbis_warmfront = (self.cavity["suitability"]) and (
             int(self.epc["current-energy-efficiency"]) <= 68
@@ -384,43 +383,49 @@ class Eligibility:
         if current_sap >= 69:
             self.eco4_warmfront = {
                 "eligible": False,
-                "message": "sap too high",
+                "message": "SAP too high",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if post_retrofit_sap is None:
-
-            if current_sap >= 55:
-                message = "Possibly eligible but property currently EPC D"
-            else:
-                message = "subject to post retrofit sap" if is_eligible else "not eligible"
-
-            # Update the message to flag properties that failed just because of a full cavity.
-            # We need to double check that the wall is a cavity, that the loft is suitable and that the
-            # sap is within reason
-            # We can then estimate the age of the cavity fill
-            if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
-                message = "Failed due to full cavity - check cavity age"
-
+        if not is_eligible and current_sap >= 55:
             self.eco4_warmfront = {
-                "eligible": is_eligible,
-                "message": message,
+                "eligible": False,
+                "message": "failed fabric and SAP check",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        is_eligible = is_eligible & (post_retrofit_sap >= 69)
+        if not is_eligible and current_sap < 55:
+            self.eco4_warmfront = {
+                "eligible": False,
+                "message": "failed fabric check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
 
-        self.eco4_warmfront = {
-            "eligible": is_eligible,
-            "message": None,
-            "cavity_type": self.cavity["type"],
-            "loft_type": self.loft["thickness_classification"]
-        }
-        return
+        if is_eligible and current_sap >= 55:
+            self.eco4_warmfront = {
+                "eligible": True,
+                "message": "Meets fabric, fails SAP check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        if is_eligible and current_sap < 55:
+            self.eco4_warmfront = {
+                "eligible": True,
+                "message": "Meets fabric and SAP check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        raise ValueError("Implement me")
 
     def check_gbis(self):
 
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bd4d5128..5dd9b6e1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -25,6 +25,84 @@ DATA_FOLDER = Path(__file__).parent / "local_data" / "ha_data"
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 
+PROPERTY_TYPE_LOOKUP = {
+    "HA1": {
+        "built_form": {
+            'Mid Terrace': 'Mid-Terrace',
+            'Semi-Detached': 'Semi-Detached',
+            'End Terrace': 'End-Terrace',
+            'Detached': 'Detached',
+            'Enclosed Mid': 'Mid-Terrace',
+            'Detached Local Connect': 'Detached',
+        }
+    },
+    "HA6": {
+        "property_type": {
+            'HOUSE': "House",
+            'GROUND FLOOR FLAT': "Flat",
+            'UPPER FLOOR FLAT': "Flat",
+            'MAISONETTE': "Maisonette",
+            'BUNGALOW': "Bungalow",
+            'WARDEN BUNGALOW': "Bungalow",
+            'WARDEN FLAT': "Flat",
+            'EXTRACARE SCHEME': "Flat",
+        }
+    },
+    "HA14": {
+        "property_type": {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+        }
+    },
+    "HA39": {
+        "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
+        "1st floor flat": {"property_type": "Flat", "built_form": None},
+        "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
+        "Ground floor flat": {"property_type": "Flat", "built_form": None},
+        "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
+        "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
+        "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
+        "2nd floor flat": {"property_type": "Flat", "built_form": None},
+        "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
+        "3rd floor flat": {"property_type": "Flat", "built_form": None},
+        "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
+        "Maisonette": {"property_type": "Maisonette", "built_form": None},
+        "Detached house": {"property_type": "House", "built_form": "Detached"},
+        "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
+        "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
+        "Basement flat": {"property_type": "Flat", "built_form": None},
+        "Cluster House": {"property_type": "House", "built_form": "Detached"},
+        "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
+        "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
+        "4th floor flat": {"property_type": "Flat", "built_form": None},
+        "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
+        "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
+    },
+    "HA107": {
+        "property_type": {
+            "HOUSE": "House",
+            "BUNGALOW": "Bungalow",
+            "GRD FLOOR FLAT": "Flat",
+            "FIRST FLOOR FLAT": "Flat",
+            "SHELTERED BUNGALOW": "Bungalow",
+            "MAISONETTE": "Maisonette",
+            "SECOND FLOOR FLAT": "Flat",
+            "SHELTERED FIRST FLR": "Flat",
+            "SHELTERED GROUND FLR": "Flat",
+            "GRD FLOOR BED SIT": "House"
+        },
+        "built_form": {
+            "Semi Detached": "Semi-Detached",
+            "Mid Terrace": "Mid-Terrace",
+            "End Terrace": "End-Terrace",
+            "Detached": "Detached",
+            "Detatched": "Detached",
+        }
+    }
+}
+
 
 class DataLoader:
     COLUMN_CONFIG = {
@@ -54,7 +132,7 @@ class DataLoader:
 
         self.data = {}
         self.december_figures = None
-        self.ha_facts_and_figures = None
+        self.facts_and_figures = None
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
@@ -929,7 +1007,77 @@ class DataLoader:
         ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
         ha_facts_and_figures = ha_facts_and_figures.fillna(0)
 
-        self.ha_facts_and_figures = ha_facts_and_figures
+        self.facts_and_figures = ha_facts_and_figures
+
+
+def get_property_type_and_built_form(property_meta, ha_name):
+    if ha_name == "HA1":
+        property_type = property_meta["Asset Type"]
+        # We correct a small error
+        if property_type == "a":
+            property_type = "House"
+
+        # Remap bedsits to flats
+        if property_type in ["Bedsit", "Room"]:
+            property_type = "Flat"
+
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None)
+    elif ha_name == "HA6":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
+        built_form = property_meta["built_form"]
+    elif ha_name == "HA14":
+        if property_meta["Asset Type Description"] == "Block - Repair":
+            # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
+            if "room" in property_meta["Address 1"].lower():
+                property_type = "House"
+            else:
+                property_type = "Flat"
+
+        else:
+            property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][
+                property_meta["Asset Type Description"]
+            ]
+
+        built_form = None
+    elif ha_name == "HA39":
+
+        property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
+        property_type = property_type_config.get("property_type", None)
+        built_form = property_type_config.get("built_form", None)
+
+        if property_type is None:
+            # We check for the presence of room or flat
+            if "flat" in property_meta["matching_address"]:
+                property_type = "Flat"
+            else:
+                property_type = "House"
+    elif ha_name == "HA107":
+
+        dwelling_style = property_meta["Dwelling Style"]
+        if isinstance(dwelling_style, str):
+            dwelling_style = dwelling_style.strip()
+
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"])
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None)
+
+        if property_type is None:
+            if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
+                property_type = "House"
+
+            if "flat" in property_meta["Wall Construction"].lower():
+                property_type = "Flat"
+
+            if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
+                # Hand a few specific cases
+                property_type = "Bungalow"
+
+            if property_meta["Street"] == "School View":
+                property_type = "Bungalow"
+
+    else:
+        raise NotImplementedError("Implement me")
+
+    return property_type, built_form
 
 
 def get_epc_data(
@@ -938,84 +1086,6 @@ def get_epc_data(
     if not loader.data:
         raise ValueError("Data not found - please run loader.load() first")
 
-    property_type_lookup = {
-        "ha_1": {
-            "built_form": {
-                'Mid Terrace': 'Mid-Terrace',
-                'Semi-Detached': 'Semi-Detached',
-                'End Terrace': 'End-Terrace',
-                'Detached': 'Detached',
-                'Enclosed Mid': 'Mid-Terrace',
-                'Detached Local Connect': 'Detached',
-            }
-        },
-        "ha_6": {
-            "property_type": {
-                'HOUSE': "House",
-                'GROUND FLOOR FLAT': "Flat",
-                'UPPER FLOOR FLAT': "Flat",
-                'MAISONETTE': "Maisonette",
-                'BUNGALOW': "Bungalow",
-                'WARDEN BUNGALOW': "Bungalow",
-                'WARDEN FLAT': "Flat",
-                'EXTRACARE SCHEME': "Flat",
-            }
-        },
-        "ha_14": {
-            "property_type": {
-                "House": "House",
-                "Flat": "Flat",
-                "Bungalow": "Bungalow",
-                "Maisonette": "Maisonette",
-            }
-        },
-        "ha_39": {
-            "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
-            "1st floor flat": {"property_type": "Flat", "built_form": None},
-            "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
-            "Ground floor flat": {"property_type": "Flat", "built_form": None},
-            "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
-            "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
-            "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
-            "2nd floor flat": {"property_type": "Flat", "built_form": None},
-            "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
-            "3rd floor flat": {"property_type": "Flat", "built_form": None},
-            "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
-            "Maisonette": {"property_type": "Maisonette", "built_form": None},
-            "Detached house": {"property_type": "House", "built_form": "Detached"},
-            "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
-            "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
-            "Basement flat": {"property_type": "Flat", "built_form": None},
-            "Cluster House": {"property_type": "House", "built_form": "Detached"},
-            "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
-            "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
-            "4th floor flat": {"property_type": "Flat", "built_form": None},
-            "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
-            "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
-        },
-        "ha_107": {
-            "property_type": {
-                "HOUSE": "House",
-                "BUNGALOW": "Bungalow",
-                "GRD FLOOR FLAT": "Flat",
-                "FIRST FLOOR FLAT": "Flat",
-                "SHELTERED BUNGALOW": "Bungalow",
-                "MAISONETTE": "Maisonette",
-                "SECOND FLOOR FLAT": "Flat",
-                "SHELTERED FIRST FLR": "Flat",
-                "SHELTERED GROUND FLR": "Flat",
-                "GRD FLOOR BED SIT": "House"
-            },
-            "built_form": {
-                "Semi Detached": "Semi-Detached",
-                "Mid Terrace": "Mid-Terrace",
-                "End Terrace": "End-Terrace",
-                "Detached": "Detached",
-                "Detatched": "Detached",
-            }
-        }
-    }
-
     outputs = {}
     for ha_name, data_assets in loader.data.items():
 
@@ -1049,77 +1119,15 @@ def get_epc_data(
             if property_meta["matching_postcode"] is None:
                 continue
 
-            if ha_name == "ha_1":
-                property_type = property_meta["Asset Type"]
-                # We correct a small error
-                if property_type == "a":
-                    property_type = "House"
-
-                # Remap bedsits to flats
-                if property_type in ["Bedsit", "Room"]:
-                    property_type = "Flat"
-
-                built_form = property_type_lookup[ha_name]["built_form"].get(property_meta["Property Type"], None)
-            elif ha_name == "ha_6":
-                property_type = property_type_lookup[ha_name]["property_type"][property_meta["Dwelling type"]]
-                built_form = property_meta["built_form"]
-            elif ha_name == "ha_14":
-                if property_meta["Asset Type Description"] == "Block - Repair":
-                    # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
-                    if "room" in property_meta["Address 1"].lower():
-                        property_type = "House"
-                    else:
-                        property_type = "Flat"
-
-                else:
-                    property_type = property_type_lookup[ha_name]["property_type"][
-                        property_meta["Asset Type Description"]
-                    ]
-
-                built_form = None
-            elif ha_name == "ha_39":
-
-                property_type_config = property_type_lookup[ha_name].get(property_meta["ConstructionStyle"], {})
-                property_type = property_type_config.get("property_type", None)
-                built_form = property_type_config.get("built_form", None)
-
-                if property_type is None:
-                    # We check for the presence of room or flat
-                    if "flat" in property_meta["matching_address"]:
-                        property_type = "Flat"
-                    else:
-                        property_type = "House"
-            elif ha_name == "ha_107":
-
-                dwelling_style = property_meta["Dwelling Style"]
-                if isinstance(dwelling_style, str):
-                    dwelling_style = dwelling_style.strip()
-
-                property_type = property_type_lookup[ha_name]["property_type"].get(property_meta["DwellingType"])
-                built_form = property_type_lookup[ha_name]["built_form"].get(dwelling_style, None)
-
-                if property_type is None:
-                    if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
-                        property_type = "House"
-
-                    if "flat" in property_meta["Wall Construction"].lower():
-                        property_type = "Flat"
-
-                    if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
-                        # Hand a few specific cases
-                        property_type = "Bungalow"
-
-                    if property_meta["Street"] == "School View":
-                        property_type = "Bungalow"
-
-            else:
-                raise NotImplementedError("Implement me")
+            property_type, built_form = get_property_type_and_built_form(
+                property_meta=property_meta, ha_name=ha_name
+            )
 
             searcher = SearchEpc(
                 address1=str(property_meta["HouseNo"]),
                 postcode=property_meta["matching_postcode"],
                 auth_token=EPC_AUTH_TOKEN,
-                os_api_key=None,
+                os_api_key="",
                 full_address=property_meta["matching_address"]
             )
             searcher.ordnance_survey_client.property_type = property_type
@@ -1150,9 +1158,21 @@ def get_epc_data(
             eligibility.check_gbis_warmfront()
             eligibility.check_eco4_warmfront()
 
-            if (not eligibility.eco4_warmfront["eligible"]) and (
-                not eligibility.gbis_warmfront
-            ) and consider_penultimate_epc:
+            # We check the conditions for checking the penultimate epc
+            identified_for_gbis = property_meta["ECO Eligibility"] == "gbis"
+            identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"]
+
+            # condition 1 - identified for gbis and not eligible
+            condition_1 = (
+                identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]
+            ) & consider_penultimate_epc
+
+            # condition 2 - identified for eco4 and not eligible
+            condition_2 = (
+                identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
+            ) & consider_penultimate_epc
+
+            if identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]:
                 # We check the penultimate epc
                 eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
                 eligibility.check_gbis_warmfront()
@@ -1161,6 +1181,10 @@ def get_epc_data(
                 # We don't update just to make data cleaning easier
                 if penultimate_epc.get("estimated") is None:
                     older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+            elif identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]:
+
+            else:
+                blah
 
             # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
             # Loft MUST be suitable
@@ -1199,6 +1223,7 @@ def get_epc_data(
                 {
                     "row_id": property_meta["asset_list_row_id"],
                     "uprn": eligibility.epc["uprn"],
+                    "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "property_type": eligibility.epc["property-type"],
                     "gbis_eligible": eligibility.gbis_warmfront,
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
@@ -1219,7 +1244,6 @@ def get_epc_data(
                     "cavity_age": cavity_age,
                     **eligibility.walls,
                     **eligibility.roof,
-                    "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
                     "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
                 }
@@ -1687,38 +1711,7 @@ def analyse_ha_data(outputs, loader):
                 writer.sheets[sheet].set_column(i, i, width)
 
 
-def app():
-    """
-    This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
-    Only HA 6 has surveys
-    :return:
-    """
-
-    use_cache = True
-
-    # List all of the data in the folder
-    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
-    # Grab the December HA figures filepath
-    december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
-
-    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
-    # Filter down the directories to only the priority HAs
-    directories = [d for d in directories if d.split("/")[2] in priority_has]
-
-    loader = DataLoader(directories, december_figures_filepath, use_cache)
-    loader.load()
-    loader.ha_facts_and_figures()
-
-    # TODO: We probably need to make sure that we have all of the columns that we need
-
-    # We load in the additional data required to perform the analysis
-
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-
+def patch_cleaned(cleaned):
     # Patch to handle the a missing description
     cleaned["floor-description"].extend(
         [
@@ -1762,16 +1755,57 @@ def app():
             x["another_property_below"] = True
             x["thermal_transmittance"] = 0
 
+    return cleaned
+
+
+def app():
+    """
+    This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
+    Only HA 6 has surveys
+    :return:
+    """
+
+    # Determines if we want to use the cached data in s3
+    use_cache = True
+    # Determines if we want to perform the data pull
+    pull_data = True
+
+    # List all of the data in the folder
+    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
+    # Grab the December HA figures filepath
+    december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
+
+    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
+    # Filter down the directories to only the priority HAs
+    directories = [d for d in directories if d.split("/")[2] in priority_has]
+
+    loader = DataLoader(directories, december_figures_filepath, use_cache)
+    loader.load()
+    loader.ha_facts_and_figures()
+
+    # We load in the additional data required to perform the analysis
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+    cleaned = patch_cleaned(cleaned)
+
     cleaning_data = read_dataframe_from_s3_parquet(
         bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
     )
-
     created_at = datetime.now().isoformat()
 
     photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
 
     outputs = get_epc_data(
-        loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False
+        loader=loader,
+        cleaned=cleaned,
+        cleaning_data=cleaning_data,
+        created_at=created_at,
+        photo_supply_lookup=photo_supply_lookup,
+        floor_area_decile_thresholds=floor_area_decile_thresholds,
+        pull_data=pull_data
     )
 
     # for ha_name, datasets in outputs.items():

From 807ce14790600dce8a810847f47bc216bcddf6b3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 19:09:19 +0000
Subject: [PATCH 019/248] updating the code to do eligibility

---
 .../ha_15_32/ha_analysis_batch_3.py           | 42 +++++++++++++------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 5dd9b6e1..3d0964c6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1164,15 +1164,33 @@ def get_epc_data(
 
             # condition 1 - identified for gbis and not eligible
             condition_1 = (
-                identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]
-            ) & consider_penultimate_epc
+                              identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[
+                              "eligible"]
+                          ) & consider_penultimate_epc
 
             # condition 2 - identified for eco4 and not eligible
-            condition_2 = (
-                identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
-            ) & consider_penultimate_epc
+            condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[
+                "eligible"]) & consider_penultimate_epc
 
-            if identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]:
+            # successfully identigied gbis
+            condition_3 = (
+                identified_for_gbis and (eligibility.gbis_warmfront or eligibility.eco4_warmfront["eligible"])
+            )
+
+            # Nothing identified
+            condition_4 = (
+                not identified_for_gbis and not identified_for_eco4 and not eligibility.gbis_warmfront and not
+            eligibility.eco4_warmfront["eligible"]
+            )
+
+            # Not identified but seemingly eligible for eco4 or gbis
+            condition_5 = (
+                not identified_for_gbis and not identified_for_eco4 and (
+                eligibility.eco4_warmfront["eligible"] or eligibility.gbis_warmfront
+            )
+            )
+
+            if condition_1 or condition_2:
                 # We check the penultimate epc
                 eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
                 eligibility.check_gbis_warmfront()
@@ -1181,10 +1199,11 @@ def get_epc_data(
                 # We don't update just to make data cleaning easier
                 if penultimate_epc.get("estimated") is None:
                     older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
-            elif identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]:
-
+            elif condition_3 or condition_4 or condition_5:
+                # If we have successfully identified for gbis, we don't need to check the penultimate epc
+                pass
             else:
-                blah
+                NotImplementedError("Implement me")
 
             # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
             # Loft MUST be suitable
@@ -1229,10 +1248,7 @@ def get_epc_data(
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                     "eco4_message": eligibility.eco4_warmfront["message"],
                     "sap": float(eligibility.epc["current-energy-efficiency"]),
-                    "gbis_eligible_future": eligibility.gbis["eligible"],
-                    "gbis_eligible_future_message": eligibility.gbis["message"],
-                    "eco4_eligible_future": eligibility.eco4["eligible"],
-                    "eco4_eligible_future_message": eligibility.eco4["message"],
+
                     # Property components
                     "roof": eligibility.roof["clean_description"],
                     "walls": eligibility.walls["clean_description"],

From 69dcc73363c43d12076b887707db802384046e07 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 19:18:58 +0000
Subject: [PATCH 020/248] deugging null lodgement-date

---
 backend/SearchEpc.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 4f6fd33d..4a3f371a 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -609,7 +609,11 @@ class SearchEpc:
         # Insert an estimated lodgement datetime, with a weighted average
         estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
         # Extract logement date
-        estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
+        # It is possible that there is still no lodgement date, so we need to handle this
+        if pd.isnull(estimated_epc["lodgement-datetime"]):
+            estimated_epc["lodgement-date"] = None
+        else:
+            estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
 
         estimated_epc["postcode"] = self.postcode
         estimated_epc["uprn"] = self.uprn

From b80ffda392e0601f08dd376cfaacba73e733fc9c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 19:29:46 +0000
Subject: [PATCH 021/248] updating eligibility pipeline to factor in ciga

---
 .../ha_15_32/ha_analysis_batch_3.py           | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3d0964c6..ecbb4e0a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1159,8 +1159,11 @@ def get_epc_data(
             eligibility.check_eco4_warmfront()
 
             # We check the conditions for checking the penultimate epc
-            identified_for_gbis = property_meta["ECO Eligibility"] == "gbis"
+            identified_for_gbis = property_meta["ECO Eligibility"] in ["gbis"]
             identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"]
+            subject_to_ciga = property_meta["ECO Eligibility"] in [
+                "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"
+            ]
 
             # condition 1 - identified for gbis and not eligible
             condition_1 = (
@@ -1179,8 +1182,11 @@ def get_epc_data(
 
             # Nothing identified
             condition_4 = (
-                not identified_for_gbis and not identified_for_eco4 and not eligibility.gbis_warmfront and not
-            eligibility.eco4_warmfront["eligible"]
+                not identified_for_gbis
+                and not identified_for_eco4
+                and not eligibility.gbis_warmfront
+                and not subject_to_ciga
+                and not eligibility.eco4_warmfront["eligible"]
             )
 
             # Not identified but seemingly eligible for eco4 or gbis
@@ -1190,6 +1196,10 @@ def get_epc_data(
             )
             )
 
+            condition_6 = (
+                subject_to_ciga and not eligibility.eco4_warmfront["eligible"]
+            )
+
             if condition_1 or condition_2:
                 # We check the penultimate epc
                 eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
@@ -1199,8 +1209,7 @@ def get_epc_data(
                 # We don't update just to make data cleaning easier
                 if penultimate_epc.get("estimated") is None:
                     older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
-            elif condition_3 or condition_4 or condition_5:
-                # If we have successfully identified for gbis, we don't need to check the penultimate epc
+            elif condition_3 or condition_4 or condition_5 or condition_6:
                 pass
             else:
                 NotImplementedError("Implement me")

From 281c6f626c833a482a199ba120e1b0e8b1869cf1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 23:23:29 +0000
Subject: [PATCH 022/248] working on eligibility

---
 backend/Property.py                           |   3 +-
 etl/eligibility/Eligibility.py                |  90 ++++++++--
 etl/eligibility/ha_15_32/app.py               |  18 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 156 +++++++++---------
 4 files changed, 167 insertions(+), 100 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 4a55e504..f86e33dc 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -147,7 +147,8 @@ class Property:
         # self.base_difference_record.df
 
     def adjust_difference_record_with_recommendations(
-        self, property_recommendations,
+        self,
+        property_recommendations,
         property_representative_recommendations
     ):
         """
diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index b09d2df5..bda34923 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -145,6 +145,7 @@ class Eligibility:
                 "reason": None,
                 "thickness_classification": thickness_classification
             }
+            return
 
         # Insulation is already thick enough
         self.loft = {
@@ -164,8 +165,10 @@ class Eligibility:
         """
 
         is_cavity = self.walls["is_cavity_wall"]
-        is_empty = (not self.walls["is_filled_cavity"]) or (
+        is_empty = (not self.walls["is_filled_cavity"])
+        is_as_built = (
             self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
+            and self.walls["is_assumed"]
         )
         is_partial_filled = "partial" in self.walls["clean_description"].lower()
         # We look for potentially under performing cavities - anything that is assumed, as built and insulated
@@ -175,6 +178,7 @@ class Eligibility:
 
         is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
         is_partial_filled_cavity = is_cavity and is_partial_filled
+        is_assumed_filled_cavity = is_cavity and is_as_built
         is_underperforming_cavity = is_cavity and is_underperforming
 
         # Check if it has internal or external wall insulation
@@ -195,6 +199,13 @@ class Eligibility:
             }
             return
 
+        if is_assumed_filled_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "as built assumed",
+            }
+            return
+
         if is_partial_filled_cavity:
             self.cavity = {
                 "suitability": True,
@@ -345,7 +356,7 @@ class Eligibility:
             int(self.epc["current-energy-efficiency"]) <= 68
         )
 
-    def check_eco4_warmfront(self, post_retrofit_sap=None):
+    def check_eco4_warmfront(self):
         """
         This funciton will check if the property is eligible for funding under the ECO4 scheme
 
@@ -377,49 +388,100 @@ class Eligibility:
         self.cavity_insulation()
         self.loft_insulation()
 
-        # make sure conditions 2 and 3 are true
-        is_eligible = self.cavity["suitability"] & self.loft["suitability"]
-
-        if current_sap >= 69:
+        # Case 1: No conditions meet
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
             self.eco4_warmfront = {
                 "eligible": False,
-                "message": "SAP too high",
+                "strict": False,
+                "message": "All conditions fail",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if not is_eligible and current_sap >= 55:
+        # Case 2 - perfect match
+        if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
             self.eco4_warmfront = {
-                "eligible": False,
-                "message": "failed fabric and SAP check",
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if not is_eligible and current_sap < 55:
+        # Case 2.5 - near perfect match - but we would not recommend this using the model
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 3 - cavity is suitable, loft is not, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity and sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
             self.eco4_warmfront = {
                 "eligible": False,
+                "strict": False,
                 "message": "failed fabric check",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if is_eligible and current_sap >= 55:
+        # Case 5 - cavity and loft suitable, sap too high
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
             self.eco4_warmfront = {
                 "eligible": True,
+                "strict": False,
                 "message": "Meets fabric, fails SAP check",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if is_eligible and current_sap < 55:
+        # Case 6 - meets just cavity
+        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
             self.eco4_warmfront = {
                 "eligible": True,
-                "message": "Meets fabric and SAP check",
+                "strict": False,
+                "message": "Meets just cavity",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 7 - fails cavity, loft but meets sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity nd lodt, meets SAP",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 8 - fails cavity, meets loft, fails sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity, meets loft, fails SAP",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index a68bf272..378a0e83 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -387,17 +387,19 @@ def prepare_model_data_row(
     }
 
     simulations = [
-        [cavity_simulation],
-        [loft_simulation]
+        cavity_simulation,
+        loft_simulation
     ]
 
-    p.adjust_difference_record_with_recommendations(simulations)
+    recommendation_record = p.base_difference_record.df.to_dict("records")[0].copy()
+    scoring_dict = p.create_recommendation_scoring_data(
+        property_id=p.id,
+        recommendation_record=recommendation_record,
+        recommendations=simulations,
+        primary_recommendation_id=cavity_simulation["recommendation_id"]
+    )
 
-    # Make sure we definitely have the correct data
-    cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0]
-    loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0]
-
-    return [cavity_scoring, loft_scoring]
+    return [scoring_dict]
 
 
 def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ecbb4e0a..239fce65 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1114,7 +1114,7 @@ def get_epc_data(
         results = []
         scoring_data = []
         nodata = []
-        for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        for index, property_meta in tqdm(eco4.iterrows(), total=len(eco4)):
 
             if property_meta["matching_postcode"] is None:
                 continue
@@ -1226,10 +1226,6 @@ def get_epc_data(
                 # We check the age of the cavity and if it's particularly old, we flag it
                 cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
 
-            # Full checks
-            eligibility.check_gbis()
-            eligibility.check_eco4()
-
             if eligibility.eco4_warmfront["eligible"]:
                 if eligibility.epc["uprn"] == "":
                     eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
@@ -1256,8 +1252,8 @@ def get_epc_data(
                     "gbis_eligible": eligibility.gbis_warmfront,
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                     "eco4_message": eligibility.eco4_warmfront["message"],
+                    "eco4_strict": eligibility.eco4_warmfront["strict"],
                     "sap": float(eligibility.epc["current-energy-efficiency"]),
-
                     # Property components
                     "roof": eligibility.roof["clean_description"],
                     "walls": eligibility.walls["clean_description"],
@@ -1267,91 +1263,97 @@ def get_epc_data(
                     "date_epc": eligibility.epc["lodgement-date"],
                     "loft_thickness": eligibility.roof["insulation_thickness"],
                     "cavity_age": cavity_age,
-                    **eligibility.walls,
-                    **eligibility.roof,
                     "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
                     "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
                 }
             )
 
-        scoring_df = pd.DataFrame(scoring_data)
-        scoring_df = scoring_df.drop(
-            columns=[
-                "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
-                "carbon_ending"
-            ]
-        )
-
-        model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
-
-        # scoring_df["is_community"].value_counts()
-        # scoring_df[scoring_df["is_community"] == "Unknown"]
-        # property_meta = asset_list[asset_list["asset_list_row_id"] == "ha_67238"].squeeze()
-
-        all_predictions = model_api.predict_all(
-            df=scoring_df,
-            bucket="retrofit-data-dev",
-            prediction_buckets={
-                "sap_change_predictions": "retrofit-sap-predictions-dev",
-                "heat_demand_predictions": "retrofit-heat-predictions-dev",
-                "carbon_change_predictions": "retrofit-carbon-predictions-dev"
-            }
-        )
-
         results_df = pd.DataFrame(results)
+        scoring_df = pd.DataFrame(scoring_data)
+        results_df["post_install_sap"] = None
+        results_df["eligibility_classification"] = None
 
-        predictions = all_predictions["sap_change_predictions"].copy()
+        eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
+        z = results_df[results_df["row_id"].isin(eco4["asset_list_row_id"])]
+        z["walls"].value_counts()
+        z1 = z[z["walls"] == "Cavity wall, as built, no insulation"]
+        k = z1[z1["roof"] == "Pitched, 100 mm loft insulation"]
+        property_meta = asset_list[asset_list["asset_list_row_id"] == k["row_id"].values[0]].squeeze()
+        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
+        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
 
-        predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
-            results_df[["row_id", "sap"]], how="left", on="row_id"
-        )
-        predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
-        predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+        if not scoring_df.empty:
+            scoring_df = scoring_df.drop(
+                columns=[
+                    "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                    "carbon_ending"
+                ]
+            )
 
-        results_df = results_df.merge(
-            predictions[["sap_uplift", "row_id"]],
-            how="left",
-            on="row_id"
-        )
-        results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+            model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
 
-        eligibility_assessment = []
-        for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
-            # The upgrade requirements are dependent on the current SAP
-
-            # If the property is an F or G, it only needs to upgrade to an %
-            if row["sap"] <= 38:
-                if row["post_install_sap"] >= 57:
-                    eligibility_classification = "highest confidence"
-                elif row["post_install_sap"] >= 55:
-                    eligibility_classification = "high confidence"
-                elif row["post_install_sap"] >= 53:
-                    eligibility_classification = "medium confidence"
-                else:
-                    eligibility_classification = "unlikely"
-            else:
-
-                if row["post_install_sap"] >= 71:
-                    eligibility_classification = "highest confidence"
-                elif row["post_install_sap"] >= 69:
-                    eligibility_classification = "high confidence"
-                elif row["post_install_sap"] >= 67:
-                    eligibility_classification = "medium confidence"
-                else:
-                    eligibility_classification = "unlikely"
-
-            eligibility_assessment.append(
-                {
-                    "row_id": row["row_id"],
-                    "eligibility_classification": eligibility_classification
+            all_predictions = model_api.predict_all(
+                df=scoring_df,
+                bucket="retrofit-data-dev",
+                prediction_buckets={
+                    "sap_change_predictions": "retrofit-sap-predictions-dev",
+                    "heat_demand_predictions": "retrofit-heat-predictions-dev",
+                    "carbon_change_predictions": "retrofit-carbon-predictions-dev"
                 }
             )
 
-        eligibility_assessment = pd.DataFrame(eligibility_assessment)
+            predictions = all_predictions["sap_change_predictions"].copy()
 
-        results_df = results_df.merge(
-            eligibility_assessment, how="left", on="row_id"
-        )
+            predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+                results_df[["row_id", "sap"]], how="left", on="row_id"
+            )
+            predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+            predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+            results_df = results_df.merge(
+                predictions[["sap_uplift", "row_id"]],
+                how="left",
+                on="row_id"
+            )
+            results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+            eligibility_assessment = []
+            for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+                # The upgrade requirements are dependent on the current SAP
+
+                # If the property is an F or G, it only needs to upgrade to an %
+                if row["sap"] <= 38:
+                    if row["post_install_sap"] >= 57:
+                        eligibility_classification = "highest confidence"
+                    elif row["post_install_sap"] >= 55:
+                        eligibility_classification = "high confidence"
+                    elif row["post_install_sap"] >= 53:
+                        eligibility_classification = "medium confidence"
+                    else:
+                        eligibility_classification = "unlikely"
+                else:
+
+                    if row["post_install_sap"] >= 71:
+                        eligibility_classification = "highest confidence"
+                    elif row["post_install_sap"] >= 69:
+                        eligibility_classification = "high confidence"
+                    elif row["post_install_sap"] >= 67:
+                        eligibility_classification = "medium confidence"
+                    else:
+                        eligibility_classification = "unlikely"
+
+                eligibility_assessment.append(
+                    {
+                        "row_id": row["row_id"],
+                        "eligibility_classification": eligibility_classification
+                    }
+                )
+
+            eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+            results_df = results_df.merge(
+                eligibility_assessment, how="left", on="row_id"
+            )
 
         # We store the results in S3 as a pickle
         save_pickle_to_s3(

From f4d27aa68dea5595037d55e7ad8c54cc9d7967ad Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 23:30:06 +0000
Subject: [PATCH 023/248] fixing eligibility

---
 etl/eligibility/Eligibility.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index bda34923..15e3158f 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -421,8 +421,19 @@ class Eligibility:
             }
             return
 
+        # Case 3 - cavity is suitable, loft is within 150mm, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity, loft borderline, meets sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
         # Case 3 - cavity is suitable, loft is not, sap is good
-        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+        if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
             self.eco4_warmfront = {
                 "eligible": True,
                 "strict": False,
@@ -444,7 +455,7 @@ class Eligibility:
             return
 
         # Case 5 - cavity and loft suitable, sap too high
-        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
             self.eco4_warmfront = {
                 "eligible": True,
                 "strict": False,
@@ -470,7 +481,7 @@ class Eligibility:
             self.eco4_warmfront = {
                 "eligible": False,
                 "strict": False,
-                "message": "Fails cavity nd lodt, meets SAP",
+                "message": "Fails cavity and loft, meets SAP",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }

From 97ce8dc32ea0edd3d24ecefe942a0eb4e8df418e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 26 Feb 2024 23:36:45 +0000
Subject: [PATCH 024/248] fixing eligibility

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 239fce65..1ba75e2b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1114,7 +1114,7 @@ def get_epc_data(
         results = []
         scoring_data = []
         nodata = []
-        for index, property_meta in tqdm(eco4.iterrows(), total=len(eco4)):
+        for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:
                 continue
@@ -1218,10 +1218,7 @@ def get_epc_data(
             # Loft MUST be suitable
             cavity_age = None
             if (
-                eligibility.walls["is_cavity_wall"] and
-                eligibility.walls["is_filled_cavity"] and
-                eligibility.loft["suitability"] and
-                eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+                identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
             ):
                 # We check the age of the cavity and if it's particularly old, we flag it
                 cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)

From 0fbf00451291a09349c0bdeeb67bbc80bd4dc9bc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 10:20:55 +0000
Subject: [PATCH 025/248] Expanding gbis eligibiity checks

---
 etl/eligibility/Eligibility.py                | 44 +++++++++++++++++--
 .../ha_15_32/ha_analysis_batch_3.py           | 20 +++++----
 etl/epc/Dataset.py                            | 16 +++----
 3 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 15e3158f..f7a5ed98 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -352,9 +352,41 @@ class Eligibility:
         # Check if the property is suitable for cavity wall
         self.cavity_insulation()
 
-        self.gbis_warmfront = (self.cavity["suitability"]) and (
-            int(self.epc["current-energy-efficiency"]) <= 68
-        )
+        current_sap = int(self.epc["current-energy-efficiency"])
+        # We have a strict suitability check and a non-strict check
+
+        # Perfect strictness
+        if (self.cavity["type"] == "empty") and (current_sap < 69):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
+            }
+            return
+
+        # Near perfect
+        if self.cavity["suitability"] and (current_sap < 55):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Near perfect suitability",
+            }
+            return
+
+        # Suitable cavity, but high sap
+        if self.cavity["suitability"] and (current_sap >= 55):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity, fails SAP check",
+            }
+            return
+
+        self.gbis_warmfront = {
+            "eligible": False,
+            "strict": False,
+            "message": "All conditions fail",
+        }
 
     def check_eco4_warmfront(self):
         """
@@ -388,6 +420,10 @@ class Eligibility:
         self.cavity_insulation()
         self.loft_insulation()
 
+        # We put in a placeholder when the roof is not a loft
+        if self.loft["reason"] == "roof not loft":
+            self.loft["thickness"] = 999
+
         # Case 1: No conditions meet
         if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
             self.eco4_warmfront = {
@@ -415,7 +451,7 @@ class Eligibility:
             self.eco4_warmfront = {
                 "eligible": True,
                 "strict": True,
-                "message": "Perfect suitability",
+                "message": "Near perfect suitability",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1ba75e2b..28efadd0 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1270,15 +1270,6 @@ def get_epc_data(
         results_df["post_install_sap"] = None
         results_df["eligibility_classification"] = None
 
-        eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
-        z = results_df[results_df["row_id"].isin(eco4["asset_list_row_id"])]
-        z["walls"].value_counts()
-        z1 = z[z["walls"] == "Cavity wall, as built, no insulation"]
-        k = z1[z1["roof"] == "Pitched, 100 mm loft insulation"]
-        property_meta = asset_list[asset_list["asset_list_row_id"] == k["row_id"].values[0]].squeeze()
-        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
-        z[z["walls"] == "Cavity wall, as built, insulated"]["roof"].value_counts()
-
         if not scoring_df.empty:
             scoring_df = scoring_df.drop(
                 columns=[
@@ -1763,6 +1754,17 @@ def patch_cleaned(cleaned):
         ]
     )
 
+    cleaned["roof-description"].extend(
+        [
+            {'original_description': 'Pitched, 300+mm loft insulation',
+             'clean_description': 'Pitched, 300+ mm loft insulation', 'thermal_transmittance': None,
+             'thermal_transmittance_unit': None, 'is_pitched': True, 'is_roof_room': False, 'is_loft': True,
+             'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+             'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': '300+'
+             }
+        ]
+    )
+
     # Patch mainheatcont-description
     cleaned["mainheatcont-description"].extend(
         [
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index dac829e2..7040d66c 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
         common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
 
         self.df = self.df.loc[
-            :,
-            no_suffix_cols
-            + only_ending_cols
-            + [col for cols in common_cols for col in cols],
-        ]
+                  :,
+                  no_suffix_cols
+                  + only_ending_cols
+                  + [col for cols in common_cols for col in cols],
+                  ]
 
     def _remove_abnormal_change_in_floor_area(self):
         """
@@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_sandstone_or_limestone"]
                     == expanded_df["is_sandstone_or_limestone_ending"]
                 )
-            ]
+                ]
         elif component == "floor":
             expanded_df = expanded_df[
                 (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_to_external_air"]
                     == expanded_df["is_to_external_air_ending"]
                 )
-            ]
+                ]
         elif component == "roof":
             expanded_df = expanded_df[
                 (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["has_dwelling_above"]
                     == expanded_df["has_dwelling_above_ending"]
                 )
-            ]
+                ]
 
         return expanded_df
 

From 7b080094fdf08daf720ac01c10bfad380a917062 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 11:02:12 +0000
Subject: [PATCH 026/248] created distributed scoring for prediction

---
 .../ha_15_32/ha_analysis_batch_3.py           | 46 ++++++++++++-------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 28efadd0..3dc4d45f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1166,10 +1166,9 @@ def get_epc_data(
             ]
 
             # condition 1 - identified for gbis and not eligible
-            condition_1 = (
-                              identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[
-                              "eligible"]
-                          ) & consider_penultimate_epc
+            condition_1 = (identified_for_gbis and not eligibility.gbis_warmfront
+                           and not eligibility.eco4_warmfront["eligible"]
+                           ) & consider_penultimate_epc
 
             # condition 2 - identified for eco4 and not eligible
             condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[
@@ -1246,10 +1245,12 @@ def get_epc_data(
                     "uprn": eligibility.epc["uprn"],
                     "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "property_type": eligibility.epc["property-type"],
-                    "gbis_eligible": eligibility.gbis_warmfront,
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                     "eco4_message": eligibility.eco4_warmfront["message"],
                     "eco4_strict": eligibility.eco4_warmfront["strict"],
+                    "gbis_eligible": eligibility.gbis_warmfront["eligible"],
+                    "gbis_message": eligibility.gbis_warmfront["message"],
+                    "gbis_strict": eligibility.gbis_warmfront["strict"],
                     "sap": float(eligibility.epc["current-energy-efficiency"]),
                     # Property components
                     "roof": eligibility.roof["clean_description"],
@@ -1279,24 +1280,32 @@ def get_epc_data(
             )
 
             model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
+            model_api.MODEL_PREFIXES = ["sap_change_predictions"]
 
-            all_predictions = model_api.predict_all(
-                df=scoring_df,
-                bucket="retrofit-data-dev",
-                prediction_buckets={
-                    "sap_change_predictions": "retrofit-sap-predictions-dev",
-                    "heat_demand_predictions": "retrofit-heat-predictions-dev",
-                    "carbon_change_predictions": "retrofit-carbon-predictions-dev"
-                }
-            )
+            scoring_df["id"] = scoring_df["id"] + "phase=0"
+            # We split up the scoring_df and score
+            predictions = []
+            to_loop_over = range(0, scoring_df.shape[0], 400)
+            for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+                predictions_dict = model_api.predict_all(
+                    df=scoring_df.iloc[chunk:chunk + 400],
+                    bucket="retrofit-data-dev",
+                    prediction_buckets={
+                        "sap_change_predictions": "retrofit-sap-predictions-dev",
+                    }
+                )
 
-            predictions = all_predictions["sap_change_predictions"].copy()
+                predictions.append(predictions_dict["sap_change_predictions"])
+
+            predictions = pd.concat(predictions)
+            predictions_size = predictions.shape[0]
 
             predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
                 results_df[["row_id", "sap"]], how="left", on="row_id"
             )
+            if predictions.shape[0] != predictions_size:
+                raise ValueError("Predictions size has changed")
             predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
-            predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
 
             results_df = results_df.merge(
                 predictions[["sap_uplift", "row_id"]],
@@ -1339,9 +1348,12 @@ def get_epc_data(
 
             eligibility_assessment = pd.DataFrame(eligibility_assessment)
 
+            # Make sure the results haven't changed in size
             results_df = results_df.merge(
                 eligibility_assessment, how="left", on="row_id"
             )
+            if results_df.shape[0] != len(results):
+                raise ValueError("results has changed size")
 
         # We store the results in S3 as a pickle
         save_pickle_to_s3(
@@ -1809,6 +1821,8 @@ def app():
     loader.load()
     loader.ha_facts_and_figures()
 
+    loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False)
+
     # We load in the additional data required to perform the analysis
     cleaned = read_from_s3(
         s3_file_name="cleaned_epc_data/cleaned.bson",

From 3ef346b248ed89e04a08d07a0231db987809521b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 13:12:54 +0000
Subject: [PATCH 027/248] patching roof description in cleaned further

---
 .../ha_15_32/ha_analysis_batch_3.py           | 60 ++++++++++++++++++-
 etl/epc/Dataset.py                            | 28 +++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3dc4d45f..e261710e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1755,7 +1755,16 @@ def patch_cleaned(cleaned):
         ]
     )
 
-    # We treat unknown loft insulation as no insulation
+    cleaned["roof-description"].extend(
+        [
+            {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
+             'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True,
+             'is_roof_room': False,
+             'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
+             'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}
+        ]
+    )
+
     cleaned["roof-description"].extend(
         [
             {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
@@ -1777,6 +1786,55 @@ def patch_cleaned(cleaned):
         ]
     )
 
+    thermal_transmittance_values = list(np.arange(0, 2, 0.01))
+    for ttv in thermal_transmittance_values:
+        ttv_roundeded = round(ttv, 2)
+        # We look for an instance of that thermal transmittance value
+        rec = [
+            x for x in cleaned["roof-description"] if
+            (x["thermal_transmittance"] == ttv_roundeded) and "Average thermal transmittance" in x["clean_description"]
+        ]
+
+        if rec:
+            continue
+        else:
+            # We patch the record
+            cleaned["roof-description"].extend(
+                [{'original_description': f'Average thermal transmittance {ttv_roundeded} W/m-¦K',
+                  'clean_description': f'Average thermal transmittance {ttv_roundeded} w/m-¦k',
+                  'thermal_transmittance': ttv_roundeded,
+                  'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+                  'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+                  'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+            )
+
+    # We also patch a funny unit value we found
+    for ttv in thermal_transmittance_values:
+        ttv_rounded = round(ttv, 2)
+        # We look for an instance of that thermal transmittance value
+        rec = [
+            x for x in cleaned["roof-description"] if
+            (x["thermal_transmittance"] == ttv_rounded) and "Average thermal transmittance" in x["clean_description"]
+            and x["thermal_transmittance_unit"] == "w/m?K"
+        ]
+
+        if rec:
+            continue
+        else:
+            # We patch the record
+            ttv_string = str(ttv_rounded)
+            if len(ttv_string) == 3:
+                ttv_string = f"{ttv_string}0"
+
+            cleaned["roof-description"].extend(
+                [{'original_description': f'Average thermal transmittance {ttv_string} W/m?K',
+                  'clean_description': f'Average thermal transmittance {ttv_string} w/m-¦k',
+                  'thermal_transmittance': ttv_rounded,
+                  'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+                  'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+                  'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+            )
+
     # Patch mainheatcont-description
     cleaned["mainheatcont-description"].extend(
         [
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 7040d66c..cf241747 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -658,6 +658,34 @@ class TrainingDataset(BaseDataset):
 
         components_to_expand = cols_to_drop.keys()
 
+        for comp in list(components_to_expand):
+            if comp == "main-fuel":
+                cleaned_key = "main-fuel"
+                left_on_starting = "main_fuel_starting"
+                left_on_ending = "main_fuel_ending"
+                original_cols = ["main_fuel_starting", "main_fuel_ending"]
+            else:
+                cleaned_key = f"{comp}-description"
+                left_on_starting = f"{comp}_description_starting"
+                left_on_ending = f"{comp}_description_ending"
+                original_cols = [
+                    f"{comp}_description_starting",
+                    f"{comp}_description_ending",
+                ]
+            df = pd.DataFrame(cleaned_lookup[cleaned_key])
+            # Check for the existence
+            filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]]
+            filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]]
+            if filtered_1.empty:
+                print(comp)
+                print(self.df[left_on_starting].values[0])
+
+            if filtered_2.empty:
+                print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup")
+
+        z = pd.DataFrame(cleaned_lookup["roof-description"])
+        z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"]
+
         for component in components_to_expand:
             # TODO: change cleaned dataframe to have underscores instead of dashes
             if component == "main-fuel":

From 730ad0fd7144b2b5e86d98b8c3ef4e5d71ccd0cb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 13:13:28 +0000
Subject: [PATCH 028/248] removing temp code

---
 etl/epc/Dataset.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index cf241747..7040d66c 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -658,34 +658,6 @@ class TrainingDataset(BaseDataset):
 
         components_to_expand = cols_to_drop.keys()
 
-        for comp in list(components_to_expand):
-            if comp == "main-fuel":
-                cleaned_key = "main-fuel"
-                left_on_starting = "main_fuel_starting"
-                left_on_ending = "main_fuel_ending"
-                original_cols = ["main_fuel_starting", "main_fuel_ending"]
-            else:
-                cleaned_key = f"{comp}-description"
-                left_on_starting = f"{comp}_description_starting"
-                left_on_ending = f"{comp}_description_ending"
-                original_cols = [
-                    f"{comp}_description_starting",
-                    f"{comp}_description_ending",
-                ]
-            df = pd.DataFrame(cleaned_lookup[cleaned_key])
-            # Check for the existence
-            filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]]
-            filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]]
-            if filtered_1.empty:
-                print(comp)
-                print(self.df[left_on_starting].values[0])
-
-            if filtered_2.empty:
-                print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup")
-
-        z = pd.DataFrame(cleaned_lookup["roof-description"])
-        z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"]
-
         for component in components_to_expand:
             # TODO: change cleaned dataframe to have underscores instead of dashes
             if component == "main-fuel":

From d573c4d8a0ae911edd0e2f181eceb4087e3e78e4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 15:15:05 +0000
Subject: [PATCH 029/248] added try except mechanism

---
 .../ha_15_32/ha_analysis_batch_3.py           | 35 ++++++++++++-------
 etl/epc/Record.py                             | 32 ++++++++---------
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e261710e..da484daa 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1089,6 +1089,9 @@ def get_epc_data(
     outputs = {}
     for ha_name, data_assets in loader.data.items():
 
+        if ha_name == "HA39":
+            continue
+
         if not pull_data:
             # Then we retrieve the data from S3
             processed_ha_results = read_pickle_from_s3(
@@ -1114,6 +1117,7 @@ def get_epc_data(
         results = []
         scoring_data = []
         nodata = []
+        failed_model_rows = []
         for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:
@@ -1225,19 +1229,24 @@ def get_epc_data(
             if eligibility.eco4_warmfront["eligible"]:
                 if eligibility.epc["uprn"] == "":
                     eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
-
-                scoring_dictionary = prepare_model_data_row(
-                    property_id=property_meta["asset_list_row_id"],
-                    modelling_epc=eligibility.epc,
-                    cleaned=cleaned,
-                    cleaning_data=cleaning_data,
-                    created_at=created_at,
-                    old_data=older_epcs,
-                    full_sap_epc=full_sap_epc,
-                    photo_supply_lookup=photo_supply_lookup,
-                    floor_area_decile_thresholds=floor_area_decile_thresholds
-                )
-                scoring_data.extend(scoring_dictionary)
+                try:
+                    scoring_dictionary = prepare_model_data_row(
+                        property_id=property_meta["asset_list_row_id"],
+                        modelling_epc=eligibility.epc,
+                        cleaned=cleaned,
+                        cleaning_data=cleaning_data,
+                        created_at=created_at,
+                        old_data=older_epcs,
+                        full_sap_epc=full_sap_epc,
+                        photo_supply_lookup=photo_supply_lookup,
+                        floor_area_decile_thresholds=floor_area_decile_thresholds
+                    )
+                    scoring_data.extend(scoring_dictionary)
+                except Exception as e:
+                    # If we fail, we just keep a record of it
+                    failed_model_rows.append(
+                        property_meta["asset_list_row_id"]
+                    )
 
             results.append(
                 {
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index c793716f..e74330a2 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -725,26 +725,26 @@ class EPCRecord:
         if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES:
             if self.old_data:
                 # Take the most recent
-                max_datetime = max(
-                    [
-                        old_record["lodgement-datetime"]
-                        for old_record in self.old_data
-                        if old_record["construction-age-band"]
-                           not in DATA_ANOMALY_MATCHES
-                    ]
-                )
-
-                most_recent = [
-                    old_record
+                old_age_bands = [
+                    old_record["lodgement-datetime"]
                     for old_record in self.old_data
-                    if old_record["lodgement-datetime"] == max_datetime
+                    if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES
                 ]
 
-                self.prepared_epc["construction-age-band"] = (
-                    EPCDataProcessor.clean_construction_age_band(
-                        most_recent[0]["construction-age-band"]
+                if old_age_bands:
+                    max_datetime = max(old_age_bands)
+
+                    most_recent = [
+                        old_record
+                        for old_record in self.old_data
+                        if old_record["lodgement-datetime"] == max_datetime
+                    ]
+
+                    self.prepared_epc["construction-age-band"] = (
+                        EPCDataProcessor.clean_construction_age_band(
+                            most_recent[0]["construction-age-band"]
+                        )
                     )
-                )
 
         self.construction_age_band = self.prepared_epc["construction-age-band"]
         self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)

From b26e44b465e5c832a65b5bd09767f1015c2dfc1a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 15:45:33 +0000
Subject: [PATCH 030/248] Extending to HA 7

---
 .../ha_15_32/ha_analysis_batch_3.py           | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index da484daa..2fb26e73 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -48,6 +48,10 @@ PROPERTY_TYPE_LOOKUP = {
             'EXTRACARE SCHEME': "Flat",
         }
     },
+    "HA7": {
+        "property_type": {},
+        "built_form": {}
+    },
     "HA14": {
         "property_type": {
             "House": "House",
@@ -143,6 +147,13 @@ class DataLoader:
             asset_list["matching_postcode"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["postcode"]
             ].str.lower().str.strip()
+        elif ha_name == "HA7":
+            # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+            asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].str.lower().str.strip() + ", " + \
+                                             asset_list["Address3"].str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
         elif ha_name == "HA14":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
             asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
@@ -241,6 +252,8 @@ class DataLoader:
     def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
             return "Asset List"
+        elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
+            return "Asset"
         else:
             return "Assets"
 
@@ -311,6 +324,8 @@ class DataLoader:
         survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
         # Remove columns that are None
         survey_list = survey_list.loc[:, survey_list.columns.notnull()]
+        # Remove rows that are completely empty
+        survey_list = survey_list.loc[survey_list.loc[:, survey_list.columns].notnull().any(axis=1)]
         survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
 
         # Perform survey list merge
@@ -328,6 +343,8 @@ class DataLoader:
         ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
         # Remove columns that are None
         ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+        # Remove rows that are completely None
+        ciga_list = ciga_list.loc[ciga_list.loc[:, ciga_list.columns].notnull().any(axis=1)]
         # Perform ciga list merge
         if not ciga_list.empty:
             # Remove rows with missing postcode which happens in a small number of cases
@@ -1880,7 +1897,7 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From eb216e55d39817a6d7bdd6c582c6da6826050ac9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 16:45:37 +0000
Subject: [PATCH 031/248] Handling missing dates in SearchEpc class

---
 backend/SearchEpc.py                            | 15 ++++++++++-----
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py |  1 +
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 4a3f371a..3d2df9fb 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -30,7 +30,7 @@ vartypes = {
     'environment-impact-potential': "Int64",
     'glazed-type': 'str',
     'heating-cost-current': 'float',
-    'address3': 'str',
+    # 'address3': 'str',
     'mainheatcont-description': 'str',
     'sheating-energy-eff': 'str',
     'property-type': 'str',
@@ -40,7 +40,7 @@ vartypes = {
     'mechanical-ventilation': 'str',
     'hot-water-cost-current': 'str',
     'county': 'str',
-    'postcode': 'str',
+    # 'postcode': 'str',
     'solar-water-heating-flag': 'str',
     'constituency': 'str',
     'co2-emissions-potential': 'float',
@@ -55,7 +55,7 @@ vartypes = {
     # 'inspection-date': str,
     'mains-gas-flag': 'str',
     'co2-emiss-curr-per-floor-area': 'float',
-    'address1': 'str',
+    # 'address1': 'str',
     'heat-loss-corridor': 'str',
     'flat-storey-count': "Int64",
     'constituency-label': 'str',
@@ -67,7 +67,7 @@ vartypes = {
     'roof-description': 'str',
     'floor-energy-eff': 'str',
     'number-habitable-rooms': 'float',
-    'address2': 'str',
+    # 'address2': 'str',
     'hot-water-env-eff': 'str',
     'posttown': 'str',
     'mainheatc-energy-eff': 'str',
@@ -98,7 +98,7 @@ vartypes = {
     # 'lodgement-date',
     'extension-count': "Int64",
     'mainheatc-env-eff': 'str',
-    'lmk-key': 'str',
+    # 'lmk-key': 'str',
     'wind-turbine-count': "Int64",
     'tenure': 'str',
     'floor-level': 'str',
@@ -575,6 +575,11 @@ class SearchEpc:
             property_type=property_type
         )
 
+        # If we have missing lodgment date, we fill it with inspection-date
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
+        # If we still have missing dates, we set it to the mean of the non NA dates
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
+
         # For each attribute, we need to determine the datatype and use an appropriate method
         # to estimate.
         estimated_epc = {}
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2fb26e73..a8f0bfa9 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1135,6 +1135,7 @@ def get_epc_data(
         scoring_data = []
         nodata = []
         failed_model_rows = []
+        # Failed at index 13691
         for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:

From 2a4d16162abc8bcda788950d44a0762148e8904d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 18:01:29 +0000
Subject: [PATCH 032/248] Added ha7

---
 .../ha_15_32/ha_analysis_batch_3.py           | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a8f0bfa9..889ae776 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -49,8 +49,19 @@ PROPERTY_TYPE_LOOKUP = {
         }
     },
     "HA7": {
-        "property_type": {},
-        "built_form": {}
+        "property_type": {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+        },
+        "built_form": {
+            "Semi Detached": "Semi-Detached",
+            "Mid Terrace": "Mid-Terrace",
+            "End Terrace": "End-Terrace",
+            "Detached": "Detached",
+            "End Terraced": "End-Terrace",
+        }
     },
     "HA14": {
         "property_type": {
@@ -1042,6 +1053,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA6":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
+    elif ha_name == "HA7":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Archetype"]]
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"][property_meta["Property Type"]]
     elif ha_name == "HA14":
         if property_meta["Asset Type Description"] == "Block - Repair":
             # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
@@ -1106,9 +1120,6 @@ def get_epc_data(
     outputs = {}
     for ha_name, data_assets in loader.data.items():
 
-        if ha_name == "HA39":
-            continue
-
         if not pull_data:
             # Then we retrieve the data from S3
             processed_ha_results = read_pickle_from_s3(
@@ -1135,7 +1146,6 @@ def get_epc_data(
         scoring_data = []
         nodata = []
         failed_model_rows = []
-        # Failed at index 13691
         for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:
@@ -1906,8 +1916,6 @@ def app():
     loader.load()
     loader.ha_facts_and_figures()
 
-    loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False)
-
     # We load in the additional data required to perform the analysis
     cleaned = read_from_s3(
         s3_file_name="cleaned_epc_data/cleaned.bson",

From 9ca6c179bca70cfffd34da4e278e144ff8263e24 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 18:34:49 +0000
Subject: [PATCH 033/248] Adding HA16

---
 .../ha_15_32/ha_analysis_batch_3.py           | 139 +++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 889ae776..a707cfa5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -128,6 +128,10 @@ class DataLoader:
         "HA6": {
             "address": "propertyaddress",
             "postcode": "address"  # The 'address' column actually contains postcode
+        },
+        "HA16": {
+            "address": "Address",
+            "postcode": "Postcode"
         }
     }
 
@@ -135,9 +139,10 @@ class DataLoader:
         # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
         # the asset list
         "HA14": 3,
+        "HA16": 7,
         # There's just too many unmatched here
         "HA6": 117,
-        "HA107": 51
+        "HA107": 51,
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache):
@@ -151,7 +156,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6"]:
+        if ha_name in ["HA1", "HA6", "HA16"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].str.lower().str.strip()
@@ -173,6 +178,7 @@ class DataLoader:
                                              asset_list["Address 4"].str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -234,7 +240,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA6", "HA14", "HA107"]:
+        if ha_name in ["HA6", "HA14", "HA107", "HA16"]:
             split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
@@ -556,6 +562,129 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha16_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "REEDS RD",
+            "Reeds ROAD",
+            survey_list["Street / Block Name"]
+        )
+        # Replace " rd " with "road"
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road',
+                                                                                            regex=True)
+
+        # Replace " , " with ", "
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
+            " , ", ', ',
+        )
+        # Fix "{place} ,{place}" with "{place}, {place}"
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ',
+                                                                                            regex=True)
+        # Strip whitespace
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
+
+        # Correct errors
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "M38 0SA",
+            "M38 9SA",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
+            "M44 5JF",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road",
+                                                                                            "chatley road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
+                                                                                            "plantation avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
+                                                                                            "howclough drive")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
+                                                                                            "brookhurst lane")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
+                                                                                            "birch road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
+                                                                                            "hodson road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
+                                                                                            "narbonne avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "cumberland road, cadishead",
+            "cumberland avenue, cadishead")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
+                                                                                            "ashton field drive")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
+                                                                                            "wedgwood road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
+                                                                                            "hamilton avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "lichens crescent, fitton hill",
+            "lichens crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
+                                                                                            "south croft")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr",
+                                                                                            "fir tree avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
+                                                                                            "hawthorn crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
+                                                                                            "reins lee avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
+                                                                                            "wester hill road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
+                                                                                            "saint martins road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
+                                                                                            "timperley close")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
+                                                                                            "eastwood avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
+                                                                                            "grasmere road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
+                                                                                            "hulton avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
+                                                                                            "beechfield road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
+                                                                                            "princes avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
+                                                                                            "edge fold crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
+                                                                                            "coniston avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
+                                                                                            "blackthorn crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
+                                                                                            "wellstock lane")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
+                                                                                            "brackley street")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
+                                                                                            "brook avenue, swinton")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
+                                                                                            "green avenue, swinton")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
+                                                                                            "grasmere avenue, wardley")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
+                                                                                            "mardale avenue, wardle")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
+                                                                                            "cartleach Grove")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
+                                                                                            "arbor Grove")
+
+        # Replacement for clively avenue 66-68
+        survey_list["NO."] = np.where(
+            survey_list["NO."] == "66-68",
+            "66",
+            survey_list["NO."]
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -898,6 +1027,8 @@ class DataLoader:
         scheme_map = {
             "ECO4": "ECO4",
             "AFFORDABLE WARMTH": "ECO4",
+            "ECO4 A/W": "ECO4",
+            "ECO4 GBIS (ECO+)": "GBIS"
         }
 
         eco_eligibility_map = {
@@ -1908,7 +2039,7 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From 102600b19651964c4b6c7945307a8defd454f9d1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 18:40:17 +0000
Subject: [PATCH 034/248] Added HA16

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a707cfa5..ee23f238 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -71,6 +71,24 @@ PROPERTY_TYPE_LOOKUP = {
             "Maisonette": "Maisonette",
         }
     },
+    "HA16": {
+        'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+        'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
+        'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
+        'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
+        'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
+        'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+        'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Detached House': {"property-type": "House", "built-form": "Detached"},
+        'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
+        'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -1201,6 +1219,10 @@ def get_property_type_and_built_form(property_meta, ha_name):
             ]
 
         built_form = None
+    elif ha_name == "HA16":
+        config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
+        property_type = config.get("property-type")
+        built_form = config.get("built-form")
     elif ha_name == "HA39":
 
         property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})

From a1c19b5b8883ead263880c2d589bd76da76d6403 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 19:01:32 +0000
Subject: [PATCH 035/248] Adding ha24 wip

---
 .../ha_15_32/ha_analysis_batch_3.py           | 47 ++++++++++++++++++-
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ee23f238..94df8ceb 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -150,6 +150,10 @@ class DataLoader:
         "HA16": {
             "address": "Address",
             "postcode": "Postcode"
+        },
+        "HA24": {
+            "address": "Address",
+            "postcode": "Postcode"
         }
     }
 
@@ -174,7 +178,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].str.lower().str.strip()
@@ -289,6 +293,8 @@ class DataLoader:
             return "Asset List"
         elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
             return "Asset"
+        elif "Decent Homes Stock" in workbook.sheetnames:
+            return "Decent Homes Stock"
         else:
             return "Assets"
 
@@ -703,6 +709,43 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha24_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, nidds lane", "nidds lane"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "wirral avenue", "wirrall avenue"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st ives road", "st. ives crescent"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "sundringham road", "sandringham road"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "milton avenue", "milton road"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st ives crescent", "st. ives crescent"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, waterbelly lane", "waterbelly lane"
+        )
+        # Generally remove "councile house, " from the start of the street name
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, ", ""
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st. leodegars close", "st leodegars close"
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -2061,7 +2104,7 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From e9bfd63c3588206cd9e7c79b25c6067b617bf436 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Feb 2024 21:00:23 +0000
Subject: [PATCH 036/248] Fixed getting property type and built form for ha107

---
 .../ha_15_32/ha_analysis_batch_3.py           | 77 ++++++++++++++-----
 1 file changed, 57 insertions(+), 20 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 94df8ceb..5cbfb90c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -223,12 +223,67 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def extract_property_info_ha107(properties):
+        property_types = {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+            "Bedsit": None
+        }
+
+        built_forms = {
+            "Detached": "Detached",
+            "Semi Detached": "Semi-Detached",
+            "End Terrace": "End-Terrace",
+            "Mid Terrace": "Mid-Terrace"
+        }
+
+        # Function to extract property type and built form from a description
+        def extract_from_description(description):
+            property_type = None
+            built_form = None
+
+            for key in property_types:
+                if key in description:
+                    property_type = property_types[key]
+                    break
+
+            for key in built_forms:
+                if key in description:
+                    built_form = built_forms[key]
+                    break
+
+            return property_type, built_form
+
+        # Process each property in the list
+        results = []
+        for property_description in properties:
+            property_type, built_form = extract_from_description(property_description)
+            results.append(
+                {
+                    "Property type": property_description,
+                    "property_type": property_type,
+                    "built_form": built_form
+                }
+            )
+        results = pd.DataFrame(results)
+
+        return results
+
     def append_asset_list_built_form(self, ha_name, asset_list):
 
         # Finally, we process property_type or built form, where needed
         if ha_name == "HA6":
             asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6)
 
+        if ha_name == "HA107":
+            mapped_df = self.extract_property_info_ha107(asset_list["Property type"].unique())
+            asset_list = asset_list.merge(
+                mapped_df, how="left", on="Property type"
+            )
+
         return asset_list
 
     @staticmethod
@@ -1280,26 +1335,8 @@ def get_property_type_and_built_form(property_meta, ha_name):
                 property_type = "House"
     elif ha_name == "HA107":
 
-        dwelling_style = property_meta["Dwelling Style"]
-        if isinstance(dwelling_style, str):
-            dwelling_style = dwelling_style.strip()
-
-        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"])
-        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None)
-
-        if property_type is None:
-            if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
-                property_type = "House"
-
-            if "flat" in property_meta["Wall Construction"].lower():
-                property_type = "Flat"
-
-            if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
-                # Hand a few specific cases
-                property_type = "Bungalow"
-
-            if property_meta["Street"] == "School View":
-                property_type = "Bungalow"
+        property_type = property_meta.get("property_type", None)
+        built_form = property_meta.get("built_form", None)
 
     else:
         raise NotImplementedError("Implement me")

From 6ae21bbcb023139961eb69749ac1380a7d3ac001 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 28 Feb 2024 12:31:48 +0000
Subject: [PATCH 037/248] Creating the output structure

---
 etl/eligibility/Eligibility.py                |  11 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 548 +++++++-----------
 2 files changed, 220 insertions(+), 339 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index f7a5ed98..b594579f 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -365,7 +365,7 @@ class Eligibility:
             return
 
         # Near perfect
-        if self.cavity["suitability"] and (current_sap < 55):
+        if self.cavity["suitability"] and (current_sap < 69):
             self.gbis_warmfront = {
                 "eligible": True,
                 "strict": True,
@@ -373,15 +373,6 @@ class Eligibility:
             }
             return
 
-        # Suitable cavity, but high sap
-        if self.cavity["suitability"] and (current_sap >= 55):
-            self.gbis_warmfront = {
-                "eligible": True,
-                "strict": False,
-                "message": "Meets cavity, fails SAP check",
-            }
-            return
-
         self.gbis_warmfront = {
             "eligible": False,
             "strict": False,
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 5cbfb90c..61c4a243 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1646,10 +1646,26 @@ def get_epc_data(
 
 
 def get_col_widths(dataframe):
-    # First we find the maximum length of the index column
-    idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
-    # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
-    return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+    # Define a maximum width for any column to prevent excessively wide columns
+    max_allowed_width = 25
+
+    # Calculate widths for columns
+    widths = []
+
+    if isinstance(dataframe.columns, pd.MultiIndex):
+        # For MultiIndex, calculate max width considering the header and data
+        header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values]  # +2 for padding
+        for i, column in enumerate(dataframe.columns):
+            max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i])
+            widths.append(min(max_data_width, max_allowed_width))
+    else:
+        # For non-MultiIndex, calculate width normally
+        for col in dataframe.columns:
+            # Calculate the max length of data or column name and limit it
+            max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2)  # +2 for padding
+            widths.append(min(max_length, max_allowed_width))
+
+    return widths
 
 
 def analyse_ha_data(outputs, loader):
@@ -1671,42 +1687,13 @@ def analyse_ha_data(outputs, loader):
     :return:
     """
 
-    eco4_rate = 1710
-    gbis_rate = 600
-
     ha_analysis_results = []
-    ha_revenue_results = []
     for ha_name, datasets in outputs.items():
-
         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-        # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for
-        #       yet
-        #
-        import random
-        randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0])
-        inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes
-        inputs["asset_list"]["funding_scheme"] = None
-        inputs["asset_list"]["funding_scheme"] = np.where(
-            inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)",
-            inputs["asset_list"]["randomly_allocated_schemes"],
-            inputs["asset_list"]["funding_scheme"]
-        )
-
-        # TODO: Also temp, just for HA 6
-        if ha_name == "ha_6":
-            inputs["survey_list"]["funding_scheme"] = None
-            inputs["survey_list"]["funding_scheme"] = np.where(
-                inputs["survey_list"][
-                    'AFFORDABLE WARMTH                 OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH",
-                "ECO4",
-                "GBIS"
-            )
-
-        # End placholder
 
         results_df = datasets["results_df"].copy()
 
-        analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename(
+        analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
             columns={"row_meaning": "asset_identification_status"}
         ).merge(
             results_df,
@@ -1715,293 +1702,236 @@ def analyse_ha_data(outputs, loader):
             left_on="asset_list_row_id"
         )
 
-        # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is
-        # remaining
+        ################################################################################################
+        # We take the properties that strictly qualified under eco
+        ################################################################################################
 
-        if inputs["matched_lookup"] is not None:
-            analysis_data = analysis_data.merge(
-                inputs["matched_lookup"], how="left", on="asset_list_row_id"
+        eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
+        eco4_identified["identification_type"] = None
+        eco4_identified["identification_type"] = np.where(
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
+            "strict",
+            eco4_identified["identification_type"]
+        )
+
+        eco4_identified["identification_type"] = np.where(
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False),
+            "expansive",
+            eco4_identified["identification_type"]
+        )
+        ################################################################################################
+        # We take the properties dependent on CIGA
+        ################################################################################################
+
+        ciga_dependent_identified = analysis_data[
+            analysis_data["ECO Eligibility"].isin(
+                [
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga"
+                ]
             )
-            # Drop any rows that have a survey_list_row_id
-            analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])]
+        ].copy()
 
-        # If we have a survey list, we merge this onto the results
-        n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique()
-
-        properties_sold = (
-            inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if
-            inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"])
-        )
-        properties_sold_eco4 = (
-            properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0
-        )
-        properties_sold_gbis = (
-            properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0
+        # These are properties that show filled cavity
+        ciga_dependent_identified["identification_type"] = None
+        ciga_dependent_identified["identification_type"] = np.where(
+            ciga_dependent_identified["eco4_message"].isin(
+                [
+                    "Perfect suitability",
+                    "Meets cavity and sap",
+                    "Fails cavity, meets loft, fails SAP",
+                    "Meets fabric, fails SAP check",
+                    "Meets cavity, loft borderline, meets sap",
+                ]
+            ),
+            "strict",
+            ciga_dependent_identified["identification_type"]
         )
 
-        # We now calculate the number of remaining properties, by scheme
-        remaining_properties = analysis_data[
-            analysis_data["asset_identification_status"] == "identified potential eco works (CWI)"
-            ].copy()
-        remaining_properties["prospect_type"] = None
-
-        remaining_properties_by_scheme = (
-            remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index()
+        ciga_dependent_identified["identification_type"] = np.where(
+            (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) &
+            (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])),
+            "expansive",
+            ciga_dependent_identified["identification_type"]
         )
 
-        n_remaining_properties_eco4 = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "ECO4"
-            ]["asset_list_row_id"].values[0]
+        ciga_dependent_identified["identification_type"] = np.where(
+            (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+                ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+            ),
+            "expansive",
+            ciga_dependent_identified["identification_type"]
+        )
 
-        n_remaining_properties_gbis = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "GBIS"
-            ]["asset_list_row_id"].values[0]
+        ################################################################################################
+        # We properties that qualified for gbis
+        ################################################################################################
+        gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
+        gbis_identified["identification_type"] = None
+        gbis_identified["identification_type"] = np.where(
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
+            "strict",
+            gbis_identified["identification_type"]
+        )
 
-        # For the remaining properties, we use the results of the eligibility process to classify the property into
-        # one of multiple categories
-        #
-        # For properties that have been identified as ECO4
-        # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because
-        #    Warmfront regularly re-surveys properties which then fall within the SAP requirement
-        #    - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties
-        #      here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have
-        #      very old EPCs which may score lower when re-done
-        # 2) Meets Fabric requirements, not SAP
-        #    Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but
-        #    label is separately as not a strict
-        # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity.
-        #    - we don't have a SAP constraint here because the EPC is (currently) showing what the property might
-        #      actually look like after retrofit and so the EPC currently being a C or above means little, because
-        #      the updated EPC, showing an empty cavity, could bring the property within
-        # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation.
-        #   - No SAP constraint, for the same reason as in category 2)
-        # 5) Looks like GBIS instead
-        # 6) Does not look like ECO4 candidate
-        #
-        # For properties that have been identified as GBIS
-        # 1) Strict GBIS candidates
-        # 2) Properties that actually look like strict GBIS candidates
-        # 3) Subject to CIGA check - Filled cavity
-        # 4) Does not look like a GBIS candidate
+        gbis_identified["identification_type"] = np.where(
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69),
+            "expansive",
+            gbis_identified["identification_type"]
+        )
 
-        remaining_eco4_df = remaining_properties[
-            remaining_properties["funding_scheme"] == "ECO4"
-            ].copy()
+        # Finally, we look at the properties that have not been identified by Warmfront
+        not_identified = analysis_data[
+            analysis_data["ECO Eligibility"].isin(
+                [
+                    "not eligible"
+                ]
+            )
+        ].copy()
 
-        ####################################
+        surplus_eco4 = not_identified[
+            (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
+                ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
+            ))
+            ]
+
+        surplus_gbis = not_identified[
+            (not_identified["gbis_eligible"] == True) & (
+                ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
+            ) & (not_identified["sap"] < 69) & (
+                (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
+                not_identified["walls"].str.contains("partial", case=False, na=False)
+            )
+            )
+            ]
+        surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
+
+        # Output variables
         # ECO4
-        ####################################
-
-        # 1) We identify this if:
-        #   - remaining_properties["eco4_eligible"] == True
-
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["eco4_eligible"] == True),
-            "strict ECO4",
-            remaining_eco4_df["prospect_type"]
+        n_properties_in_asset_list = inputs["asset_list"].shape[0]
+        n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
+        eco4_of_which_identified_strict = (
+            eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
+            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
         )
-
-        # 2) Meets fabric requirements
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                (remaining_eco4_df["eco4_message"] == "sap too high") &
-                remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
-                pd.isnull(remaining_eco4_df["prospect_type"])
-            ),
-            "ECO4 if SAP downgrade",
-            remaining_eco4_df["prospect_type"]
+        eco4_of_which_identified_expansive = (
+            eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
+            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
         )
-
-        # 3) We identify this if it has a filled cavity but meets the loft conditions
-        # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm)
-        #       to account for measurement error
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
-            ),
-            "ECO4 - Filled cavity - subject to CIGA check",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"])
-            ),
-            "ECO4 prospect - empty cavity, loft insulation below regulation",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 5) Looks like GBIS instead
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
-            "Looks like GBIS",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm)
-        remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna(
-            "Does not look like ECO4 candidate"
-        )
-
-        ####################################
         # GBIS
-        ####################################
-
-        remaining_gbis = remaining_properties[
-            remaining_properties["funding_scheme"] == "GBIS"
-            ].copy()
-
-        # 1) Strict GBIS candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False)
-            ),
-            "strict GBIS",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 2) GBIS candidates that look like strict ECO4 candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (remaining_gbis["eco4_eligible"] == True),
-            "GBIS - Upgradable to ECO4",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 3) Subject to CIGA check - Filled cavity
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
-                pd.isnull(remaining_gbis["prospect_type"])
-            ),
-            "GBIS - Filled cavity - subject to CIGA check",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 4) Everything else
-        remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna(
-            "Does not look like GBIS candidate"
-        )
-
-        ####################################
-        # Surplus properties
-        ####################################
-
-        # Take properties that were not identified by Warmfront and identify those that look like they would qualify
-        # under the strictest criteria
-        surplus_df = analysis_data[
-            analysis_data["asset_identification_status"] != "identified potential eco works (CWI)"
-            ].copy()
-
-        eco4_surplus = surplus_df[
-            (
-                (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") &
-                (
-                    surplus_df["eligibility_classification"].isin(
-                        ["high confidence", "highest confidence", "medium confidence"]
-                    )
-                )
-            )
-        ].copy()
-
-        gbis_surplus = surplus_df[
-            (
-                (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & (
-                surplus_df["eligibility_cavity_type"].isin(["empty", "partial"])
-            )
-            )
-        ].copy()
-
-        # Perform some checks to make sure we have all of the values
-        remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining ECO4 dict"
-            )
-
-        remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining GBIS dict"
-            )
+        n_warmfront_identified_gbis = gbis_identified.shape[0]
+        gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
+        gbis_of_which_identified_expansive = \
+            gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
 
         to_append = {
-            "ha_name": ha_name,
-            "n_properties_in_asset_list": n_properties_in_asset_list,
+            ("", "HA Name"): ha_name,
+            ("", "# Properties in asset list"): n_properties_in_asset_list,
             ############
             # ECO4
             ############
-            "properties_sold_eco4": properties_sold_eco4,
-            "n_remaining_properties_eco4": n_remaining_properties_eco4,
-            **remaining_eco4_dict,
+            ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4,
+            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
+            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
+            ("ECO4", "Of which identified by model - total"): (
+                eco4_of_which_identified_strict + eco4_of_which_identified_expansive),
+            ("ECO4", "Additional properties"): surplus_eco4.shape[0],
             ############
             # GBIS
             ############
-            "properties_sold_gbis": properties_sold_gbis,
-            "n_remaining_properties_gbis": n_remaining_properties_gbis,
-            **remaining_gbis_dict,
-            ############
-            # GBIS
-            ############
-            "n_eco4_surplus": eco4_surplus.shape[0],
-            "n_gbis_surplus": gbis_surplus.shape[0],
+            ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis,
+            ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
+            ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
+            ("GBIS", "Of which identified by model - total"): (
+                gbis_of_which_identified_strict + gbis_of_which_identified_expansive
+            ),
+            ("GBIS", "Additional properties"): surplus_gbis.shape[0]
         }
 
         ha_analysis_results.append(to_append)
 
-        revenue_to_append = {
-            "ha_name": ha_name,
-            "£ Remaining from asset list": (
-                n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
-            ),
-            "Of which: Strict": (
-                to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
-                to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
-            ),
-            "Of which: Subject to CIGA": (
-                to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
-                to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
-            ),
-            "Of which: Prospect, not perfect strict prospect": (
-                to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
-                to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
-            ),
-            "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
-            "Of which: Does not look like prospect": (
-                to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
-                to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
-            ),
-            "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
-            "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
-        }
-
-        # Perform a quick check:
-        if revenue_to_append["£ Remaining from asset list"] - (
-            revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
-            revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
-            revenue_to_append["Of which: Potential downgrade to GBIS"] +
-            revenue_to_append["Of which: Does not look like prospect"]
-        ) > 1:
-            raise ValueError("Error between top level revenue figures and breakdown - investigate me")
-
-        ha_revenue_results.append(revenue_to_append)
-
     ha_analysis_results = pd.DataFrame(ha_analysis_results)
-    ha_revenue_results = pd.DataFrame(ha_revenue_results)
+    ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
 
+    facts_and_figures = loader.facts_and_figures.copy()
+    facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
+    facts_and_figures = facts_and_figures.sort_values("ha_number")
+    facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
+
+    # Rename some of the cols
+    facts_and_figures = facts_and_figures.rename(
+        columns={
+            # ECO4 cols
+            "ECO4": "ECO4 - December",
+            "GBIS": "GBIS - December",
+            "eco4 (subject to ciga)": "ECO4 - subject to ciga",
+            "eco4": "ECO4 - doesn't need CIGA",
+            "eco4 - passed ciga": "ECO4 - passed CIGA",
+            "failed ciga": "ECO4 - failed CIGA",
+            "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
+            "ECO4 - in progress": "ECO4 - Install in progress",
+            "ECO4 - cancelled": "ECO4 - Install cancelled",
+            # GBIS cols
+            "gbis": "GBIS total (asset list)"
+        }
+    )
+    # We calculate the eco4 total from the asset list
+    # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
+    # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
+    # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
+    # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
+    facts_and_figures["ECO4 total (asset list)"] = np.where(
+        facts_and_figures["ECO4 - passed CIGA"] > 0,
+        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
+        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"]
+    )
+
+    # Re-arrange the columns
+    facts_and_figures = facts_and_figures[
+        [
+            'HA Name',
+            'ECO4 - December',
+            'GBIS - December',
+            'ECO4 total (asset list)',
+            'GBIS total (asset list)',
+            'ECO4 - subject to ciga',
+            "ECO4 - doesn't need CIGA",
+            'ECO4 - passed CIGA',
+            'ECO4 - failed CIGA',
+            'ECO4 - installed',
+            'ECO4 - Install in progress',
+            'ECO4 - Install cancelled',
+            'ECO4 - partially installed',
+            'ECO4 - Install downgrade to GBIS',
+        ]
+    ]
+    # Addd a note to flag any rows where ECO4 (
+    # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
+    # )
+    facts_and_figures["Missed CIGA checks opportunity"] = None
+    facts_and_figures["Missed CIGA checks opportunity"] = np.where(
+        (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
+        "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
+            str) + " ECO4 properties needing a CIGA check",
+        facts_and_figures["Missed CIGA checks opportunity"]
+    )
+
+    # Re arrage the columns
+
+    # Also sort ha_analysis_results by ha number
+    ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
+    ha_analysis_results = ha_analysis_results.sort_values("ha_number")
+    ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
+
+    # We save 2 sheets
     # Automate creation of the excel
     # Create a Pandas Excel writer using XlsxWriter as the engine
-    with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
+    with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
         # Write each dataframe to a different worksheet without the index
-        for df, sheet in [(ha_revenue_results, 'Total Revenue'),
-                          (ha_analysis_results, 'By ECO4 and GBIS')]:
+        for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
+                          (ha_analysis_results, 'Asset Identification')]:
 
-            df.to_excel(writer, sheet_name=sheet, index=False)
+            df.to_excel(writer, sheet_name=sheet)
 
             # Auto-adjust columns' width
             for i, width in enumerate(get_col_widths(df)):
@@ -2134,7 +2064,7 @@ def app():
     # Determines if we want to use the cached data in s3
     use_cache = True
     # Determines if we want to perform the data pull
-    pull_data = True
+    pull_data = False
 
     # List all of the data in the folder
     directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
@@ -2173,43 +2103,3 @@ def app():
         floor_area_decile_thresholds=floor_area_decile_thresholds,
         pull_data=pull_data
     )
-
-    # for ha_name, datasets in outputs.items():
-    #     datasets["results_df"] = datasets["results_df"].drop(
-    #         columns=["eligibility_cavity_type", "eligibility_loft_type"]
-    #     )
-    #
-    #     # Re-do
-    #     res = []
-    #     for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]):
-    #         epc = {
-    #             "walls-description": row["walls"],
-    #             "roof-description": row["roof"],
-    #             "floor-description": "",
-    #             "tenure": "",
-    #             "current-energy-efficiency": row["sap"],
-    #         }
-    #         eligibility = Eligibility(epc=epc, cleaned=cleaned)
-    #         eligibility.check_eco4_warmfront()
-    #         res.append(
-    #             {
-    #                 "row_id": row["row_id"],
-    #                 "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
-    #                 "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
-    #             }
-    #         )
-    #
-    #     # Merge back on
-    #     res = pd.DataFrame(res)
-    #     datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id")
-    #
-    #     # Re-save in s3
-    #     save_pickle_to_s3(
-    #         data={
-    #             "results_df": datasets["results_df"],
-    #             "scoring_df": datasets["scoring_df"],
-    #             "nodata": datasets["nodata"]
-    #         },
-    #         bucket_name="retrofit-datalake-dev",
-    #         s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
-    #     )

From 8b8e2bf902f8cc6c588eab8b64253580f3364694 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 16:29:19 +0000
Subject: [PATCH 038/248] working on new forecast approach for warmfront
 remaining sales

---
 .../ha_15_32/ha_analysis_batch_3.py           | 811 +++++++++++++++++-
 utils/s3.py                                   |   2 +-
 2 files changed, 768 insertions(+), 45 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 61c4a243..bb27029e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -17,6 +17,7 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
+from etl.epc.Record import EPCRecord
 
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -181,25 +182,25 @@ class DataLoader:
         if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
-            ].str.lower().str.strip()
+            ].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["postcode"]
-            ].str.lower().str.strip()
+            ].astype(str).str.lower().str.strip()
         elif ha_name == "HA7":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
-            asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address2"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address3"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+            asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA14":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
-            asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 2"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 3"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 4"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
 
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
@@ -209,7 +210,7 @@ class DataLoader:
                                              asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["post_code"].astype(str).str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -1098,8 +1099,8 @@ class DataLoader:
         self.december_figures = pd.read_csv(self.december_figures_filepath)
         # Remove the spaces in HA Name
         self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "")
-        self.december_figures["ECO4"] = self.december_figures["ECO4"].astype("Int64")
-        self.december_figures["GBIS"] = self.december_figures["GBIS"].astype("Int64")
+        for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]:
+            self.december_figures[col] = self.december_figures[col].astype("Int64")
 
         if self.use_cache:
             self.data = read_pickle_from_s3(
@@ -1203,7 +1204,6 @@ class DataLoader:
             # Update the asset list with the categorisations and rename changes
             if asset_list.shape[0] != asset_list_starting_size:
                 raise ValueError("The asset list has changed in size")
-            self.data[ha_name]["asset_list"] = asset_list
 
             # Report on sales
             sales_report = {}
@@ -1259,7 +1259,31 @@ class DataLoader:
                 )
 
                 # We get the sales
-                sales_report = survey_list["installation_status"].value_counts().to_dict()
+                sales_report = {
+                    "ECO4 - surveys sold": survey_list.shape[0],
+                    **survey_list["installation_status"].value_counts().to_dict()
+                }
+
+                # We find some cases where properties have sold but are missing CIGA checks
+                survey_list_to_merge = survey_list[["asset_list_row_id"]].copy()
+                survey_list_to_merge["has_a_survey_record"] = True
+                survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])]
+
+                asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & (
+                        asset_list["has_a_survey_record"] == True
+                    ),
+                    "eco4 - passed ciga",
+                    asset_list["ECO Eligibility"]
+                )
+                asset_list = asset_list.drop(columns=["has_a_survey_record"])
+
+                # Update the survey list with installation status
+                self.data[ha_name]["survey_list"] = survey_list
+
+            # Insert updated asset list
+            self.data[ha_name]["asset_list"] = asset_list
 
             ha_facts_and_figures.append(
                 {
@@ -1687,7 +1711,21 @@ def analyse_ha_data(outputs, loader):
     :return:
     """
 
+    eco4_rate = 1710
+    gbis_rate = 600
+    old_eco4_rate = 1456
+    old_gbis_rate = 432
+
+    epc_c_threshold = 80
+    scheme_map = {
+        "ECO4": "ECO4",
+        "AFFORDABLE WARMTH": "ECO4",
+        "ECO4 A/W": "ECO4",
+        "ECO4 GBIS (ECO+)": "GBIS"
+    }
+
     ha_analysis_results = []
+    total_revenue_results = []
     for ha_name, datasets in outputs.items():
         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
 
@@ -1702,6 +1740,88 @@ def analyse_ha_data(outputs, loader):
             left_on="asset_list_row_id"
         )
 
+        analysis_data["is_remaining"] = True
+
+        n_sold_eco4 = 0
+        n_sold_gbis = 0
+        if not inputs["survey_list"].empty:
+            # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
+            # a survey)
+            survey_list = inputs["survey_list"].copy()
+
+            # TODO: TEMP
+            scheme_column = survey_list.columns[0]
+            # We clean up the survey list installation or cancelled
+            survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+            # Remove all punctuation
+            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                r'[^\w\s]', '', regex=True
+            )
+            # Remove double spaces
+            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+                r'\s+', ' ', regex=True
+            )
+            # Remove trailing spaces
+            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+
+            # Remap the values in the scheme column
+            survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+
+            survey_list["installation_status"] = None
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+                "installed",
+                survey_list["installation_status"]
+            )
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+                "cancelled",
+                survey_list["installation_status"]
+            )
+            # Find partial installations
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+                "partially installed",
+                survey_list["installation_status"]
+            )
+            # Find partial cancellations
+            # TODO: We might have more indications of partial cancellations
+            survey_list["installation_status"] = np.where(
+                survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+                "partially cancelled",
+                survey_list["installation_status"]
+            )
+
+            # Finally, for other cases, we set the status to "in progress"
+            survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+
+            # We concatenate the scheme name with the installation status
+            survey_list["installation_status"] = (
+                survey_list[scheme_column] + " - " + survey_list["installation_status"]
+            )
+
+            # TODO: END TEMP
+
+            survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
+            survey_list_to_merge["is_remaining"] = False
+            analysis_data = analysis_data.drop(columns="is_remaining").merge(
+                survey_list_to_merge,
+                how="left", on="asset_list_row_id"
+            )
+            analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
+
+            n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
+            n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
+
+        # Take just remaining
+        analysis_data = analysis_data[analysis_data["is_remaining"]]
+
+        # Also, if the HA has started selling, we remove any that are still subject to ciga
+        n_eco4_missed_subject_to_ciga = 0
+        if not inputs["survey_list"].empty:
+            n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
+            analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
+
         ################################################################################################
         # We take the properties that strictly qualified under eco
         ################################################################################################
@@ -1714,8 +1834,11 @@ def analyse_ha_data(outputs, loader):
             eco4_identified["identification_type"]
         )
 
+        # For expansive, the property can be no higher than an EPC C
         eco4_identified["identification_type"] = np.where(
-            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False),
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
+                eco4_identified["sap"] <= epc_c_threshold
+            ),
             "expansive",
             eco4_identified["identification_type"]
         )
@@ -1743,21 +1866,17 @@ def analyse_ha_data(outputs, loader):
                     "Meets fabric, fails SAP check",
                     "Meets cavity, loft borderline, meets sap",
                 ]
-            ),
+            ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
             "strict",
             ciga_dependent_identified["identification_type"]
         )
 
         ciga_dependent_identified["identification_type"] = np.where(
-            (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) &
-            (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])),
-            "expansive",
-            ciga_dependent_identified["identification_type"]
-        )
-
-        ciga_dependent_identified["identification_type"] = np.where(
-            (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+            ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
                 ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+            )) & (
+                (ciga_dependent_identified["sap"] <= epc_c_threshold) &
+                pd.isnull(ciga_dependent_identified["identification_type"])
             ),
             "expansive",
             ciga_dependent_identified["identification_type"]
@@ -1775,7 +1894,9 @@ def analyse_ha_data(outputs, loader):
         )
 
         gbis_identified["identification_type"] = np.where(
-            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69),
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
+                pd.isnull(gbis_identified["identification_type"])
+            ),
             "expansive",
             gbis_identified["identification_type"]
         )
@@ -1806,9 +1927,16 @@ def analyse_ha_data(outputs, loader):
             ]
         surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
 
-        # Output variables
+        # Output variables - the data was sent to us in December, but the remaining figures are
+        # what was in November
+        november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
+
         # ECO4
-        n_properties_in_asset_list = inputs["asset_list"].shape[0]
+        n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
+        november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
+        november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
+        eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
+
         n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
         eco4_of_which_identified_strict = (
             eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
@@ -1820,26 +1948,37 @@ def analyse_ha_data(outputs, loader):
         )
         # GBIS
         n_warmfront_identified_gbis = gbis_identified.shape[0]
+        november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
+        november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
+        gbis_sales_since_november = n_sold_gbis - november_gbis_sold
         gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
         gbis_of_which_identified_expansive = \
             gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
 
         to_append = {
             ("", "HA Name"): ha_name,
-            ("", "# Properties in asset list"): n_properties_in_asset_list,
+            ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
             ############
             # ECO4
             ############
-            ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4,
+            ("ECO4", "# remaining November file"): november_eco4_remaining,
+            ("ECO4", "# sold in November file"): november_eco4_sold,
+            ("ECO4", "# sold (survey list)"): n_sold_eco4,
+            ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
+            ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
             ("ECO4", "Of which identified by model - total"): (
-                eco4_of_which_identified_strict + eco4_of_which_identified_expansive),
+                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+            ),
             ("ECO4", "Additional properties"): surplus_eco4.shape[0],
             ############
             # GBIS
             ############
-            ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis,
+            ("GBIS", "# remaining November file"): november_gbis_remaining,
+            ("GBIS", "# sold in November file"): november_gbis_sold,
+            ("GBIS", "# sold (survey list)"): n_sold_gbis,
+            ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
             ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
             ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
             ("GBIS", "Of which identified by model - total"): (
@@ -1850,6 +1989,24 @@ def analyse_ha_data(outputs, loader):
 
         ha_analysis_results.append(to_append)
 
+        # Calculate the revenue results
+        to_append_revenue = {
+            ("", "HA Name"): ha_name,
+            # Eco4 revenue
+            ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
+            ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
+            ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
+            ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
+            ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
+            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
+            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
+            ("ECO4", "Of which identified by model - total"): eco4_rate * (
+                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+            ),
+            ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
+        }
+        total_revenue_results.append(to_append_revenue)
+
     ha_analysis_results = pd.DataFrame(ha_analysis_results)
     ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
 
@@ -1862,8 +2019,8 @@ def analyse_ha_data(outputs, loader):
     facts_and_figures = facts_and_figures.rename(
         columns={
             # ECO4 cols
-            "ECO4": "ECO4 - December",
-            "GBIS": "GBIS - December",
+            "ECO4": "ECO4 - November",
+            "GBIS": "GBIS - November",
             "eco4 (subject to ciga)": "ECO4 - subject to ciga",
             "eco4": "ECO4 - doesn't need CIGA",
             "eco4 - passed ciga": "ECO4 - passed CIGA",
@@ -1880,19 +2037,27 @@ def analyse_ha_data(outputs, loader):
     # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
     # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
     # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
-    facts_and_figures["ECO4 total (asset list)"] = np.where(
+    facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
+        facts_and_figures["ECO4 - doesn't need CIGA"] +
+        facts_and_figures["ECO4 - subject to ciga"] +
+        facts_and_figures["ECO4 - passed CIGA"]
+    )
+
+    facts_and_figures["ECO4 total (asset list - post ciga)"] = None
+    facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
         facts_and_figures["ECO4 - passed CIGA"] > 0,
         facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
-        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"]
+        facts_and_figures["ECO4 total (asset list - post ciga)"]
     )
 
     # Re-arrange the columns
     facts_and_figures = facts_and_figures[
         [
             'HA Name',
-            'ECO4 - December',
-            'GBIS - December',
-            'ECO4 total (asset list)',
+            'ECO4 - November',
+            'GBIS - November',
+            'ECO4 total (asset list - pre ciga)',
+            'ECO4 total (asset list - post ciga)',
             'GBIS total (asset list)',
             'ECO4 - subject to ciga',
             "ECO4 - doesn't need CIGA",
@@ -1916,6 +2081,8 @@ def analyse_ha_data(outputs, loader):
         facts_and_figures["Missed CIGA checks opportunity"]
     )
 
+    facts_and_figures.to_csv("Facts and figures sample.csv")
+
     # Re arrage the columns
 
     # Also sort ha_analysis_results by ha number
@@ -1937,6 +2104,333 @@ def analyse_ha_data(outputs, loader):
             for i, width in enumerate(get_col_widths(df)):
                 writer.sheets[sheet].set_column(i, i, width)
 
+    # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
+    #               description, and what proportion of time they get identified via non-invasive surveys
+
+    # true_eco4_assets = []
+    # ciga_dependent_assets = []
+    # not_eligible = []
+    # as_built_insulated = []
+    # date_cols = {
+    #     "HA39": "date_built",
+    #     "HA14": "Built In Year",
+    #     "HA6": "Construction Year",
+    #     "HA1": "Build Date",
+    #     "HA107": "YEAR BUILT"
+    # }
+    # for ha_name, data_objects in outputs.items():
+    #     inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+    #
+    #     date_col = date_cols[ha_name]
+    #     results_df = data_objects["results_df"].copy()
+    #     df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
+    #         columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
+    #     ).merge(
+    #         results_df,
+    #         how="left",
+    #         right_on="row_id",
+    #         left_on="asset_list_row_id"
+    #     )
+    #
+    #     # take the true ECO4
+    #     true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
+    #     ciga_dependent = df[
+    #         df["ECO Eligibility"].isin(
+    #             [
+    #                 "eco4 (subject to ciga)",
+    #                 "failed ciga",
+    #                 "eco4 - passed ciga"
+    #             ]
+    #         )
+    #     ]
+    #     insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
+    #     # We convert date built to datetime
+    #     try:
+    #         insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
+    #         insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
+    #         as_built_insulated.append(insulated_assumed)
+    #     except Exception as e:
+    #         print("oh well")
+    #
+    #     true_eco4_assets.append(true_eco4)
+    #     ciga_dependent_assets.append(ciga_dependent)
+    #
+    # true_eco4_assets = pd.concat(true_eco4_assets)
+    # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
+    # as_built_insulated = pd.concat(as_built_insulated)
+    #
+    # true_eco4_assets["walls"].value_counts(normalize=True)
+    # ciga_dependent_assets["walls"].value_counts(normalize=True)
+    #
+    # from recommendations.recommendation_utils import extract_insulation_thickness
+    #
+    # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
+    #     lambda x: extract_insulation_thickness(x)
+    # )
+    #
+    # true_eco4_assets["e"] = true_eco4_assets.merge(
+    #     pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
+    #     how="left",
+    #     left_on="roof",
+    #     right_on="original_description"
+    # )
+    #
+    # true_eco4_assets["sap"].mean()
+    #
+    # true_eco4_assets["insulation_thickness"].isin(
+    #     ["250", "150", "200", "100", "75", "50"]
+    # ).sum() / true_eco4_assets.shape[0]
+    #
+    # true_eco4_assets["insulation_thickness"].isin(
+    #     ["100"]
+    # ).sum() / true_eco4_assets.shape[0]
+    #
+    # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
+
+
+def get_propensity_model_data(
+    loader, cleaned, cleaning_data, created_at, photo_supply_lookup,
+    floor_area_decile_thresholds, pull_data=True
+):
+    # TODO: Set a seed!
+    model_data = []
+    for ha_name, data_assets in loader.data.items():
+
+        logger.info("Processing HA: %s", ha_name)
+        if data_assets["survey_list"].empty:
+            continue
+
+        number_sold = data_assets["survey_list"].shape[0]
+
+        # For each HA, we read pull in the data required, and store in S3
+        asset_list = data_assets["asset_list"].copy()
+        # We determine the number of properties that we should select that are eligible
+        asset_list_size = asset_list.shape[0]
+        # Number eligible
+        n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+        success_rate = n_eligibile / asset_list_size
+        needed_sample_size = np.ceil(number_sold / success_rate)
+        number_negative_samples = int(needed_sample_size - number_sold)
+
+        sold_asset_list_ids = data_assets["survey_list"]["asset_list_row_id"].tolist()
+        negative_sample_asset_list_ids = asset_list["asset_list_row_id"].sample(number_negative_samples).tolist()
+        sample_ids = sold_asset_list_ids + negative_sample_asset_list_ids
+
+        sample_asset_list = asset_list[asset_list["asset_list_row_id"].isin(sample_ids)]
+
+        # In order to have the most confidence, we should take just properties that have 1 EPC. We might need to
+        # cut down the number of properties that we include because of this
+        # Note: This is an imbalanced problem so we will need to build a model accomadating of that
+
+        data = []
+        errors = []
+        for index, property_meta in tqdm(sample_asset_list.iterrows(), total=len(sample_asset_list)):
+
+            if property_meta["matching_postcode"] is None:
+                continue
+
+            property_type, built_form = get_property_type_and_built_form(
+                property_meta=property_meta, ha_name=ha_name
+            )
+
+            searcher = SearchEpc(
+                address1=str(property_meta["HouseNo"]),
+                postcode=property_meta["matching_postcode"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                full_address=property_meta["matching_address"]
+            )
+            searcher.ordnance_survey_client.property_type = property_type
+            searcher.ordnance_survey_client.built_form = built_form
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                continue
+
+            if searcher.newest_epc.get("estimated"):
+                # We insert the row ID as our proxy for UPRN
+                searcher.newest_epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
+
+            newest_epc = searcher.newest_epc
+            older_epcs = searcher.older_epcs
+            full_sap_epc = searcher.full_sap_epc
+
+            # If we have more than 1 EPC for the moment we just continue
+            if older_epcs or full_sap_epc:
+                continue
+            try:
+
+                # We clean up the data
+                epc_records = {
+                    'original_epc': newest_epc.copy(),
+                    'full_sap_epc': full_sap_epc.copy(),
+                    'old_data': older_epcs.copy(),
+                }
+
+                epc_record = EPCRecord(
+                    epc_records=epc_records,
+                    run_mode="newdata",
+                    cleaning_data=cleaning_data
+                )
+
+                # If we have some data, continue
+                data.append(
+                    {
+                        "ECO Eligibility": property_meta["ECO Eligibility"],
+                        "asset_list_row_id": property_meta["asset_list_row_id"],
+                        **epc_record.get("prepared_epc")
+                    }
+                )
+            except Exception as e:
+                errors.append(
+                    {
+                        "error": str(e),
+                        "asset_list_row_id": property_meta["asset_list_row_id"],
+                        "matching_postcode": property_meta["matching_postcode"],
+                        "matching_address": property_meta["matching_address"]
+                    }
+                )
+
+        data = pd.DataFrame(data)
+        # We store the results in S3 as a pickle
+        save_pickle_to_s3(
+            data=data,
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+        )
+
+        # Store the errors
+        if errors:
+            save_pickle_to_s3(
+                data=errors,
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"propensity_model_data/{ha_name}/errors.pickle"
+            )
+
+        model_data.append(data)
+
+    return model_data
+
+
+def conversion_model(loader):
+    # Read in the model data
+
+    model_data = []
+    for ha_name in loader.data.keys():
+        try:
+            picked = read_pickle_from_s3(
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+            )
+            data = pd.DataFrame(picked)
+
+            # We merge on the sales data
+            sales_data = loader.data[ha_name]["survey_list"].copy()
+            data = data.merge(
+                sales_data[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            data["ha_name"] = ha_name
+
+        except Exception as e:
+            logger.error("Error reading in the data for %s", ha_name)
+            continue
+
+        model_data.append(data)
+
+    model_data = pd.concat(model_data)
+
+    model_data["response"] = model_data["installation_status"].isin(
+        [
+            "ECO4 - in progress",
+            "ECO4 - installed"
+        ]
+    ).astype(int)
+
+    # Because of how we pulled the data, we need to re-balance the sample
+    ha_names = model_data["ha_name"].unique()
+
+    balanced_sample = []
+    for ha_name in ha_names:
+        df = model_data[model_data["ha_name"] == ha_name]
+        positive_samples = df[df["response"] == 1]
+        negative_samples = df[df["response"] != 1]
+
+        inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+        asset_list = inputs["asset_list"].copy()
+        asset_list_size = asset_list.shape[0]
+        n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+        success_rate = n_eligibile / asset_list_size
+        needed_sample_size = np.ceil(positive_samples.shape[0] / success_rate)
+        number_negative_samples = int(needed_sample_size - positive_samples.shape[0])
+        negative_samples_subset = negative_samples.sample(number_negative_samples)
+
+        output = pd.concat([positive_samples, negative_samples_subset])
+
+        balanced_sample.append(output)
+
+    balanced_sample = pd.concat(balanced_sample)
+
+    # We work with a small sample
+    # Drop the ECO Eligibility column and installation_status column
+    # We keep the ID column
+    balanced_sample = balanced_sample.drop(
+        columns=['ECO Eligibility', 'asset_list_row_id', 'address', 'uprn_source', 'address3', 'local_authority_label',
+                 'county', 'postcode', 'constituency', 'local_authority', 'inspection_date', 'address1',
+                 'constituency_label', 'building_reference_number', 'address2', 'posttown', 'lodgement_datetime',
+                 'uprn', 'lodgement_date', 'lmk_key', 'installation_status', 'ha_name']
+    )
+
+    # POC model
+    df = balanced_sample.copy()
+    # FIll missings with means, if they exist
+    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
+    df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
+
+    categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+    df[categorical_cols] = df[categorical_cols].fillna("other")
+
+    # Reduce the number of categories to a specific number and the rest to other
+    max_n_categories = 10
+    for col in categorical_cols:
+        top_categories = df[col].value_counts().nlargest(max_n_categories).index
+        df[col] = df[col].where(df[col].isin(top_categories), other="other")
+
+    # Use a model based approach to feature selection
+    import xgboost as xgb
+    from sklearn.model_selection import train_test_split
+
+    # Assuming your outcome column is named 'target'
+    X = df.drop(columns=['response'])
+    y = df['response']
+    df["low_energy_fixed_light_count"].va
+
+    # Encoding categorical variables if not already done
+    X = pd.get_dummies(X, drop_first=True)
+
+    # Splitting the data into train and test sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Initialize an XGBoost classifier
+    model = xgb.XGBClassifier()
+
+    # Fit the model
+    model.fit(X_train, y_train)
+
+    # Get feature importances
+    feature_importances = model.feature_importances_
+
+    # Map feature importances to their corresponding column names
+    feature_importance_dict = {feature: importance for feature, importance in zip(X.columns, feature_importances)}
+
+    # Sort features by importance
+    sorted_features = sorted(feature_importance_dict.items(), key=lambda item: item[1], reverse=True)
+
+    # Display sorted features
+    for feature, importance in sorted_features:
+        print(f"{feature}: {importance}")
+
 
 def patch_cleaned(cleaned):
     # Patch to handle the a missing description
@@ -2054,6 +2548,218 @@ def patch_cleaned(cleaned):
     return cleaned
 
 
+def forecast_remaining_sales(loader):
+    # Assumptions:
+    # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
+    # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
+    maximum_ciga_conversion = 0.75
+
+    gbis_rate = 600
+    eco4_rate = 1710
+    old_gbis_rate = 432
+    old_eco4_rate = 1456
+
+    # 1) Calculate the conversion rate from passed CIGA to actual sale
+    converted_ciga_jobs = []
+    for ha_name, input_data in loader.data.items():
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        if survey_list.empty:
+            continue
+
+        ciga_dependent_assets = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 - passed ciga"
+            ]
+
+        # These are now the ciga dependent assets at installation
+        ciga_dependent_assets_at_installation = ciga_dependent_assets.merge(
+            survey_list[["asset_list_row_id", "installation_status"]],
+            how="inner",
+            on="asset_list_row_id"
+        )
+
+        # We then calculate how many get cancelled
+        ciga_dependent_assets_sold = ciga_dependent_assets_at_installation[
+            ciga_dependent_assets_at_installation["installation_status"].isin(
+                [
+                    "ECO4 - installed", "ECO4 - in progress"
+                ]
+            )
+        ]
+
+        ciga_dependent_assets_failed = ciga_dependent_assets_at_installation[
+            ~ciga_dependent_assets_at_installation["installation_status"].isin(
+                [
+                    "ECO4 - installed", "ECO4 - in progress"
+                ]
+            )
+        ]
+
+        converted_ciga_jobs.append(
+            {
+                "HA Name": ha_name,
+                "# Ciga dependent at installation": ciga_dependent_assets_at_installation.shape[0],
+                "# Ciga dependent successfully installed": ciga_dependent_assets_sold.shape[0],
+                "# Ciga dependent failed install": ciga_dependent_assets_failed.shape[0]
+            }
+        )
+
+    converted_ciga_jobs = pd.DataFrame(converted_ciga_jobs)
+
+    # We calculate a ciga pass to install conversaion rate
+    median_ciga_pass_to_install = (
+        converted_ciga_jobs["# Ciga dependent successfully installed"].sum() /
+        converted_ciga_jobs["# Ciga dependent at installation"].sum()
+    )
+
+    # 2) Calculate the conversion rate from CIGA dependent ciga passed
+    ciga_passrates = []
+    for ha_name, input_data in loader.data.items():
+
+        # If we don't have a ciga list, we can't do anything
+        if input_data["ciga_list"].empty:
+            continue
+
+        # 1) Calculate the conversion rate for CIGA to actual sale
+        asset_list = input_data["asset_list"].copy()
+
+        ciga_completed_assets = asset_list[
+            asset_list["ECO Eligibility"].isin(
+                [
+                    "eco4 - passed ciga",
+                    "failed ciga"
+                ]
+            )
+        ]
+
+        ciga_passed = ciga_completed_assets[
+            ciga_completed_assets["ECO Eligibility"].isin(
+                [
+                    "eco4 - passed ciga"
+                ]
+            )
+        ]
+
+        ciga_passrates.append(
+            {
+                "Ha Name": ha_name,
+                "# CIGA dependent": ciga_completed_assets.shape[0],
+                "# CIGA passed": ciga_passed.shape[0],
+            }
+        )
+
+    ciga_passrates = pd.DataFrame(ciga_passrates)
+
+    median_ciga_pass_to_install = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
+
+    # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
+    eco4_ciga_independent_passrates = []
+    gbis_ciga_independent_passrates = []
+    for ha_name, input_data in loader.data.items():
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        if survey_list.empty:
+            continue
+
+        # For properties that were identified as a typical ECO4 job, we calculate the number of properties that
+        # installed
+        # vs cancelled
+
+        typical_eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
+        typical_gbis = asset_list[asset_list["ECO Eligibility"] == "gbis"]
+
+        # Merge on the surveys
+        typical_eco4_installed = typical_eco4.merge(
+            survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+        )
+
+        if not typical_eco4_installed.empty:
+            typical_eco4_sold = typical_eco4_installed[
+                typical_eco4_installed["installation_status"].isin(
+                    [
+                        "ECO4 - installed", "ECO4 - in progress"
+                    ]
+                )
+            ]
+
+            eco4_ciga_independent_passrates.append(
+                {
+                    "Ha Name": ha_name,
+                    "# ECO4 at install stage": typical_eco4_installed.shape[0],
+                    "# ECO4 successfully installed": typical_eco4_sold.shape[0]
+                }
+            )
+
+        typical_gbis_installed = typical_gbis.merge(
+            survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+        )
+        if not typical_gbis_installed.empty:
+            typical_gbis_sold = typical_gbis_installed[
+                typical_gbis_installed["installation_status"].isin(
+                    [
+                        "GBIS - in progress", "GBIS - installed"
+                    ]
+                )
+            ]
+
+            gbis_ciga_independent_passrates.append(
+                {
+                    "Ha Name": ha_name,
+                    "# GBIS at install stage": typical_gbis_installed.shape[0],
+                    "# GBIS successfully installed": typical_gbis_sold.shape[0]
+                }
+            )
+
+    eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates)
+    gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates)
+
+    median_eco4_to_install = (
+        eco4_ciga_independent_passrates["# ECO4 successfully installed"].sum() /
+        eco4_ciga_independent_passrates["# ECO4 at install stage"].sum()
+    )
+
+    median_gbis_to_install = (
+        gbis_ciga_independent_passrates["# GBIS successfully installed"].sum() /
+        gbis_ciga_independent_passrates["# GBIS at install stage"].sum()
+    )
+
+    # Produce the final output
+    december_figures = loader.december_figures.copy()
+    december_figures = december_figures.fillna(0)
+    results = []
+    for ha_name, input_data in loader.data.items():
+        # Original warmfront figures
+        original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
+
+        original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
+        original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
+        original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
+        original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
+
+        original_warmfront_eco4_revenue = (
+            original_warmfront_remaining_eco4 * eco4_rate +
+            (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
+        )
+        original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
+
+        original_warmfront_gbis_revenue = (
+            original_warmfront_remaining_gbis * gbis_rate +
+            (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
+        )
+
+        results.append(
+            {
+                ("", "", "HA Name"): ha_name,
+                ("Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+                ("", "Remaining - #", ""): original_warmfront_remaining_eco4,
+                ("", "Total - £", ""): original_warmfront_eco4_revenue,
+                ("", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+            }
+        )
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
@@ -2067,11 +2773,14 @@ def app():
     pull_data = False
 
     # List all of the data in the folder
-    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
+
+    directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir()
+                   for file in entry.iterdir() if file.suffix == '.xlsx']
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
+    # priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 
@@ -2103,3 +2812,17 @@ def app():
         floor_area_decile_thresholds=floor_area_decile_thresholds,
         pull_data=pull_data
     )
+
+    analyse_ha_data(outputs, loader)
+
+    # import pickle
+    # with open("ha_analysis.pickle", "wb") as f:
+    #     pickle.dump({"outputs": outputs, "loader": loader}, f)
+
+    # To read:
+    # import pickle
+    # with open("ha_analysis.pickle", "rb") as f:
+    #     outputs = pickle.load(f)["outputs"]
+    #
+    # with open("loader.pickle", "rb") as f:
+    #     loader = pickle.load(f)
diff --git a/utils/s3.py b/utils/s3.py
index cb55094a..8d36bdb3 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -184,7 +184,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
         logger.errpr("Incomplete credentials provided.")
         return None
     except Exception as e:
-        logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
+        logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
         return None
 
     # Deserialize data from pickle format

From 9e679bd3fdb6e38a263f804ffdb07dda3892e7b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 16:59:22 +0000
Subject: [PATCH 039/248] working on new forecast methodology

---
 .../ha_15_32/ha_analysis_batch_3.py           | 81 +++++++++++++++++--
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bb27029e..21af73ff 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2728,15 +2728,22 @@ def forecast_remaining_sales(loader):
     # Produce the final output
     december_figures = loader.december_figures.copy()
     december_figures = december_figures.fillna(0)
+    # If we have negative remaining, it means that actually sold more gbis than they initially thought so we set
+    # remaining to 0
+    december_figures["ECO4 remaining"] = np.where(
+        december_figures["ECO4 remaining"] < 0, 0, december_figures["ECO4 remaining"]
+    )
+    december_figures["GBIS remaining"] = np.where(
+        december_figures["GBIS remaining"] < 0, 0, december_figures["GBIS remaining"]
+    )
+
     results = []
     for ha_name, input_data in loader.data.items():
-        # Original warmfront figures
+        # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
-        original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
-        original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
 
         original_warmfront_eco4_revenue = (
             original_warmfront_remaining_eco4 * eco4_rate +
@@ -2744,21 +2751,79 @@ def forecast_remaining_sales(loader):
         )
         original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
 
+        # Original warmfront figures - GBIS
+
+        original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
+        original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
+
         original_warmfront_gbis_revenue = (
             original_warmfront_remaining_gbis * gbis_rate +
             (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
         )
+        original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
+
+        # Asset list
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        asset_list_remaining = asset_list.merge(
+            survey_list[["asset_list_row_id", "installation_status"]],
+            how="left",
+            on="asset_list_row_id"
+        )
+        asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+
+        eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
+        eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
+
+        eco4_pre_ciga = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"].isin(
+                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+            )
+        ]["count"].sum()
+
+        eco4_pre_ciga_remaining = eligiblity_counts_remaining[
+            eligiblity_counts["ECO Eligibility"].isin(
+                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+            )
+        ]["count"].sum()
+
+        eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate
+        eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate
+
+        # We check if the property has done a CIGA check
+        has_ciga_check = not input_data["ciga_list"].empty
+
+        if has_ciga_check:
+            eco4_post_ciga = eligiblity_counts[
+                eligiblity_counts["ECO Eligibility"].isin(
+                    ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                )
+            ]["count"].sum()
 
         results.append(
             {
-                ("", "", "HA Name"): ha_name,
-                ("Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
-                ("", "Remaining - #", ""): original_warmfront_remaining_eco4,
-                ("", "Total - £", ""): original_warmfront_eco4_revenue,
-                ("", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+                ("", "", "", "HA Name"): ha_name,
+                # ECO4 - original warmfront figures
+                ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+                ("ECO4", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
+                ("ECO4", "", "Total - £", ""): original_warmfront_eco4_revenue,
+                ("ECO4", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+                # GBIS - original warmfront figures
+                ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
+                ("GBIS", "", "Remaining - #", ""): original_warmfront_gbis,
+                ("GBIS", "", "Total - £", ""): original_warmfront_gbis_revenue,
+                ("GBIS", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
+                # ECO4 - asset list
+                ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
+                ("ECO4", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
+                ("ECO4", "", "Total - £", ""): eco4_pre_ciga_revenue,
+                ("ECO4", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             }
         )
 
+    results = pd.DataFrame(results)
+
 
 def app():
     """

From a81f1f2520479e706479bada1761aaa92bb01a44 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 17:37:57 +0000
Subject: [PATCH 040/248] Adding in eligible properties left estimation

---
 .../ha_15_32/ha_analysis_batch_3.py           | 101 ++++++++++++------
 1 file changed, 69 insertions(+), 32 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 21af73ff..cf9dfa53 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2613,7 +2613,7 @@ def forecast_remaining_sales(loader):
         converted_ciga_jobs["# Ciga dependent at installation"].sum()
     )
 
-    # 2) Calculate the conversion rate from CIGA dependent ciga passed
+    # 2) Calculate the conversion rate from CIGA dependent to ciga passed
     ciga_passrates = []
     for ha_name, input_data in loader.data.items():
 
@@ -2651,7 +2651,7 @@ def forecast_remaining_sales(loader):
 
     ciga_passrates = pd.DataFrame(ciga_passrates)
 
-    median_ciga_pass_to_install = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
+    median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
 
     # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
     eco4_ciga_independent_passrates = []
@@ -2762,16 +2762,20 @@ def forecast_remaining_sales(loader):
         )
         original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
 
-        # Asset list
+        # Asset list - ECO4
         asset_list = input_data["asset_list"].copy()
         survey_list = input_data["survey_list"].copy()
 
-        asset_list_remaining = asset_list.merge(
-            survey_list[["asset_list_row_id", "installation_status"]],
-            how="left",
-            on="asset_list_row_id"
-        )
-        asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+        if survey_list.empty:
+            asset_list_remaining = asset_list.copy()
+        else:
+            asset_list_remaining = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+            asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
         eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
@@ -2791,36 +2795,69 @@ def forecast_remaining_sales(loader):
         eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate
         eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate
 
-        # We check if the property has done a CIGA check
-        has_ciga_check = not input_data["ciga_list"].empty
+        # Total Eligible - this is what passed ciga checks + strict. If we don't have what passed CIGA, we estimate
+        # We check if the HA has done a CIGA check. Also, if we have assets dormant at CIGA, we estimate what will
+        # convert
+        # We estimate a conversion for anything left post CIGA
+        ha_ciga_conversion = ciga_passrates[ciga_passrates["Ha Name"] == ha_name]
+        if not ha_ciga_conversion.empty:
+            ha_ciga_conversion_rate = (
+                ha_ciga_conversion["# CIGA passed"].values[0] / ha_ciga_conversion["# CIGA dependent"].values[0]
+            )
+        else:
+            ha_ciga_conversion_rate = (
+                median_ciga_success_rate if median_ciga_success_rate <= median_ciga_success_rate else
+                median_ciga_success_rate
+            )
 
+        remaining_needing_ciga_check = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
+            ]["count"].sum()
+
+        has_ciga_check = not input_data["ciga_list"].empty
         if has_ciga_check:
             eco4_post_ciga = eligiblity_counts[
                 eligiblity_counts["ECO Eligibility"].isin(
-                    ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                    ["eco4", "eco4 - passed ciga", "failed ciga"]
                 )
             ]["count"].sum()
 
-        results.append(
-            {
-                ("", "", "", "HA Name"): ha_name,
-                # ECO4 - original warmfront figures
-                ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
-                ("ECO4", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
-                ("ECO4", "", "Total - £", ""): original_warmfront_eco4_revenue,
-                ("ECO4", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
-                # GBIS - original warmfront figures
-                ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
-                ("GBIS", "", "Remaining - #", ""): original_warmfront_gbis,
-                ("GBIS", "", "Total - £", ""): original_warmfront_gbis_revenue,
-                ("GBIS", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
-                # ECO4 - asset list
-                ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
-                ("ECO4", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
-                ("ECO4", "", "Total - £", ""): eco4_pre_ciga_revenue,
-                ("ECO4", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
-            }
-        )
+            if remaining_needing_ciga_check > 0:
+                # We update the eco4 post ciga with the converted remaining
+                eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        else:
+            eco4_post_ciga = eligiblity_counts[
+                                 eligiblity_counts["ECO Eligibility"] == "eco4"
+                                 ]["count"].sum() + np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+
+        eco4_post_ciga = int(eco4_post_ciga)
+
+        to_append = {
+            ("", "", "", "HA Name"): ha_name,
+            # ECO4 - original warmfront figures
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+            ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
+            ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
+            ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+            # GBIS - original warmfront figures
+            ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
+            ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
+            ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
+            ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
+            # ECO4 - asset list, pre-ciga
+            ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
+            ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
+            ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
+            # ECO4 - asset list, post ciga
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
+        }
+
+        # Make sure nothing is forgotten due to duplicate multi-index keys
+        if len(to_append) != 14:
+            raise ValueError("Something went wrong")
+
+        results.append(to_append)
 
     results = pd.DataFrame(results)
 

From 6544adc6c3c9d811f789a0372a33a19bd32beb78 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 17:47:51 +0000
Subject: [PATCH 041/248] Added eligibility calculations

---
 .../ha_15_32/ha_analysis_batch_3.py           | 55 ++++++++++++-------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index cf9dfa53..8a46703e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2548,6 +2548,33 @@ def patch_cleaned(cleaned):
     return cleaned
 
 
+def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate):
+    remaining_needing_ciga_check = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
+        ]["count"].sum()
+
+    has_ciga_check = not input_data["ciga_list"].empty
+    if has_ciga_check:
+        eco4_post_ciga = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"].isin(
+                ["eco4", "eco4 - passed ciga", "failed ciga"]
+            )
+        ]["count"].sum()
+
+        if remaining_needing_ciga_check > 0:
+            # We update the eco4 post ciga with the converted remaining
+            eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+    else:
+        eco4_post_ciga = (
+            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() +
+            np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        )
+    eco4_post_ciga = int(eco4_post_ciga)
+    eco4_post_ciga_revenue = eco4_post_ciga * eco4_rate
+
+    return eco4_post_ciga, eco4_post_ciga_revenue
+
+
 def forecast_remaining_sales(loader):
     # Assumptions:
     # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
@@ -2810,27 +2837,13 @@ def forecast_remaining_sales(loader):
                 median_ciga_success_rate
             )
 
-        remaining_needing_ciga_check = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
-            ]["count"].sum()
+        eco4_post_ciga, eco4_post_ciga_revenue = calculate_eco4_post_ciga(
+            eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
+        )
 
-        has_ciga_check = not input_data["ciga_list"].empty
-        if has_ciga_check:
-            eco4_post_ciga = eligiblity_counts[
-                eligiblity_counts["ECO Eligibility"].isin(
-                    ["eco4", "eco4 - passed ciga", "failed ciga"]
-                )
-            ]["count"].sum()
-
-            if remaining_needing_ciga_check > 0:
-                # We update the eco4 post ciga with the converted remaining
-                eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
-        else:
-            eco4_post_ciga = eligiblity_counts[
-                                 eligiblity_counts["ECO Eligibility"] == "eco4"
-                                 ]["count"].sum() + np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
-
-        eco4_post_ciga = int(eco4_post_ciga)
+        eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga(
+            eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
+        )
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -2851,6 +2864,8 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga
             ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
+            ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From 5c686f5ec471b3c5c84b307e0851e2a0462934c0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 17:56:45 +0000
Subject: [PATCH 042/248] working on forecast

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8a46703e..0bf34e70 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2841,6 +2841,9 @@ def forecast_remaining_sales(loader):
             eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
         )
 
+        # Calculate the delta compared to Warmfront's original estimate
+        eco4_delta_vs_original_estimate = 200 * (eco4_post_ciga - original_warmfront_eco4) / original_warmfront_eco4
+
         eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga(
             eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
         )
@@ -2862,14 +2865,17 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
-            # ECO4 - asset list, post ciga
+            # ECO4 - asset list, post ciga, total
             ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
-            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue,
+            ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate,
+            # ECO4 - asset list, post ciga, remaining
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
+            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 14:
+        if len(to_append) != 18:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From c47af474b92282a1159c2866e8810e8e883db7bd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 18:13:11 +0000
Subject: [PATCH 043/248] Added in remaining breakdowns into forecast and
 confirmed

---
 .../ha_15_32/ha_analysis_batch_3.py           | 59 ++++++++++++++-----
 1 file changed, 45 insertions(+), 14 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0bf34e70..77c18e80 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2555,24 +2555,40 @@ def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_r
 
     has_ciga_check = not input_data["ciga_list"].empty
     if has_ciga_check:
-        eco4_post_ciga = eligiblity_counts[
+        eco4_confirmed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"].isin(
-                ["eco4", "eco4 - passed ciga", "failed ciga"]
+                ["eco4", "eco4 - passed ciga"]
             )
         ]["count"].sum()
 
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
-            eco4_post_ciga += np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
+        else:
+            eco4_remaining_forecast = 0
+            eco4_post_ciga = eco4_confirmed
     else:
+        eco4_confirmed = 0
+        eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
         eco4_post_ciga = (
-            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() +
-            np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast
         )
     eco4_post_ciga = int(eco4_post_ciga)
-    eco4_post_ciga_revenue = eco4_post_ciga * eco4_rate
+    eco4_remaining_forecast = int(eco4_remaining_forecast)
 
-    return eco4_post_ciga, eco4_post_ciga_revenue
+    results = {
+        # Counts
+        "ECO4 - post CIGA - #": eco4_post_ciga,
+        "Of which confirmed - #": eco4_confirmed,
+        "Of which forecast - #": eco4_remaining_forecast,
+        # Revenue
+        "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
+        "Of which confirmed - £": eco4_confirmed * eco4_rate,
+        "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+    }
+
+    return results
 
 
 def forecast_remaining_sales(loader):
@@ -2837,14 +2853,16 @@ def forecast_remaining_sales(loader):
                 median_ciga_success_rate
             )
 
-        eco4_post_ciga, eco4_post_ciga_revenue = calculate_eco4_post_ciga(
+        eco4_post_ciga_total_results = calculate_eco4_post_ciga(
             eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
         )
 
         # Calculate the delta compared to Warmfront's original estimate
-        eco4_delta_vs_original_estimate = 200 * (eco4_post_ciga - original_warmfront_eco4) / original_warmfront_eco4
+        eco4_delta_vs_original_estimate = 100 * (
+            eco4_post_ciga_total_results["ECO4 - post CIGA - #"] - original_warmfront_eco4
+        ) / original_warmfront_eco4
 
-        eco4_post_ciga_remaining, eco4_post_ciga_remaining_revenue = calculate_eco4_post_ciga(
+        eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
             eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
         )
 
@@ -2866,12 +2884,25 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
-            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga,
-            ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_revenue,
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga_total_results[
+                "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
+                "ECO4 - post CIGA - £"],
             ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate,
             # ECO4 - asset list, post ciga, remaining
-            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining,
-            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_revenue,
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
+                "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_results[
+                "ECO4 - post CIGA - £"],
+            ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
+                eco4_post_ciga_remaining_results["Of which confirmed - #"],
+            ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""):
+                eco4_post_ciga_remaining_results["Of which confirmed - £"],
+            ("ECO4 post-ciga", "", "Of which forecast - #", ""):
+                eco4_post_ciga_remaining_results["Of which forecast - #"],
+            ("ECO4 post-ciga", "", "Of which forecast - £", ""):
+                eco4_post_ciga_remaining_results["Of which forecast - £"],
+            # CIGA failures
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From 752f0b0f8384a1082161abf31c18638864c45f1e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 18:37:47 +0000
Subject: [PATCH 044/248] splitting out post ciga figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 71 +++++++++++++++----
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 77c18e80..4f33bf34 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2548,34 +2548,52 @@ def patch_cleaned(cleaned):
     return cleaned
 
 
-def calculate_eco4_post_ciga(eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate):
+def calculate_eco4_post_ciga(
+    eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate,
+    eco4_rate
+):
     remaining_needing_ciga_check = eligiblity_counts[
         eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
         ]["count"].sum()
 
     has_ciga_check = not input_data["ciga_list"].empty
     if has_ciga_check:
-        eco4_confirmed = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"].isin(
-                ["eco4", "eco4 - passed ciga"]
-            )
-        ]["count"].sum()
+
+        eco4_no_ciga_needed = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4"
+            ]["count"].sum()
+
+        eco4_ciga_passed = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
+            ]["count"].sum()
+
+        eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
+        eco4_confirmed = np.round(eco4_confirmed)
 
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
-            eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+            eco4_remaining_forecast = np.round(
+                remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
+            )
             eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
         else:
             eco4_remaining_forecast = 0
             eco4_post_ciga = eco4_confirmed
     else:
-        eco4_confirmed = 0
-        eco4_remaining_forecast = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        eco4_no_ciga_needed = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4"
+            ]["count"].sum()
+        eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+        eco4_remaining_forecast = np.round(
+            remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
+        )
         eco4_post_ciga = (
             eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast
         )
+
     eco4_post_ciga = int(eco4_post_ciga)
     eco4_remaining_forecast = int(eco4_remaining_forecast)
+    eco4_confirmed = int(eco4_confirmed)
 
     results = {
         # Counts
@@ -2853,8 +2871,32 @@ def forecast_remaining_sales(loader):
                 median_ciga_success_rate
             )
 
+        # We also need the ha ciga passed to install success rate
+        ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name]
+        if not ha_ciga_pass_to_sale.empty:
+            ha_ciga_pass_to_sale_rate = (
+                ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] /
+                ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0]
+            )
+        else:
+            ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install
+
+        ha_eco4_to_sale = eco4_ciga_independent_passrates[eco4_ciga_independent_passrates["Ha Name"] == ha_name]
+        if not ha_eco4_to_sale.empty:
+            ha_eco4_to_sale_rate = (
+                ha_eco4_to_sale['# ECO4 successfully installed'].values[0] /
+                ha_eco4_to_sale['# ECO4 at install stage'].values[0]
+            )
+        else:
+            ha_eco4_to_sale_rate = median_eco4_to_install
+
         eco4_post_ciga_total_results = calculate_eco4_post_ciga(
-            eligiblity_counts, input_data, ha_ciga_conversion_rate, eco4_rate
+            eligiblity_counts=eligiblity_counts,
+            input_data=input_data,
+            ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+            ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+            ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+            eco4_rate=eco4_rate
         )
 
         # Calculate the delta compared to Warmfront's original estimate
@@ -2863,7 +2905,12 @@ def forecast_remaining_sales(loader):
         ) / original_warmfront_eco4
 
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
-            eligiblity_counts_remaining, input_data, ha_ciga_conversion_rate, eco4_rate
+            eligiblity_counts=eligiblity_counts_remaining,
+            input_data=input_data,
+            ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+            ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+            ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+            eco4_rate=eco4_rate
         )
 
         to_append = {
@@ -2906,7 +2953,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 18:
+        if len(to_append) != 22:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From 56ee7224f58e7363a1732ed46aaebd29a71f7acd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 19:53:28 +0000
Subject: [PATCH 045/248] Added gbis remaining columns

---
 .../ha_15_32/ha_analysis_batch_3.py           | 1100 +++++++++--------
 1 file changed, 592 insertions(+), 508 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4f33bf34..191ca74c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1692,500 +1692,500 @@ def get_col_widths(dataframe):
     return widths
 
 
-def analyse_ha_data(outputs, loader):
-    """
-    The approach we take within this function is the following:
-    For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
-    characterisation can be broken down as the following:
-    1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
-    2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
-    a CIGA check
-    3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
-    insulation
-    4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
-    any cirsumstances, given the available data
-
-    Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
-    qualify under the strictest criteria, and mark these as potential additional opportunities.
-
-    :return:
-    """
-
-    eco4_rate = 1710
-    gbis_rate = 600
-    old_eco4_rate = 1456
-    old_gbis_rate = 432
-
-    epc_c_threshold = 80
-    scheme_map = {
-        "ECO4": "ECO4",
-        "AFFORDABLE WARMTH": "ECO4",
-        "ECO4 A/W": "ECO4",
-        "ECO4 GBIS (ECO+)": "GBIS"
-    }
-
-    ha_analysis_results = []
-    total_revenue_results = []
-    for ha_name, datasets in outputs.items():
-        inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-
-        results_df = datasets["results_df"].copy()
-
-        analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
-            columns={"row_meaning": "asset_identification_status"}
-        ).merge(
-            results_df,
-            how="left",
-            right_on="row_id",
-            left_on="asset_list_row_id"
-        )
-
-        analysis_data["is_remaining"] = True
-
-        n_sold_eco4 = 0
-        n_sold_gbis = 0
-        if not inputs["survey_list"].empty:
-            # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
-            # a survey)
-            survey_list = inputs["survey_list"].copy()
-
-            # TODO: TEMP
-            scheme_column = survey_list.columns[0]
-            # We clean up the survey list installation or cancelled
-            survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
-            # Remove all punctuation
-            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                r'[^\w\s]', '', regex=True
-            )
-            # Remove double spaces
-            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                r'\s+', ' ', regex=True
-            )
-            # Remove trailing spaces
-            survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
-
-            # Remap the values in the scheme column
-            survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
-
-            survey_list["installation_status"] = None
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
-                "installed",
-                survey_list["installation_status"]
-            )
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
-                "cancelled",
-                survey_list["installation_status"]
-            )
-            # Find partial installations
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
-                "partially installed",
-                survey_list["installation_status"]
-            )
-            # Find partial cancellations
-            # TODO: We might have more indications of partial cancellations
-            survey_list["installation_status"] = np.where(
-                survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
-                "partially cancelled",
-                survey_list["installation_status"]
-            )
-
-            # Finally, for other cases, we set the status to "in progress"
-            survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
-
-            # We concatenate the scheme name with the installation status
-            survey_list["installation_status"] = (
-                survey_list[scheme_column] + " - " + survey_list["installation_status"]
-            )
-
-            # TODO: END TEMP
-
-            survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
-            survey_list_to_merge["is_remaining"] = False
-            analysis_data = analysis_data.drop(columns="is_remaining").merge(
-                survey_list_to_merge,
-                how="left", on="asset_list_row_id"
-            )
-            analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
-
-            n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
-            n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
-
-        # Take just remaining
-        analysis_data = analysis_data[analysis_data["is_remaining"]]
-
-        # Also, if the HA has started selling, we remove any that are still subject to ciga
-        n_eco4_missed_subject_to_ciga = 0
-        if not inputs["survey_list"].empty:
-            n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
-            analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
-
-        ################################################################################################
-        # We take the properties that strictly qualified under eco
-        ################################################################################################
-
-        eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
-        eco4_identified["identification_type"] = None
-        eco4_identified["identification_type"] = np.where(
-            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
-            "strict",
-            eco4_identified["identification_type"]
-        )
-
-        # For expansive, the property can be no higher than an EPC C
-        eco4_identified["identification_type"] = np.where(
-            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
-                eco4_identified["sap"] <= epc_c_threshold
-            ),
-            "expansive",
-            eco4_identified["identification_type"]
-        )
-        ################################################################################################
-        # We take the properties dependent on CIGA
-        ################################################################################################
-
-        ciga_dependent_identified = analysis_data[
-            analysis_data["ECO Eligibility"].isin(
-                [
-                    "eco4 (subject to ciga)",
-                    "eco4 - passed ciga"
-                ]
-            )
-        ].copy()
-
-        # These are properties that show filled cavity
-        ciga_dependent_identified["identification_type"] = None
-        ciga_dependent_identified["identification_type"] = np.where(
-            ciga_dependent_identified["eco4_message"].isin(
-                [
-                    "Perfect suitability",
-                    "Meets cavity and sap",
-                    "Fails cavity, meets loft, fails SAP",
-                    "Meets fabric, fails SAP check",
-                    "Meets cavity, loft borderline, meets sap",
-                ]
-            ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
-            "strict",
-            ciga_dependent_identified["identification_type"]
-        )
-
-        ciga_dependent_identified["identification_type"] = np.where(
-            ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
-                ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
-            )) & (
-                (ciga_dependent_identified["sap"] <= epc_c_threshold) &
-                pd.isnull(ciga_dependent_identified["identification_type"])
-            ),
-            "expansive",
-            ciga_dependent_identified["identification_type"]
-        )
-
-        ################################################################################################
-        # We properties that qualified for gbis
-        ################################################################################################
-        gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
-        gbis_identified["identification_type"] = None
-        gbis_identified["identification_type"] = np.where(
-            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
-            "strict",
-            gbis_identified["identification_type"]
-        )
-
-        gbis_identified["identification_type"] = np.where(
-            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
-                pd.isnull(gbis_identified["identification_type"])
-            ),
-            "expansive",
-            gbis_identified["identification_type"]
-        )
-
-        # Finally, we look at the properties that have not been identified by Warmfront
-        not_identified = analysis_data[
-            analysis_data["ECO Eligibility"].isin(
-                [
-                    "not eligible"
-                ]
-            )
-        ].copy()
-
-        surplus_eco4 = not_identified[
-            (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
-                ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
-            ))
-            ]
-
-        surplus_gbis = not_identified[
-            (not_identified["gbis_eligible"] == True) & (
-                ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
-            ) & (not_identified["sap"] < 69) & (
-                (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
-                not_identified["walls"].str.contains("partial", case=False, na=False)
-            )
-            )
-            ]
-        surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
-
-        # Output variables - the data was sent to us in December, but the remaining figures are
-        # what was in November
-        november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
-
-        # ECO4
-        n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
-        november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
-        november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
-        eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
-
-        n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
-        eco4_of_which_identified_strict = (
-            eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
-            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
-        )
-        eco4_of_which_identified_expansive = (
-            eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
-            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
-        )
-        # GBIS
-        n_warmfront_identified_gbis = gbis_identified.shape[0]
-        november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
-        november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
-        gbis_sales_since_november = n_sold_gbis - november_gbis_sold
-        gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
-        gbis_of_which_identified_expansive = \
-            gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
-
-        to_append = {
-            ("", "HA Name"): ha_name,
-            ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
-            ############
-            # ECO4
-            ############
-            ("ECO4", "# remaining November file"): november_eco4_remaining,
-            ("ECO4", "# sold in November file"): november_eco4_sold,
-            ("ECO4", "# sold (survey list)"): n_sold_eco4,
-            ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
-            ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
-            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
-            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
-            ("ECO4", "Of which identified by model - total"): (
-                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
-            ),
-            ("ECO4", "Additional properties"): surplus_eco4.shape[0],
-            ############
-            # GBIS
-            ############
-            ("GBIS", "# remaining November file"): november_gbis_remaining,
-            ("GBIS", "# sold in November file"): november_gbis_sold,
-            ("GBIS", "# sold (survey list)"): n_sold_gbis,
-            ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
-            ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
-            ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
-            ("GBIS", "Of which identified by model - total"): (
-                gbis_of_which_identified_strict + gbis_of_which_identified_expansive
-            ),
-            ("GBIS", "Additional properties"): surplus_gbis.shape[0]
-        }
-
-        ha_analysis_results.append(to_append)
-
-        # Calculate the revenue results
-        to_append_revenue = {
-            ("", "HA Name"): ha_name,
-            # Eco4 revenue
-            ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
-            ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
-            ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
-            ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
-            ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
-            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
-            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
-            ("ECO4", "Of which identified by model - total"): eco4_rate * (
-                eco4_of_which_identified_strict + eco4_of_which_identified_expansive
-            ),
-            ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
-        }
-        total_revenue_results.append(to_append_revenue)
-
-    ha_analysis_results = pd.DataFrame(ha_analysis_results)
-    ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
-
-    facts_and_figures = loader.facts_and_figures.copy()
-    facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
-    facts_and_figures = facts_and_figures.sort_values("ha_number")
-    facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
-
-    # Rename some of the cols
-    facts_and_figures = facts_and_figures.rename(
-        columns={
-            # ECO4 cols
-            "ECO4": "ECO4 - November",
-            "GBIS": "GBIS - November",
-            "eco4 (subject to ciga)": "ECO4 - subject to ciga",
-            "eco4": "ECO4 - doesn't need CIGA",
-            "eco4 - passed ciga": "ECO4 - passed CIGA",
-            "failed ciga": "ECO4 - failed CIGA",
-            "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
-            "ECO4 - in progress": "ECO4 - Install in progress",
-            "ECO4 - cancelled": "ECO4 - Install cancelled",
-            # GBIS cols
-            "gbis": "GBIS total (asset list)"
-        }
-    )
-    # We calculate the eco4 total from the asset list
-    # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
-    # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
-    # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
-    # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
-    facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
-        facts_and_figures["ECO4 - doesn't need CIGA"] +
-        facts_and_figures["ECO4 - subject to ciga"] +
-        facts_and_figures["ECO4 - passed CIGA"]
-    )
-
-    facts_and_figures["ECO4 total (asset list - post ciga)"] = None
-    facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
-        facts_and_figures["ECO4 - passed CIGA"] > 0,
-        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
-        facts_and_figures["ECO4 total (asset list - post ciga)"]
-    )
-
-    # Re-arrange the columns
-    facts_and_figures = facts_and_figures[
-        [
-            'HA Name',
-            'ECO4 - November',
-            'GBIS - November',
-            'ECO4 total (asset list - pre ciga)',
-            'ECO4 total (asset list - post ciga)',
-            'GBIS total (asset list)',
-            'ECO4 - subject to ciga',
-            "ECO4 - doesn't need CIGA",
-            'ECO4 - passed CIGA',
-            'ECO4 - failed CIGA',
-            'ECO4 - installed',
-            'ECO4 - Install in progress',
-            'ECO4 - Install cancelled',
-            'ECO4 - partially installed',
-            'ECO4 - Install downgrade to GBIS',
-        ]
-    ]
-    # Addd a note to flag any rows where ECO4 (
-    # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
-    # )
-    facts_and_figures["Missed CIGA checks opportunity"] = None
-    facts_and_figures["Missed CIGA checks opportunity"] = np.where(
-        (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
-        "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
-            str) + " ECO4 properties needing a CIGA check",
-        facts_and_figures["Missed CIGA checks opportunity"]
-    )
-
-    facts_and_figures.to_csv("Facts and figures sample.csv")
-
-    # Re arrage the columns
-
-    # Also sort ha_analysis_results by ha number
-    ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
-    ha_analysis_results = ha_analysis_results.sort_values("ha_number")
-    ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
-
-    # We save 2 sheets
-    # Automate creation of the excel
-    # Create a Pandas Excel writer using XlsxWriter as the engine
-    with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
-        # Write each dataframe to a different worksheet without the index
-        for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
-                          (ha_analysis_results, 'Asset Identification')]:
-
-            df.to_excel(writer, sheet_name=sheet)
-
-            # Auto-adjust columns' width
-            for i, width in enumerate(get_col_widths(df)):
-                writer.sheets[sheet].set_column(i, i, width)
-
-    # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
-    #               description, and what proportion of time they get identified via non-invasive surveys
-
-    # true_eco4_assets = []
-    # ciga_dependent_assets = []
-    # not_eligible = []
-    # as_built_insulated = []
-    # date_cols = {
-    #     "HA39": "date_built",
-    #     "HA14": "Built In Year",
-    #     "HA6": "Construction Year",
-    #     "HA1": "Build Date",
-    #     "HA107": "YEAR BUILT"
-    # }
-    # for ha_name, data_objects in outputs.items():
-    #     inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-    #
-    #     date_col = date_cols[ha_name]
-    #     results_df = data_objects["results_df"].copy()
-    #     df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
-    #         columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
-    #     ).merge(
-    #         results_df,
-    #         how="left",
-    #         right_on="row_id",
-    #         left_on="asset_list_row_id"
-    #     )
-    #
-    #     # take the true ECO4
-    #     true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
-    #     ciga_dependent = df[
-    #         df["ECO Eligibility"].isin(
-    #             [
-    #                 "eco4 (subject to ciga)",
-    #                 "failed ciga",
-    #                 "eco4 - passed ciga"
-    #             ]
-    #         )
-    #     ]
-    #     insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
-    #     # We convert date built to datetime
-    #     try:
-    #         insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
-    #         insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
-    #         as_built_insulated.append(insulated_assumed)
-    #     except Exception as e:
-    #         print("oh well")
-    #
-    #     true_eco4_assets.append(true_eco4)
-    #     ciga_dependent_assets.append(ciga_dependent)
-    #
-    # true_eco4_assets = pd.concat(true_eco4_assets)
-    # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
-    # as_built_insulated = pd.concat(as_built_insulated)
-    #
-    # true_eco4_assets["walls"].value_counts(normalize=True)
-    # ciga_dependent_assets["walls"].value_counts(normalize=True)
-    #
-    # from recommendations.recommendation_utils import extract_insulation_thickness
-    #
-    # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
-    #     lambda x: extract_insulation_thickness(x)
-    # )
-    #
-    # true_eco4_assets["e"] = true_eco4_assets.merge(
-    #     pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
-    #     how="left",
-    #     left_on="roof",
-    #     right_on="original_description"
-    # )
-    #
-    # true_eco4_assets["sap"].mean()
-    #
-    # true_eco4_assets["insulation_thickness"].isin(
-    #     ["250", "150", "200", "100", "75", "50"]
-    # ).sum() / true_eco4_assets.shape[0]
-    #
-    # true_eco4_assets["insulation_thickness"].isin(
-    #     ["100"]
-    # ).sum() / true_eco4_assets.shape[0]
-    #
-    # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
+# def analyse_ha_data(outputs, loader):
+#     """
+#     The approach we take within this function is the following:
+#     For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
+#     characterisation can be broken down as the following:
+#     1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
+#     2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
+#     a CIGA check
+#     3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
+#     insulation
+#     4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
+#     any cirsumstances, given the available data
+#
+#     Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
+#     qualify under the strictest criteria, and mark these as potential additional opportunities.
+#
+#     :return:
+#     """
+#
+#     eco4_rate = 1710
+#     gbis_rate = 600
+#     # old_eco4_rate = 1456
+#     old_gbis_rate = 432
+#
+#     epc_c_threshold = 80
+#     scheme_map = {
+#         "ECO4": "ECO4",
+#         "AFFORDABLE WARMTH": "ECO4",
+#         "ECO4 A/W": "ECO4",
+#         "ECO4 GBIS (ECO+)": "GBIS"
+#     }
+#
+#     ha_analysis_results = []
+#     total_revenue_results = []
+#     for ha_name, datasets in outputs.items():
+#         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+#
+#         results_df = datasets["results_df"].copy()
+#
+#         analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
+#             columns={"row_meaning": "asset_identification_status"}
+#         ).merge(
+#             results_df,
+#             how="left",
+#             right_on="row_id",
+#             left_on="asset_list_row_id"
+#         )
+#
+#         analysis_data["is_remaining"] = True
+#
+#         n_sold_eco4 = 0
+#         n_sold_gbis = 0
+#         if not inputs["survey_list"].empty:
+#             # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
+#             # a survey)
+#             survey_list = inputs["survey_list"].copy()
+#
+#             # TODO: TEMP
+#             scheme_column = survey_list.columns[0]
+#             # We clean up the survey list installation or cancelled
+#             survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+#             # Remove all punctuation
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+#                 r'[^\w\s]', '', regex=True
+#             )
+#             # Remove double spaces
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+#                 r'\s+', ' ', regex=True
+#             )
+#             # Remove trailing spaces
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+#
+#             # Remap the values in the scheme column
+#             survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+#
+#             survey_list["installation_status"] = None
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+#                 "installed",
+#                 survey_list["installation_status"]
+#             )
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+#                 "cancelled",
+#                 survey_list["installation_status"]
+#             )
+#             # Find partial installations
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+#                 "partially installed",
+#                 survey_list["installation_status"]
+#             )
+#             # Find partial cancellations
+#             # TODO: We might have more indications of partial cancellations
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+#                 "partially cancelled",
+#                 survey_list["installation_status"]
+#             )
+#
+#             # Finally, for other cases, we set the status to "in progress"
+#             survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+#
+#             # We concatenate the scheme name with the installation status
+#             survey_list["installation_status"] = (
+#                 survey_list[scheme_column] + " - " + survey_list["installation_status"]
+#             )
+#
+#             # TODO: END TEMP
+#
+#             survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
+#             survey_list_to_merge["is_remaining"] = False
+#             analysis_data = analysis_data.drop(columns="is_remaining").merge(
+#                 survey_list_to_merge,
+#                 how="left", on="asset_list_row_id"
+#             )
+#             analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
+#
+#             n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
+#             n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
+#
+#         # Take just remaining
+#         analysis_data = analysis_data[analysis_data["is_remaining"]]
+#
+#         # Also, if the HA has started selling, we remove any that are still subject to ciga
+#         n_eco4_missed_subject_to_ciga = 0
+#         if not inputs["survey_list"].empty:
+#             n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
+#             analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
+#
+#         ################################################################################################
+#         # We take the properties that strictly qualified under eco
+#         ################################################################################################
+#
+#         eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
+#         eco4_identified["identification_type"] = None
+#         eco4_identified["identification_type"] = np.where(
+#             (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
+#             "strict",
+#             eco4_identified["identification_type"]
+#         )
+#
+#         # For expansive, the property can be no higher than an EPC C
+#         eco4_identified["identification_type"] = np.where(
+#             (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
+#                 eco4_identified["sap"] <= epc_c_threshold
+#             ),
+#             "expansive",
+#             eco4_identified["identification_type"]
+#         )
+#         ################################################################################################
+#         # We take the properties dependent on CIGA
+#         ################################################################################################
+#
+#         ciga_dependent_identified = analysis_data[
+#             analysis_data["ECO Eligibility"].isin(
+#                 [
+#                     "eco4 (subject to ciga)",
+#                     "eco4 - passed ciga"
+#                 ]
+#             )
+#         ].copy()
+#
+#         # These are properties that show filled cavity
+#         ciga_dependent_identified["identification_type"] = None
+#         ciga_dependent_identified["identification_type"] = np.where(
+#             ciga_dependent_identified["eco4_message"].isin(
+#                 [
+#                     "Perfect suitability",
+#                     "Meets cavity and sap",
+#                     "Fails cavity, meets loft, fails SAP",
+#                     "Meets fabric, fails SAP check",
+#                     "Meets cavity, loft borderline, meets sap",
+#                 ]
+#             ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
+#             "strict",
+#             ciga_dependent_identified["identification_type"]
+#         )
+#
+#         ciga_dependent_identified["identification_type"] = np.where(
+#             ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+#                 ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+#             )) & (
+#                 (ciga_dependent_identified["sap"] <= epc_c_threshold) &
+#                 pd.isnull(ciga_dependent_identified["identification_type"])
+#             ),
+#             "expansive",
+#             ciga_dependent_identified["identification_type"]
+#         )
+#
+#         ################################################################################################
+#         # We properties that qualified for gbis
+#         ################################################################################################
+#         gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
+#         gbis_identified["identification_type"] = None
+#         gbis_identified["identification_type"] = np.where(
+#             (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
+#             "strict",
+#             gbis_identified["identification_type"]
+#         )
+#
+#         gbis_identified["identification_type"] = np.where(
+#             (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
+#                 pd.isnull(gbis_identified["identification_type"])
+#             ),
+#             "expansive",
+#             gbis_identified["identification_type"]
+#         )
+#
+#         # Finally, we look at the properties that have not been identified by Warmfront
+#         not_identified = analysis_data[
+#             analysis_data["ECO Eligibility"].isin(
+#                 [
+#                     "not eligible"
+#                 ]
+#             )
+#         ].copy()
+#
+#         surplus_eco4 = not_identified[
+#             (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
+#                 ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
+#             ))
+#             ]
+#
+#         surplus_gbis = not_identified[
+#             (not_identified["gbis_eligible"] == True) & (
+#                 ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
+#             ) & (not_identified["sap"] < 69) & (
+#                 (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
+#                 not_identified["walls"].str.contains("partial", case=False, na=False)
+#             )
+#             )
+#             ]
+#         surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
+#
+#         # Output variables - the data was sent to us in December, but the remaining figures are
+#         # what was in November
+#         november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
+#
+#         # ECO4
+#         n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
+#         november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
+#         november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
+#         eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
+#
+#         n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
+#         eco4_of_which_identified_strict = (
+#             eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
+#             ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
+#         )
+#         eco4_of_which_identified_expansive = (
+#             eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
+#             ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
+#         )
+#         # GBIS
+#         n_warmfront_identified_gbis = gbis_identified.shape[0]
+#         november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
+#         november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
+#         gbis_sales_since_november = n_sold_gbis - november_gbis_sold
+#         gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
+#         gbis_of_which_identified_expansive = \
+#             gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
+#
+#         to_append = {
+#             ("", "HA Name"): ha_name,
+#             ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
+#             ############
+#             # ECO4
+#             ############
+#             ("ECO4", "# remaining November file"): november_eco4_remaining,
+#             ("ECO4", "# sold in November file"): november_eco4_sold,
+#             ("ECO4", "# sold (survey list)"): n_sold_eco4,
+#             ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
+#             ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
+#             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
+#             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
+#             ("ECO4", "Of which identified by model - total"): (
+#                 eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+#             ),
+#             ("ECO4", "Additional properties"): surplus_eco4.shape[0],
+#             ############
+#             # GBIS
+#             ############
+#             ("GBIS", "# remaining November file"): november_gbis_remaining,
+#             ("GBIS", "# sold in November file"): november_gbis_sold,
+#             ("GBIS", "# sold (survey list)"): n_sold_gbis,
+#             ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
+#             ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
+#             ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
+#             ("GBIS", "Of which identified by model - total"): (
+#                 gbis_of_which_identified_strict + gbis_of_which_identified_expansive
+#             ),
+#             ("GBIS", "Additional properties"): surplus_gbis.shape[0]
+#         }
+#
+#         ha_analysis_results.append(to_append)
+#
+#         # Calculate the revenue results
+#         to_append_revenue = {
+#             ("", "HA Name"): ha_name,
+#             # Eco4 revenue
+#             ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
+#             ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
+#             ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
+#             ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
+#             ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
+#             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
+#             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
+#             ("ECO4", "Of which identified by model - total"): eco4_rate * (
+#                 eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+#             ),
+#             ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
+#         }
+#         total_revenue_results.append(to_append_revenue)
+#
+#     ha_analysis_results = pd.DataFrame(ha_analysis_results)
+#     ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
+#
+#     facts_and_figures = loader.facts_and_figures.copy()
+#     facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
+#     facts_and_figures = facts_and_figures.sort_values("ha_number")
+#     facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
+#
+#     # Rename some of the cols
+#     facts_and_figures = facts_and_figures.rename(
+#         columns={
+#             # ECO4 cols
+#             "ECO4": "ECO4 - November",
+#             "GBIS": "GBIS - November",
+#             "eco4 (subject to ciga)": "ECO4 - subject to ciga",
+#             "eco4": "ECO4 - doesn't need CIGA",
+#             "eco4 - passed ciga": "ECO4 - passed CIGA",
+#             "failed ciga": "ECO4 - failed CIGA",
+#             "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
+#             "ECO4 - in progress": "ECO4 - Install in progress",
+#             "ECO4 - cancelled": "ECO4 - Install cancelled",
+#             # GBIS cols
+#             "gbis": "GBIS total (asset list)"
+#         }
+#     )
+#     # We calculate the eco4 total from the asset list
+#     # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
+#     # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
+#     # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
+#     # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
+#     facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
+#         facts_and_figures["ECO4 - doesn't need CIGA"] +
+#         facts_and_figures["ECO4 - subject to ciga"] +
+#         facts_and_figures["ECO4 - passed CIGA"]
+#     )
+#
+#     facts_and_figures["ECO4 total (asset list - post ciga)"] = None
+#     facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
+#         facts_and_figures["ECO4 - passed CIGA"] > 0,
+#         facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
+#         facts_and_figures["ECO4 total (asset list - post ciga)"]
+#     )
+#
+#     # Re-arrange the columns
+#     facts_and_figures = facts_and_figures[
+#         [
+#             'HA Name',
+#             'ECO4 - November',
+#             'GBIS - November',
+#             'ECO4 total (asset list - pre ciga)',
+#             'ECO4 total (asset list - post ciga)',
+#             'GBIS total (asset list)',
+#             'ECO4 - subject to ciga',
+#             "ECO4 - doesn't need CIGA",
+#             'ECO4 - passed CIGA',
+#             'ECO4 - failed CIGA',
+#             'ECO4 - installed',
+#             'ECO4 - Install in progress',
+#             'ECO4 - Install cancelled',
+#             'ECO4 - partially installed',
+#             'ECO4 - Install downgrade to GBIS',
+#         ]
+#     ]
+#     # Addd a note to flag any rows where ECO4 (
+#     # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
+#     # )
+#     facts_and_figures["Missed CIGA checks opportunity"] = None
+#     facts_and_figures["Missed CIGA checks opportunity"] = np.where(
+#         (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
+#         "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
+#             str) + " ECO4 properties needing a CIGA check",
+#         facts_and_figures["Missed CIGA checks opportunity"]
+#     )
+#
+#     facts_and_figures.to_csv("Facts and figures sample.csv")
+#
+#     # Re arrage the columns
+#
+#     # Also sort ha_analysis_results by ha number
+#     ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
+#     ha_analysis_results = ha_analysis_results.sort_values("ha_number")
+#     ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
+#
+#     # We save 2 sheets
+#     # Automate creation of the excel
+#     # Create a Pandas Excel writer using XlsxWriter as the engine
+#     with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
+#         # Write each dataframe to a different worksheet without the index
+#         for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
+#                           (ha_analysis_results, 'Asset Identification')]:
+#
+#             df.to_excel(writer, sheet_name=sheet)
+#
+#             # Auto-adjust columns' width
+#             for i, width in enumerate(get_col_widths(df)):
+#                 writer.sheets[sheet].set_column(i, i, width)
+#
+#     # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
+#     #               description, and what proportion of time they get identified via non-invasive surveys
+#
+#     # true_eco4_assets = []
+#     # ciga_dependent_assets = []
+#     # not_eligible = []
+#     # as_built_insulated = []
+#     # date_cols = {
+#     #     "HA39": "date_built",
+#     #     "HA14": "Built In Year",
+#     #     "HA6": "Construction Year",
+#     #     "HA1": "Build Date",
+#     #     "HA107": "YEAR BUILT"
+#     # }
+#     # for ha_name, data_objects in outputs.items():
+#     #     inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+#     #
+#     #     date_col = date_cols[ha_name]
+#     #     results_df = data_objects["results_df"].copy()
+#     #     df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
+#     #         columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
+#     #     ).merge(
+#     #         results_df,
+#     #         how="left",
+#     #         right_on="row_id",
+#     #         left_on="asset_list_row_id"
+#     #     )
+#     #
+#     #     # take the true ECO4
+#     #     true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
+#     #     ciga_dependent = df[
+#     #         df["ECO Eligibility"].isin(
+#     #             [
+#     #                 "eco4 (subject to ciga)",
+#     #                 "failed ciga",
+#     #                 "eco4 - passed ciga"
+#     #             ]
+#     #         )
+#     #     ]
+#     #     insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
+#     #     # We convert date built to datetime
+#     #     try:
+#     #         insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
+#     #         insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
+#     #         as_built_insulated.append(insulated_assumed)
+#     #     except Exception as e:
+#     #         print("oh well")
+#     #
+#     #     true_eco4_assets.append(true_eco4)
+#     #     ciga_dependent_assets.append(ciga_dependent)
+#     #
+#     # true_eco4_assets = pd.concat(true_eco4_assets)
+#     # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
+#     # as_built_insulated = pd.concat(as_built_insulated)
+#     #
+#     # true_eco4_assets["walls"].value_counts(normalize=True)
+#     # ciga_dependent_assets["walls"].value_counts(normalize=True)
+#     #
+#     # from recommendations.recommendation_utils import extract_insulation_thickness
+#     #
+#     # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
+#     #     lambda x: extract_insulation_thickness(x)
+#     # )
+#     #
+#     # true_eco4_assets["e"] = true_eco4_assets.merge(
+#     #     pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
+#     #     how="left",
+#     #     left_on="roof",
+#     #     right_on="original_description"
+#     # )
+#     #
+#     # true_eco4_assets["sap"].mean()
+#     #
+#     # true_eco4_assets["insulation_thickness"].isin(
+#     #     ["250", "150", "200", "100", "75", "50"]
+#     # ).sum() / true_eco4_assets.shape[0]
+#     #
+#     # true_eco4_assets["insulation_thickness"].isin(
+#     #     ["100"]
+#     # ).sum() / true_eco4_assets.shape[0]
+#     #
+#     # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
 
 
 def get_propensity_model_data(
@@ -2567,29 +2567,39 @@ def calculate_eco4_post_ciga(
             eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
             ]["count"].sum()
 
+        eco4_confirmed_ciga_failures = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "failed ciga"
+            ]["count"].sum()
+
         eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
         eco4_confirmed = np.round(eco4_confirmed)
 
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
+            eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
             eco4_remaining_forecast = np.round(
-                remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
+                eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
             )
+            eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
             eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
         else:
             eco4_remaining_forecast = 0
+            eco4_estimated_ciga_failures = 0
             eco4_post_ciga = eco4_confirmed
     else:
         eco4_no_ciga_needed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "eco4"
             ]["count"].sum()
+        eco4_confirmed_ciga_failures = 0
+        # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+        eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
+
         eco4_remaining_forecast = np.round(
-            remaining_needing_ciga_check * ha_ciga_conversion_rate * ha_ciga_pass_to_sale_rate
-        )
-        eco4_post_ciga = (
-            eligiblity_counts[eligiblity_counts["ECO Eligibility"] == "eco4"]["count"].sum() + eco4_remaining_forecast
+            eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
         )
+        eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
 
     eco4_post_ciga = int(eco4_post_ciga)
     eco4_remaining_forecast = int(eco4_remaining_forecast)
@@ -2604,6 +2614,16 @@ def calculate_eco4_post_ciga(
         "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
         "Of which confirmed - £": eco4_confirmed * eco4_rate,
         "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+        # Ciga failures
+        "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures),
+        "Confirmed CIGA failures": eco4_confirmed_ciga_failures,
+        "Estimated CIGA failures": int(eco4_estimated_ciga_failures),
+        # Ciga failures cost
+        "Estimated total - failed CIGA - £": int(
+            (eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures) * eco4_rate
+        ),
+        "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate),
+        "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate),
     }
 
     return results
@@ -2617,8 +2637,8 @@ def forecast_remaining_sales(loader):
 
     gbis_rate = 600
     eco4_rate = 1710
-    old_gbis_rate = 432
-    old_eco4_rate = 1456
+    # old_gbis_rate = 432
+    # old_eco4_rate = 1456
 
     # 1) Calculate the conversion rate from passed CIGA to actual sale
     converted_ciga_jobs = []
@@ -2800,16 +2820,18 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
 
-        original_warmfront_eco4_revenue = (
-            original_warmfront_remaining_eco4 * eco4_rate +
-            (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
-        )
+        # original_warmfront_eco4_revenue = (
+        #     original_warmfront_remaining_eco4 * eco4_rate +
+        #     (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
+        # )
+        original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate
         original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
 
         # Original warmfront figures - GBIS
@@ -2817,9 +2839,12 @@ def forecast_remaining_sales(loader):
         original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
         original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
 
+        # original_warmfront_gbis_revenue = (
+        #     original_warmfront_remaining_gbis * gbis_rate +
+        #     (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
+        # )
         original_warmfront_gbis_revenue = (
-            original_warmfront_remaining_gbis * gbis_rate +
-            (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
+            original_warmfront_gbis * gbis_rate
         )
         original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
 
@@ -2835,6 +2860,7 @@ def forecast_remaining_sales(loader):
                 how="left",
                 on="asset_list_row_id"
             )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
@@ -2913,6 +2939,32 @@ def forecast_remaining_sales(loader):
             eco4_rate=eco4_rate
         )
 
+        # GBIS Figures
+        # Estimate the GBIS conversion rate
+        ha_gbis_sale_conversion = gbis_ciga_independent_passrates[
+            gbis_ciga_independent_passrates["Ha Name"] == ha_name
+            ]
+
+        if not ha_gbis_sale_conversion.empty:
+            ha_gbis_sale_conversion = (
+                ha_gbis_sale_conversion["# GBIS successfully installed"].values[0] /
+                ha_gbis_sale_conversion["# GBIS at install stage"].values[0]
+            )
+        else:
+            ha_gbis_sale_conversion = median_gbis_to_install
+
+        gbis_total = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "gbis"
+            ]["count"].sum()
+        gbis_total = np.round(gbis_total * ha_gbis_sale_conversion)
+        gbis_total_revenue = gbis_total * gbis_rate
+
+        gbis_remaining = eligiblity_counts_remaining[
+            eligiblity_counts["ECO Eligibility"] == "gbis"
+            ]["count"].sum()
+        gbis_remaining = np.round(gbis_remaining * ha_gbis_sale_conversion)
+        gbis_remaining_revenue = gbis_remaining * gbis_rate
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -2950,16 +3002,48 @@ def forecast_remaining_sales(loader):
             ("ECO4 post-ciga", "", "Of which forecast - £", ""):
                 eco4_post_ciga_remaining_results["Of which forecast - £"],
             # CIGA failures
+            ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
+                'Estimated total - failed CIGA'
+            ],
+            ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - £", ""): eco4_post_ciga_remaining_results[
+                'Estimated total - failed CIGA - £'
+            ],
+            ("ECO4 CIGA failures", "", "Confirmed failures - #", ""): eco4_post_ciga_remaining_results[
+                "Confirmed CIGA failures"
+            ],
+            ("ECO4 CIGA failures", "", "Confirmed failures - £", ""): eco4_post_ciga_remaining_results[
+                "Confirmed CIGA failures - £"
+            ],
+            ("ECO4 CIGA failures", "", "Estimated failures - #", ""): eco4_post_ciga_remaining_results[
+                "Estimated CIGA failures"
+            ],
+            ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
+                "Estimated CIGA failures - £"
+            ],
+            # GBIS postcode list
+            ("", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
+            ("", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
+            ("", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 22:
+        if len(to_append) != 32:
             raise ValueError("Something went wrong")
 
         results.append(to_append)
 
     results = pd.DataFrame(results)
 
+    # TODO: Add a blank row and then a total row
+
+    assumptions = {
+        "ECO4 new rate": eco4_rate,
+        "GBIS new rate": gbis_rate,
+        # "ECO4 old rate": old_eco4_rate,
+        # "GBIS old rate": old_gbis_rate,
+    }
+
 
 def app():
     """

From 2ba37d55e65a746fdb58588aa2768851a83a3887 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 20:06:57 +0000
Subject: [PATCH 046/248] Added assumptions table

---
 .../ha_15_32/ha_analysis_batch_3.py           | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 191ca74c..ac4d3a0c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2956,14 +2956,14 @@ def forecast_remaining_sales(loader):
         gbis_total = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_total = np.round(gbis_total * ha_gbis_sale_conversion)
-        gbis_total_revenue = gbis_total * gbis_rate
+        gbis_total = int(np.round(gbis_total * ha_gbis_sale_conversion))
+        gbis_total_revenue = int(gbis_total * gbis_rate)
 
         gbis_remaining = eligiblity_counts_remaining[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_remaining = np.round(gbis_remaining * ha_gbis_sale_conversion)
-        gbis_remaining_revenue = gbis_remaining * gbis_rate
+        gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
+        gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -3037,12 +3037,37 @@ def forecast_remaining_sales(loader):
 
     # TODO: Add a blank row and then a total row
 
-    assumptions = {
-        "ECO4 new rate": eco4_rate,
-        "GBIS new rate": gbis_rate,
-        # "ECO4 old rate": old_eco4_rate,
-        # "GBIS old rate": old_gbis_rate,
-    }
+    assumptions = [
+        {
+            ("", "", "", "HA Name"): "ECO4 rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate)
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS rate",
+            ("ECO4 original", "", "Remaining - #", ""): "£" + str(gbis_rate)
+        },
+        {
+            ("", "", "", "HA Name"): "Median CIGA pass rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_success_rate * 100, 1)) + "%",
+        },
+        {
+            ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - £", ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks"
+        },
+        {
+            ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - £",
+             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check"
+        },
+        {
+            ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
+            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - £",
+             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check"
+        }
+    ]
 
 
 def app():

From 57a7edf62511207f7d7af176414b5b269f3b1aa1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 20:18:44 +0000
Subject: [PATCH 047/248] collating results

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ac4d3a0c..7da6bb3a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3035,9 +3035,21 @@ def forecast_remaining_sales(loader):
 
     results = pd.DataFrame(results)
 
-    # TODO: Add a blank row and then a total row
+    totals_row = {}
+    for col in results.columns:
+        if col == ('', '', '', 'HA Name'):
+            totals_row[col] = "Total"
+        elif col == ("ECO4 post-ciga", "", "Delta vs original estimate", ""):
+            totals_row[col] = results[col].mean()
+        else:
+            totals_row[col] = results[col].sum()
+
+    blank_row = pd.DataFrame([{col: "" for col in results.columns}])
 
     assumptions = [
+        {
+            ("", "", "", "HA Name"): "Assumptions",
+        },
         {
             ("", "", "", "HA Name"): "ECO4 rate",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate)
@@ -3059,16 +3071,20 @@ def forecast_remaining_sales(loader):
             ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
             ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - £",
-             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check"
+             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
             ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - £",
-             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check"
+             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel"
         }
     ]
 
+    results = pd.concat(
+        [results, pd.DataFrame([totals_row]), blank_row, blank_row, pd.DataFrame(assumptions)]
+    )
+
 
 def app():
     """

From 028c2edce7ab951987379a7c653324e5863426ae Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 20:48:43 +0000
Subject: [PATCH 048/248] Added headlines

---
 .../ha_15_32/ha_analysis_batch_3.py           | 129 +++++++++++++++++-
 1 file changed, 126 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7da6bb3a..1c320f9c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2991,7 +2991,7 @@ def forecast_remaining_sales(loader):
             # ECO4 - asset list, post ciga, remaining
             ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - #"],
-            ("ECO4 post-ciga", "", "Estimated remaining total eligible - £", ""): eco4_post_ciga_remaining_results[
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - £"],
             ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
                 eco4_post_ciga_remaining_results["Of which confirmed - #"],
@@ -3046,6 +3046,126 @@ def forecast_remaining_sales(loader):
 
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])
 
+    # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals
+
+    # ECO4 Headlines
+    headline_eco4_original_remaining = totals_row[("ECO4 original", "", "Remaining - #", "")]
+    headline_eco4_original_remaining_revenue = totals_row[("ECO4 original", "", "Remaining - £", "")]
+    headline_eco4_postcode_list_remaining = totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")]
+    headline_eco4_postcode_list_remaining_revenue = totals_row[
+        ("ECO4 post-ciga", "", "Estimated remaining eligible - £", "")
+    ]
+    headline_eco4_delta = 100 * (
+        (headline_eco4_postcode_list_remaining - headline_eco4_original_remaining) /
+        headline_eco4_original_remaining
+    )
+    headline_eco4_delta = round(headline_eco4_delta, 1)
+
+    # GBIS Headlines
+    headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")]
+    headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")]
+    headline_gbis_postcode_list_remaining = totals_row[("", "Warmfront post code list", "Remaining - #", "GBIS total")]
+    headline_gbis_postcode_list_remaining_revenue = totals_row[
+        ("", "Warmfront post code list", "Remaining - £", "GBIS total")
+    ]
+    headline_gbis_delta = 100 * (
+        (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) /
+        headline_gbis_original_remaining
+    )
+    headline_gbis_delta = round(headline_gbis_delta, 1)
+
+    headline_original_total_revenue_remaining = (
+        headline_eco4_original_remaining_revenue + headline_gbis_original_remaining_revenue
+    )
+
+    headline_postcode_list_total_revenue_remaining = (
+        headline_eco4_postcode_list_remaining_revenue + headline_gbis_postcode_list_remaining_revenue
+    )
+    headline_total_delta = 100 * (
+        (headline_postcode_list_total_revenue_remaining - headline_original_total_revenue_remaining) /
+        headline_original_total_revenue_remaining
+    )
+    headline_total_delta = round(headline_total_delta, 1)
+
+    headlines = [
+        {
+            ("", "", "", "HA Name"): "Headlines",
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - November - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_eco4_original_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - November - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_eco4_original_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_eco4_postcode_list_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_eco4_postcode_list_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%"
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - November - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_gbis_original_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - November - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_gbis_original_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                headline_gbis_postcode_list_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - post code list - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_gbis_postcode_list_remaining_revenue
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_gbis_delta) + "%"
+        },
+        # Total revenue
+        {
+            ("", "", "", "HA Name"): "Total Remaining - November - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_original_total_revenue_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "Total Remaining - post code list - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
+                headline_postcode_list_total_revenue_remaining
+            )
+        },
+        {
+            ("", "", "", "HA Name"): "Total Remaining delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_total_delta) + "%"
+        },
+    ]
+
     assumptions = [
         {
             ("", "", "", "HA Name"): "Assumptions",
@@ -3065,7 +3185,9 @@ def forecast_remaining_sales(loader):
         {
             ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
             ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £", ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks"
+            ("ECO4 original", "", "Remaining - £",
+             ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be "
+                  "conservative"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
@@ -3082,7 +3204,8 @@ def forecast_remaining_sales(loader):
     ]
 
     results = pd.concat(
-        [results, pd.DataFrame([totals_row]), blank_row, blank_row, pd.DataFrame(assumptions)]
+        [results, pd.DataFrame([headlines]), pd.DataFrame([totals_row]), blank_row, blank_row,
+         pd.DataFrame(assumptions)]
     )
 
 

From 721bfb19fcc3bd70fe02081e14e4abde22f9a13e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 21:33:45 +0000
Subject: [PATCH 049/248] Added totals percentages aggregations

---
 .../ha_15_32/ha_analysis_batch_3.py           | 74 ++++++++++++++++---
 1 file changed, 64 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1c320f9c..3341e34c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2965,6 +2965,14 @@ def forecast_remaining_sales(loader):
         gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
+        # GBIS delta
+        if original_warmfront_gbis == 0:
+            gbis_delta_vs_original_estimate = 100 * gbis_total
+        else:
+            gbis_delta_vs_original_estimate = 100 * (
+                gbis_total - original_warmfront_gbis
+            ) / original_warmfront_gbis
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -2987,7 +2995,7 @@ def forecast_remaining_sales(loader):
                 "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
                 "ECO4 - post CIGA - £"],
-            ("ECO4 post-ciga", "", "Delta vs original estimate", ""): eco4_delta_vs_original_estimate,
+            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate,
             # ECO4 - asset list, post ciga, remaining
             ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - #"],
@@ -3021,14 +3029,15 @@ def forecast_remaining_sales(loader):
                 "Estimated CIGA failures - £"
             ],
             # GBIS postcode list
-            ("", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
-            ("", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
-            ("", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
-            ("", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("GBIS Postcode list", "", "Delta vs original estimate - %", ""): gbis_delta_vs_original_estimate,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 32:
+        if len(to_append) != 33:
             raise ValueError("Something went wrong")
 
         results.append(to_append)
@@ -3039,11 +3048,31 @@ def forecast_remaining_sales(loader):
     for col in results.columns:
         if col == ('', '', '', 'HA Name'):
             totals_row[col] = "Total"
-        elif col == ("ECO4 post-ciga", "", "Delta vs original estimate", ""):
-            totals_row[col] = results[col].mean()
+        elif col in [
+            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""),
+            ("GBIS Postcode list", "", "Delta vs original estimate - %", "")
+        ]:
+            totals_row[col] = None
         else:
             totals_row[col] = results[col].sum()
 
+    # For the delta columns, we calculate the delta on the totals
+    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round(
+        100 * (
+            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "")] -
+            totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")],
+        1
+    )
+
+    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = round(
+        100 * (
+            totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] -
+            totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")],
+        1
+    )
+
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])
 
     # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals
@@ -3204,10 +3233,35 @@ def forecast_remaining_sales(loader):
     ]
 
     results = pd.concat(
-        [results, pd.DataFrame([headlines]), pd.DataFrame([totals_row]), blank_row, blank_row,
-         pd.DataFrame(assumptions)]
+        [
+            results,
+            pd.DataFrame([totals_row]),
+            pd.DataFrame(headlines),
+            blank_row,
+            blank_row,
+            pd.DataFrame(assumptions)
+        ]
     )
 
+    # header_rows = [
+    #     [name[0] for name in results.columns.values],
+    #     [name[1] for name in results.columns.values],
+    #     [name[2] for name in results.columns.values],
+    #     [name[3] for name in results.columns.values]
+    # ]
+
+    # Step 2: Write the transformed header and DataFrame data to CSV.
+    # Open the file in write mode.
+    import csv
+    with open("HA Remaining Analysis.csv", "w", newline="") as file:
+        # writer = csv.writer(file)
+
+        # Write the header rows.
+        # writer.writerows(header_rows)
+
+        # Write the DataFrame data without the index (adjust if you want the index).
+        results.to_csv(file, header=True, index=False)
+
 
 def app():
     """

From f9d1a90689ef742fd32217b606c6a919b766d974 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:17:11 +0000
Subject: [PATCH 050/248] Fixing some formatting bugs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 86 +++++++++++--------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3341e34c..6309d2e2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2820,6 +2820,8 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+        if ha_name == "HA16":
+            dew
 
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
@@ -2991,8 +2993,9 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
-            ("ECO4 post-ciga", "", "Estimated total eligible - #", ""): eco4_post_ciga_total_results[
-                "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)"):
+                eco4_post_ciga_total_results[
+                    "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
                 "ECO4 - post CIGA - £"],
             ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate,
@@ -3059,7 +3062,7 @@ def forecast_remaining_sales(loader):
     # For the delta columns, we calculate the delta on the totals
     totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round(
         100 * (
-            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "")] -
+            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] -
             totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
         ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")],
         1
@@ -3093,9 +3096,11 @@ def forecast_remaining_sales(loader):
     # GBIS Headlines
     headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")]
     headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")]
-    headline_gbis_postcode_list_remaining = totals_row[("", "Warmfront post code list", "Remaining - #", "GBIS total")]
+    headline_gbis_postcode_list_remaining = totals_row[
+        ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")
+    ]
     headline_gbis_postcode_list_remaining_revenue = totals_row[
-        ("", "Warmfront post code list", "Remaining - £", "GBIS total")
+        ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total")
     ]
     headline_gbis_delta = 100 * (
         (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) /
@@ -3205,29 +3210,33 @@ def forecast_remaining_sales(loader):
         },
         {
             ("", "", "", "HA Name"): "GBIS rate",
-            ("ECO4 original", "", "Remaining - #", ""): "£" + str(gbis_rate)
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(gbis_rate)
         },
         {
             ("", "", "", "HA Name"): "Median CIGA pass rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_success_rate * 100, 1)) + "%",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_ciga_success_rate * 100, 1)) + "%",
         },
         {
             ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(maximum_ciga_conversion * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(maximum_ciga_conversion * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
              ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be "
                   "conservative"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(median_eco4_to_install * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_eco4_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
              ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel"
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
-            ("ECO4 original", "", "Total - £", ""): str(round(median_ciga_pass_to_install * 100, 1)) + "%",
-            ("ECO4 original", "", "Remaining - £",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_ciga_pass_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
              ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel"
         }
     ]
@@ -3236,6 +3245,7 @@ def forecast_remaining_sales(loader):
         [
             results,
             pd.DataFrame([totals_row]),
+            blank_row,
             pd.DataFrame(headlines),
             blank_row,
             blank_row,
@@ -3291,32 +3301,32 @@ def app():
     loader.load()
     loader.ha_facts_and_figures()
 
+    forecast_remaining_sales(loader)
+
     # We load in the additional data required to perform the analysis
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-    cleaned = patch_cleaned(cleaned)
-
-    cleaning_data = read_dataframe_from_s3_parquet(
-        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
-    )
-    created_at = datetime.now().isoformat()
-
-    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
-
-    outputs = get_epc_data(
-        loader=loader,
-        cleaned=cleaned,
-        cleaning_data=cleaning_data,
-        created_at=created_at,
-        photo_supply_lookup=photo_supply_lookup,
-        floor_area_decile_thresholds=floor_area_decile_thresholds,
-        pull_data=pull_data
-    )
-
-    analyse_ha_data(outputs, loader)
+    # cleaned = read_from_s3(
+    #     s3_file_name="cleaned_epc_data/cleaned.bson",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    # cleaned = msgpack.unpackb(cleaned, raw=False)
+    # cleaned = patch_cleaned(cleaned)
+    #
+    # cleaning_data = read_dataframe_from_s3_parquet(
+    #     bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    # )
+    # created_at = datetime.now().isoformat()
+    #
+    # photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+    #
+    # outputs = get_epc_data(
+    #     loader=loader,
+    #     cleaned=cleaned,
+    #     cleaning_data=cleaning_data,
+    #     created_at=created_at,
+    #     photo_supply_lookup=photo_supply_lookup,
+    #     floor_area_decile_thresholds=floor_area_decile_thresholds,
+    #     pull_data=pull_data
+    # )
 
     # import pickle
     # with open("ha_analysis.pickle", "wb") as f:

From 0497290b7cac36b4519b3db4c0f9d1d1be4932b5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:17:51 +0000
Subject: [PATCH 051/248] removed temp code

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6309d2e2..ec9469dc 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2820,8 +2820,6 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
-        if ha_name == "HA16":
-            dew
 
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]

From fbd808a54d3314d9821d5fad5456e951558959c9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:27:50 +0000
Subject: [PATCH 052/248] re-formatting percentages

---
 .../ha_15_32/ha_analysis_batch_3.py           | 64 ++++++++-----------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ec9469dc..0daf239b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2926,9 +2926,10 @@ def forecast_remaining_sales(loader):
         )
 
         # Calculate the delta compared to Warmfront's original estimate
-        eco4_delta_vs_original_estimate = 100 * (
-            eco4_post_ciga_total_results["ECO4 - post CIGA - #"] - original_warmfront_eco4
-        ) / original_warmfront_eco4
+        eco4_delta_vs_original_estimate = (
+                                              eco4_post_ciga_total_results[
+                                                  "ECO4 - post CIGA - #"] - original_warmfront_eco4
+                                          ) / original_warmfront_eco4
 
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
             eligiblity_counts=eligiblity_counts_remaining,
@@ -2967,11 +2968,11 @@ def forecast_remaining_sales(loader):
 
         # GBIS delta
         if original_warmfront_gbis == 0:
-            gbis_delta_vs_original_estimate = 100 * gbis_total
+            gbis_delta_vs_original_estimate = gbis_total
         else:
-            gbis_delta_vs_original_estimate = 100 * (
-                gbis_total - original_warmfront_gbis
-            ) / original_warmfront_gbis
+            gbis_delta_vs_original_estimate = (
+                                                  gbis_total - original_warmfront_gbis
+                                              ) / original_warmfront_gbis
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -3125,27 +3126,23 @@ def forecast_remaining_sales(loader):
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - November - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_eco4_original_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_original_remaining
+
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - November - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_eco4_original_remaining_revenue
-            )
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_eco4_original_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_eco4_postcode_list_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining
         },
         {
             ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_eco4_postcode_list_remaining_revenue
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "ECO4 delta %",
@@ -3153,27 +3150,22 @@ def forecast_remaining_sales(loader):
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - November - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_gbis_original_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_original_remaining
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - November - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_gbis_original_remaining_revenue
-            )
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_gbis_original_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
-                headline_gbis_postcode_list_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining
         },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - post code list - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_gbis_postcode_list_remaining_revenue
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_gbis_postcode_list_remaining_revenue
         },
         {
             ("", "", "", "HA Name"): "GBIS delta %",
@@ -3182,15 +3174,13 @@ def forecast_remaining_sales(loader):
         # Total revenue
         {
             ("", "", "", "HA Name"): "Total Remaining - November - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_original_total_revenue_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_original_total_revenue_remaining
         },
         {
             ("", "", "", "HA Name"): "Total Remaining - post code list - £",
-            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(
-                headline_postcode_list_total_revenue_remaining
-            )
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_postcode_list_total_revenue_remaining
         },
         {
             ("", "", "", "HA Name"): "Total Remaining delta %",

From 46f5ee8ea43e719dc4f0c8c472de68b62d974270 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 22:34:02 +0000
Subject: [PATCH 053/248] formatting percentage

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0daf239b..b5c6835b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3059,20 +3059,18 @@ def forecast_remaining_sales(loader):
             totals_row[col] = results[col].sum()
 
     # For the delta columns, we calculate the delta on the totals
-    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = round(
-        100 * (
+    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = (
+        (
             totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] -
             totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")],
-        1
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
     )
 
-    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = round(
-        100 * (
+    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = (
+        (
             totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] -
             totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")],
-        1
+        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
     )
 
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])

From d9e9be4389d371176a8f83ec5f83f0fcbabbeb8b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 23:48:27 +0000
Subject: [PATCH 054/248] Added HA25

---
 .../ha_15_32/ha_analysis_batch_3.py           | 79 ++++++++++++-------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b5c6835b..baaa4050 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -159,19 +159,18 @@ class DataLoader:
     }
 
     UNMATCHED_CIGA = {
-        # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
-        # the asset list
+        "HA6": 117,
         "HA14": 3,
         "HA16": 7,
-        # There's just too many unmatched here
-        "HA6": 117,
+        "HA24": 12,
         "HA107": 51,
     }
 
-    def __init__(self, directories, december_figures_filepath, use_cache):
+    def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
         self.directories = directories
         self.use_cache = use_cache
         self.december_figures_filepath = december_figures_filepath
+        self.rebuild = rebuild
 
         self.data = {}
         self.december_figures = None
@@ -312,23 +311,20 @@ class DataLoader:
         return asset_list
 
     @staticmethod
-    def create_ciga_list_house_no(ha_name, ciga_list):
+    def create_ciga_list_house_no(ciga_list):
         """
         This function will append the House number onto the asset list
         :return:
         """
 
-        if ha_name in ["HA6", "HA14", "HA107", "HA16"]:
-            split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
-            house_numbers = split_addresses[0].str.split(' ', expand=True)
-            # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
-            # many columns there might be
-            house_numbers = house_numbers.iloc[:, 0:1]
-            house_numbers.columns = ['HouseNo']
+        split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
+        house_numbers = split_addresses[0].str.split(' ', expand=True)
+        # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
+        # many columns there might be
+        house_numbers = house_numbers.iloc[:, 0:1]
+        house_numbers.columns = ['HouseNo']
 
-            ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
-        else:
-            raise NotImplementedError("Implement me")
+        ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
 
         return ciga_list
 
@@ -447,7 +443,7 @@ class DataLoader:
             # Remove rows with missing postcode which happens in a small number of cases
             ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
             ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
-            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+            ciga_list = self.create_ciga_list_house_no(ciga_list)
             ciga_list = self.dedupe_ciga_list(ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
@@ -800,6 +796,10 @@ class DataLoader:
             "st. leodegars close", "st leodegars close"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "montgomery crescent", "montgomery road"
+        )
+
         return survey_list
 
     @staticmethod
@@ -1102,16 +1102,18 @@ class DataLoader:
         for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]:
             self.december_figures[col] = self.december_figures[col].astype("Int64")
 
-        if self.use_cache:
-            self.data = read_pickle_from_s3(
+        if self.use_cache and not self.rebuild:
+            data = read_pickle_from_s3(
                 bucket_name="retrofit-datalake-dev",
                 s3_file_name="ha-analysis/batch3-inputs.pickle",
             )
-            return
+        else:
+            data = {}
 
-        data = {}
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
+            if ha_name in data:
+                continue
             # Load asset list
             logger.info("Loading data for {}".format(ha_name))
             asset_list, survey_list, ciga_list = self.load_asset_list(
@@ -2635,6 +2637,10 @@ def forecast_remaining_sales(loader):
     # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
     maximum_ciga_conversion = 0.75
 
+    # This is a hard limit to the allowed conversion rates to final sale. These are typically very
+    # high but there are some anomalies, amongst surveys that are early on
+    sales_conversion_lower_bound = 0.8
+
     gbis_rate = 600
     eco4_rate = 1710
     # old_gbis_rate = 432
@@ -2796,14 +2802,30 @@ def forecast_remaining_sales(loader):
     eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates)
     gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates)
 
+    eco4_ciga_independent_passrates["conversion"] = (
+        eco4_ciga_independent_passrates["# ECO4 successfully installed"] /
+        eco4_ciga_independent_passrates["# ECO4 at install stage"]
+    )
+    eco4_ciga_independent_passrates_clipped = eco4_ciga_independent_passrates[
+        eco4_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+        ]
+
+    gbis_ciga_independent_passrates["conversion"] = (
+        gbis_ciga_independent_passrates["# GBIS successfully installed"] /
+        gbis_ciga_independent_passrates["# GBIS at install stage"]
+    )
+    gbis_ciga_independent_passrates_clipped = gbis_ciga_independent_passrates[
+        gbis_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+        ]
+
     median_eco4_to_install = (
-        eco4_ciga_independent_passrates["# ECO4 successfully installed"].sum() /
-        eco4_ciga_independent_passrates["# ECO4 at install stage"].sum()
+        eco4_ciga_independent_passrates_clipped["# ECO4 successfully installed"].sum() /
+        eco4_ciga_independent_passrates_clipped["# ECO4 at install stage"].sum()
     )
 
     median_gbis_to_install = (
-        gbis_ciga_independent_passrates["# GBIS successfully installed"].sum() /
-        gbis_ciga_independent_passrates["# GBIS at install stage"].sum()
+        gbis_ciga_independent_passrates_clipped["# GBIS successfully installed"].sum() /
+        gbis_ciga_independent_passrates_clipped["# GBIS at install stage"].sum()
     )
 
     # Produce the final output
@@ -3270,6 +3292,8 @@ def app():
     use_cache = True
     # Determines if we want to perform the data pull
     pull_data = False
+    # Override to re-build all inputs
+    rebuild_inputs = False
 
     # List all of the data in the folder
 
@@ -3278,12 +3302,11 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    # priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
+    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"]
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 
-    loader = DataLoader(directories, december_figures_filepath, use_cache)
+    loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
     loader.load()
     loader.ha_facts_and_figures()
 

From cbd4a0052ef005e00ce143c16306b5f0b782c4ed Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 1 Mar 2024 23:52:19 +0000
Subject: [PATCH 055/248] Starting HA25

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index baaa4050..0c9f685f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -347,6 +347,8 @@ class DataLoader:
             return "Asset"
         elif "Decent Homes Stock" in workbook.sheetnames:
             return "Decent Homes Stock"
+        elif "Report" in workbook.sheetnames:
+            return "Report"
         else:
             return "Assets"
 

From fc022b8a22d571651ba21fff9fd4c5901b18e20f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 12:34:22 +0000
Subject: [PATCH 056/248] Added data load for HA25

---
 .../ha_15_32/ha_analysis_batch_3.py           | 32 +++++++++++++++----
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0c9f685f..4ae881d2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -155,6 +155,10 @@ class DataLoader:
         "HA24": {
             "address": "Address",
             "postcode": "Postcode"
+        },
+        "HA25": {
+            "address": "T1_Address",
+            "postcode": "matching_postcode"
         }
     }
 
@@ -178,7 +182,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA25"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -374,13 +378,23 @@ class DataLoader:
         asset_sheetname = self.get_asset_sheetname(workbook)
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
+        if ha_name == "HA25":
+            asset_sheet_colnames[11] = "matching_postcode"
+
+        values_only = not ha_name != "HA25"
 
         rows_data = []
-        for row in asset_sheet.iter_rows(min_row=2, values_only=False):
-            row_data = [cell.value for cell in row]  # This will get you the cell values
-            rows_data.append(row_data)
+        if not values_only:
+            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):
+                row_data = [cell.value for cell in row]  # This will get you the cell values
+                rows_data.append(row_data)
+        else:
+            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):  # use values_only=True to get values
+                row_data = list(row)  # No need for comprehension, values_only=True returns a tuple of values
+                rows_data.append(row_data)
 
         asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames)
+
         asset_list = asset_list.loc[:, asset_list.columns.notnull()]
 
         # Remove entirely empty rows - consider all rows apart from row_color
@@ -403,9 +417,10 @@ class DataLoader:
             asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
             asset_list = asset_list_correction_function(asset_list)
 
-        # For HA1, there is an exception in the structure of the data. We don't have any survey or ciga lists, and so
+        # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
+        # lists, and so
         # we can return the asset list now
-        if ha_name == "HA1":
+        if ha_name in ["HA1", "HA25"]:
             return asset_list, pd.DataFrame(), pd.DataFrame()
 
         # We check if there is a survey list
@@ -1149,7 +1164,8 @@ class DataLoader:
             "ECO4": "ECO4",
             "AFFORDABLE WARMTH": "ECO4",
             "ECO4 A/W": "ECO4",
-            "ECO4 GBIS (ECO+)": "GBIS"
+            "ECO4 GBIS (ECO+)": "GBIS",
+            "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS"
         }
 
         eco_eligibility_map = {
@@ -3305,6 +3321,8 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"]
+    # Next HAs to do: 15, 32, 33,
+    # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From 9a69f8741ece9fdb740cb1b9855f53e639637f44 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 12:54:19 +0000
Subject: [PATCH 057/248] adding HA15

---
 .../ha_15_32/ha_analysis_batch_3.py           | 32 +++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4ae881d2..81ed2301 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -165,6 +165,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         "HA6": 117,
         "HA14": 3,
+        "HA15": 3,
         "HA16": 7,
         "HA24": 12,
         "HA107": 51,
@@ -204,7 +205,15 @@ class DataLoader:
                                              asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
-
+        elif ha_name == "HA15":
+            asset_list["matching_address"] = (
+                asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -502,6 +511,15 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def correct_ha15_asset_list(asset_list):
+        asset_list["matching_postcode"] = np.where(
+            asset_list["Address Line 1"] == "103 Priory Crescent",
+            "hp19 9ny",
+            asset_list["matching_postcode"]
+        )
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -655,6 +673,14 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha15_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Mary Mac Manus Drive, Milton Keynes", "Mary Mac Manus Drive"
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha16_survey_list(survey_list):
         survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
@@ -3320,7 +3346,9 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"]
+    priority_has = [
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA39", "HA107"
+    ]
     # Next HAs to do: 15, 32, 33,
     # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs

From dad2fc74c889112cbed0a67578fb013e21b276f9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 13:10:28 +0000
Subject: [PATCH 058/248] HA15 checked and added

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 81ed2301..1ae05d16 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1191,12 +1191,15 @@ class DataLoader:
             "AFFORDABLE WARMTH": "ECO4",
             "ECO4 A/W": "ECO4",
             "ECO4 GBIS (ECO+)": "GBIS",
-            "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS"
+            "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
+            "ECO4 AFFORDABLE WARMTH": "ECO4"
         }
 
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
+            "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
+            "eco4 (subject to archetype check)": "eco4"
         }
 
         ha_facts_and_figures = []

From 9eccfca70dda75ac1c49084bcd63ec3734e3dd23 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 2 Mar 2024 13:26:54 +0000
Subject: [PATCH 059/248] fixing merge

---
 .../ha_15_32/ha_analysis_batch_3.py           | 67 ++++++++++++++++++-
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1ae05d16..1f99d23c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -214,6 +214,13 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA32":
+            asset_list["matching_address"] = (
+                asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Street"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -308,6 +315,8 @@ class DataLoader:
 
         if ha_name in ["HA107"]:
             asset_list["HouseNo"] = asset_list["House No"].copy()
+        elif ha_name == "HA32":
+            asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -520,6 +529,16 @@ class DataLoader:
         )
         return asset_list
 
+    @staticmethod
+    def correct_ha32_asset_list(asset_list):
+        asset_list["Postcode"] = np.where(
+            (asset_list["Street"] == "Norton Grove") & (asset_list["Postcode"] == "HU4 6HQ") & (
+                asset_list["Dwelling num"] == "7"),
+            "hu4 6hg",
+            asset_list["Postcode"]
+        )
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -845,6 +864,50 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha32_survey_list(survey_list):
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Coxwold",
+            "Coxwold Grove",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update the Barringhton Avenue with their correct spelling: Barrington Avenue
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Barringhton Avenue",
+            "Barrington Avenue",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the Rustenburn addresses are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Rustenburg",
+            "Rustenburg Street",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the MALIN LODGE, RONALDSWAY CLOSE addresses are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "MALIN LODGE, RONALDSWAY CLOSE",
+            "Malin Lodge",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the Feroes Close are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Feroes Close",
+            "Faroes Close",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == 'FORESTER  WAY',
+            'FORESTER WAY',
+            survey_list["Street / Block Name"]
+        )
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -3350,9 +3413,9 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA39", "HA107"
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA32", "HA39", "HA107"
     ]
-    # Next HAs to do: 15, 32, 33,
+    # Next HAs to do: 15[DONE], 32, 33,
     # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]

From 2828b005cbb3676216827fcb5dc70630f8ecb393 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:06:31 +0000
Subject: [PATCH 060/248] fixing HA32 merge

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1f99d23c..c84a2c5c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -906,6 +906,19 @@ class DataLoader:
             survey_list["Street / Block Name"]
         )
 
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == '6 Zeigfeld',
+            'Ziegfeld Court',
+            survey_list["Street / Block Name"]
+        )
+
+        # Malin Lodge, Ronaldsway Close
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == 'Malin Lodge, Ronaldsway Close',
+            'Malin Lodge',
+            survey_list["Street / Block Name"]
+        )
+
         return survey_list
 
     @staticmethod

From 811f141c45b1fcfa52c9f1d685690389df55f531 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:35:49 +0000
Subject: [PATCH 061/248] started working on ha33 but paused

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c84a2c5c..9bd04884 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -221,6 +221,12 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA33":
+            asset_list["matching_address"] = (
+                asset_list["ADDRESS"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["POST CODE"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -3426,9 +3432,9 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA32", "HA39", "HA107"
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
     ]
-    # Next HAs to do: 15[DONE], 32, 33,
+    # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28, 41, 38, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]

From cb39590f618e7c6ff382e76cc461792101a9741a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:48:05 +0000
Subject: [PATCH 062/248] debugging matching for HA28

---
 .../ha_15_32/ha_analysis_batch_3.py           | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9bd04884..7481724b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -214,6 +214,13 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA28":
+            asset_list["matching_address"] = (
+                asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Street 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA32":
             asset_list["matching_address"] = (
                 asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " +
@@ -323,6 +330,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["House No"].copy()
         elif ha_name == "HA32":
             asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
+        elif ha_name == "HA28":
+            asset_list["HouseNo"] = asset_list["House Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -371,6 +380,8 @@ class DataLoader:
     def get_asset_sheetname(workbook):
         if "Asset List" in workbook.sheetnames:
             return "Asset List"
+        elif "Asset list" in workbook.sheetnames:
+            return "Asset list"
         elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
             return "Asset"
         elif "Decent Homes Stock" in workbook.sheetnames:
@@ -394,6 +405,8 @@ class DataLoader:
     def get_survey_sheetname(workbook):
         if "ECO Surveys" in workbook.sheetnames:
             return "ECO Surveys"
+        elif "ECO Survey" in workbook.sheetnames:
+            return "ECO Survey"
         else:
             return "ECO surveys"
 
@@ -870,6 +883,12 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha28_survey_list(survey_list):
+        # Rename the "No" column to "No." to align with the other survey sheets
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+        return survey_list
+
     @staticmethod
     def correct_ha32_survey_list(survey_list):
         survey_list["Street / Block Name"] = np.where(
@@ -1027,6 +1046,10 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
             ].copy()
 
+            if str(house_number) not in df["matching_address"].values:
+                if "flat" in str(house_number):
+                    house_number = house_number.split("flat")[1].strip()
+
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:

From 0909b811ee7aea834784f0deb947308593ce7cdd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 15:57:49 +0000
Subject: [PATCH 063/248] fixed matching for ha28

---
 .../ha_15_32/ha_analysis_batch_3.py           | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7481724b..b954a651 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -887,6 +887,27 @@ class DataLoader:
     def correct_ha28_survey_list(survey_list):
         # Rename the "No" column to "No." to align with the other survey sheets
         survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "ME75HA",
+            "ME7 5HA",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ANDREW MANOR/BRITTON ST", "ANDREW MANOR"
+        )
+
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "ME75TW",
+            "ME7 5TW",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST MARKS HOUSE/SAXON ST", "ST MARKS HOUSE"
+        )
+
         return survey_list
 
     @staticmethod
@@ -1046,7 +1067,7 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
             ].copy()
 
-            if str(house_number) not in df["matching_address"].values:
+            if not any(df["matching_address"].str.contains(str(house_number))):
                 if "flat" in str(house_number):
                     house_number = house_number.split("flat")[1].strip()
 

From 87c77e53c03ec83286718d6ef6bb5593466a48b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 16:22:42 +0000
Subject: [PATCH 064/248] handing facts and figures for ha28

---
 .../ha_15_32/ha_analysis_batch_3.py           | 92 +++++++++++--------
 1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b954a651..3ded09ba 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -398,6 +398,8 @@ class DataLoader:
             return "CIGA Checks"
         elif "CIGA checks" in workbook.sheetnames:
             return "CIGA checks"
+        elif "CIGA check" in workbook.sheetnames:
+            return "CIGA check"
         else:
             return "CIGA"
 
@@ -1318,14 +1320,16 @@ class DataLoader:
             "ECO4 A/W": "ECO4",
             "ECO4 GBIS (ECO+)": "GBIS",
             "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
-            "ECO4 AFFORDABLE WARMTH": "ECO4"
+            "ECO4 AFFORDABLE WARMTH": "ECO4",
+            "Affordable Warmth": "ECO4"
         }
 
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype check)": "eco4"
+            "eco4 (subject to archetype check)": "eco4",
+            "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -1384,46 +1388,56 @@ class DataLoader:
             sales_report = {}
             if not survey_list.empty:
                 scheme_column = survey_list.columns[0]
-                # We clean up the survey list installation or cancelled
-                survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
-                # Remove all punctuation
-                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                    r'[^\w\s]', '', regex=True
-                )
-                # Remove double spaces
-                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
-                    r'\s+', ' ', regex=True
-                )
-                # Remove trailing spaces
-                survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
-
                 # Remap the values in the scheme column
                 survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+                # We clean up the survey list installation or cancelled
+                if "INSTALLED OR CANCELLED" in survey_list.columns:
+                    survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+                    # Remove all punctuation
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.replace(
+                        r'[^\w\s]', '', regex=True
+                    )
+                    # Remove double spaces
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.replace(
+                        r'\s+', ' ', regex=True
+                    )
+                    # Remove trailing spaces
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.strip()
 
-                survey_list["installation_status"] = None
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
-                    "installed",
-                    survey_list["installation_status"]
-                )
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
-                    "cancelled",
-                    survey_list["installation_status"]
-                )
-                # Find partial installations
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
-                    "partially installed",
-                    survey_list["installation_status"]
-                )
-                # Find partial cancellations
-                # TODO: We might have more indications of partial cancellations
-                survey_list["installation_status"] = np.where(
-                    survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
-                    "partially cancelled",
-                    survey_list["installation_status"]
-                )
+                    survey_list["installation_status"] = None
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+                        "installed",
+                        survey_list["installation_status"]
+                    )
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+                        "cancelled",
+                        survey_list["installation_status"]
+                    )
+                    # Find partial installations
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+                        "partially installed",
+                        survey_list["installation_status"]
+                    )
+                    # Find partial cancellations
+                    # TODO: We might have more indications of partial cancellations
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+                        "partially cancelled",
+                        survey_list["installation_status"]
+                    )
+                else:
+                    # We have some examples, e.g. HA28, where we do not have the installed or cancelled column
+                    survey_list["installation_status"] = np.where(
+                        survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                        "cancelled",
+                        "installed",
+                    )
 
                 # Finally, for other cases, we set the status to "in progress"
                 survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")

From f8948ff60f9e00d9501bd2f71f4269152cf3ab51 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 3 Mar 2024 16:47:10 +0000
Subject: [PATCH 065/248] ha38 wip:

---
 .../ha_15_32/ha_analysis_batch_3.py           | 20 +++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3ded09ba..4af7d9b9 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -234,6 +234,13 @@ class DataLoader:
                 asset_list["POST CODE"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA38":
+            asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -332,6 +339,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
         elif ha_name == "HA28":
             asset_list["HouseNo"] = asset_list["House Number"].copy()
+        elif ha_name == "HA38":
+            asset_list["HouseNo"] = asset_list["House_Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -912,6 +921,12 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha38_survey_list(survey_list):
+        # Rename the "No" column to "No." to align with the other survey sheets
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+        return survey_list
+
     @staticmethod
     def correct_ha32_survey_list(survey_list):
         survey_list["Street / Block Name"] = np.where(
@@ -3490,10 +3505,11 @@ def app():
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
-    # Then: 28, 41, 38, 10, 14, 20, 48
+    # Then: 28 [DONE],
+    # 38, 41, 10, 14, 20, 48
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From abe0e627dbe1c89209de2f867c2abe4eef419d2e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 10:24:42 +0000
Subject: [PATCH 066/248] Fixing bug with gbis remaining counts

---
 .../ha_15_32/ha_analysis_batch_3.py           | 266 ++++++++++++------
 1 file changed, 184 insertions(+), 82 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4af7d9b9..6d1a3b45 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -424,6 +424,12 @@ class DataLoader:
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
+
+        # TODO: TEMP
+        sheetnames_lower = [x.lower() for x in workbook.sheetnames]
+        if any("eco3" in x for x in sheetnames_lower):
+            raise Exception("REMOVE ME")
+
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
         if ha_name == "HA25":
@@ -569,6 +575,34 @@ class DataLoader:
         )
         return asset_list
 
+    @staticmethod
+    def correct_ha38_asset_list(asset_list):
+        # For Kingsford court, the house number is at the end of the address
+        def rearrange_address_if_flat(address):
+            if '/flat' in address.lower():
+                parts = address.split('/flat', 1)
+                return f"FLAT{parts[1]}, {parts[0]}"
+            return address
+
+        def extract_house_no_if_flat(address):
+            if '/flat' in address.lower():
+                # Attempt to extract the house number following "/flat"
+                try:
+                    house_no = address.split('/flat ')[1].split(' ')[0]
+                    # Remove trailing comma
+                    house_no = house_no.replace(",", "")
+                except IndexError:
+                    house_no = None
+                return house_no
+            return None
+
+        asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat)
+        asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo']
+        asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat)
+        # We then need to
+
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -925,6 +959,11 @@ class DataLoader:
     def correct_ha38_survey_list(survey_list):
         # Rename the "No" column to "No." to align with the other survey sheets
         survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'Kingsford Court, Coombe Valley Road', 'Kingsford Court'
+        )
+
         return survey_list
 
     @staticmethod
@@ -1345,6 +1384,7 @@ class DataLoader:
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
             "eco4 (subject to archetype check)": "eco4",
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
+            "eco4  (subject to ciga)": "eco4 (subject to ciga)"
         }
 
         ha_facts_and_figures = []
@@ -2943,8 +2983,8 @@ def forecast_remaining_sales(loader):
     median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
 
     # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
-    eco4_ciga_independent_passrates = []
-    gbis_ciga_independent_passrates = []
+    eco4_ciga_independent_to_install = []
+    gbis_to_install = []
     for ha_name, input_data in loader.data.items():
         asset_list = input_data["asset_list"].copy()
         survey_list = input_data["survey_list"].copy()
@@ -2973,7 +3013,7 @@ def forecast_remaining_sales(loader):
                 )
             ]
 
-            eco4_ciga_independent_passrates.append(
+            eco4_ciga_independent_to_install.append(
                 {
                     "Ha Name": ha_name,
                     "# ECO4 at install stage": typical_eco4_installed.shape[0],
@@ -2993,7 +3033,7 @@ def forecast_remaining_sales(loader):
                 )
             ]
 
-            gbis_ciga_independent_passrates.append(
+            gbis_to_install.append(
                 {
                     "Ha Name": ha_name,
                     "# GBIS at install stage": typical_gbis_installed.shape[0],
@@ -3001,33 +3041,33 @@ def forecast_remaining_sales(loader):
                 }
             )
 
-    eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates)
-    gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates)
+    eco4_ciga_independent_to_install = pd.DataFrame(eco4_ciga_independent_to_install)
+    gbis_to_install = pd.DataFrame(gbis_to_install)
 
-    eco4_ciga_independent_passrates["conversion"] = (
-        eco4_ciga_independent_passrates["# ECO4 successfully installed"] /
-        eco4_ciga_independent_passrates["# ECO4 at install stage"]
+    eco4_ciga_independent_to_install["conversion"] = (
+        eco4_ciga_independent_to_install["# ECO4 successfully installed"] /
+        eco4_ciga_independent_to_install["# ECO4 at install stage"]
     )
-    eco4_ciga_independent_passrates_clipped = eco4_ciga_independent_passrates[
-        eco4_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+    eco4_ciga_independent_to_install_clipped = eco4_ciga_independent_to_install[
+        eco4_ciga_independent_to_install["conversion"] >= sales_conversion_lower_bound
         ]
 
-    gbis_ciga_independent_passrates["conversion"] = (
-        gbis_ciga_independent_passrates["# GBIS successfully installed"] /
-        gbis_ciga_independent_passrates["# GBIS at install stage"]
+    gbis_to_install["conversion"] = (
+        gbis_to_install["# GBIS successfully installed"] /
+        gbis_to_install["# GBIS at install stage"]
     )
-    gbis_ciga_independent_passrates_clipped = gbis_ciga_independent_passrates[
-        gbis_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
+    gbis_to_install_clipped = gbis_to_install[
+        gbis_to_install["conversion"] >= sales_conversion_lower_bound
         ]
 
     median_eco4_to_install = (
-        eco4_ciga_independent_passrates_clipped["# ECO4 successfully installed"].sum() /
-        eco4_ciga_independent_passrates_clipped["# ECO4 at install stage"].sum()
+        eco4_ciga_independent_to_install_clipped["# ECO4 successfully installed"].sum() /
+        eco4_ciga_independent_to_install_clipped["# ECO4 at install stage"].sum()
     )
 
     median_gbis_to_install = (
-        gbis_ciga_independent_passrates_clipped["# GBIS successfully installed"].sum() /
-        gbis_ciga_independent_passrates_clipped["# GBIS at install stage"].sum()
+        gbis_to_install_clipped["# GBIS successfully installed"].sum() /
+        gbis_to_install_clipped["# GBIS at install stage"].sum()
     )
 
     # Produce the final output
@@ -3044,29 +3084,26 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
-
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
+        original_warmfront_sold_eco4 = (
+            original_warmfront_estimates["No. of Tech surveys complete - Eco 4"].values[0] * eco4_rate
+        )
 
-        # original_warmfront_eco4_revenue = (
-        #     original_warmfront_remaining_eco4 * eco4_rate +
-        #     (original_warmfront_eco4 - original_warmfront_remaining_eco4) * old_eco4_rate
-        # )
         original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate
         original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
+        original_warmfront_sold_gbis = (
+            original_warmfront_estimates["No. of Tech surveys complete - GBIS"].values[0] * gbis_rate
+        )
 
         # Original warmfront figures - GBIS
 
         original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
         original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
 
-        # original_warmfront_gbis_revenue = (
-        #     original_warmfront_remaining_gbis * gbis_rate +
-        #     (original_warmfront_gbis - original_warmfront_remaining_gbis) * old_gbis_rate
-        # )
         original_warmfront_gbis_revenue = (
             original_warmfront_gbis * gbis_rate
         )
@@ -3123,7 +3160,7 @@ def forecast_remaining_sales(loader):
 
         # We also need the ha ciga passed to install success rate
         ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name]
-        if not ha_ciga_pass_to_sale.empty:
+        if not ha_ciga_pass_to_sale.empty and ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0] != 0:
             ha_ciga_pass_to_sale_rate = (
                 ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] /
                 ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0]
@@ -3131,7 +3168,9 @@ def forecast_remaining_sales(loader):
         else:
             ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install
 
-        ha_eco4_to_sale = eco4_ciga_independent_passrates[eco4_ciga_independent_passrates["Ha Name"] == ha_name]
+        ha_eco4_to_sale = eco4_ciga_independent_to_install_clipped[
+            eco4_ciga_independent_to_install_clipped["Ha Name"] == ha_name
+            ]
         if not ha_eco4_to_sale.empty:
             ha_eco4_to_sale_rate = (
                 ha_eco4_to_sale['# ECO4 successfully installed'].values[0] /
@@ -3149,12 +3188,6 @@ def forecast_remaining_sales(loader):
             eco4_rate=eco4_rate
         )
 
-        # Calculate the delta compared to Warmfront's original estimate
-        eco4_delta_vs_original_estimate = (
-                                              eco4_post_ciga_total_results[
-                                                  "ECO4 - post CIGA - #"] - original_warmfront_eco4
-                                          ) / original_warmfront_eco4
-
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
             eligiblity_counts=eligiblity_counts_remaining,
             input_data=input_data,
@@ -3164,10 +3197,18 @@ def forecast_remaining_sales(loader):
             eco4_rate=eco4_rate
         )
 
+        # Calculate the delta compared to Warmfront's original remaining
+        if original_warmfront_remaining_eco4 == 0:
+            eco4_delta_vs_original_estimate_remaining = eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"]
+        else:
+            eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] -
+                                                          original_warmfront_remaining_eco4) /
+                                                         original_warmfront_remaining_eco4)
+
         # GBIS Figures
         # Estimate the GBIS conversion rate
-        ha_gbis_sale_conversion = gbis_ciga_independent_passrates[
-            gbis_ciga_independent_passrates["Ha Name"] == ha_name
+        ha_gbis_sale_conversion = gbis_to_install_clipped[
+            gbis_to_install_clipped["Ha Name"] == ha_name
             ]
 
         if not ha_gbis_sale_conversion.empty:
@@ -3178,6 +3219,9 @@ def forecast_remaining_sales(loader):
         else:
             ha_gbis_sale_conversion = median_gbis_to_install
 
+        asset_list["ECO Eligibility"].value_counts()
+        asset_list_remaining["ECO Eligibility"].value_counts()
+
         gbis_total = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
@@ -3185,18 +3229,59 @@ def forecast_remaining_sales(loader):
         gbis_total_revenue = int(gbis_total * gbis_rate)
 
         gbis_remaining = eligiblity_counts_remaining[
-            eligiblity_counts["ECO Eligibility"] == "gbis"
+            eligiblity_counts_remaining["ECO Eligibility"] == "gbis"
             ]["count"].sum()
         gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
         # GBIS delta
-        if original_warmfront_gbis == 0:
-            gbis_delta_vs_original_estimate = gbis_total
+        if original_warmfront_remaining_gbis == 0:
+            gbis_delta_vs_original_estimate_remaining = gbis_remaining
         else:
-            gbis_delta_vs_original_estimate = (
-                                                  gbis_total - original_warmfront_gbis
-                                              ) / original_warmfront_gbis
+            gbis_delta_vs_original_estimate_remaining = (
+                (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis
+            )
+
+        # Current sales figures
+        # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
+        eco4_actually_sold = 0
+        gbis_actually_sold = 0
+        if not survey_list.empty:
+            surveys_with_eligibility = survey_list.merge(
+                asset_list[["asset_list_row_id", "ECO Eligibility"]],
+                how="left", on="asset_list_row_id"
+            )
+            completed_eco4_sales = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "ECO4 - installed"
+                ]
+            incomplete_eco4_sales = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+                (~surveys_with_eligibility["ECO Eligibility"].isin(
+                    ["eco4 - passed ciga"])
+                 )
+                ]
+            incomplete_eco4_sales_ciga = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+                (surveys_with_eligibility["ECO Eligibility"].isin(
+                    ["eco4 - passed ciga"])
+                )
+                ]
+
+            eco4_actually_sold = (completed_eco4_sales.shape[0] * eco4_rate) + (
+                incomplete_eco4_sales.shape[0] * ha_eco4_to_sale_rate +
+                incomplete_eco4_sales_ciga.shape[0] * ha_ciga_pass_to_sale_rate
+            ) * eco4_rate
+
+            completed_gbis_sales = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "GBIS - installed"
+                ]
+            incomplete_gbis_sales = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "GBIS - in progress")
+            ]
+
+            gbis_actually_sold = completed_gbis_sales.shape[0] * gbis_rate + (
+                incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate
+            )
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -3204,29 +3289,33 @@ def forecast_remaining_sales(loader):
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
             ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
             ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
+            ("ECO4 original", "", "Sold - £", ""): original_warmfront_sold_eco4,
             ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
             # GBIS - original warmfront figures
             ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
             ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
             ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
+            ("GBIS original", "", "Sold - £", ""): original_warmfront_sold_gbis,
             ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
             # ECO4 - asset list, pre-ciga
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
-            ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)"):
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
                 eco4_post_ciga_total_results[
                     "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
                 "ECO4 - post CIGA - £"],
-            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""): eco4_delta_vs_original_estimate,
             # ECO4 - asset list, post ciga, remaining
             ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - #"],
             ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[
                 "ECO4 - post CIGA - £"],
+            ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %",
+             ""): eco4_delta_vs_original_estimate_remaining,
             ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
                 eco4_post_ciga_remaining_results["Of which confirmed - #"],
             ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""):
@@ -3257,13 +3346,15 @@ def forecast_remaining_sales(loader):
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
-            ("GBIS Postcode list", "", "Delta vs original estimate - %", ""): gbis_delta_vs_original_estimate,
+            ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
+                gbis_delta_vs_original_estimate_remaining,
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 33:
+        if len(to_append) != 37:
             raise ValueError("Something went wrong")
 
         results.append(to_append)
@@ -3275,26 +3366,26 @@ def forecast_remaining_sales(loader):
         if col == ('', '', '', 'HA Name'):
             totals_row[col] = "Total"
         elif col in [
-            ("ECO4 post-ciga", "", "Delta vs original estimate - %", ""),
-            ("GBIS Postcode list", "", "Delta vs original estimate - %", "")
+            ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", ""),
+            ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")
         ]:
             totals_row[col] = None
         else:
             totals_row[col] = results[col].sum()
 
     # For the delta columns, we calculate the delta on the totals
-    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate - %", "")] = (
+    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", "")] = (
         (
-            totals_row[("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total (post-ciga)")] -
-            totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "ECO4 - November")]
+            totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")] -
+            totals_row[("ECO4 original", "", "Remaining - #", "")]
+        ) / totals_row[("ECO4 original", "", "Remaining - #", "")]
     )
 
-    totals_row[("GBIS Postcode list", "", "Delta vs original estimate - %", "")] = (
+    totals_row[("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")] = (
         (
-            totals_row[("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total")] -
-            totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
-        ) / totals_row[("", "Original Warmfront estimate", "Total - #", "GBIS - November")]
+            totals_row[("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")] -
+            totals_row[("GBIS original", "", "Remaining - #", "")]
+        ) / totals_row[("GBIS original", "", "Remaining - #", "")]
     )
 
     blank_row = pd.DataFrame([{col: "" for col in results.columns}])
@@ -3342,6 +3433,15 @@ def forecast_remaining_sales(loader):
     )
     headline_total_delta = round(headline_total_delta, 1)
 
+    headline_eco4_sold_since_november = (
+        totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] - totals_row[('ECO4 original', '', 'Sold - £', '')]
+    )
+
+    headline_gbis_sold_since_november = (
+        totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] -
+        totals_row[('GBIS original', '', 'Sold - £', '')]
+    )
+
     headlines = [
         {
             ("", "", "", "HA Name"): "Headlines",
@@ -3358,16 +3458,22 @@ def forecast_remaining_sales(loader):
                 "ECO4 - November"): headline_eco4_original_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - #",
+            ("", "", "", "HA Name"): "ECO4 Sold since November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_eco4_sold_since_november
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - #",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining
         },
         {
-            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list - £",
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - £",
             ("", "Original Warmfront estimate", "Total - #",
              "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "ECO4 delta %",
+            ("", "", "", "HA Name"): "ECO4 £ remaining delta - %",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%"
         },
         {
@@ -3380,6 +3486,12 @@ def forecast_remaining_sales(loader):
                 "", "Original Warmfront estimate", "Total - #",
                 "ECO4 - November"): headline_gbis_original_remaining_revenue
         },
+        {
+            ("", "", "", "HA Name"): "GBIS Sold since November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_gbis_sold_since_november
+        },
         {
             ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining
@@ -3400,7 +3512,7 @@ def forecast_remaining_sales(loader):
              "ECO4 - November"): headline_original_total_revenue_remaining
         },
         {
-            ("", "", "", "HA Name"): "Total Remaining - post code list - £",
+            ("", "", "", "HA Name"): "Total Remaining - post code list (post CIGA) - £",
             ("", "Original Warmfront estimate", "Total - #",
              "ECO4 - November"): headline_postcode_list_total_revenue_remaining
         },
@@ -3440,14 +3552,16 @@ def forecast_remaining_sales(loader):
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
                 round(median_eco4_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - #",
-             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Job must not cancel"
+             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Surveys that resulted "
+                  "in cancelled install are excluded."
         },
         {
             ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
                 round(median_ciga_pass_to_install * 100, 1)) + "%",
             ("ECO4 original", "", "Remaining - #",
-             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Job must not cancel"
+             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Surveys that resulted in "
+                  "cancelled installs are excluded."
         }
     ]
 
@@ -3462,23 +3576,7 @@ def forecast_remaining_sales(loader):
             pd.DataFrame(assumptions)
         ]
     )
-
-    # header_rows = [
-    #     [name[0] for name in results.columns.values],
-    #     [name[1] for name in results.columns.values],
-    #     [name[2] for name in results.columns.values],
-    #     [name[3] for name in results.columns.values]
-    # ]
-
-    # Step 2: Write the transformed header and DataFrame data to CSV.
-    # Open the file in write mode.
-    import csv
     with open("HA Remaining Analysis.csv", "w", newline="") as file:
-        # writer = csv.writer(file)
-
-        # Write the header rows.
-        # writer.writerows(header_rows)
-
         # Write the DataFrame data without the index (adjust if you want the index).
         results.to_csv(file, header=True, index=False)
 
@@ -3504,8 +3602,12 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
+    # priority_has = [
+    #     "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
+    # ]
+    # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28 [DONE],

From 5b32ac8aad59b1942f80a399d072486ab6db9ec3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 10:59:07 +0000
Subject: [PATCH 067/248] handling case where property is marked as gbis but
 sold for ECO4

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6d1a3b45..7bfbd7f5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1509,11 +1509,12 @@ class DataLoader:
                 }
 
                 # We find some cases where properties have sold but are missing CIGA checks
-                survey_list_to_merge = survey_list[["asset_list_row_id"]].copy()
+                survey_list_to_merge = survey_list[["asset_list_row_id", "installation_status"]].copy()
                 survey_list_to_merge["has_a_survey_record"] = True
                 survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])]
 
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
+                # Update the cases where properties have sold, but are missing a CIGA check
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & (
                         asset_list["has_a_survey_record"] == True
@@ -1521,6 +1522,17 @@ class DataLoader:
                     "eco4 - passed ciga",
                     asset_list["ECO Eligibility"]
                 )
+                # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "gbis") & (
+                        asset_list["installation_status"].isin(
+                            ["ECO4 - installed", "ECO4 - cancelled"]
+                        )
+                    ),
+                    "eco4",
+                    asset_list["ECO Eligibility"]
+                )
+
                 asset_list = asset_list.drop(columns=["has_a_survey_record"])
 
                 # Update the survey list with installation status
@@ -3199,7 +3211,7 @@ def forecast_remaining_sales(loader):
 
         # Calculate the delta compared to Warmfront's original remaining
         if original_warmfront_remaining_eco4 == 0:
-            eco4_delta_vs_original_estimate_remaining = eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"]
+            eco4_delta_vs_original_estimate_remaining = "N/A"
         else:
             eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] -
                                                           original_warmfront_remaining_eco4) /
@@ -3219,9 +3231,6 @@ def forecast_remaining_sales(loader):
         else:
             ha_gbis_sale_conversion = median_gbis_to_install
 
-        asset_list["ECO Eligibility"].value_counts()
-        asset_list_remaining["ECO Eligibility"].value_counts()
-
         gbis_total = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
@@ -3236,7 +3245,7 @@ def forecast_remaining_sales(loader):
 
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
-            gbis_delta_vs_original_estimate_remaining = gbis_remaining
+            gbis_delta_vs_original_estimate_remaining = "N/A"
         else:
             gbis_delta_vs_original_estimate_remaining = (
                 (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis

From 9d26c94ae571ce1ba5363e9c850b8017f110bc9d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 11:35:14 +0000
Subject: [PATCH 068/248] removed stray comma causing bugs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 32 ++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7bfbd7f5..e58c7799 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1526,14 +1526,40 @@ class DataLoader:
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "gbis") & (
                         asset_list["installation_status"].isin(
-                            ["ECO4 - installed", "ECO4 - cancelled"]
+                            ["ECO4 - installed", "ECO4 - cancelled", "ECO4 - in progress"]
                         )
                     ),
                     "eco4",
                     asset_list["ECO Eligibility"]
                 )
+                # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"].isin(
+                        ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                    )) & (
+                        asset_list["installation_status"].isin(
+                            ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"]
+                        )
+                    ),
+                    "gbis",
+                    asset_list["ECO Eligibility"]
+                )
+                # Update the cases where a property is marked as not eligible, but sold for GBIS
+                if ((asset_list["ECO Eligibility"] == "not eligible") & (
+                    asset_list["installation_status"].isin(
+                        ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
+                    ))).sum():
+                    bah
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "not eligible") & (
+                        asset_list["installation_status"].isin(
+                            ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
+                        )),
+                    "gbis",
+                    asset_list["ECO Eligibility"]
+                )
 
-                asset_list = asset_list.drop(columns=["has_a_survey_record"])
+                asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])
 
                 # Update the survey list with installation status
                 self.data[ha_name]["survey_list"] = survey_list
@@ -2897,8 +2923,6 @@ def forecast_remaining_sales(loader):
 
     gbis_rate = 600
     eco4_rate = 1710
-    # old_gbis_rate = 432
-    # old_eco4_rate = 1456
 
     # 1) Calculate the conversion rate from passed CIGA to actual sale
     converted_ciga_jobs = []

From a70260f128aec2785a8000669dc981d8220505a3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 11:55:02 +0000
Subject: [PATCH 069/248] Update how we handle partially completed jobs

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e58c7799..060539e1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1476,7 +1476,7 @@ class DataLoader:
                     # Find partial installations
                     survey_list["installation_status"] = np.where(
                         survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
-                        "partially installed",
+                        "in progress",
                         survey_list["installation_status"]
                     )
                     # Find partial cancellations
@@ -1550,6 +1550,7 @@ class DataLoader:
                         ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
                     ))).sum():
                     bah
+
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "not eligible") & (
                         asset_list["installation_status"].isin(
@@ -1559,6 +1560,15 @@ class DataLoader:
                     asset_list["ECO Eligibility"]
                 )
 
+                # Update the cases where a property is marked as not eligible, but sold for ECO4
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "not eligible") & (
+                        asset_list["installation_status"].isin(
+                            ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"]
+                        )
+                    )
+                )
+
                 asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])
 
                 # Update the survey list with installation status

From 4cc467e5142c7eba903d2819d59229643cf93e03 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 11:57:03 +0000
Subject: [PATCH 070/248] fix bug in updating eligibility for initially
 non-eligible rows

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 060539e1..8c03b1ef 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1545,12 +1545,6 @@ class DataLoader:
                     asset_list["ECO Eligibility"]
                 )
                 # Update the cases where a property is marked as not eligible, but sold for GBIS
-                if ((asset_list["ECO Eligibility"] == "not eligible") & (
-                    asset_list["installation_status"].isin(
-                        ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
-                    ))).sum():
-                    bah
-
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"] == "not eligible") & (
                         asset_list["installation_status"].isin(
@@ -1566,7 +1560,9 @@ class DataLoader:
                         asset_list["installation_status"].isin(
                             ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"]
                         )
-                    )
+                    ),
+                    "eco4",
+                    asset_list["ECO Eligibility"]
                 )
 
                 asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])

From 5e991547f7239cf5a84f8e5824d4d9379b825a2a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 14:08:05 +0000
Subject: [PATCH 071/248] debuging variances, fixed usage of 75% ciga pass rate

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8c03b1ef..91c198b1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3196,8 +3196,8 @@ def forecast_remaining_sales(loader):
             )
         else:
             ha_ciga_conversion_rate = (
-                median_ciga_success_rate if median_ciga_success_rate <= median_ciga_success_rate else
-                median_ciga_success_rate
+                median_ciga_success_rate if median_ciga_success_rate <= maximum_ciga_conversion else
+                maximum_ciga_conversion
             )
 
         # We also need the ha ciga passed to install success rate

From d35d8ea8457ce128ac1fe0c51abd9f83f4e3acaa Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 14:14:50 +0000
Subject: [PATCH 072/248] fixed but in eligibility counts remaining

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 91c198b1..1e2c5d92 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3177,7 +3177,7 @@ def forecast_remaining_sales(loader):
         ]["count"].sum()
 
         eco4_pre_ciga_remaining = eligiblity_counts_remaining[
-            eligiblity_counts["ECO Eligibility"].isin(
+            eligiblity_counts_remaining["ECO Eligibility"].isin(
                 ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
             )
         ]["count"].sum()

From 680f38963a874eef548883d8f0f365f7958d42b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 15:01:33 +0000
Subject: [PATCH 073/248] Added variance columns to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 49 ++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1e2c5d92..d4c3f74f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2859,21 +2859,30 @@ def calculate_eco4_post_ciga(
             eligiblity_counts["ECO Eligibility"] == "failed ciga"
             ]["count"].sum()
 
+        eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed
+
         eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
         eco4_confirmed = np.round(eco4_confirmed)
 
+        eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed)
+
         if remaining_needing_ciga_check > 0:
             # We update the eco4 post ciga with the converted remaining
             eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+
             eco4_remaining_forecast = np.round(
                 eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
             )
+            eco4_ciga_needed_cancellations = eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast
             eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
             eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
         else:
             eco4_remaining_forecast = 0
             eco4_estimated_ciga_failures = 0
+            eco4_ciga_needed_cancellations = 0
             eco4_post_ciga = eco4_confirmed
+
+        eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
     else:
         eco4_no_ciga_needed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "eco4"
@@ -2881,14 +2890,18 @@ def calculate_eco4_post_ciga(
         eco4_confirmed_ciga_failures = 0
         # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+        eco4_no_ciga_cancellations = int(eco4_no_ciga_needed - eco4_confirmed)
         eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
         eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
 
         eco4_remaining_forecast = np.round(
             eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
         )
+        eco4_ciga_cancellations = int(eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast)
         eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
 
+        eco4_expected_cancellations = eco4_no_ciga_cancellations + eco4_ciga_cancellations
+
     eco4_post_ciga = int(eco4_post_ciga)
     eco4_remaining_forecast = int(eco4_remaining_forecast)
     eco4_confirmed = int(eco4_confirmed)
@@ -2912,6 +2925,9 @@ def calculate_eco4_post_ciga(
         ),
         "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate),
         "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate),
+        # Expected cencellations
+        "Expected cancellations - #": eco4_expected_cancellations,
+        "Expected cancellations - £": eco4_expected_cancellations * eco4_rate
     }
 
     return results
@@ -3322,6 +3338,28 @@ def forecast_remaining_sales(loader):
                 incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate
             )
 
+        # Add in the variance:
+        # We should expect that the pre-ciga total is:
+        # 1) The number of post CIGA successes +
+        # 2) the number of CIGA failures +
+        # 3) The number of cancellations
+        variance_total = eco4_pre_ciga - (
+            eco4_post_ciga_total_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_total_results['Estimated total - failed CIGA'] +
+            eco4_post_ciga_total_results["Expected cancellations - #"]
+        )
+        if variance_total != 0:
+            raise ValueError("Something went wrong in variance total")
+
+        variance_remaining = eco4_pre_ciga_remaining - (
+            eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] +
+            eco4_post_ciga_remaining_results["Expected cancellations - #"]
+        )
+
+        if variance_remaining != 0:
+            raise ValueError("Something went wrong in variance remaining")
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -3340,6 +3378,8 @@ def forecast_remaining_sales(loader):
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
+            ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
@@ -3382,6 +3422,13 @@ def forecast_remaining_sales(loader):
             ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
                 "Estimated CIGA failures - £"
             ],
+            # Expected ECO4 cancellations
+            ("ECO4 Cancellations", "", "Expected cancellations - #", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - #"
+            ],
+            ("ECO4 Cancellations", "", "Expected cancellations - £", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - £"
+            ],
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
@@ -3393,7 +3440,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 37:
+        if len(to_append) != 41:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From e966dfdf6e785cbcc1e2245cce852e842d0def92 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 16:22:20 +0000
Subject: [PATCH 074/248] Adding cancellations to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 68 +++++++++++++------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d4c3f74f..09b0910e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3301,6 +3301,10 @@ def forecast_remaining_sales(loader):
         # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
         eco4_actually_sold = 0
         gbis_actually_sold = 0
+        eco4_confirmed_cancellations = 0
+        eco4_expected_cancellations = 0
+        gbis_confirmed_cancellations = 0
+        gbis_expected_cancellations = 0
         if not survey_list.empty:
             surveys_with_eligibility = survey_list.merge(
                 asset_list[["asset_list_row_id", "ECO Eligibility"]],
@@ -3308,34 +3312,54 @@ def forecast_remaining_sales(loader):
             )
             completed_eco4_sales = surveys_with_eligibility[
                 surveys_with_eligibility["installation_status"] == "ECO4 - installed"
-                ]
+                ].shape[0]
             incomplete_eco4_sales = surveys_with_eligibility[
                 (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
                 (~surveys_with_eligibility["ECO Eligibility"].isin(
                     ["eco4 - passed ciga"])
                  )
-                ]
+                ].shape[0]
             incomplete_eco4_sales_ciga = surveys_with_eligibility[
                 (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
                 (surveys_with_eligibility["ECO Eligibility"].isin(
                     ["eco4 - passed ciga"])
                 )
-                ]
+                ].shape[0]
 
-            eco4_actually_sold = (completed_eco4_sales.shape[0] * eco4_rate) + (
-                incomplete_eco4_sales.shape[0] * ha_eco4_to_sale_rate +
-                incomplete_eco4_sales_ciga.shape[0] * ha_ciga_pass_to_sale_rate
-            ) * eco4_rate
+            eco4_confirmed_cancellations = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "ECO4 - cancelled"
+                ].shape[0]
+
+            expected_eco4_sales_no_ciga = np.round(incomplete_eco4_sales * ha_eco4_to_sale_rate)
+            expected_eco4_sales_ciga = np.round(incomplete_eco4_sales_ciga * ha_ciga_pass_to_sale_rate)
+
+            eco4_expected_cancellations = (incomplete_eco4_sales + incomplete_eco4_sales_ciga) - (
+                expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+            )
+            eco4_expected_cancellations = int(np.round(eco4_expected_cancellations))
+
+            eco4_actually_sold = eco4_rate * (
+                completed_eco4_sales + expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+            )
 
             completed_gbis_sales = surveys_with_eligibility[
                 surveys_with_eligibility["installation_status"] == "GBIS - installed"
-                ]
+                ].shape[0]
             incomplete_gbis_sales = surveys_with_eligibility[
                 (surveys_with_eligibility["installation_status"] == "GBIS - in progress")
-            ]
+            ].shape[0]
 
-            gbis_actually_sold = completed_gbis_sales.shape[0] * gbis_rate + (
-                incomplete_gbis_sales.shape[0] * ha_gbis_sale_conversion * gbis_rate
+            # Get confirmed cancellations
+            gbis_confirmed_cancellations = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "GBIS - cancelled"
+                ].shape[0]
+
+            expected_gbis_unconfirmed_sales = incomplete_gbis_sales * ha_gbis_sale_conversion
+
+            gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales)
+
+            gbis_actually_sold = completed_gbis_sales * gbis_rate + (
+                expected_gbis_unconfirmed_sales * gbis_rate
             )
 
         # Add in the variance:
@@ -3381,6 +3405,9 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
             ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
+            ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations,
+            # This is for jobs that are in-progress and could still cancel
+            ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             # ECO4 - asset list, post ciga, total
             ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
@@ -3403,6 +3430,13 @@ def forecast_remaining_sales(loader):
                 eco4_post_ciga_remaining_results["Of which forecast - #"],
             ("ECO4 post-ciga", "", "Of which forecast - £", ""):
                 eco4_post_ciga_remaining_results["Of which forecast - £"],
+            # Expected ECO4 cancellations
+            ("ECO4 Cancellations", "", "Of which expected cancellations - #", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - #"
+            ],
+            ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - £"
+            ],
             # CIGA failures
             ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
                 'Estimated total - failed CIGA'
@@ -3422,17 +3456,13 @@ def forecast_remaining_sales(loader):
             ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
                 "Estimated CIGA failures - £"
             ],
-            # Expected ECO4 cancellations
-            ("ECO4 Cancellations", "", "Expected cancellations - #", ""): eco4_post_ciga_remaining_results[
-                "Expected cancellations - #"
-            ],
-            ("ECO4 Cancellations", "", "Expected cancellations - £", ""): eco4_post_ciga_remaining_results[
-                "Expected cancellations - £"
-            ],
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
+            ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations,
+            # This is for jobs that are in-progress and could still cancel
+            ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
             ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
@@ -3440,7 +3470,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 41:
+        if len(to_append) != 45:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From e2055b3b7dde7a1b001a568c23bb3016fbfa4079 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 19:34:43 +0000
Subject: [PATCH 075/248] fixed variance for HA6

---
 .../ha_15_32/ha_analysis_batch_3.py           | 135 +++++++++++++++++-
 1 file changed, 129 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 09b0910e..8c9f59c2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -730,6 +730,81 @@ class DataLoader:
             "Post Code"
         ] = "ST5 7BY"
 
+        # PERFORM ADDITIONAL DROPS
+        # Dropping rows based on multiple conditions
+        conditions_to_drop = [
+            (survey_list['Street / Block Name'] == "Bedford Crescent") & (survey_list['Post Code'] == "ST5 3EH") & (
+                survey_list['NO.'] == 23) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "Hereford Avenue") & (survey_list['Post Code'] == "ST5 3EJ") & (
+                survey_list['NO.'] == 92) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "Seabridge Lane") & (survey_list['Post Code'] == "ST5 3EX") & (
+                survey_list['NO.'].isin([16, 18, 42])) & (
+                survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "ESKDALE PLACE") & (survey_list['Post Code'] == "ST5 3QW") & (
+                survey_list['NO.'] == 5) & (survey_list['SUBMISSION DATE'].astype(str) == "2023-03-06 00:00:00"),
+            (survey_list['Street / Block Name'] == "Birch House road") & (survey_list['Post Code'] == "ST6 2LS") & (
+                survey_list['NO.'].isin([56, 58])),
+            (survey_list['Street / Block Name'] == "Blackthorn Place") & (survey_list['Post Code'] == "ST6 2LS") & (
+                survey_list['NO.'].isin([37, 39])),
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 7BT") & (
+                survey_list['NO.'].isin([17, 6])),
+            (survey_list['Street / Block Name'] == "Lion Grove") & (survey_list['Post Code'] == "ST5 7HQ") & (
+                survey_list['NO.'].isin([10, 12])) & (
+                survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "DENRY CRESCENT") & (survey_list['Post Code'] == "ST5 8JW") & (
+                survey_list['NO.'] == 87) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "HOLLINS CRESCENT") & (survey_list['Post Code'] == "ST7 1JW") & (
+                survey_list['NO.'] == 19)
+        ]
+
+        # Combine all conditions with an OR "|"
+        combined_condition = np.logical_or.reduce(conditions_to_drop)
+
+        # Drop rows that meet the combined condition
+        survey_list = survey_list[~combined_condition]
+
+        # Making replacements using np.where
+        survey_list['Post Code'] = np.where(
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3EH") & (
+                survey_list['NO.'] == 17),
+            "ST5 7BT",
+            survey_list['Post Code']
+        )
+
+        survey_list['Post Code'] = np.where(
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3ED") & (
+                survey_list['NO.'] == 6),
+            "ST5 7BT",
+            survey_list['Post Code']
+        )
+
+        # Maple avenue (stoke on trent, not newcastle) should be st7 1jw
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"].str.lower().str.contains("maple avenue")) & (
+                survey_list["Post Code"].str.lower() == "st7 1jx"
+            ),
+            "st7 1jw",
+            survey_list["Post Code"]
+        )
+
+        # Hollins Crescent should be st7 1jx
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"].str.lower().str.contains("hollins crescent")) & (
+                survey_list["Post Code"].str.lower() == "st7 1jw"
+            ),
+            "st7 1jx",
+            survey_list["Post Code"]
+        )
+
+        # Additional drops as the above misses some:
+        survey_list = survey_list[
+            ~((survey_list["NO."].astype(str).isin(["18", "42"])) &
+              (survey_list["Street / Block Name"] == "Seabridge Lane") &
+              (survey_list["Post Code"] == "ST5 3EY") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "24.07.2023") &
+              (survey_list["INSTALLED OR CANCELLED"].str.contains("NO UPDATE YET")))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -1176,6 +1251,11 @@ class DataLoader:
         if matching_lookup.shape[0] != survey_list.shape[0]:
             raise ValueError("Mismatch in the number of survey rows and matching lookup rows")
 
+        matching_lookup = matching_lookup[~pd.isnull(matching_lookup["asset_list_row_id"])]
+
+        if matching_lookup["asset_list_row_id"].duplicated().sum():
+            raise ValueError("Duplicated matches in survey list")
+
         # Merge onto the survey list
         survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
 
@@ -1483,7 +1563,7 @@ class DataLoader:
                     # TODO: We might have more indications of partial cancellations
                     survey_list["installation_status"] = np.where(
                         survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
-                        "partially cancelled",
+                        "cancelled",
                         survey_list["installation_status"]
                     )
                 else:
@@ -3174,6 +3254,8 @@ def forecast_remaining_sales(loader):
         if survey_list.empty:
             asset_list_remaining = asset_list.copy()
         else:
+            # For HA6, there are a small number of postcodes that do not match to any item in the asset list
+            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
             asset_list_remaining = asset_list.merge(
                 survey_list[["asset_list_row_id", "installation_status"]],
                 how="left",
@@ -3183,6 +3265,47 @@ def forecast_remaining_sales(loader):
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
+        # # TODO: TEMP
+        # n_pre_ciga = asset_list[
+        #     asset_list["ECO Eligibility"].isin(
+        #         [
+        #             "eco4 - passed ciga",
+        #             "eco4 (subject to ciga)",
+        #             "failed ciga",
+        #             "eco4"
+        #         ]
+        #     )
+        # ].shape[0]
+        #
+        # n_pre_ciga_remaining = asset_list_remaining[
+        #     asset_list_remaining["ECO Eligibility"].isin(
+        #         [
+        #             "eco4 - passed ciga",
+        #             "eco4 (subject to ciga)",
+        #             "failed ciga",
+        #             "eco4"
+        #         ]
+        #     )
+        # ].shape[0]
+        #
+        # compare_to_ids = asset_list_remaining["asset_list_row_id"].values
+        # assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids]
+        # diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)]
+        #
+        # n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
+        # # cancellations = survey_list[]
+        # asset_list["ECO Eligibility"].value_counts()
+        #
+        # # Revenenue
+        # pre_ciga_revenue = n_pre_ciga * eco4_rate
+        # pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate
+        # sold_revenue = n_sold * eco4_rate
+        #
+        # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
+        # # MISSING 1 SALE from sold
+        # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
+        # # TODO: END TEMP
+
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
         eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
 
@@ -3402,13 +3525,13 @@ def forecast_remaining_sales(loader):
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
             ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
             ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
-            ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations,
+            ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate,
             # This is for jobs that are in-progress and could still cancel
-            ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations,
-            ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
+            ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations * eco4_rate,
             # ECO4 - asset list, post ciga, total
             ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
                 eco4_post_ciga_total_results[
@@ -3460,9 +3583,9 @@ def forecast_remaining_sales(loader):
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
-            ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations,
+            ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
             # This is for jobs that are in-progress and could still cancel
-            ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations,
+            ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
             ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
             ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):

From 21082d8d3779a75cae422becf1a6e589ebcbaba6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 19:46:28 +0000
Subject: [PATCH 076/248] fixed duplication variance for HA16

---
 .../ha_15_32/ha_analysis_batch_3.py           | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8c9f59c2..7859d6d2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -960,6 +960,21 @@ class DataLoader:
             survey_list["NO."]
         )
 
+        # Delete some duplicated entries
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "york road") &
+              (survey_list["NO."].astype(str) == "12") &
+              (survey_list["Post Code"] == "M44 5HU") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "45229"))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "peatfield avenue") &
+              (survey_list["NO."].astype(str) == "23") &
+              (survey_list["Post Code"] == "M27 9XG") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "45236"))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -3265,7 +3280,7 @@ def forecast_remaining_sales(loader):
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
-        # # TODO: TEMP
+        # TODO: TEMP
         # n_pre_ciga = asset_list[
         #     asset_list["ECO Eligibility"].isin(
         #         [
@@ -3304,6 +3319,9 @@ def forecast_remaining_sales(loader):
         # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
         # # MISSING 1 SALE from sold
         # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
+        # dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values
+        # z = survey_list[survey_list["asset_list_row_id"].isin(dupes)]
+        # z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']]
         # # TODO: END TEMP
 
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()

From af13467c2c4c9b7fc98e5be1e343399f57c062fb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 20:04:37 +0000
Subject: [PATCH 077/248] Added gbis variance checks

---
 .../ha_15_32/ha_analysis_batch_3.py           | 83 ++++++++-----------
 1 file changed, 36 insertions(+), 47 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7859d6d2..553f6271 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3237,6 +3237,7 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
@@ -3280,50 +3281,6 @@ def forecast_remaining_sales(loader):
             asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
             asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
 
-        # TODO: TEMP
-        # n_pre_ciga = asset_list[
-        #     asset_list["ECO Eligibility"].isin(
-        #         [
-        #             "eco4 - passed ciga",
-        #             "eco4 (subject to ciga)",
-        #             "failed ciga",
-        #             "eco4"
-        #         ]
-        #     )
-        # ].shape[0]
-        #
-        # n_pre_ciga_remaining = asset_list_remaining[
-        #     asset_list_remaining["ECO Eligibility"].isin(
-        #         [
-        #             "eco4 - passed ciga",
-        #             "eco4 (subject to ciga)",
-        #             "failed ciga",
-        #             "eco4"
-        #         ]
-        #     )
-        # ].shape[0]
-        #
-        # compare_to_ids = asset_list_remaining["asset_list_row_id"].values
-        # assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids]
-        # diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)]
-        #
-        # n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
-        # # cancellations = survey_list[]
-        # asset_list["ECO Eligibility"].value_counts()
-        #
-        # # Revenenue
-        # pre_ciga_revenue = n_pre_ciga * eco4_rate
-        # pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate
-        # sold_revenue = n_sold * eco4_rate
-        #
-        # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
-        # # MISSING 1 SALE from sold
-        # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
-        # dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values
-        # z = survey_list[survey_list["asset_list_row_id"].isin(dupes)]
-        # z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']]
-        # # TODO: END TEMP
-
         eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
         eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
 
@@ -3525,6 +3482,35 @@ def forecast_remaining_sales(loader):
         if variance_remaining != 0:
             raise ValueError("Something went wrong in variance remaining")
 
+        # We also check variances to make sure that the pre-CIGA ECO4 total equals
+        # 1) Pre CIGA remaining +
+        # 2) ECO4 sold +
+        # 3) ECO4 confirmed cancellations +
+        # 4) ECO4 unconfirmed cancellations
+
+        pre_ciga_eco4_variance = (
+            eco4_pre_ciga_revenue -
+            eco4_pre_ciga_remaining_revenue -
+            eco4_actually_sold -
+            eco4_confirmed_cancellations * eco4_rate -
+            eco4_expected_cancellations * eco4_rate
+        )
+
+        if pre_ciga_eco4_variance != 0:
+            raise ValueError("Something went wrong in pre_ciga_eco4_variance")
+
+        # Check GBIS total variance
+        gbis_variance = (
+            gbis_total_revenue -
+            gbis_actually_sold -
+            gbis_confirmed_cancellations * gbis_rate -
+            gbis_expected_cancellations * gbis_rate -
+            gbis_remaining_revenue
+        )
+
+        if gbis_variance != 0:
+            raise ValueError("Something went wrong in gbis_variance")
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -3544,8 +3530,10 @@ def forecast_remaining_sales(loader):
             ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
             ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
             ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
-            ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
-            ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL", ""): pre_ciga_eco4_variance,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL VS ELIGIBLE & INELIGIBLE", ""): variance_total,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 REMAINING VS ELIGIBLE & INELIGIBLE", ""):
+                variance_remaining,
             ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
             ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate,
             # This is for jobs that are in-progress and could still cancel
@@ -3600,6 +3588,7 @@ def forecast_remaining_sales(loader):
             # GBIS postcode list
             ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
             ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
             ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
             # This is for jobs that are in-progress and could still cancel
@@ -3611,7 +3600,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 45:
+        if len(to_append) != 47:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From 8dcb6a9be0f903fc06e4c9dcb3218bb1d6db949e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 21:11:17 +0000
Subject: [PATCH 078/248] 11% through matching ha38

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 553f6271..6998eb4b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1054,6 +1054,17 @@ class DataLoader:
             'Kingsford Court, Coombe Valley Road', 'Kingsford Court'
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'LESLIE TEW COURT/DERWENT ROAD', 'LESLIE TEW COURT'
+        )
+
+        # There is no 18A LESLIE TEW COURT in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "LESLIE TEW COURT") &
+              (survey_list["Post Code"] == "TN10 3TX") &
+              (survey_list["NO."] == "18A"))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -3848,12 +3859,10 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    # priority_has = [
-    #     "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA38", "HA39", "HA107",
-    # ]
+    # Add in: "HA25"
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA38", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28 [DONE],

From 17b5f6e140a90d261b790fee1a4a28f43d1e3a62 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 21:42:17 +0000
Subject: [PATCH 079/248] ha38 23% merged

---
 .../ha_15_32/ha_analysis_batch_3.py           | 50 ++++++++++++++-----
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 6998eb4b..ff39b190 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1065,6 +1065,24 @@ class DataLoader:
               (survey_list["NO."] == "18A"))
         ]
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'Brindley House, Wellbeck Road', 'Brindley House'
+        )
+
+        # Try taking just the first part of the string, splitting on a /
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split('/').str[0].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'HUNTSMAN WAY', 'HUNTSMANS WAY'
+        )
+
+        # Try taking just the first part of the string, splitting on a ,
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split(',').str[0].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "McCLAREN COURT", "MCLAREN COURT"
+        )
+
         return survey_list
 
     @staticmethod
@@ -1228,6 +1246,10 @@ class DataLoader:
                 if "flat" in str(house_number):
                     house_number = house_number.split("flat")[1].strip()
 
+                # We check if we had an instance of flat x, y
+                if "," in str(house_number):
+                    house_number = house_number.split(",")[0].strip()
+
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:
@@ -1251,19 +1273,23 @@ class DataLoader:
                 df = df[df["HouseNo"].astype(str) == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
-
-                    full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + row[
-                        "Town/Area"].lower().strip() + row["Post Code"].lower().strip()
-                    # Remove any spaces from the full key
-                    full_key = full_key.replace(" ", "")
-
-                    df = self.levenstein_match(full_key, df)
-
                     if df.shape[0] != 1:
-                        print(row["Street / Block Name"])
-                        print(house_number)
-                        print(row["Post Code"])
-                        raise ValueError("Investigate")
+                        if "Town/Area" not in row.keys():
+                            full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() +
+                                        row["Post Code"].lower().strip())
+                        else:
+                            full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + \
+                                       row["Town/Area"].lower().strip() + row["Post Code"].lower().strip()
+                        # Remove any spaces from the full key
+                        full_key = full_key.replace(" ", "")
+
+                        df = self.levenstein_match(full_key, df)
+
+                        if df.shape[0] != 1:
+                            print(row["Street / Block Name"])
+                            print(house_number)
+                            print(row["Post Code"])
+                            raise ValueError("Investigate")
 
             matching_lookup.append(
                 {

From 8e258ff3ca164e2eddcd9cc74d1e7531bf655e4f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 22:29:18 +0000
Subject: [PATCH 080/248] 44% through matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 70 ++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ff39b190..567394a4 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1083,6 +1083,70 @@ class DataLoader:
             "McCLAREN COURT", "MCLAREN COURT"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST JAMES CLOISTERS", "ST. JAMES'S CLOISTERS"
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            ((survey_list["NO."].isin(
+                [
+                    "FLAT 1 22",
+                    "FLAT 2 22",
+                    "FLAT 3 22",
+                    "FLAT 4 22",
+                    "FLAT 5 22",
+                    "FLAT 6 22",
+                ]
+            )) &
+             (survey_list["Street / Block Name"] == "MELTON ROAD")),
+            "22 MELTON ROAD",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            ((survey_list["NO."].isin(
+                [
+                    "FLAT 1 24",
+                    "FLAT 2 24",
+                    "FLAT 3 24",
+                    "FLAT 4 24",
+                    "FLAT 5 24",
+                    "FLAT 6 24",
+                ]
+            )) &
+             (survey_list["Street / Block Name"] == "MELTON ROAD")),
+            "24 MELTON ROAD",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "TURRETT GREEN COURT SILENT STREET", "TURRET GREEN COURT"
+        )
+
+        # Turret green court flat 1 doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "TURRET GREEN COURT") &
+              (survey_list["NO."] == 1))
+        ]
+        # 3, 45 raywell steet doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "45 RAYWELL STREET") &
+              (survey_list["NO."] == 3))
+        ]
+
+        # 40 Avondale drive doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Avondale Drive") &
+              (survey_list["NO."] == 40))
+        ]
+        # 17A beech road has the wrong postcode
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "BEECH ROAD") &
+            (survey_list["Post Code"] == "DH6 1JD"),
+            "DH6 1JB",
+            survey_list["Post Code"]
+        )
+
         return survey_list
 
     @staticmethod
@@ -1250,6 +1314,10 @@ class DataLoader:
                 if "," in str(house_number):
                     house_number = house_number.split(",")[0].strip()
 
+                # We may also have a space for an instance of flat x y
+                if " " in str(house_number):
+                    house_number = house_number.split(" ")[0].strip()
+
             df = df[df["matching_address"].str.contains(str(house_number))]
 
             if df.empty:
@@ -1270,7 +1338,7 @@ class DataLoader:
                 raise ValueError("Investigate")
 
             if df.shape[0] != 1:
-                df = df[df["HouseNo"].astype(str) == str(house_number)]
+                df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
                     if df.shape[0] != 1:

From 067a66c1b172b63abc419a112525382ce7c2baa3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 6 Mar 2024 22:45:22 +0000
Subject: [PATCH 081/248] ha38 wip - leaving for now

---
 .../ha_15_32/ha_analysis_batch_3.py           | 54 ++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 567394a4..c4f6307c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -599,7 +599,52 @@ class DataLoader:
         asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat)
         asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo']
         asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat)
-        # We then need to
+
+        # We update a few specific rows
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM A1",
+                    "10 SOUTH VIEW/ROOM A2",
+                    "10 SOUTH VIEW/ROOM A3",
+                ]
+            )),
+            "10A",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM A1",
+                ]
+            )),
+            "10a, 10 south view/room a1, spennymoor, co. durham, dl16 7df'",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM B1",
+                    "10 SOUTH VIEW/ROOM B2",
+                    "10 SOUTH VIEW/ROOM B3",
+                    "10 SOUTH VIEW/ROOM B4",
+                ]
+            )),
+            "10B",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM B1",
+                ]
+            )),
+            "10b, 10 south view/room b1, spennymoor, co. durham, dl16 7df",
+            asset_list["matching_address"]
+        )
 
         return asset_list
 
@@ -1147,6 +1192,13 @@ class DataLoader:
             survey_list["Post Code"]
         )
 
+        survey_list["Street / Block Name"] = np.where(
+            (survey_list["Street / Block Name"] == "SOUTHVIEW") &
+            (survey_list["Post Code"] == "DL16 7DF"),
+            "SOUTH VIEW",
+            survey_list["Street / Block Name"]
+        )
+
         return survey_list
 
     @staticmethod

From 5c3f6320dd6bfc2ddaac4fefb8786646c50e7945 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 10:42:51 +0000
Subject: [PATCH 082/248] 29% through matching eco3 ha25

---
 .../ha_15_32/ha_analysis_batch_3.py           | 136 +++++++++++++++---
 1 file changed, 117 insertions(+), 19 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c4f6307c..3ea9649e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -183,7 +183,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA25"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -214,6 +214,14 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA25":
+            asset_list["matching_address"] = asset_list[
+                self.COLUMN_CONFIG[ha_name]["address"]
+            ].astype(str).str.lower().str.strip()
+
+            asset_list["matching_postcode"] = asset_list['matching_address'].apply(
+                lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x
+            )
         elif ha_name == "HA28":
             asset_list["matching_address"] = (
                 asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
@@ -352,6 +360,9 @@ class DataLoader:
             house_numbers = house_numbers.iloc[:, 0:1]
             house_numbers.columns = ['HouseNo']
 
+            # Remove trailing punctuation such as , or ;
+            house_numbers["HouseNo"] = house_numbers["HouseNo"].str.rstrip(',;')
+
             asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
 
         return asset_list
@@ -425,27 +436,16 @@ class DataLoader:
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
 
-        # TODO: TEMP
-        sheetnames_lower = [x.lower() for x in workbook.sheetnames]
-        if any("eco3" in x for x in sheetnames_lower):
-            raise Exception("REMOVE ME")
-
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
         if ha_name == "HA25":
             asset_sheet_colnames[11] = "matching_postcode"
 
-        values_only = not ha_name != "HA25"
-
         rows_data = []
-        if not values_only:
-            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                rows_data.append(row_data)
-        else:
-            for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):  # use values_only=True to get values
-                row_data = list(row)  # No need for comprehension, values_only=True returns a tuple of values
-                rows_data.append(row_data)
+
+        for row in asset_sheet.iter_rows(min_row=2, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            rows_data.append(row_data)
 
         asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames)
 
@@ -477,6 +477,29 @@ class DataLoader:
         if ha_name in ["HA1", "HA25"]:
             return asset_list, pd.DataFrame(), pd.DataFrame()
 
+        # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
+        # suitable under ECO4, since their walls will be filled
+        eco3_list = pd.DataFrame()
+        sheetnames_lower = [x.lower() for x in workbook.sheetnames]
+        eco3_sheetname_index = [i for i, x in enumerate(sheetnames_lower) if "eco3" in x.replace(" ", "")]
+        if eco3_sheetname_index:
+            eco3_sheetname = workbook.sheetnames[eco3_sheetname_index[0]]
+            eco3_sheet = workbook[eco3_sheetname]
+            eco3_rows = []
+            for row in eco3_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+                row_data = [cell.value for cell in row]  # This will get you the cell values
+                eco3_rows.append(row_data)
+
+            eco3_list = pd.DataFrame(eco3_rows, columns=[cell.value for cell in eco3_sheet[1]])
+            # Remove columns that are None
+            eco3_list = eco3_list.loc[:, eco3_list.columns.notnull()]
+            # Remove rows that are completely empty
+            eco3_list = eco3_list.loc[eco3_list.loc[:, eco3_list.columns].notnull().any(axis=1)]
+            eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))]
+
+            # Perform the eco3 merge
+            eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
+
         # We check if there is a survey list
         survey_sheetname = self.get_survey_sheetname(workbook)
         survey_sheet = workbook[survey_sheetname]
@@ -518,7 +541,7 @@ class DataLoader:
             ciga_list = self.dedupe_ciga_list(ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
-        return asset_list, survey_list, ciga_list
+        return asset_list, survey_list, ciga_list, eco3_list
 
     @staticmethod
     def correct_ha6_asset_list(asset_list):
@@ -1433,6 +1456,79 @@ class DataLoader:
 
         return survey_list
 
+    def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
+
+        # We add on a matching postcode without spaces for this
+        # asset_list["matching_postcode_no_space"] = asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+
+        # May need an eco3 list correction function
+
+        # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
+        eco3_list = eco3_list[
+            ~(eco3_list["Post Code"] == "BS305DT")
+        ]
+        # Drop rows with missings postcode
+        eco3_list = eco3_list[
+            ~pd.isnull(eco3_list["Post Code"])
+        ]
+
+        missed_postcodes = []
+        if ha_name == "HA25":
+            missed_postcodes = {
+                postcode.lower() for postcode in eco3_list["Post Code"] if
+                postcode.lower() not in asset_list["matching_postcode"].values
+            }
+            eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
+
+        matching_lookup = []
+        missed = []
+        for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
+
+            postcode = row["Post Code"].lower().strip()
+
+            # df will never be empty, since we've already done a check for common postcodes
+            df = asset_list[
+                asset_list["matching_postcode"].str.contains(postcode)
+            ]
+
+            house_number = row["NO "]
+            if isinstance(house_number, str):
+                house_number = house_number.lower().strip()
+
+            if not any(df["matching_address"].str.contains(str(house_number))):
+                if "flat" in str(house_number):
+                    house_number = house_number.split("flat")[1].strip()
+
+                # We check if we had an instance of flat x, y
+                if "," in str(house_number):
+                    house_number = house_number.split(",")[0].strip()
+
+                # We may also have a space for an instance of flat x y
+                if " " in str(house_number):
+                    house_number = house_number.split(" ")[0].strip()
+
+            df = df[df["matching_address"].str.contains(str(house_number))]
+
+            if df.empty:
+                missed.append(row["eco3_list_row_id"])
+                continue
+
+            if df.shape[0] != 1:
+                df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
+
+            if df.shape[0] != 1:
+                print(row["Street / Block Name"])
+                print(house_number)
+                print(row["Post Code"])
+                raise ValueError("Investigate")
+
+            matching_lookup.append(
+                {
+                    "eco3_list_row_id": row["eco3_list_row_id"],
+                    "asset_list_row_id": df["asset_list_row_id"].values[0],
+                }
+            )
+
     @staticmethod
     def extract_streetname(address, house_number=None, postcode=None):
         """
@@ -4008,11 +4104,13 @@ def app():
     # Add in: "HA25"
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA38", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
     ]
     # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
     # Then: 28 [DONE],
-    # 38, 41, 10, 14, 20, 48
+    # 41, 10, 14 [DONE], 20, 48, 50
+    # 38[problematic, but no ECO4]
+    # TODO - do 50 and 25 next
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From ef77db10373c653e28c82265460ce9fd3bf3f3bf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 10:56:27 +0000
Subject: [PATCH 083/248] HA25 eco3 matching 91% complete

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3ea9649e..ea5b0456 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1516,6 +1516,15 @@ class DataLoader:
             if df.shape[0] != 1:
                 df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
 
+            if df.empty:
+                missed.append(row["eco3_list_row_id"])
+                continue
+
+            if df.shape[0] != 1:
+                # Perform a search on streetname
+                street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
+                df = df[df["matching_address"].str.contains(street_name_section1)]
+
             if df.shape[0] != 1:
                 print(row["Street / Block Name"])
                 print(house_number)

From 022244377d36557f83081e505b8068ab2bd98004 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 12:26:16 +0000
Subject: [PATCH 084/248] working on fixing missed matched in eco3 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 84 +++++++++++++++----
 1 file changed, 66 insertions(+), 18 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ea5b0456..a5845990 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -171,6 +171,10 @@ class DataLoader:
         "HA107": 51,
     }
 
+    UNMATCHED_ECO3 = {
+        "HA25": 94
+    }
+
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
         self.directories = directories
         self.use_cache = use_cache
@@ -1458,9 +1462,6 @@ class DataLoader:
 
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
-        # We add on a matching postcode without spaces for this
-        # asset_list["matching_postcode_no_space"] = asset_list["matching_postcode"].str.lower().str.replace(" ", "")
-
         # May need an eco3 list correction function
 
         # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
@@ -1471,8 +1472,17 @@ class DataLoader:
         eco3_list = eco3_list[
             ~pd.isnull(eco3_list["Post Code"])
         ]
+        # We have a bunch of genuine duplicates
+        eco3_list = eco3_list.drop_duplicates(["NO ", "Street / Block Name", "Post Code"])
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "HALWILL MEADOOW", "HALWILL MEADOW"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Hall Road", "Hall Rd"
+        )
 
-        missed_postcodes = []
         if ha_name == "HA25":
             missed_postcodes = {
                 postcode.lower() for postcode in eco3_list["Post Code"] if
@@ -1480,10 +1490,18 @@ class DataLoader:
             }
             eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
 
+        # For the asset list, we create a matching address without any punctuation
+        # TODO: We should generally just remove puncutation from addresses when matching
+        asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(r'[^\w\s]', '',
+                                                                                                   regex=True)
+        # Remove double spaces
+        asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace(
+            "  ", " "
+        )
+
         matching_lookup = []
         missed = []
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
-
             postcode = row["Post Code"].lower().strip()
 
             # df will never be empty, since we've already done a check for common postcodes
@@ -1507,24 +1525,20 @@ class DataLoader:
                 if " " in str(house_number):
                     house_number = house_number.split(" ")[0].strip()
 
-            df = df[df["matching_address"].str.contains(str(house_number))]
+            # We must do the house number filter
+            df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
+
+            # Perform a search on streetname
+            # We do this to prevent duplicate matches to properties with the same postcode and house number,
+            # but different streets
+            street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
+            street_name_section1 = re.sub(r'[^\w\s]', '', street_name_section1)
+            df = df[df["matching_address_no_punctuation"].str.contains(street_name_section1)]
 
             if df.empty:
                 missed.append(row["eco3_list_row_id"])
                 continue
 
-            if df.shape[0] != 1:
-                df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
-
-            if df.empty:
-                missed.append(row["eco3_list_row_id"])
-                continue
-
-            if df.shape[0] != 1:
-                # Perform a search on streetname
-                street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
-                df = df[df["matching_address"].str.contains(street_name_section1)]
-
             if df.shape[0] != 1:
                 print(row["Street / Block Name"])
                 print(house_number)
@@ -1538,6 +1552,40 @@ class DataLoader:
                 }
             )
 
+        # We verify the missed
+        # -HA25 contains 88 missed entries. These are actually 8 unique postcodes, where surveys were conducted
+        # on properties that had house numbers outside of the asset list
+        if len(missed) != self.UNMATCHED_ECO3[ha_name]:
+            raise ValueError(
+                f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
+            )
+
+        # TODO: 194 missed
+
+        matching_lookup = pd.DataFrame(matching_lookup)
+        # Check dupes as this will cause problems later on
+        if matching_lookup["asset_list_row_id"].duplicated().any():
+            raise ValueError("Duplicated asset list row ids")
+
+        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
+        missed_df.head(3).tail(1)["eco3_list_row_id"]
+
+        duped_ids = matching_lookup[matching_lookup["asset_list_row_id"].duplicated()]["asset_list_row_id"].tolist()
+        duped_df = matching_lookup[
+            matching_lookup["asset_list_row_id"].isin(duped_ids)
+        ]
+        duped_surveys = eco3_list[
+            eco3_list["eco3_list_row_id"].isin(duped_df["eco3_list_row_id"].values)
+        ].copy()
+
+        duped_surveys = duped_surveys.merge(matching_lookup, how="left", on="eco3_list_row_id")
+
+        duped_surveys[
+            ["NO ", "Street / Block Name", "Post Code", "eco3_list_row_id", "asset_list_row_id"]
+        ].sort_values("asset_list_row_id").head()
+
+        asset_list[asset_list["asset_list_row_id"] == "HA2515145"]["matching_address"].values
+
     @staticmethod
     def extract_streetname(address, house_number=None, postcode=None):
         """

From b09bd63b53c8d9b14f11c1c5b7cb38b28c63afbc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 12:53:25 +0000
Subject: [PATCH 085/248] done with ha25 matching for now

---
 .../ha_15_32/ha_analysis_batch_3.py           | 66 +++++++++++--------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a5845990..f0813aef 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -172,7 +172,7 @@ class DataLoader:
     }
 
     UNMATCHED_ECO3 = {
-        "HA25": 94
+        "HA25": 119
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -478,7 +478,7 @@ class DataLoader:
         # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
         # lists, and so
         # we can return the asset list now
-        if ha_name in ["HA1", "HA25"]:
+        if ha_name in ["HA1"]:
             return asset_list, pd.DataFrame(), pd.DataFrame()
 
         # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
@@ -1460,10 +1460,8 @@ class DataLoader:
 
         return survey_list
 
-    def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
-
-        # May need an eco3 list correction function
-
+    @staticmethod
+    def correct_ha25_eco3_list(eco3_list):
         # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
         eco3_list = eco3_list[
             ~(eco3_list["Post Code"] == "BS305DT")
@@ -1483,6 +1481,29 @@ class DataLoader:
             "Hall Road", "Hall Rd"
         )
 
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "SPRINGFIELD WAY SAINT DAY", "SPRINGFIELD WAY ST DAY"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "BOND SPEAR COURT", "BOND-SPEAR COURT"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "ST.MARYS HILL", "ST MARYS HILL"
+        )
+        # Correct the postcode for edmund road
+        eco3_list["Post Code"] = np.where(
+            (eco3_list["Street / Block Name"] == "EDMUND ROAD") &
+            (eco3_list["Post Code"] == "TR14 8QJ"),
+            "TR15 1BY",
+            eco3_list["Post Code"]
+        )
+        return eco3_list
+
+    def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
+
+        eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
+        eco3_list = eco3_list_correction_function(eco3_list)
+
         if ha_name == "HA25":
             missed_postcodes = {
                 postcode.lower() for postcode in eco3_list["Post Code"] if
@@ -1492,8 +1513,9 @@ class DataLoader:
 
         # For the asset list, we create a matching address without any punctuation
         # TODO: We should generally just remove puncutation from addresses when matching
-        asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(r'[^\w\s]', '',
-                                                                                                   regex=True)
+        asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(
+            r'[^\w\s]', '', regex=True
+        )
         # Remove double spaces
         asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace(
             "  ", " "
@@ -1502,6 +1524,8 @@ class DataLoader:
         matching_lookup = []
         missed = []
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
+            # if row["eco3_list_row_id"] == "HA25_Eco3_5422":
+            #     raise Exception()
             postcode = row["Post Code"].lower().strip()
 
             # df will never be empty, since we've already done a check for common postcodes
@@ -1553,38 +1577,24 @@ class DataLoader:
             )
 
         # We verify the missed
-        # -HA25 contains 88 missed entries. These are actually 8 unique postcodes, where surveys were conducted
-        # on properties that had house numbers outside of the asset list
+        # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
+        # where many surveys were conducted on house numbers, not in the asset list
         if len(missed) != self.UNMATCHED_ECO3[ha_name]:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
-        # TODO: 194 missed
-
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
         if matching_lookup["asset_list_row_id"].duplicated().any():
             raise ValueError("Duplicated asset list row ids")
 
-        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
-        missed_df.head(3).tail(1)["eco3_list_row_id"]
+        # Merge onto eco3 list
+        eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id")
 
-        duped_ids = matching_lookup[matching_lookup["asset_list_row_id"].duplicated()]["asset_list_row_id"].tolist()
-        duped_df = matching_lookup[
-            matching_lookup["asset_list_row_id"].isin(duped_ids)
-        ]
-        duped_surveys = eco3_list[
-            eco3_list["eco3_list_row_id"].isin(duped_df["eco3_list_row_id"].values)
-        ].copy()
+        asset_list = asset_list.drop(columns=["matching_address_no_punctuation"])
 
-        duped_surveys = duped_surveys.merge(matching_lookup, how="left", on="eco3_list_row_id")
-
-        duped_surveys[
-            ["NO ", "Street / Block Name", "Post Code", "eco3_list_row_id", "asset_list_row_id"]
-        ].sort_values("asset_list_row_id").head()
-
-        asset_list[asset_list["asset_list_row_id"] == "HA2515145"]["matching_address"].values
+        return eco3_list
 
     @staticmethod
     def extract_streetname(address, house_number=None, postcode=None):

From 961b53d523bf7dc82d9e83459861cb3aa2865c93 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 12:58:29 +0000
Subject: [PATCH 086/248] Adding return for HA25

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index f0813aef..7ad50583 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -479,7 +479,7 @@ class DataLoader:
         # lists, and so
         # we can return the asset list now
         if ha_name in ["HA1"]:
-            return asset_list, pd.DataFrame(), pd.DataFrame()
+            return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
 
         # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
         # suitable under ECO4, since their walls will be filled
@@ -504,6 +504,10 @@ class DataLoader:
             # Perform the eco3 merge
             eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
 
+        if ha_name in ["HA25"]:
+            # Accomodate ha25 unique structure
+            return asset_list, pd.DataFrame(), pd.DataFrame(), eco3_list
+
         # We check if there is a survey list
         survey_sheetname = self.get_survey_sheetname(workbook)
         survey_sheet = workbook[survey_sheetname]
@@ -1592,7 +1596,7 @@ class DataLoader:
         # Merge onto eco3 list
         eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id")
 
-        asset_list = asset_list.drop(columns=["matching_address_no_punctuation"])
+        asset_list.drop(columns=["matching_address_no_punctuation"], inplace=True)
 
         return eco3_list
 
@@ -1756,7 +1760,7 @@ class DataLoader:
                 continue
             # Load asset list
             logger.info("Loading data for {}".format(ha_name))
-            asset_list, survey_list, ciga_list = self.load_asset_list(
+            asset_list, survey_list, ciga_list, eco3_list = self.load_asset_list(
                 filepath=filepath,
                 ha_name=ha_name,
             )
@@ -1764,7 +1768,8 @@ class DataLoader:
             data[ha_name] = {
                 "asset_list": asset_list,
                 "survey_list": survey_list,
-                "ciga_list": ciga_list
+                "ciga_list": ciga_list,
+                "eco3_list": eco3_list
             }
 
         self.data = data

From 7f88f0e0f59e584d82a6799671e8f1a64a034392 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 13:59:32 +0000
Subject: [PATCH 087/248] Added in the re-labelling of assets based on eco3
 merge

---
 .../ha_15_32/ha_analysis_batch_3.py           | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7ad50583..21509923 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1812,6 +1812,7 @@ class DataLoader:
             asset_list = data_assets["asset_list"].copy()
             survey_list = data_assets["survey_list"].copy()
             ciga_list = data_assets["ciga_list"].copy()
+            eco3_list = data_assets.get("eco3_list", pd.DataFrame())
 
             asset_list_starting_size = asset_list.shape[0]
 
@@ -1859,6 +1860,25 @@ class DataLoader:
             if asset_list.shape[0] != asset_list_starting_size:
                 raise ValueError("The asset list has changed in size")
 
+            # If we have eco3 surveys, we set a property to not eligible
+            if not eco3_list.empty:
+                eco3_list_to_merge = eco3_list[["asset_list_row_id"]].copy()
+                eco3_list_to_merge["has_eco3"] = True
+                asset_list = asset_list.merge(
+                    eco3_list_to_merge, how="left", on="asset_list_row_id"
+                )
+
+                if asset_list.shape[0] != asset_list_starting_size:
+                    raise ValueError("The asset list has changed in size, when merging on eco3")
+
+                # Any rows that have an eco3 survey are set to not eligible
+                asset_list["ECO Eligibility"] = np.where(
+                    asset_list["has_eco3"] == True,
+                    "not eligible",
+                    asset_list["ECO Eligibility"]
+                )
+                asset_list = asset_list.drop(columns=["has_eco3"])
+
             # Report on sales
             sales_report = {}
             if not survey_list.empty:

From 9a0c6c3e8fbae7a23980aa7e75912ef6202ab29d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 14:18:08 +0000
Subject: [PATCH 088/248] expanded eco3 matching

---
 .../ha_15_32/ha_analysis_batch_3.py            | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 21509923..06bb0d96 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -172,7 +172,7 @@ class DataLoader:
     }
 
     UNMATCHED_ECO3 = {
-        "HA25": 119
+        "HA25": 154
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -1508,12 +1508,16 @@ class DataLoader:
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
         eco3_list = eco3_list_correction_function(eco3_list)
 
+        asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
+        eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
+
         if ha_name == "HA25":
+            # 317 -> 259
             missed_postcodes = {
-                postcode.lower() for postcode in eco3_list["Post Code"] if
-                postcode.lower() not in asset_list["matching_postcode"].values
+                postcode for postcode in eco3_list["postcode_no_space"] if
+                postcode not in asset_list["matching_postcode_nospace"].values
             }
-            eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
+            eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)]
 
         # For the asset list, we create a matching address without any punctuation
         # TODO: We should generally just remove puncutation from addresses when matching
@@ -1530,11 +1534,11 @@ class DataLoader:
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
             # if row["eco3_list_row_id"] == "HA25_Eco3_5422":
             #     raise Exception()
-            postcode = row["Post Code"].lower().strip()
+            postcode = row["postcode_no_space"]
 
             # df will never be empty, since we've already done a check for common postcodes
             df = asset_list[
-                asset_list["matching_postcode"].str.contains(postcode)
+                asset_list["matching_postcode_nospace"].str.contains(postcode)
             ]
 
             house_number = row["NO "]
@@ -1588,6 +1592,8 @@ class DataLoader:
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
+        # 154 missed, 2827 matched for HA 25
+
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
         if matching_lookup["asset_list_row_id"].duplicated().any():

From 8b70fb346c0ce51acd24b245bbbecedeaa10d30c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:00:51 +0000
Subject: [PATCH 089/248] matching ha50

---
 .../ha_15_32/ha_analysis_batch_3.py           | 56 ++++++++++++++++---
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 06bb0d96..4708bf35 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -172,7 +172,8 @@ class DataLoader:
     }
 
     UNMATCHED_ECO3 = {
-        "HA25": 154
+        "HA25": 154,
+        "HA50": 5
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -262,6 +263,10 @@ class DataLoader:
                                              asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["post_code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA50":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -433,6 +438,8 @@ class DataLoader:
             return "ECO Surveys"
         elif "ECO Survey" in workbook.sheetnames:
             return "ECO Survey"
+        elif "ECO 4 Surveys completed" in workbook.sheetnames:
+            return "ECO 4 Surveys completed"
         else:
             return "ECO surveys"
 
@@ -1289,6 +1296,34 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha50_survey_list(survey_list):
+
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == 'COSELEY STREET') &
+            (survey_list["Post Code"] == 'ST16 1LR'),
+            "ST6 1JU",
+            survey_list["Post Code"]
+        )
+
+        # Remove some of COSELEY STREET, as we have surveys done, outside of the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "COSELEY STREET") &
+              (survey_list["Post Code"] == "ST6 1JU") &
+              (survey_list["NO."].isin([96])))
+        ]
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace("ST33JZ", "ST3 3JZ")
+
+        # Remove some of Jesmond drive as we have surveys done outside of the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Jesmond Drive") &
+              (survey_list["Post Code"] == "ST3 3JZ") &
+              (survey_list["NO."].isin([29])))
+        ]
+
+        return survey_list
+
     @staticmethod
     def correct_ha107_survey_list(survey_list):
         # Replace Front Street, East Stockham with Front Street, East Stockwith
@@ -1503,6 +1538,10 @@ class DataLoader:
         )
         return eco3_list
 
+    @staticmethod
+    def correct_ha50_eco3_list(eco3_list):
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -1517,6 +1556,7 @@ class DataLoader:
                 postcode for postcode in eco3_list["postcode_no_space"] if
                 postcode not in asset_list["matching_postcode_nospace"].values
             }
+
             eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)]
 
         # For the asset list, we create a matching address without any punctuation
@@ -4199,16 +4239,18 @@ def app():
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
-    # Add in: "HA25"
+    # Add in:
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA50", "HA107",
     ]
-    # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
+    # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
+    # back on this],
     # Then: 28 [DONE],
-    # 41, 10, 14 [DONE], 20, 48, 50
-    # 38[problematic, but no ECO4]
-    # TODO - do 50 and 25 next
+    # 41, 48, 50
+    # 38[problematic, but no ECO4], 10 problematic (no eligibility),
+    # 20 has barely any in
+    # TODO - do 50
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From 3001a98421b377cb31e2c3b667528e8d4b80a150 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:02:23 +0000
Subject: [PATCH 090/248] ha50 30% matched

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4708bf35..901784e1 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1322,6 +1322,10 @@ class DataLoader:
               (survey_list["NO."].isin([29])))
         ]
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BRUNDELL OVAL", "BRUNDALL OVAL"
+        )
+
         return survey_list
 
     @staticmethod

From 4afd012e51bfc3b366dc1e8d1f70281bb1097bd0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:14:53 +0000
Subject: [PATCH 091/248] ha50 51% matched

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 901784e1..bde6f647 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1326,6 +1326,13 @@ class DataLoader:
             "BRUNDELL OVAL", "BRUNDALL OVAL"
         )
 
+        # Remove 4 Linden Place
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Linden Place") &
+              (survey_list["Post Code"] == "ST3 3AT") &
+              (survey_list["NO."].isin([4])))
+        ]
+
         return survey_list
 
     @staticmethod

From 1146f34eba62ab2b00f610502b17ba6f9425cf43 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:24:20 +0000
Subject: [PATCH 092/248] matching 81% complete

---
 .../ha_15_32/ha_analysis_batch_3.py           | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bde6f647..818f6e4f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1333,6 +1333,45 @@ class DataLoader:
               (survey_list["NO."].isin([4])))
         ]
 
+        # Remove 11 Tilehurst Place
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tilehurst Place") &
+              (survey_list["Post Code"] == "ST3 3AP") &
+              (survey_list["NO."].isin([11])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "deavile road", "DEAVILLE ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WOOLISCROFT ROAD", "WOOLLISCROFT ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Leak Road", "Leek Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Springfield road", "Springfields road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "MILLWARD RD", "MILLWARD ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "REPINGTON RD", "REPINGTON ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ECCELSTONE PLACE", "ECCLESTONE PLACE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. James Place", "St James Place"
+        )
+
         return survey_list
 
     @staticmethod

From 5a1aa3995221ddf125b25c6d619165fdbcab37ff Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:33:26 +0000
Subject: [PATCH 093/248] ha50 93% complete

---
 .../ha_15_32/ha_analysis_batch_3.py           | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 818f6e4f..3b9bd7ca 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1372,6 +1372,50 @@ class DataLoader:
             "St. James Place", "St James Place"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CHELL HEATH RD", "CHELL HEATH ROAD"
+        )
+        # Correct postcode
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == 'CHELL HEATH ROAD') &
+            (survey_list["Post Code"] == 'ST6 6HU'),
+            "ST6 6HJ",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Franklin Rd", "Franklin Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Lodge Rd", "Lodge Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St Matthews Street", "St Matthew Street"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Grove Bank Road", "Grovebank Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "OVERSLEY RD", "OVERSLEY ROAD"
+        )
+
+        # Replace all of the " RD" with " ROAD"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            " RD", " ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. Georges Crescent", "St Georges Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Tewson Road", "Tewson Green"
+        )
+
         return survey_list
 
     @staticmethod

From d4e378f109deb3c71b87165309a5935b3641a915 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:40:37 +0000
Subject: [PATCH 094/248] ha50 matching complete subject to checks

---
 .../ha_15_32/ha_analysis_batch_3.py           | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3b9bd7ca..a5b99a72 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1416,6 +1416,35 @@ class DataLoader:
             "Tewson Road", "Tewson Green"
         )
 
+        # Remove 55 Seabridge Lane
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Seabridge Lane") &
+              (survey_list["Post Code"] == "ST5 4AG") &
+              (survey_list["NO."].isin([55])))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tyne Way") &
+              (survey_list["Post Code"] == "ST5 4AX") &
+              (survey_list["NO."].isin([56])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St.Bernards Place", "St Bernard Place"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Penarth Road", "Penarth Grove"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. Marys Road", "St Marys Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Larch Drive", "Larch Grove"
+        )
+
         return survey_list
 
     @staticmethod

From 33b3f51ca4701ede548e6af82f80ae191a3c0710 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:54:40 +0000
Subject: [PATCH 095/248] handling dupes for ha50

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a5b99a72..7124919e 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1445,6 +1445,21 @@ class DataLoader:
             "Larch Drive", "Larch Grove"
         )
 
+        # Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") &
+              (survey_list["Post Code"] == "ST20QS") &
+              (survey_list["NO."].isin([31])))
+        ]
+
+        # Handle dropping of dupes
+        survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "")
+        survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "")
+
+        # Should go to 18
+        survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"])
+        survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"])
+
         return survey_list
 
     @staticmethod

From 23eaa5600118f0df54667ea36422153158db8dd5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 15:57:00 +0000
Subject: [PATCH 096/248] checked ha50 ciga merge

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7124919e..2feded98 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -168,6 +168,7 @@ class DataLoader:
         "HA15": 3,
         "HA16": 7,
         "HA24": 12,
+        "HA50": 4,
         "HA107": 51,
     }
 
@@ -429,6 +430,8 @@ class DataLoader:
             return "CIGA checks"
         elif "CIGA check" in workbook.sheetnames:
             return "CIGA check"
+        elif "CIGA requested" in workbook.sheetnames:
+            return "CIGA requested"
         else:
             return "CIGA"
 

From 180c0c53eaa48c185c75cf22aee448aac91bbe30 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 16:26:58 +0000
Subject: [PATCH 097/248] done with ha50

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2feded98..0720a686 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1982,7 +1982,8 @@ class DataLoader:
             "ECO4 GBIS (ECO+)": "GBIS",
             "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
             "ECO4 AFFORDABLE WARMTH": "ECO4",
-            "Affordable Warmth": "ECO4"
+            "Affordable Warmth": "ECO4",
+            "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
         }
 
         eco_eligibility_map = {

From c43349a5777326145107a6406779eadcdc6e9dab Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 16:39:47 +0000
Subject: [PATCH 098/248] Added ha41 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0720a686..4cf447aa 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -174,7 +174,8 @@ class DataLoader:
 
     UNMATCHED_ECO3 = {
         "HA25": 154,
-        "HA50": 5
+        "HA41": 26,
+        "HA50": 5,
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -264,6 +265,14 @@ class DataLoader:
                                              asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["post_code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA41":
+            asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA50":
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
@@ -1683,6 +1692,10 @@ class DataLoader:
     def correct_ha50_eco3_list(eco3_list):
         return eco3_list
 
+    @staticmethod
+    def correct_ha41_eco3_list(eco3_list):
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -4384,15 +4397,14 @@ def app():
     # Add in:
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA50", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA50", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this],
     # Then: 28 [DONE],
     # 41, 48, 50
-    # 38[problematic, but no ECO4], 10 problematic (no eligibility),
-    # 20 has barely any in
-    # TODO - do 50
+    # Ignore for now:
+    # TODO: 38[problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From c4af2251f4fac0af95676b7158e5baf1ad9d3d3c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 16:41:58 +0000
Subject: [PATCH 099/248] data load for ha41

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4cf447aa..c2d585a2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -452,6 +452,8 @@ class DataLoader:
             return "ECO Survey"
         elif "ECO 4 Surveys completed" in workbook.sheetnames:
             return "ECO 4 Surveys completed"
+        elif "ECO4 Surveys" in workbook.sheetnames:
+            return "ECO4 Surveys"
         else:
             return "ECO surveys"
 
@@ -1533,6 +1535,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha41_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()

From ae714e42a62b1e6def566c6de46b34035d0ab7bb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 17:11:44 +0000
Subject: [PATCH 100/248] identified 9 additional has worth analysing

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c2d585a2..b22ea273 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4403,14 +4403,16 @@ def app():
     # Add in:
     # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA50", "HA107",
+        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
+        "HA50", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this],
-    # Then: 28 [DONE],
-    # 41, 48, 50
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE],
+    # 48 [WIP],
+    # Consider for ECO4: 2, 63, 12, 13, 136, 117
+    # COnsider for GBIS: 56, 35, 34
     # Ignore for now:
-    # TODO: 38[problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
+    # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs
     directories = [d for d in directories if d.split("/")[2] in priority_has]
 

From c84be65e8defa04aa1453f80b53d073c9011a629 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 19:52:08 +0000
Subject: [PATCH 101/248] ha48 ciga unmatched count added

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b22ea273..56867ef7 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -159,6 +159,10 @@ class DataLoader:
         "HA25": {
             "address": "T1_Address",
             "postcode": "matching_postcode"
+        },
+        "HA48": {
+            "address": "Full Address",
+            "postcode": "Postcode"
         }
     }
 
@@ -170,6 +174,7 @@ class DataLoader:
         "HA24": 12,
         "HA50": 4,
         "HA107": 51,
+        "HA48": 0
     }
 
     UNMATCHED_ECO3 = {
@@ -190,7 +195,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
+        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA48"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()

From c3fd2ae902bd96250bc5ca376a424ebc8cbc3335 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 20:58:47 +0000
Subject: [PATCH 102/248] Adding HA2, data load done

---
 .../ha_15_32/ha_analysis_batch_3.py           | 34 ++++++++++++-------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 56867ef7..74c6d3f5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -167,6 +167,7 @@ class DataLoader:
     }
 
     UNMATCHED_CIGA = {
+        "HA2": 0,
         "HA6": 117,
         "HA14": 3,
         "HA15": 3,
@@ -202,6 +203,12 @@ class DataLoader:
             asset_list["matching_postcode"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["postcode"]
             ].astype(str).str.lower().str.strip()
+        elif ha_name == "HA2":
+            # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA7":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
             asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
@@ -3794,7 +3801,6 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
-
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
 
@@ -4074,13 +4080,13 @@ def forecast_remaining_sales(loader):
             ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
             ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
             ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
-            ("ECO4 original", "", "Sold - £", ""): original_warmfront_sold_eco4,
+            ("ECO4 original", "", "Sold or cancelled - £", ""): original_warmfront_sold_eco4,
             ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
             # GBIS - original warmfront figures
             ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
             ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
             ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
-            ("GBIS original", "", "Sold - £", ""): original_warmfront_sold_gbis,
+            ("GBIS original", "", "Sold or cancelled - £", ""): original_warmfront_sold_gbis,
             ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
             # ECO4 - asset list, pre-ciga
             ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
@@ -4237,12 +4243,17 @@ def forecast_remaining_sales(loader):
     headline_total_delta = round(headline_total_delta, 1)
 
     headline_eco4_sold_since_november = (
-        totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] - totals_row[('ECO4 original', '', 'Sold - £', '')]
+        totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] +
+        totals_row[('ECO4 pre-ciga', '', 'Confirmed cancellations - £', '')] +  # confirmed canclleations
+        totals_row[('ECO4 pre-ciga', '', 'Unconfirmed cancellations - £', '')] -  # expected cancellations
+        totals_row[('ECO4 original', '', 'Sold or cancelled - £', '')]
     )
 
     headline_gbis_sold_since_november = (
-        totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] -
-        totals_row[('GBIS original', '', 'Sold - £', '')]
+        totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] +
+        totals_row[("GBIS Postcode list", "", "Confirmed cancellations - £", "")] +  # confirmed cancellations
+        totals_row[("GBIS Postcode list", "", "Unconfirmed cancellations - £", "")] -  # expected cancellations
+        totals_row[('GBIS original', '', 'Sold or cancelled - £', '')]
     )
 
     headlines = [
@@ -4261,7 +4272,7 @@ def forecast_remaining_sales(loader):
                 "ECO4 - November"): headline_eco4_original_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "ECO4 Sold since November - £",
+            ("", "", "", "HA Name"): "ECO4 Sold or cancelled since November - £",
             (
                 "", "Original Warmfront estimate", "Total - #",
                 "ECO4 - November"): headline_eco4_sold_since_november
@@ -4290,7 +4301,7 @@ def forecast_remaining_sales(loader):
                 "ECO4 - November"): headline_gbis_original_remaining_revenue
         },
         {
-            ("", "", "", "HA Name"): "GBIS Sold since November - £",
+            ("", "", "", "HA Name"): "GBIS Sold or cancelled since November - £",
             (
                 "", "Original Warmfront estimate", "Total - #",
                 "ECO4 - November"): headline_gbis_sold_since_november
@@ -4399,21 +4410,18 @@ def app():
     rebuild_inputs = False
 
     # List all of the data in the folder
-
     directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir()
                    for file in entry.iterdir() if file.suffix == '.xlsx']
     # Grab the December HA figures filepath
     december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
 
     # Add in:
-    # TODO: Remove ECO3 sales from HA25
     priority_has = [
-        "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
+        "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
         "HA50", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE],
-    # 48 [WIP],
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE],
     # Consider for ECO4: 2, 63, 12, 13, 136, 117
     # COnsider for GBIS: 56, 35, 34
     # Ignore for now:

From 19850f924445035e3880eaae40f750d21fb12b80 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 21:34:46 +0000
Subject: [PATCH 103/248] fixing up ha63 eco3 list

---
 .../ha_15_32/ha_analysis_batch_3.py           | 46 +++++++++++++++++--
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 74c6d3f5..aebf0506 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -289,6 +289,10 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA63":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -1551,6 +1555,16 @@ class DataLoader:
     def correct_ha41_survey_list(survey_list):
         return survey_list
 
+    @staticmethod
+    def correct_ha63_survey_list(survey_list):
+        # Drop some filler rows
+        survey_list = survey_list[
+            ~survey_list[survey_list.columns[0]].isin(
+                ["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"]
+            )
+        ]
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -1714,6 +1728,26 @@ class DataLoader:
     def correct_ha41_eco3_list(eco3_list):
         return eco3_list
 
+    @staticmethod
+    def correct_ha63_eco3_list(eco3_list):
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+        # Some postcode that aren't in the asset list
+        eco3_list = eco3_list[
+            ~eco3_list["Post Code"].isin(
+                ["NR32 15X", "NR30 2BT"]
+            )
+        ]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "POUND COTTAGES - BLOOMSBERRY CLOSE", "POUND COTTAGES"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "FREDRICK ROAD", "Frederick Road"
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -1799,12 +1833,15 @@ class DataLoader:
         # We verify the missed
         # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
         # where many surveys were conducted on house numbers, not in the asset list
+        # 154 missed, 2827 matched for HA 25
         if len(missed) != self.UNMATCHED_ECO3[ha_name]:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
-        # 154 missed, 2827 matched for HA 25
+        # 41
+        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
+        missed_df.head(1)["Street / Block Name"]
 
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
@@ -4418,11 +4455,12 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
-        "HA50", "HA107",
+        "HA50", "HA63", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE],
-    # Consider for ECO4: 2, 63, 12, 13, 136, 117
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE]
+    # 63 [WIP]
+    # Consider for ECO4: 12, 13, 136, 117
     # COnsider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From 47b97fce0a6eec4fe15a967f1721e18908bffccf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 21:46:44 +0000
Subject: [PATCH 104/248] fixing eco3 matching for ha63

---
 .../ha_15_32/ha_analysis_batch_3.py           | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index aebf0506..bab5cdab 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -174,6 +174,7 @@ class DataLoader:
         "HA16": 7,
         "HA24": 12,
         "HA50": 4,
+        "HA63": 15,
         "HA107": 51,
         "HA48": 0
     }
@@ -182,6 +183,7 @@ class DataLoader:
         "HA25": 154,
         "HA41": 26,
         "HA50": 5,
+        "HA63": 0
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -1746,6 +1748,25 @@ class DataLoader:
             "FREDRICK ROAD", "Frederick Road"
         )
 
+        # For denmark street, remove the space from the house number
+        eco3_list["NO "] = np.where(
+            eco3_list["Street / Block Name"] == "DENMARK STREET",
+            eco3_list["NO "].str.replace(" ", ""),
+            eco3_list["NO "]
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "OLD HOSPITAL MEWS HOSPITAL WALK", "Old Hospital Mews"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Portland House, Portland Street", "Portland House"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "MIDDLE MARKET STREET", "Middle Market Road"
+        )
+
         return eco3_list
 
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
@@ -1791,7 +1812,7 @@ class DataLoader:
             if isinstance(house_number, str):
                 house_number = house_number.lower().strip()
 
-            if not any(df["matching_address"].str.contains(str(house_number))):
+            if not any(df["HouseNo"].str.contains(str(house_number))):
                 if "flat" in str(house_number):
                     house_number = house_number.split("flat")[1].strip()
 
@@ -1839,10 +1860,6 @@ class DataLoader:
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
 
-        # 41
-        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
-        missed_df.head(1)["Street / Block Name"]
-
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
         if matching_lookup["asset_list_row_id"].duplicated().any():

From 9cd166160bfbe9a3cc89f5d43231c3c8ed5c2ede Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 21:51:16 +0000
Subject: [PATCH 105/248] sorted ha63 facts and figures

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bab5cdab..2a1a4b16 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2077,7 +2077,8 @@ class DataLoader:
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
             "eco4 (subject to archetype check)": "eco4",
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
-            "eco4  (subject to ciga)": "eco4 (subject to ciga)"
+            "eco4  (subject to ciga)": "eco4 (subject to ciga)",
+            "eco4(subject to ciga)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []

From 76ef60d06c8d508d4c78e1bda320902880bce96c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 7 Mar 2024 22:16:05 +0000
Subject: [PATCH 106/248] done with ha12

---
 .../ha_15_32/ha_analysis_batch_3.py           | 58 ++++++++++++++-----
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2a1a4b16..4dbf326b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -148,6 +148,10 @@ class DataLoader:
             "address": "propertyaddress",
             "postcode": "address"  # The 'address' column actually contains postcode
         },
+        "HA12": {
+            "address": "Full Address",
+            "postcode": "Postcode"
+        },
         "HA16": {
             "address": "Address",
             "postcode": "Postcode"
@@ -169,6 +173,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         "HA2": 0,
         "HA6": 117,
+        "HA12": 6,
         "HA14": 3,
         "HA15": 3,
         "HA16": 7,
@@ -198,7 +203,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA48"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA48"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -1558,13 +1563,39 @@ class DataLoader:
         return survey_list
 
     @staticmethod
-    def correct_ha63_survey_list(survey_list):
-        # Drop some filler rows
-        survey_list = survey_list[
-            ~survey_list[survey_list.columns[0]].isin(
-                ["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"]
-            )
-        ]
+    def correct_ha12_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Henstone Road", "Hanstone Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Lindern avenue", "Linden Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "priness way", "Princess Way"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Worth Crecesent", "Worth Crescent"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "DY117HA", "DY11 7HA"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "DY117HF", "DY11 7HF"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Adderbrook Crescent", "Addenbrooke Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Kinver Road", "Kinver Avenue"
+        )
+
         return survey_list
 
     @staticmethod
@@ -2079,6 +2110,7 @@ class DataLoader:
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
+            "eco4 subject to ciga": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -4472,13 +4504,13 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
-        "HA50", "HA63", "HA107",
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41",
+        "HA48", "HA50", "HA63", "HA107",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE]
-    # 63 [WIP]
-    # Consider for ECO4: 12, 13, 136, 117
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE]
+    #
+    # Consider for ECO4: 13, 136, 117
     # COnsider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From e3f36fc881925fd845f623d469d0faf9cd6b89c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 18:52:32 +0000
Subject: [PATCH 107/248] HA117 data load

---
 .../ha_15_32/ha_analysis_batch_3.py           | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 4dbf326b..d4de589a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -188,7 +188,8 @@ class DataLoader:
         "HA25": 154,
         "HA41": 26,
         "HA50": 5,
-        "HA63": 0
+        "HA63": 0,
+        "HA117": 4
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -308,6 +309,11 @@ class DataLoader:
                                              asset_list["District"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA117":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["PostCode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
         else:
             raise NotImplementedError("implement me")
 
@@ -1800,6 +1806,17 @@ class DataLoader:
 
         return eco3_list
 
+    @staticmethod
+    def correct_ha117_eco3_list(eco3_list):
+        # Delete rows where postcode is null - there are some placeholder rows where this happens
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "TARRING ROAD", "155 TARRING ROAD"
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -4505,13 +4522,13 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41",
-        "HA48", "HA50", "HA63", "HA107",
+        "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE]
-    #
-    # Consider for ECO4: 13, 136, 117
-    # COnsider for GBIS: 56, 35, 34
+    # 117 [WIP]
+    # Consider for ECO4: 13
+    # Consider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 15efd02b8b8220f1d6cc745cb1b4a571be808643 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 19:14:35 +0000
Subject: [PATCH 108/248] done ha117, ha13 next

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d4de589a..97ac96da 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2119,15 +2119,19 @@ class DataLoader:
             "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
         }
 
+        # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
+        # treat these as similar to subject to CIGA, and therefore unconfirmed worked that could fail. There
+        # are only a small volume of properties for which we see this
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype check)": "eco4",
+            "eco4 (subject to archetype check)": "eco4 (subject to ciga)",
             "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
+            "eco4 (subject to archetype)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -4525,9 +4529,9 @@ def app():
         "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE]
-    # 117 [WIP]
-    # Consider for ECO4: 13
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE]
+    # 13 [WIP]
+    # Consider for ECO4:
     # Consider for GBIS: 56, 35, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From b2b8fd8f84321f369cc3d14b009515759a2eff9a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 19:20:38 +0000
Subject: [PATCH 109/248] ha13 49% matched

---
 .../ha_15_32/ha_analysis_batch_3.py           | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 97ac96da..3edc1490 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -224,6 +224,12 @@ class DataLoader:
                                              asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA13":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA14":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
@@ -1604,6 +1610,19 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha13_survey_list(survey_list):
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Woodfarm Road", "WOOD FARM ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ALLANDALE ROAD", "ALLANDALE"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -4525,8 +4544,8 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41",
-        "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39",
+        "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE]

From 21117f3e585be18d5da6e49744353f7ed830a483 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 8 Mar 2024 19:32:42 +0000
Subject: [PATCH 110/248] worked through ha13 matching - need to do facts and
 figures

---
 .../ha_15_32/ha_analysis_batch_3.py           | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 3edc1490..15a4f438 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -174,6 +174,7 @@ class DataLoader:
         "HA2": 0,
         "HA6": 117,
         "HA12": 6,
+        "HA13": 119,
         "HA14": 3,
         "HA15": 3,
         "HA16": 7,
@@ -1621,6 +1622,30 @@ class DataLoader:
             "ALLANDALE ROAD", "ALLANDALE"
         )
 
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "NEWFIELDS LANE", "NEWFIELD LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BROADFIELDS ROAD", "BROADFIELD ROAD"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "HP2 5SF+", "HP2 5SF"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PESCOTT HILL", "PESCOT HILL"
+        )
+
+        # This is a duplicate record
+        survey_list = survey_list[
+            ~((survey_list["NO."] == 33) &
+              (survey_list["Street / Block Name"] == "Turners Hill") &
+              (survey_list["Post Code"] == "HP2 4LH") &
+              (survey_list["INSTALLED OR CANCELLED"] == "NO UPDATE - CHECKED 18.12.23"))
+        ]
+
         return survey_list
 
     @staticmethod
@@ -1652,6 +1677,9 @@ class DataLoader:
                 postcode.lower() not in asset_list["matching_postcode"].values
             ]
 
+        if ha_name == "HA13":
+            missed_postcodes = ["hp17 8le"]
+
         matching_lookup = []
         for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
 

From f03485d4f49045e8f68cf7a8dcc5caf58113ede1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 14:41:38 +0000
Subject: [PATCH 111/248] updating facts and figures to treat archetype
 dependent properties separately

---
 .../ha_15_32/ha_analysis_batch_3.py            | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 15a4f438..c0f3ab12 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2172,13 +2172,12 @@ class DataLoader:
         eco_eligibility_map = {
             "not eligble": "not eligible",
             "eco 4(subject to ciga)": "eco4 (subject to ciga)",
-            "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype check)": "eco4 (subject to ciga)",
-            "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga)",
+            "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4 (subject to archetype check)": "eco4 (subject to archetype)",
+            "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
-            "eco4 (subject to archetype)": "eco4 (subject to ciga)",
         }
 
         ha_facts_and_figures = []
@@ -2330,7 +2329,7 @@ class DataLoader:
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
                 # Update the cases where properties have sold, but are missing a CIGA check
                 asset_list["ECO Eligibility"] = np.where(
-                    (asset_list["ECO Eligibility"] == "eco4 (subject to ciga)") & (
+                    (asset_list["ECO Eligibility"].str.contains("(subject to ciga)")) & (
                         asset_list["has_a_survey_record"] == True
                     ),
                     "eco4 - passed ciga",
@@ -2349,7 +2348,14 @@ class DataLoader:
                 # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"].isin(
-                        ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                        [
+                            "eco4",
+                            "eco4 (subject to ciga)",
+                            "eco4 - passed ciga",
+                            "failed ciga",
+                            "eco4 (subject to archetype)",
+                            "eco4 (subject to ciga) (subject to archetype)"
+                        ]
                     )) & (
                         asset_list["installation_status"].isin(
                             ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"]

From c1a15052f246288c5216e2c80849ccef3b2c6be0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 14:46:26 +0000
Subject: [PATCH 112/248] Handling warning for regex searching of (subject to
 ciga)

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index c0f3ab12..430e5ff7 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2329,7 +2329,7 @@ class DataLoader:
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
                 # Update the cases where properties have sold, but are missing a CIGA check
                 asset_list["ECO Eligibility"] = np.where(
-                    (asset_list["ECO Eligibility"].str.contains("(subject to ciga)")) & (
+                    (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & (
                         asset_list["has_a_survey_record"] == True
                     ),
                     "eco4 - passed ciga",

From b46da0f6c0140b28d00385f02f29cae91f412b2d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 15:48:51 +0000
Subject: [PATCH 113/248] adding in archetype check process to model

---
 .../ha_15_32/ha_analysis_batch_3.py           | 99 +++++++++++++++----
 1 file changed, 82 insertions(+), 17 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 430e5ff7..9a959956 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3658,19 +3658,47 @@ def patch_cleaned(cleaned):
 
 def calculate_eco4_post_ciga(
     eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate,
-    eco4_rate
+    eco4_rate, archetype_conversion_rate
 ):
     remaining_needing_ciga_check = eligiblity_counts[
-        eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)"
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+        ~eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
         ]["count"].sum()
 
+    remaining_needing_ciga_and_archetype_check = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
+        ]["count"].sum()
+    # We scale this down by the archetype_conversion_rate, and add this on to the remaining_needing_ciga_check
+    remaining_needing_ciga_and_archetype_check_passed = np.round(
+        remaining_needing_ciga_and_archetype_check * archetype_conversion_rate
+    )
+
+    remaining_needing_ciga_check += remaining_needing_ciga_and_archetype_check_passed
+
+    eco4_no_ciga_needed = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4"
+        ]["count"].sum()
+
+    eco4_no_ciga_archetype_needed = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4 (subject to archetype)"
+        ]["count"].sum()
+    eco4_no_ciga_archetype_needed_passed = np.round(
+        eco4_no_ciga_archetype_needed * archetype_conversion_rate
+    )
+
+    eco4_no_ciga_needed += eco4_no_ciga_archetype_needed_passed
+
+    failed_archetype_check = int(
+        remaining_needing_ciga_and_archetype_check +
+        eco4_no_ciga_archetype_needed -
+        remaining_needing_ciga_and_archetype_check_passed -
+        eco4_no_ciga_archetype_needed_passed
+    )
+
     has_ciga_check = not input_data["ciga_list"].empty
     if has_ciga_check:
 
-        eco4_no_ciga_needed = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"] == "eco4"
-            ]["count"].sum()
-
         eco4_ciga_passed = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
             ]["count"].sum()
@@ -3681,8 +3709,10 @@ def calculate_eco4_post_ciga(
 
         eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed
 
-        eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
-        eco4_confirmed = np.round(eco4_confirmed)
+        eco4_confirmed = np.round(
+            (eco4_no_ciga_needed * ha_eco4_to_sale_rate) +
+            (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
+        )
 
         eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed)
 
@@ -3704,9 +3734,7 @@ def calculate_eco4_post_ciga(
 
         eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
     else:
-        eco4_no_ciga_needed = eligiblity_counts[
-            eligiblity_counts["ECO Eligibility"] == "eco4"
-            ]["count"].sum()
+
         eco4_confirmed_ciga_failures = 0
         # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
@@ -3735,6 +3763,9 @@ def calculate_eco4_post_ciga(
         "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
         "Of which confirmed - £": eco4_confirmed * eco4_rate,
         "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+        # Archetype check failures
+        "Estimated total - failed archetype check - #": failed_archetype_check,
+        "Estimated total - failed archetype check - £": failed_archetype_check * eco4_rate,
         # Ciga failures
         "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures),
         "Confirmed CIGA failures": eco4_confirmed_ciga_failures,
@@ -3766,6 +3797,14 @@ def forecast_remaining_sales(loader):
     gbis_rate = 600
     eco4_rate = 1710
 
+    # Based on ONS https://www.ons.gov.uk/peoplepopulationandcommunity/housing/bulletins/housingenglandandwales
+    # /census2021
+    # there are 5.7 million terraced properties in the UK, of the 19.3 million houses or bungalows. We therefore apply
+    # a 30% discount to homes that are dependent on an archetype check, since around 30% of them will be mid terraced
+    # This 30% is slightly harsh but we be conservative
+    # Therefore, the archetype check conversion rate is 70%
+    archetype_conversion_rate = 0.7
+
     # 1) Calculate the conversion rate from passed CIGA to actual sale
     converted_ciga_jobs = []
     for ha_name, input_data in loader.data.items():
@@ -4010,13 +4049,27 @@ def forecast_remaining_sales(loader):
 
         eco4_pre_ciga = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"].isin(
-                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                [
+                    "eco4",
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga",
+                    "failed ciga",
+                    "eco4 (subject to ciga) (subject to archetype)",
+                    "eco4 (subject to archetype)"
+                ]
             )
         ]["count"].sum()
 
         eco4_pre_ciga_remaining = eligiblity_counts_remaining[
             eligiblity_counts_remaining["ECO Eligibility"].isin(
-                ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"]
+                [
+                    "eco4",
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga",
+                    "failed ciga",
+                    "eco4 (subject to ciga) (subject to archetype)",
+                    "eco4 (subject to archetype)"
+                ]
             )
         ]["count"].sum()
 
@@ -4065,7 +4118,8 @@ def forecast_remaining_sales(loader):
             ha_ciga_conversion_rate=ha_ciga_conversion_rate,
             ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
             ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
-            eco4_rate=eco4_rate
+            eco4_rate=eco4_rate,
+            archetype_conversion_rate=archetype_conversion_rate
         )
 
         eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
@@ -4074,7 +4128,8 @@ def forecast_remaining_sales(loader):
             ha_ciga_conversion_rate=ha_ciga_conversion_rate,
             ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
             ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
-            eco4_rate=eco4_rate
+            eco4_rate=eco4_rate,
+            archetype_conversion_rate=archetype_conversion_rate
         )
 
         # Calculate the delta compared to Warmfront's original remaining
@@ -4111,6 +4166,8 @@ def forecast_remaining_sales(loader):
         gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
 
+        survey_list["installation_status"].value_counts()
+
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
             gbis_delta_vs_original_estimate_remaining = "N/A"
@@ -4176,7 +4233,7 @@ def forecast_remaining_sales(loader):
                 surveys_with_eligibility["installation_status"] == "GBIS - cancelled"
                 ].shape[0]
 
-            expected_gbis_unconfirmed_sales = incomplete_gbis_sales * ha_gbis_sale_conversion
+            expected_gbis_unconfirmed_sales = np.round(incomplete_gbis_sales * ha_gbis_sale_conversion)
 
             gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales)
 
@@ -4187,10 +4244,12 @@ def forecast_remaining_sales(loader):
         # Add in the variance:
         # We should expect that the pre-ciga total is:
         # 1) The number of post CIGA successes +
+        # 2) The number of archetype failures +
         # 2) the number of CIGA failures +
         # 3) The number of cancellations
         variance_total = eco4_pre_ciga - (
             eco4_post_ciga_total_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_total_results["Estimated total - failed archetype check - #"] +
             eco4_post_ciga_total_results['Estimated total - failed CIGA'] +
             eco4_post_ciga_total_results["Expected cancellations - #"]
         )
@@ -4199,6 +4258,7 @@ def forecast_remaining_sales(loader):
 
         variance_remaining = eco4_pre_ciga_remaining - (
             eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_remaining_results["Estimated total - failed archetype check - #"] +
             eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] +
             eco4_post_ciga_remaining_results["Expected cancellations - #"]
         )
@@ -4290,6 +4350,11 @@ def forecast_remaining_sales(loader):
             ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[
                 "Expected cancellations - £"
             ],
+            # Archetype check failures
+            ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - #", ""):
+                eco4_post_ciga_remaining_results['Estimated total - failed archetype check - #'],
+            ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - £", ""):
+                eco4_post_ciga_remaining_results['Estimated total - failed archetype check - £'],
             # CIGA failures
             ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
                 'Estimated total - failed CIGA'
@@ -4324,7 +4389,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 47:
+        if len(to_append) != 49:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From a7e593ecd9289551d7ef47481ea3dff0c2a70592 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 16:15:16 +0000
Subject: [PATCH 114/248] Added handling of archetype checks and corrected gbis
 calculations

---
 .../ha_15_32/ha_analysis_batch_3.py           | 65 ++++++++++++++-----
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9a959956..aca2ce43 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4154,19 +4154,25 @@ def forecast_remaining_sales(loader):
         else:
             ha_gbis_sale_conversion = median_gbis_to_install
 
-        gbis_total = eligiblity_counts[
+        gbis_total_pre_cancellations = eligiblity_counts[
             eligiblity_counts["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_total = int(np.round(gbis_total * ha_gbis_sale_conversion))
-        gbis_total_revenue = int(gbis_total * gbis_rate)
 
-        gbis_remaining = eligiblity_counts_remaining[
+        gbis_total_pre_cancellations_revenue = gbis_total_pre_cancellations * gbis_rate
+        # gbis_total = int(np.round(gbis_total_pre_cancellations * ha_gbis_sale_conversion))
+        # gbis_total_revenue = int(gbis_total * gbis_rate)
+
+        gbis_remaining_pre_cancellations = eligiblity_counts_remaining[
             eligiblity_counts_remaining["ECO Eligibility"] == "gbis"
             ]["count"].sum()
-        gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion))
+        gbis_remaining_pre_cancellations_revenue = (
+            gbis_remaining_pre_cancellations * gbis_rate
+        )
+        # This is the gbis jobs we expect to sell
+        gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
-
-        survey_list["installation_status"].value_counts()
+        # This is the number we expect to cancel
+        gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining) * gbis_rate
 
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
@@ -4179,9 +4185,10 @@ def forecast_remaining_sales(loader):
         # Current sales figures
         # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
         eco4_actually_sold = 0
-        gbis_actually_sold = 0
         eco4_confirmed_cancellations = 0
         eco4_expected_cancellations = 0
+
+        gbis_actually_sold = 0
         gbis_confirmed_cancellations = 0
         gbis_expected_cancellations = 0
         if not survey_list.empty:
@@ -4284,17 +4291,30 @@ def forecast_remaining_sales(loader):
             raise ValueError("Something went wrong in pre_ciga_eco4_variance")
 
         # Check GBIS total variance
-        gbis_variance = (
-            gbis_total_revenue -
-            gbis_actually_sold -
-            gbis_confirmed_cancellations * gbis_rate -
-            gbis_expected_cancellations * gbis_rate -
-            gbis_remaining_revenue
+        # The total before cancellations should equal:
+        # The number of sold +
+        # The number of confirmed cancelled +
+        # The number of expected cancelled +
+        # The number of remaining
+        gbis_variance = gbis_total_pre_cancellations - (
+            gbis_actually_sold / gbis_rate +
+            gbis_confirmed_cancellations +
+            gbis_expected_cancellations +
+            gbis_remaining_pre_cancellations
         )
 
         if gbis_variance != 0:
             raise ValueError("Something went wrong in gbis_variance")
 
+        # We expect the remaining to equal expected sales + expected cancellations
+        gbis_variance_2 = gbis_remaining_pre_cancellations - (
+            gbis_remaining +
+            gbis_remaining_expected_cancellations
+        )
+
+        if gbis_variance_2 != 0:
+            raise ValueError("Something went wrong in gbis_variance")
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -4375,17 +4395,26 @@ def forecast_remaining_sales(loader):
                 "Estimated CIGA failures - £"
             ],
             # GBIS postcode list
-            ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
-            ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total_pre_cancellations,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"):
+                gbis_total_pre_cancellations_revenue,
             ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance,
             ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
             ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
             # This is for jobs that are in-progress and could still cancel
             ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate,
-            ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
-            ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"):
+                gbis_remaining_pre_cancellations,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"):
+                gbis_remaining_pre_cancellations_revenue,
             ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
                 gbis_delta_vs_original_estimate_remaining,
+            # Expected cancellations
+            (
+                "GBIS Postcode list", "Of which expected sales - £", "Remaining - £",
+                "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"):
+                gbis_remaining_expected_cancellations
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From f9957a55d066a294e79efdf196b72e79d82689fb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 16:19:54 +0000
Subject: [PATCH 115/248] fixed bug in gbis variance 2?

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index aca2ce43..a25f98c6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4172,7 +4172,8 @@ def forecast_remaining_sales(loader):
         gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion))
         gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
         # This is the number we expect to cancel
-        gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining) * gbis_rate
+        gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining)
+        gbis_remaining_expected_cancellations_revenue = gbis_remaining_expected_cancellations * gbis_rate
 
         # GBIS delta
         if original_warmfront_remaining_gbis == 0:
@@ -4313,7 +4314,7 @@ def forecast_remaining_sales(loader):
         )
 
         if gbis_variance_2 != 0:
-            raise ValueError("Something went wrong in gbis_variance")
+            raise ValueError("Something went wrong in gbis_variance2")
 
         to_append = {
             ("", "", "", "HA Name"): ha_name,
@@ -4414,7 +4415,7 @@ def forecast_remaining_sales(loader):
                 "GBIS Postcode list", "Of which expected sales - £", "Remaining - £",
                 "GBIS total"): gbis_remaining_revenue,
             ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"):
-                gbis_remaining_expected_cancellations
+                gbis_remaining_expected_cancellations_revenue
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys

From 1ccb2cdebdca9a2fc17f0b11ef431bac81309357 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 16:22:28 +0000
Subject: [PATCH 116/248] updated number of expected to append

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a25f98c6..7ddc9844 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4419,7 +4419,7 @@ def forecast_remaining_sales(loader):
         }
 
         # Make sure nothing is forgotten due to duplicate multi-index keys
-        if len(to_append) != 49:
+        if len(to_append) != 51:
             raise ValueError("Something went wrong")
 
         results.append(to_append)

From 768a0385e3a2cf7fc29b86b827cfb43d914e4621 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 17:02:33 +0000
Subject: [PATCH 117/248] ha35 data read

---
 .../ha_15_32/ha_analysis_batch_3.py           | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7ddc9844..ea0078c2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -276,6 +276,13 @@ class DataLoader:
                 asset_list["POST CODE"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA35":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA38":
             asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -1648,6 +1655,13 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha35_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BALLADIER WLAK", "BALLADIER WALK"
+        )
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -4673,14 +4687,14 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39",
-        "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA35",
+        "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE]
-    # 13 [WIP]
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE]
+    # 35 [WIP]
     # Consider for ECO4:
-    # Consider for GBIS: 56, 35, 34
+    # Consider for GBIS: 56, 34
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 29f2a2abf801e4c01ad89383b18eaac4ed97b0af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 17:09:43 +0000
Subject: [PATCH 118/248] HA35 done

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index ea0078c2..04ee343c 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -4691,8 +4691,9 @@ def app():
         "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
-    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE]
-    # 35 [WIP]
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
+    # 35 [DONE]
+    # 34 [WIP]
     # Consider for ECO4:
     # Consider for GBIS: 56, 34
     # Ignore for now:

From 6e4fc23ecc2036e14148b18611cb04aafde8084b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 18:12:12 +0000
Subject: [PATCH 119/248] fixed dupes for HA34

---
 .../ha_15_32/ha_analysis_batch_3.py           | 104 +++++++++++++++++-
 1 file changed, 98 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 04ee343c..8784481b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -276,6 +276,12 @@ class DataLoader:
                 asset_list["POST CODE"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA34":
+            asset_list["matching_address"] = (
+                asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list[" Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA35":
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -566,7 +572,8 @@ class DataLoader:
             eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))]
 
             # Perform the eco3 merge
-            eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
+            if not eco3_list.empty:
+                eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
 
         if ha_name in ["HA25"]:
             # Accomodate ha25 unique structure
@@ -1657,9 +1664,94 @@ class DataLoader:
 
     @staticmethod
     def correct_ha35_survey_list(survey_list):
-        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
-            "BALLADIER WLAK", "BALLADIER WALK"
+        return survey_list
+
+    @staticmethod
+    def correct_ha34_survey_list(survey_list):
+        # Note in the asset list
+        survey_list = survey_list[
+            survey_list["Post Code"] != "L5 3SS"
+            ]
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "L177DR", "L17 7DR"
         )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PENVALLEY CRESENT", "Penvalley Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PENLINKEN DRIVE", "Penlinken Drive"
+        )
+
+        # There's no 32 Penlinken Drive in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Penlinken Drive") &
+              (survey_list["NO."] == 32))
+        ]
+
+        # There's no 30 Gwent Street in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "GWENT ST") &
+              (survey_list["NO."] == 30))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "POULTON RD", "Poulton Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST PAULS RD", "St Pauls Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BROAD LANE, KIRKBY", "BROAD LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BULLENS RD, KIRKBY", "Bullens Road"
+        )
+
+        # There's no 219 NORTH HILL ST in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "NORTH HILL ST") &
+              (survey_list["NO."] == 219))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CROSLAND RD, KIRKBY", "CROSLAND ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PARK BROW DRIVE, KIRKBY", "Park Brow Drive"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CELTIC TREET", "Celtic Street"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BUCKLAND ROAD", "Buckland Street"
+        )
+
+        # duplicates
+        survey_list = survey_list.drop_duplicates(["Street / Block Name", "NO.", "Post Code"])
+
+        # This is a duplicate with wrong postcode
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "CLARIBEL STREET") &
+              (survey_list["NO."] == 7) &
+              (survey_list["Post Code"] == "L8 8AF"))
+        ]
+
+        survey_list["NO."] = np.where(
+            ((survey_list["NO."] == "187 A") &
+             (survey_list["Post Code"] == "L32 6QF")),
+            "187A",
+            survey_list["NO."]
+        )
+
         return survey_list
 
     @staticmethod
@@ -1685,7 +1777,7 @@ class DataLoader:
         survey_list = survey_list_correction_function(survey_list)
 
         missed_postcodes = []
-        if ha_name == "HA6":
+        if ha_name in ["HA6", "HA34"]:
             missed_postcodes = [
                 postcode.lower() for postcode in survey_list["Post Code"] if
                 postcode.lower() not in asset_list["matching_postcode"].values
@@ -4687,8 +4779,8 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA35",
-        "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA34",
+        "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],

From 27fed2dce320a54a049df279fca5c3abd407275f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Mar 2024 18:25:22 +0000
Subject: [PATCH 120/248] temp removed HA34 due to issue

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 8784481b..d1f8d546 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2270,6 +2270,11 @@ class DataLoader:
             "ECO4 AFFORDABLE WARMTH": "ECO4",
             "Affordable Warmth": "ECO4",
             "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
+            "ECO4 PPS": "ECO4",
+            "AFFORDABLE WARMTH / REMEDIAL": "ECO4",
+            "AFF0RDALE WARMTH": "ECO4",
+            "ECO 4 RdSAP CL": "ECO4",
+            "Affordable Warmth (R) ": "ECO4"
         }
 
         # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
@@ -4779,15 +4784,17 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA34",
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32",
+        # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE]
-    # 34 [WIP]
+    #  [WIP]
     # Consider for ECO4:
-    # Consider for GBIS: 56, 34
+    # Consider for GBIS: 56
+    # 34 [bug in the results so leaving out for the moment]
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 28434f43c8fd9dac176fd68a1b4e20a79a128e9d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 13:55:44 +0000
Subject: [PATCH 121/248] ha56 wip

---
 .../ha_15_32/ha_analysis_batch_3.py           | 90 +++++++++++++++++--
 1 file changed, 83 insertions(+), 7 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d1f8d546..064ff8f5 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -317,6 +317,12 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA56":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA63":
             asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["POSTCODE"].astype(str).str.lower().str.strip()
@@ -639,6 +645,54 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def correct_ha56_asset_list(asset_list):
+        # CH1 4JR has already been surveyed, but it's listed in the asset list
+        # as a single row, when it's actually 32 units, so we just set this
+        # as ineligible
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CH1 4JR",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        # Same for CW8 3EU
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CW8 3EU",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CW1 3HP",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "WA4 2PH",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "BD6 1QJ",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "L39 1RS",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "WA10 2DE",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
     @staticmethod
     def correct_ha14_asset_list(asset_list):
 
@@ -1970,6 +2024,24 @@ class DataLoader:
 
         return eco3_list
 
+    @staticmethod
+    def correct_ha56_eco3_list(eco3_list):
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Mount Pleasant, Crewe", "Mount Pleasant"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Dutton Close", "Dutton Way"
+        )
+
+        eco3_list["Post Code"] = eco3_list["Post Code"].str.replace(
+            "Ls63nl", "LS6 3NL"
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -1978,8 +2050,8 @@ class DataLoader:
         asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
         eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
 
-        if ha_name == "HA25":
-            # 317 -> 259
+        if ha_name in ["HA25", "HA56"]:
+            # HA25: 317 -> 259
             missed_postcodes = {
                 postcode for postcode in eco3_list["postcode_no_space"] if
                 postcode not in asset_list["matching_postcode_nospace"].values
@@ -2060,6 +2132,7 @@ class DataLoader:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
+        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
 
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
@@ -3896,6 +3969,9 @@ def calculate_eco4_post_ciga(
 
 
 def forecast_remaining_sales(loader):
+    # TODO: Skip HA34 for the moment
+    loader.data = {k: v for k, v in loader.data.items() if k != "HA34"}
+
     # Assumptions:
     # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
     # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
@@ -4523,9 +4599,9 @@ def forecast_remaining_sales(loader):
                 gbis_delta_vs_original_estimate_remaining,
             # Expected cancellations
             (
-                "GBIS Postcode list", "Of which expected sales - £", "Remaining - £",
+                "GBIS Postcode list", "", "Of which expected sales - £ - £",
                 "GBIS total"): gbis_remaining_revenue,
-            ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"):
+            ("GBIS Postcode list", "", "Of which expected cancellations -£", "GBIS total"):
                 gbis_remaining_expected_cancellations_revenue
         }
 
@@ -4786,14 +4862,14 @@ def app():
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32",
         # "HA34",
-        "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117"
+        "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE]
-    #  [WIP]
+    # 56 [WIP]
     # Consider for ECO4:
-    # Consider for GBIS: 56
+    # Consider for GBIS:
     # 34 [bug in the results so leaving out for the moment]
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From db7b6de87bfb13486a179cbdc547ae375cfc0c8d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:13:20 +0000
Subject: [PATCH 122/248] handle HA56 dupes

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 064ff8f5..62099386 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -189,6 +189,7 @@ class DataLoader:
         "HA25": 154,
         "HA41": 26,
         "HA50": 5,
+        "HA56": 320,
         "HA63": 0,
         "HA117": 4
     }
@@ -693,6 +694,8 @@ class DataLoader:
             asset_list["ECO Eligibility"]
         )
 
+        return asset_list
+
     @staticmethod
     def correct_ha14_asset_list(asset_list):
 
@@ -2040,6 +2043,14 @@ class DataLoader:
             "Ls63nl", "LS6 3NL"
         )
 
+        # Handle a duplicate
+        eco3_list = eco3_list[
+            ~((eco3_list["Street / Block Name"] == "Mount Pleasant") &
+              (eco3_list["Post Code"] == "CW1 3JF") &
+              (eco3_list["NO "] == 5) &
+              (eco3_list["INSTALL/ CANCELLATION DATE"] == "CANCELLED 20.5.2022"))
+        ]
+
         return eco3_list
 
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
@@ -2128,15 +2139,16 @@ class DataLoader:
         # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
         # where many surveys were conducted on house numbers, not in the asset list
         # 154 missed, 2827 matched for HA 25
+        # For HA56, the number of missed is high at 320, however a big portion of these are due to the block being
+        # listed in the asset list, and individual units being in the survey list
         if len(missed) != self.UNMATCHED_ECO3[ha_name]:
             raise ValueError(
                 f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
             )
-        missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
 
         matching_lookup = pd.DataFrame(matching_lookup)
         # Check dupes as this will cause problems later on
-        if matching_lookup["asset_list_row_id"].duplicated().any():
+        if matching_lookup["asset_list_row_id"].duplicated().sum():
             raise ValueError("Duplicated asset list row ids")
 
         # Merge onto eco3 list

From 8b3f4d3a520f9148195c6fbd55d3b1d7354d0ee1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:25:47 +0000
Subject: [PATCH 123/248] ha56 survey list matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 62099386..f9bf3856 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -694,6 +694,20 @@ class DataLoader:
             asset_list["ECO Eligibility"]
         )
 
+        # Already surveyed under ECO4
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "SK17 6NR",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            ~((asset_list["Post Code"] == "WA5 0EN") &
+              (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
         return asset_list
 
     @staticmethod
@@ -1811,6 +1825,29 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha56_survey_list(survey_list):
+        # Not in asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Samual Street") &
+              (survey_list["NO."].isin([22, 24])) &
+              (survey_list["Post Code"] == "WA5 1BB"))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "STOURTON RD", "Stourton Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BIRKIN RD", "Birkin Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PORTLAND RD", "Portland Road"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -1843,6 +1880,10 @@ class DataLoader:
         if ha_name == "HA13":
             missed_postcodes = ["hp17 8le"]
 
+        if ha_name == "HA56":
+            # Multiple properties are listed as blocks, which is a problem for matching
+            missed_postcodes = ["sk17 6nr", "wa5 0en"]
+
         matching_lookup = []
         for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
 
@@ -1890,6 +1931,19 @@ class DataLoader:
                 df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
                 if df.shape[0] != 1:
                     df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
+
+                    if df.empty:
+
+                        postcode_lower = row["Post Code"].lower()
+                        if postcode_lower in missed_postcodes:
+                            matching_lookup.append(
+                                {
+                                    "survey_list_row_id": row["survey_list_row_id"],
+                                    "asset_list_row_id": None,
+                                }
+                            )
+                            continue
+
                     if df.shape[0] != 1:
                         if "Town/Area" not in row.keys():
                             full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() +

From 4a6711a1403a8661b467a0f7023151829e305822 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:35:08 +0000
Subject: [PATCH 124/248] handling ha56 dupes|

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index f9bf3856..0030af9d 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1846,6 +1846,13 @@ class DataLoader:
             "PORTLAND RD", "Portland Road"
         )
 
+        # We remove a row, because two rows match to a block listing
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tavlin Avenue") &
+              (survey_list["NO."] == 17) &
+              (survey_list["Post Code"] == "WA5 0EN"))
+        ]
+
         return survey_list
 
     @staticmethod

From ba65b6c8e37e5a44492c3342a05513d05d275ac4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 14:39:15 +0000
Subject: [PATCH 125/248] fixed bug in asset list cleaning

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0030af9d..b1eda326 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -702,8 +702,8 @@ class DataLoader:
         )
 
         asset_list["ECO Eligibility"] = np.where(
-            ~((asset_list["Post Code"] == "WA5 0EN") &
-              (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
+            ((asset_list["Post Code"] == "WA5 0EN") &
+             (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
             "Not eligible",
             asset_list["ECO Eligibility"]
         )

From 5eb938bf54fbaaf52bb72e7c8972bad5e2d58a46 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 15:40:02 +0000
Subject: [PATCH 126/248] ha18 done

---
 .../ha_15_32/ha_analysis_batch_3.py           | 28 +++++++++++++++++--
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b1eda326..676bd613 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -249,6 +249,20 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA18":
+            asset_list["matching_address"] = (
+                asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Post Code"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA19":
+            asset_list["matching_address"] = (
+                asset_list["Address1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA25":
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
@@ -495,6 +509,8 @@ class DataLoader:
             return "CIGA checks"
         elif "CIGA check" in workbook.sheetnames:
             return "CIGA check"
+        elif "CIGA Check" in workbook.sheetnames:
+            return "CIGA Check"
         elif "CIGA requested" in workbook.sheetnames:
             return "CIGA requested"
         else:
@@ -1733,6 +1749,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha18_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def correct_ha35_survey_list(survey_list):
         return survey_list
@@ -2435,6 +2455,7 @@ class DataLoader:
             "eco4  (subject to ciga)": "eco4 (subject to ciga)",
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
+            "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
         }
 
         ha_facts_and_figures = []
@@ -4933,14 +4954,15 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32",
+        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
+        "HA19", "HA24", "HA25", "HA28", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE]
-    # 56 [WIP]
+    # 35 [DONE], 56 [DONE], 19 [DONE]
+    #
     # Consider for ECO4:
     # Consider for GBIS:
     # 34 [bug in the results so leaving out for the moment]

From 5b39cf138df458b749d13fd100de011e6f3ac350 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 15:52:33 +0000
Subject: [PATCH 127/248] ha9 data load

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 676bd613..88ab706b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -173,6 +173,7 @@ class DataLoader:
     UNMATCHED_CIGA = {
         "HA2": 0,
         "HA6": 117,
+        "HA9": 0,
         "HA12": 6,
         "HA13": 119,
         "HA14": 3,
@@ -226,6 +227,14 @@ class DataLoader:
                                              asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA9":
+            asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA13":
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -430,7 +439,7 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA107"]:
+        if ha_name == "HA107":
             asset_list["HouseNo"] = asset_list["House No"].copy()
         elif ha_name == "HA32":
             asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
@@ -438,6 +447,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["House Number"].copy()
         elif ha_name == "HA38":
             asset_list["HouseNo"] = asset_list["House_Number"].copy()
+        elif ha_name == "HA9":
+            asset_list["HouseNo"] = asset_list["House Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -4954,7 +4965,7 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
+        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
         "HA19", "HA24", "HA25", "HA28", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"

From efbda5cece019d8518b770c0ace444c1179a1d6a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 16:09:08 +0000
Subject: [PATCH 128/248] ha27 complete

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 88ab706b..fba30f1f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -280,6 +280,12 @@ class DataLoader:
             asset_list["matching_postcode"] = asset_list['matching_address'].apply(
                 lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x
             )
+        elif ha_name == "HA27":
+            asset_list["matching_address"] = (
+                asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list[" Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA28":
             asset_list["matching_address"] = (
                 asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
@@ -582,7 +588,7 @@ class DataLoader:
         # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
         # lists, and so
         # we can return the asset list now
-        if ha_name in ["HA1"]:
+        if ha_name in ["HA1", "HA27"]:
             return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
 
         # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
@@ -4966,13 +4972,13 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
-        "HA19", "HA24", "HA25", "HA28", "HA32",
+        "HA19", "HA24", "HA25", "HA27", "HA28", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE], 56 [DONE], 19 [DONE]
+    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 DONE
     #
     # Consider for ECO4:
     # Consider for GBIS:

From 22f3aca336abafc164439f00ddbdf34649f4f28a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 16:26:42 +0000
Subject: [PATCH 129/248] ha30 32% matched

---
 .../ha_15_32/ha_analysis_batch_3.py           | 29 +++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index fba30f1f..bdb0d0c4 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -164,6 +164,10 @@ class DataLoader:
             "address": "T1_Address",
             "postcode": "matching_postcode"
         },
+        "HA30": {
+            "address": "A_Address",
+            "postcode": "A_Postcode"
+        },
         "HA48": {
             "address": "Full Address",
             "postcode": "Postcode"
@@ -207,7 +211,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA48"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA48"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -1892,6 +1896,27 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha30_survey_list(survey_list):
+
+        survey_list = survey_list[~pd.isnull(survey_list["Post Code"])]
+
+        # Split on / and take the first half
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+        # Not in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Horsebridge Road") &
+              (survey_list["NO."] == 286))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "DUTTON WAY") &
+              (survey_list["NO."] == 9))
+        ]
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -4972,7 +4997,7 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
-        "HA19", "HA24", "HA25", "HA27", "HA28", "HA32",
+        "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA32",
         # "HA34",
         "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]

From cd81c2b0b29a65b3fd3c59ec5dec7730afdd64ec Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 16:45:59 +0000
Subject: [PATCH 130/248] done ha30 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bdb0d0c4..71062b16 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1915,6 +1915,74 @@ class DataLoader:
               (survey_list["NO."] == 9))
         ]
 
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "PAYTHORNE CLOSE") &
+              (survey_list["NO."] == 10))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "MARCHWOOD ROAD") &
+              (survey_list["NO."] == 11))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Otterburn Close") &
+              (survey_list["NO."] == 4))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Blossom Court") &
+              (survey_list["NO."] == 5))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St LUKES CLOSE , HUNTINGDON", "St. Lukes Close"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "St. Lukes Close") &
+              (survey_list["NO."].isin([4, 7, 8])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ROMAN WAY , GODMANCHESTER , HUNTINGDON", "Roman Way"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Roman Way") &
+              (survey_list["NO."].isin([58])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "HEADLANDS , FENSTANTON , HUNTINGDON", "Headlands Fenstanton"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Headlands Fenstanton") &
+              (survey_list["NO."].isin([126, 134])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WALLACE COURT , HUNTINGDON", "Wallace Court"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CRICKETERS WAY , CHATTERIS", "Cricketers Way"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Jubilee Gardens", "Jubilee Green"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Harrow Road") &
+              (survey_list["NO."].isin([10])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST LUKES CLOSE", "St. Lukes Close"
+        )
+
         return survey_list
 
     @staticmethod

From 2810316e22ffe4662ae40c2c3bb9bee2f6af6f83 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 17:14:22 +0000
Subject: [PATCH 131/248] handled bug for HA30

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 71062b16..1ee40dde 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2566,6 +2566,7 @@ class DataLoader:
             "eco4(subject to ciga)": "eco4 (subject to ciga)",
             "eco4 subject to ciga": "eco4 (subject to ciga)",
             "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
         }
 
         ha_facts_and_figures = []
@@ -2716,11 +2717,13 @@ class DataLoader:
 
                 asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
                 # Update the cases where properties have sold, but are missing a CIGA check
+                # If we don't have a CIGA list, we set the value to ECO4
+                set_to = "eco4 - passed ciga" if not ciga_list.empty else "eco4"
                 asset_list["ECO Eligibility"] = np.where(
                     (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & (
                         asset_list["has_a_survey_record"] == True
                     ),
-                    "eco4 - passed ciga",
+                    set_to,
                     asset_list["ECO Eligibility"]
                 )
                 # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4
@@ -4122,7 +4125,6 @@ def calculate_eco4_post_ciga(
 
         eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
     else:
-
         eco4_confirmed_ciga_failures = 0
         # Multiply by sale conversion
         eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)

From e15b977930c1b65ab39099c8c6a92d05039e96af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 17:25:07 +0000
Subject: [PATCH 132/248] fixed ha34, completed 30

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1ee40dde..7d35386d 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -2550,7 +2550,8 @@ class DataLoader:
             "AFFORDABLE WARMTH / REMEDIAL": "ECO4",
             "AFF0RDALE WARMTH": "ECO4",
             "ECO 4 RdSAP CL": "ECO4",
-            "Affordable Warmth (R) ": "ECO4"
+            "Affordable Warmth (R) ": "ECO4",
+            "Affordable Warmth ": "ECO4"
         }
 
         # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
@@ -4175,9 +4176,6 @@ def calculate_eco4_post_ciga(
 
 
 def forecast_remaining_sales(loader):
-    # TODO: Skip HA34 for the moment
-    loader.data = {k: v for k, v in loader.data.items() if k != "HA34"}
-
     # Assumptions:
     # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
     # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
@@ -5066,18 +5064,15 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18",
-        "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA32",
-        # "HA34",
-        "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
+        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
+        "HA27", "HA28", "HA30", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 DONE
+    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE]
     #
     # Consider for ECO4:
     # Consider for GBIS:
-    # 34 [bug in the results so leaving out for the moment]
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
     # Filter down the directories to only the priority HAs

From 41c17aa1dafe9110c74d6969f2fa06e58d3f0cf8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 10 Mar 2024 18:13:45 +0000
Subject: [PATCH 133/248] HA54 done

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7d35386d..d556450b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -168,9 +168,17 @@ class DataLoader:
             "address": "A_Address",
             "postcode": "A_Postcode"
         },
+        "HA31": {
+            "address": "A_Address",
+            "postcode": "matching_postcode"
+        },
         "HA48": {
             "address": "Full Address",
             "postcode": "Postcode"
+        },
+        "HA54": {
+            "address": "Postal Address",
+            "postcode": "matching_postcode"
         }
     }
 
@@ -211,7 +219,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA48"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -559,6 +567,12 @@ class DataLoader:
         if ha_name == "HA25":
             asset_sheet_colnames[11] = "matching_postcode"
 
+        if ha_name == "HA31":
+            asset_sheet_colnames[2] = "matching_postcode"
+
+        if ha_name == "HA54":
+            asset_sheet_colnames[10] = "matching_postcode"
+
         rows_data = []
 
         for row in asset_sheet.iter_rows(min_row=2, values_only=False):
@@ -2568,6 +2582,7 @@ class DataLoader:
             "eco4 subject to ciga": "eco4 (subject to ciga)",
             "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
             "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)"
         }
 
         ha_facts_and_figures = []
@@ -5065,11 +5080,12 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
-        "HA27", "HA28", "HA30", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117"
+        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA54", "HA56", "HA63",
+        "HA107", "HA117"
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
-    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE]
+    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
     #
     # Consider for ECO4:
     # Consider for GBIS:

From 6a327629bf0ab5284b1b951cc98360597f30ce1f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 12 Mar 2024 11:09:09 +0000
Subject: [PATCH 134/248] rough attempt to attribute surplus ciga dependent
 eco4 jobs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 144 +++++++++++++-----
 1 file changed, 107 insertions(+), 37 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index d556450b..5ad1aa27 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -176,6 +176,10 @@ class DataLoader:
             "address": "Full Address",
             "postcode": "Postcode"
         },
+        "HA49": {
+            "address": "Property Address Full",
+            "postcode": "Property Postcode"
+        },
         "HA54": {
             "address": "Postal Address",
             "postcode": "matching_postcode"
@@ -219,7 +223,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA54"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -382,6 +386,16 @@ class DataLoader:
                                              asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["PostCode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HAXX":
+            asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["PostCode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HAXXX":
+            asset_list["matching_address"] = (
+                asset_list["Combined Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         else:
             raise NotImplementedError("implement me")
 
@@ -467,6 +481,8 @@ class DataLoader:
             asset_list["HouseNo"] = asset_list["House_Number"].copy()
         elif ha_name == "HA9":
             asset_list["HouseNo"] = asset_list["House Number"].copy()
+        elif ha_name == "HAXXX":
+            asset_list["HouseNo"] = asset_list["Door Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
@@ -1999,6 +2015,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha49_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -5080,8 +5100,11 @@ def app():
     # Add in:
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
-        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA54", "HA56", "HA63",
-        "HA107", "HA117"
+        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
+        "HA63", "HA107", "HA117",
+
+        # New HAS
+        "HAXX", "HAXXX",
     ]
     # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
@@ -5100,39 +5123,86 @@ def app():
 
     forecast_remaining_sales(loader)
 
-    # We load in the additional data required to perform the analysis
-    # cleaned = read_from_s3(
-    #     s3_file_name="cleaned_epc_data/cleaned.bson",
-    #     bucket_name="retrofit-data-dev"
-    # )
-    # cleaned = msgpack.unpackb(cleaned, raw=False)
-    # cleaned = patch_cleaned(cleaned)
-    #
-    # cleaning_data = read_dataframe_from_s3_parquet(
-    #     bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
-    # )
-    # created_at = datetime.now().isoformat()
-    #
-    # photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
-    #
-    # outputs = get_epc_data(
-    #     loader=loader,
-    #     cleaned=cleaned,
-    #     cleaning_data=cleaning_data,
-    #     created_at=created_at,
-    #     photo_supply_lookup=photo_supply_lookup,
-    #     floor_area_decile_thresholds=floor_area_decile_thresholds,
-    #     pull_data=pull_data
-    # )
+    conversion_rate = 0.95
+    archetype_check_conversion = 0.7
+    res = []
+    for k, v in loader.data.items():
+        asset_list = v["asset_list"].copy()
+        agg = asset_list["ECO Eligibility"].value_counts()
+        # We find a case where there are properties that have passed CIGA
+        if not any("passed" in x for x in agg.index):
+            continue
 
-    # import pickle
-    # with open("ha_analysis.pickle", "wb") as f:
-    #     pickle.dump({"outputs": outputs, "loader": loader}, f)
+        agg = pd.DataFrame(agg).reset_index()
 
-    # To read:
-    # import pickle
-    # with open("ha_analysis.pickle", "rb") as f:
-    #     outputs = pickle.load(f)["outputs"]
-    #
-    # with open("loader.pickle", "rb") as f:
-    #     loader = pickle.load(f)
+        passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"]
+        passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0
+
+        failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"]
+        failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0
+
+        ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1
+
+        dormant_ciga = agg[
+            agg["ECO Eligibility"].str.contains("subject to ciga") &
+            ~agg["ECO Eligibility"].str.contains("subject to archetype")
+            ]
+
+        dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0
+
+        dormant_ciga_archetype = agg[
+            agg["ECO Eligibility"].str.contains("subject to ciga") &
+            agg["ECO Eligibility"].str.contains("subject to archetype")
+            ]
+
+        dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0
+
+        needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion
+        needing_check = np.round(needing_check)
+
+        additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + (
+            dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate
+        )
+        additional_jobs = np.round(additional_jobs)
+
+        # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs
+        original_estimate = loader.december_figures[
+            loader.december_figures["HA Name"] == k
+            ]
+
+        original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0
+        base_eco_figures = agg[
+            agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"])
+        ]["count"].sum()
+        eco4_from_ciga = original_estimate - base_eco_figures
+        eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0
+        surplus_from_dormant = additional_jobs - eco4_from_ciga
+        surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant
+
+        res.append(
+            {
+                "ha_name": k,
+                "additional_eco4": additional_jobs,
+                "needing_check": needing_check,
+                "surplus_from_dormant": surplus_from_dormant
+            }
+        )
+
+    res = pd.DataFrame(res)
+    # Drop the HAs that are not in that pervious draft
+    # In the v2 draft, there are 12 HAs
+
+    v5_surplus = res[
+        ~res["ha_name"].isin(["HA9"])
+    ]["additional_eco4"].sum()
+    # 7212 properties
+    # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November
+    # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255,
+    # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties
+    # pre-CIGA
+
+    v5_surplus_from_dormant = res[
+        ~res["ha_name"].isin(["HA9"])
+    ]["surplus_from_dormant"].sum()
+    # 5539.0
+    # 9471690

From ddb5de50e550190c74cd5a2be767f2960352143a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 13:58:29 +0000
Subject: [PATCH 135/248] testing with another stupid effing method

---
 .idea/.gitignore                              |   2 +
 .../ha_15_32/ha_analysis_batch_3.py           | 230 +++++++++++++++++-
 .../epc_attributes/RoofAttributes.py          |  17 +-
 3 files changed, 241 insertions(+), 8 deletions(-)

diff --git a/.idea/.gitignore b/.idea/.gitignore
index 26d33521..8f00030d 100644
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -1,3 +1,5 @@
 # Default ignored files
 /shelf/
 /workspace.xml
+# GitHub Copilot persisted chat sessions
+/copilot/chatSessions
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 5ad1aa27..767e13c8 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -7,7 +7,9 @@ import msgpack
 from datetime import datetime
 import pandas as pd
 import numpy as np
-from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
+from utils.s3 import (
+    read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3, save_dataframe_to_s3_parquet
+)
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from tqdm import tqdm
@@ -2860,8 +2862,8 @@ def get_property_type_and_built_form(property_meta, ha_name):
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
     elif ha_name == "HA7":
-        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Archetype"]]
-        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"][property_meta["Property Type"]]
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
     elif ha_name == "HA14":
         if property_meta["Asset Type Description"] == "Block - Repair":
             # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
@@ -4429,6 +4431,12 @@ def forecast_remaining_sales(loader):
     for ha_name, input_data in loader.data.items():
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
+        if original_warmfront_estimates.empty:
+            # Append an empty row
+            original_warmfront_estimates = december_figures.head(1).copy()
+            for k in original_warmfront_estimates.columns:
+                original_warmfront_estimates[k] = 0
+            original_warmfront_estimates["HA Name"] = ha_name
 
         original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
         original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
@@ -4742,6 +4750,12 @@ def forecast_remaining_sales(loader):
         if gbis_variance_2 != 0:
             raise ValueError("Something went wrong in gbis_variance2")
 
+        # Update the GBIS sold, since Warmfront often sold more GBIS that expected
+        original_warmfront_gbis_revenue = original_warmfront_sold_gbis + original_warmfront_remaining_gbis_revenue
+        original_warmfront_gbis = (
+            original_warmfront_sold_gbis / gbis_rate + original_warmfront_remaining_gbis_revenue / gbis_rate
+        )
+
         to_append = {
             ("", "", "", "HA Name"): ha_name,
             # ECO4 - original warmfront figures
@@ -5077,6 +5091,216 @@ def forecast_remaining_sales(loader):
         results.to_csv(file, header=True, index=False)
 
 
+def fml_data_pull(loader):
+    has_bruh = ["HA7"]
+    from backend.SearchEpc import SearchEpc
+    epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
+
+    for ha in has_bruh:
+        asset_list = loader.data[ha]["asset_list"].copy()
+        # properties found as eligibile
+        fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+
+        # For each property, search for the latest EPC
+        epc_data = []
+        for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+            property_type, built_form = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+            searcher = SearchEpc(
+                address1=row["HouseNo"],
+                postcode=row["matching_postcode"],
+                auth_token=epc_api_key,
+                os_api_key="",
+                property_type=property_type,
+                full_address=row["matching_address"],
+            )
+            searcher.ordnance_survey_client.property_type = property_type
+            searcher.ordnance_survey_client.built_form = built_form
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            epc = {
+                "asset_list_row_id": row["asset_list_row_id"],
+                **searcher.newest_epc.copy()
+            }
+
+            epc_data.append(epc)
+
+        # Remove None entries
+        epc_data = [x for x in epc_data if x is not None]
+        # Save the data in S3 as a parquet
+        epc_data_df = pd.DataFrame(epc_data)
+        save_pickle_to_s3(
+            data=epc_data_df,
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
+        )
+
+
+def extract_lower_bound(age_band):
+    if pd.isna(age_band):
+        return 1930
+    try:
+        return int(age_band.split(':')[1].split('-')[0].strip())
+    except (ValueError, IndexError):
+        return 1930
+
+
+def fml_analysis(loader):
+    from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+    from etl.epc.DataProcessor import EPCDataProcessor
+    assumed_ciga_pass_rate = 0.731
+    has_bruh = ["HA7"]
+
+    results = []
+    for ha_name in has_bruh:
+
+        original_figures = loader.december_figures[
+            loader.december_figures["HA Name"] == ha_name
+            ].copy()
+        original_remaining = original_figures["ECO4 remaining"].values[0]
+
+        # Read in the epc data
+        asset_list = loader.data[ha_name]["asset_list"].copy()
+        # properties found as eligibile
+        fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+        epc_data = read_pickle_from_s3(
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
+        )
+
+        fuck_this = fml.merge(
+            epc_data, how="left", on="asset_list_row_id"
+        )
+        if fuck_this.shape[0] != fml.shape[0]:
+            raise Exception("What the fuck bruv")
+
+        # Take just remaining
+        if not loader.data[ha_name]["survey_list"].empty:
+            raise NotImplementedError("TAKE JUST REMAINING IDIOT")
+
+        insulation_thicknesses = []
+        for _, x in fuck_this.iterrows():
+            if pd.isnull(x["roof-description"]):
+                continue
+            thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"]
+            # If there is a + in the thickness, strip it out
+            thickness = str(thickness).replace("+", "")
+            insulation_thicknesses.append(
+                {'uprn': x["uprn"], "roof_insulation_thickness": thickness}
+            )
+        insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
+
+        fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
+        # clean roof insulation
+        fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("below average", "50")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("None", "0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("none", "0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("average", "150")
+
+        fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
+            lambda x: EPCDataProcessor.clean_construction_age_band(x)
+        )
+
+        fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
+
+        had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
+
+        # proportion with a survey:
+        proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
+
+        # Let's look just at the ECO4 business
+        # For things that had a survey, take the properties that didn't need a CIGA check
+        no_ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"] == "eco4"
+            ]
+
+        no_ciga_check_needed_with_archetype = no_ciga_check_needed[
+            (no_ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (no_ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+            ]
+        if not no_ciga_check_needed_with_archetype.empty:
+            raise Exception("SORT ME OUT")
+
+        # Characterise no CIGA check needed
+
+        # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction
+
+        ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"].str.contains("subject to ciga")
+        ]
+
+        # We take just the cavity walls
+        # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
+        # This paper is based on London properties
+        # The proportion of EPCs with building characteristics errors are shown to
+        # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
+        # compared with ~5% for wall insulation and glazing performance
+
+        ciga_check_needed_with_archetype = ciga_check_needed[
+            (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+            ]
+
+        # We take properties that could feasibly be within install regions
+        ciga_check_needed_plausible = ciga_check_needed_with_archetype[
+            ciga_check_needed_with_archetype["roof_insulation_thickness"].astype(float) < 270
+            ]
+
+        if not loader.data[ha_name]["ciga_list"].empty:
+            raise NotImplementedError("SORT OUT THE CIGA BRUV")
+        else:
+            ha_ciga_pass_rate = assumed_ciga_pass_rate
+
+        ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate)
+        without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0]
+
+        # Need to add on the non-ciga
+        total_expectation = ciga_check_expectation + without_ciga_expectation
+
+        if proportion_with_survey < 100:
+            # We estimate the rest
+            without_survey_needing_ciga = fuck_this[
+                (pd.isnull(fuck_this["estimated"]) == False) &
+                (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
+                ]
+
+            # We apply the same conversion rate as the properties with a survey
+            without_survey_without_ciga_expected = np.round(
+                without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+            )
+
+            total_expectation += without_survey_without_ciga_expected
+
+            without_survey_without_ciga = fuck_this[
+                (pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"]))
+                ]
+
+            if not without_survey_without_ciga.empty:
+                raise Exception("Estimate the rest!!")
+
+        results.append(
+            {
+                "HA Name": ha_name,
+                "Original ECO4 Estimate - Remaining": original_remaining,
+                "Proportion with a survey": proportion_with_survey,
+                "total_expectation": total_expectation
+            }
+        )
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py
index 9d3b46b4..76f99f09 100644
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@@ -122,6 +122,13 @@ class RoofAttributes(Definitions):
         result["is_valid"] = "invalid" not in description
         description = description.replace("invalid", "")
 
+        # We handle an edge case where the description is "pitched, 150  loft insulation" and is missing the mm
+        if result["is_pitched"] or result["is_loft"]:
+            # Search for a regular expression that matches 150   insulation
+            match = re.search(r"(\d+\+?)\s*insulation", description)
+            if match:
+                result['insulation_thickness'] = match.group(1)
+
         # insulation thickness
         thickness_map = {
             "ceiling insulated": "average",
@@ -137,11 +144,11 @@ class RoofAttributes(Definitions):
                 # Remove the match from the description
                 # description = description.replace(key, "")
                 break
-        else:
-            # Extract insulation thickness in mm, if present
-            match = re.search(r'(\d+\+?)\s*mm', description)
-            if match:
-                result['insulation_thickness'] = match.group(1)
+
+        # Extract insulation thickness in mm, if present
+        match = re.search(r'(\d+\+?)\s*mm', description)
+        if match:
+            result['insulation_thickness'] = match.group(1)
 
         if "insulation_thickness" not in result:
             result['insulation_thickness'] = None

From bee07a253b8285a67c4cb78b9051e2b000de30c0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 16:10:55 +0000
Subject: [PATCH 136/248] new method wip

---
 .../ha_15_32/ha_analysis_batch_3.py           | 125 +++++++++++++++---
 1 file changed, 105 insertions(+), 20 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 767e13c8..9cadaf9f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -92,6 +92,27 @@ PROPERTY_TYPE_LOOKUP = {
         'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
         'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
     },
+    "HA25": {
+        'Flat': 'Flat',
+        'Mid Terrace House': 'House',
+        'Semi Detached House': 'House',
+        'End Terrace House': 'House',
+        'House': 'House',
+        'Semi Detached Bung': 'Bungalow',
+        'Bungalow': 'Bungalow',
+        'End Terrace Bungalow': 'Bungalow',
+        'Maisonnette': 'Maisonette',
+        'Mid Terrace Bungalow': 'Bungalow',
+        'Bedspace': None,
+        'Detached House': 'House',
+        'Bedsit': 'Flat',
+        'Coach House': 'House',
+        'Detached Bungalow': 'Bungalow',
+        'Office Buildings': None,
+        'Guest Room': None,
+        'Mid Terrace Housekeeping ': 'House',
+        'End Terrace Housex': 'House'
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -2877,6 +2898,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
                 property_meta["Asset Type Description"]
             ]
 
+        built_form = None
+    elif ha_name == "HA25":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
         built_form = None
     elif ha_name == "HA16":
         config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
@@ -5092,7 +5116,8 @@ def forecast_remaining_sales(loader):
 
 
 def fml_data_pull(loader):
-    has_bruh = ["HA7"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+    # DO
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
 
@@ -5104,7 +5129,7 @@ def fml_data_pull(loader):
         # For each property, search for the latest EPC
         epc_data = []
         for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
-            property_type, built_form = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+            property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
             searcher = SearchEpc(
                 address1=row["HouseNo"],
                 postcode=row["matching_postcode"],
@@ -5113,8 +5138,9 @@ def fml_data_pull(loader):
                 property_type=property_type,
                 full_address=row["matching_address"],
             )
-            searcher.ordnance_survey_client.property_type = property_type
-            searcher.ordnance_survey_client.built_form = built_form
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
 
             searcher.find_property(skip_os=True)
             if searcher.newest_epc is None:
@@ -5147,11 +5173,32 @@ def extract_lower_bound(age_band):
         return 1930
 
 
+def classify_loft(x):
+    # high confidence
+    if float(x["roof_insulation_thickness"]) <= 100:
+        return "high"
+
+    if float(x["roof_insulation_thickness"]) <= 200:
+        return "medium"
+
+    if float(x["roof_insulation_thickness"]) <= 270 and x["epc_age"] >= 5 * 365:
+        return "medium"
+
+    return "unlikely"
+
+
 def fml_analysis(loader):
     from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
     from etl.epc.DataProcessor import EPCDataProcessor
+    from datetime import datetime
     assumed_ciga_pass_rate = 0.731
-    has_bruh = ["HA7"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+
+    no_ciga_cavity_descriptions = [
+        "Cavity wall, as built, insulated (assumed)",
+        "Cavity wall, as built, no insulation (assumed)",
+        "Cavity wall, as built, partial insulation (assumed)"
+    ]
 
     results = []
     for ha_name in has_bruh:
@@ -5170,6 +5217,11 @@ def fml_analysis(loader):
             s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
         )
 
+        # time from the inspection to now
+        epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
+        if "estimated" not in epc_data.columns:
+            epc_data["estimated"] = None
+
         fuck_this = fml.merge(
             epc_data, how="left", on="asset_list_row_id"
         )
@@ -5178,12 +5230,27 @@ def fml_analysis(loader):
 
         # Take just remaining
         if not loader.data[ha_name]["survey_list"].empty:
-            raise NotImplementedError("TAKE JUST REMAINING IDIOT")
+            survey_list = (
+                loader.data[ha_name]["survey_list"][
+                    ~pd.isnull(loader.data[ha_name]["survey_list"]["asset_list_row_id"])
+                ]
+            )
+            fuck_this = fuck_this.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            fuck_this = fuck_this[pd.isnull(fuck_this["installation_status"])]
+            fuck_this = fuck_this.drop(columns=["installation_status"])
 
         insulation_thicknesses = []
         for _, x in fuck_this.iterrows():
             if pd.isnull(x["roof-description"]):
                 continue
+            if x["roof-description"] == "SAP05:Roof":
+                continue
+
             thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"]
             # If there is a + in the thickness, strip it out
             thickness = str(thickness).replace("+", "")
@@ -5208,11 +5275,13 @@ def fml_analysis(loader):
             "roof_insulation_thickness"
         ].str.replace("average", "150")
 
-        fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
-            lambda x: EPCDataProcessor.clean_construction_age_band(x)
-        )
+        fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
 
-        fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
+        # fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
+        #     lambda x: EPCDataProcessor.clean_construction_age_band(x)
+        # )
+        #
+        # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
 
         had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
 
@@ -5225,9 +5294,23 @@ def fml_analysis(loader):
             had_survey["ECO Eligibility"] == "eco4"
             ]
 
+        # Walls:
+        # Cavity wall, as built, insulated (assumed)
+        # Cavity wall, as built, no insulation (assumed)
+        # Cavity wall, as built, partial insulation (assumed)
+
+        # Roof:
+        # Less than 100mm = high confidence
+        # Less than 270mm & EPC at least 5 years old = medium confidence
+        # Otherwise, low confidence
+
+        # SAP criteria is EPC C or below
+
+        # Pre is 54 or below
+
         no_ciga_check_needed_with_archetype = no_ciga_check_needed[
-            (no_ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
-            (no_ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
         if not no_ciga_check_needed_with_archetype.empty:
@@ -5239,7 +5322,14 @@ def fml_analysis(loader):
 
         ciga_check_needed = had_survey[
             had_survey["ECO Eligibility"].str.contains("subject to ciga")
-        ]
+        ].copy()
+
+        ciga_check_passed = had_survey[
+            had_survey["ECO Eligibility"] == "eco4 - passed ciga"
+            ]
+
+        if not ciga_check_passed.empty:
+            raise Exception("SORT ME BRUV")
 
         # We take just the cavity walls
         # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
@@ -5248,17 +5338,12 @@ def fml_analysis(loader):
         # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
         # compared with ~5% for wall insulation and glazing performance
 
-        ciga_check_needed_with_archetype = ciga_check_needed[
+        ciga_check_needed_plausible = ciga_check_needed[
             (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
-            (ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) &
+            (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
-        # We take properties that could feasibly be within install regions
-        ciga_check_needed_plausible = ciga_check_needed_with_archetype[
-            ciga_check_needed_with_archetype["roof_insulation_thickness"].astype(float) < 270
-            ]
-
         if not loader.data[ha_name]["ciga_list"].empty:
             raise NotImplementedError("SORT OUT THE CIGA BRUV")
         else:

From 9b255029b3f58d9f8653aaf1bbbd0cc43b024803 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 17:36:09 +0000
Subject: [PATCH 137/248] fml fml

---
 .../ha_15_32/ha_analysis_batch_3.py           | 141 ++++++++++++------
 1 file changed, 96 insertions(+), 45 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9cadaf9f..e1d7db4d 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -20,6 +20,9 @@ from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from etl.epc.Record import EPCRecord
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc.DataProcessor import EPCDataProcessor
+from datetime import datetime
 
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -5188,9 +5191,6 @@ def classify_loft(x):
 
 
 def fml_analysis(loader):
-    from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
-    from etl.epc.DataProcessor import EPCDataProcessor
-    from datetime import datetime
     assumed_ciga_pass_rate = 0.731
     has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
 
@@ -5216,15 +5216,20 @@ def fml_analysis(loader):
             bucket_name="retrofit-datalake-dev",
             s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
         )
+        # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
+        # issue at this point
+        epc_data = epc_data.drop_duplicates("uprn")
 
         # time from the inspection to now
         epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
         if "estimated" not in epc_data.columns:
-            epc_data["estimated"] = None
+            # For all after HA7, we don't use estimated surveys
+            epc_data["estimated"] = False
 
         fuck_this = fml.merge(
             epc_data, how="left", on="asset_list_row_id"
         )
+        fuck_this["estimated"] = fuck_this["estimated"].fillna(True)
         if fuck_this.shape[0] != fml.shape[0]:
             raise Exception("What the fuck bruv")
 
@@ -5259,7 +5264,15 @@ def fml_analysis(loader):
             )
         insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
 
+        before_merge_shape = fuck_this.shape[0]
         fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
+
+        if fuck_this.shape[0] != before_merge_shape:
+            raise Exception("SOMETHING WENT WRONG")
+
+        if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+            blah
+
         # clean roof insulation
         fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
         fuck_this["roof_insulation_thickness"] = fuck_this[
@@ -5283,7 +5296,7 @@ def fml_analysis(loader):
         #
         # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
 
-        had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
+        had_survey = fuck_this[fuck_this["estimated"] == False]
 
         # proportion with a survey:
         proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
@@ -5294,27 +5307,11 @@ def fml_analysis(loader):
             had_survey["ECO Eligibility"] == "eco4"
             ]
 
-        # Walls:
-        # Cavity wall, as built, insulated (assumed)
-        # Cavity wall, as built, no insulation (assumed)
-        # Cavity wall, as built, partial insulation (assumed)
-
-        # Roof:
-        # Less than 100mm = high confidence
-        # Less than 270mm & EPC at least 5 years old = medium confidence
-        # Otherwise, low confidence
-
-        # SAP criteria is EPC C or below
-
-        # Pre is 54 or below
-
-        no_ciga_check_needed_with_archetype = no_ciga_check_needed[
+        no_ciga_check_needed_eligible = no_ciga_check_needed[
             (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
             (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
-        if not no_ciga_check_needed_with_archetype.empty:
-            raise Exception("SORT ME OUT")
 
         # Characterise no CIGA check needed
 
@@ -5327,9 +5324,20 @@ def fml_analysis(loader):
         ciga_check_passed = had_survey[
             had_survey["ECO Eligibility"] == "eco4 - passed ciga"
             ]
+        # These should be treated the same as one that have passed their ciga checks, from a detection perspective
+        ciga_check_passed_eligible = ciga_check_passed[
+            (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) &
+            (ciga_check_passed["current-energy-efficiency"].astype(float) <= 80)
+            ]
 
-        if not ciga_check_passed.empty:
-            raise Exception("SORT ME BRUV")
+        if not loader.data[ha_name]["ciga_list"].empty:
+
+            proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True)
+            ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0]
+
+        else:
+            ha_ciga_pass_rate = assumed_ciga_pass_rate
 
         # We take just the cavity walls
         # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
@@ -5338,53 +5346,96 @@ def fml_analysis(loader):
         # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
         # compared with ~5% for wall insulation and glazing performance
 
-        ciga_check_needed_plausible = ciga_check_needed[
+        ciga_check_needed_eligible = ciga_check_needed[
             (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
             (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
-        if not loader.data[ha_name]["ciga_list"].empty:
-            raise NotImplementedError("SORT OUT THE CIGA BRUV")
-        else:
-            ha_ciga_pass_rate = assumed_ciga_pass_rate
-
-        ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate)
-        without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0]
+        ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
+        without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
+        passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
 
         # Need to add on the non-ciga
-        total_expectation = ciga_check_expectation + without_ciga_expectation
+        total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
 
         if proportion_with_survey < 100:
             # We estimate the rest
             without_survey_needing_ciga = fuck_this[
-                (pd.isnull(fuck_this["estimated"]) == False) &
+                (fuck_this["estimated"] == True) &
                 (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
                 ]
 
-            # We apply the same conversion rate as the properties with a survey
-            without_survey_without_ciga_expected = np.round(
-                without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
-            )
+            if without_survey_needing_ciga.empty:
+                without_survey_without_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_without_ciga_expected = np.round(
+                    without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+                )
 
-            total_expectation += without_survey_without_ciga_expected
-
-            without_survey_without_ciga = fuck_this[
-                (pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"]))
+            without_survey_passed_ciga = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4 - passed ciga")
                 ]
 
-            if not without_survey_without_ciga.empty:
-                raise Exception("Estimate the rest!!")
+            if without_survey_passed_ciga.empty:
+                without_survey_passed_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_passed_ciga_expected = np.round(
+                    without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0])
+                )
+
+            # Finally, no ciga needed
+            without_survey_eco4 = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4")
+                ]
+
+            if without_survey_eco4.empty:
+                without_survey_eco4_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_eco4_expected = np.round(
+                    without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
+                )
+
+            total_expectation = (
+                total_expectation +
+                without_survey_without_ciga_expected +
+                without_survey_passed_ciga_expected +
+                without_survey_eco4_expected
+            )
+
+        surveys = loader.data[ha_name]["survey_list"]
+        sold_now = 0
+        if not surveys.empty:
+            sold_now = surveys[
+                surveys["installation_status"].str.lower().str.contains("eco4")
+            ].shape[0]
+
+        sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0]
 
         results.append(
             {
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
+                "Of which sold": sales_since_nov,
+                "Of which ECO4 Eligible - Remaining": int(total_expectation),
                 "Proportion with a survey": proportion_with_survey,
-                "total_expectation": total_expectation
             }
         )
 
+    results_df = pd.DataFrame(results)
+
+    results_df["Delta vs November"] = 100 * (
+        results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
+    ) / results_df["Original ECO4 Estimate - Remaining"]
+
+    # TODO: Split into high and low confidence?
+    #
+
 
 def app():
     """

From 3b65a71793721d65fd8356c215813a13d384bc4d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 18:25:50 +0000
Subject: [PATCH 138/248] added in extra shit to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 47 ++++++++++++++++---
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e1d7db4d..53ce69e2 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5200,6 +5200,22 @@ def fml_analysis(loader):
         "Cavity wall, as built, partial insulation (assumed)"
     ]
 
+    codes = [
+        "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
+        "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27",
+        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX"
+    ]
+
+    values = [
+        706, 2161, 1053, 793, 0, 656, 1200, 1647, 4248, 2703, 1087, 1876, 2135,
+        1078, 775, 538, 518, 401, 466, 2627, 98, 1050, 524, 191, 538, 384, 204,
+        281, 422, 74, 313, 71, 6
+    ]
+
+    # Create a dictionary mapping
+    remaining_eligible_mapping = dict(zip(codes, values))
+
     results = []
     for ha_name in has_bruh:
 
@@ -5207,6 +5223,7 @@ def fml_analysis(loader):
             loader.december_figures["HA Name"] == ha_name
             ].copy()
         original_remaining = original_figures["ECO4 remaining"].values[0]
+        postcode_list_remaining = remaining_eligible_mapping[ha_name]
 
         # Read in the epc data
         asset_list = loader.data[ha_name]["asset_list"].copy()
@@ -5271,7 +5288,7 @@ def fml_analysis(loader):
             raise Exception("SOMETHING WENT WRONG")
 
         if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
-            blah
+            raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
 
         # clean roof insulation
         fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
@@ -5313,6 +5330,13 @@ def fml_analysis(loader):
             (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
+        # For anything not needing a CIGA check, some of it will be GBIS
+        no_ciga_check_needed_eligible_gbis = no_ciga_check_needed[
+            (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) &
+            (~no_ciga_check_needed["asset_list_row_id"].isin(no_ciga_check_needed_eligible["asset_list_row_id"].values))
+            ]
+
         # Characterise no CIGA check needed
 
         # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction
@@ -5359,6 +5383,8 @@ def fml_analysis(loader):
         # Need to add on the non-ciga
         total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
 
+        total_gbis_expectation = no_ciga_check_needed_eligible_gbis.shape[0]
+
         if proportion_with_survey < 100:
             # We estimate the rest
             without_survey_needing_ciga = fuck_this[
@@ -5395,12 +5421,17 @@ def fml_analysis(loader):
 
             if without_survey_eco4.empty:
                 without_survey_eco4_expected = 0
+                without_survey_gbis_expected = 0
             else:
                 # We apply the same conversion rate as the properties with a survey
                 without_survey_eco4_expected = np.round(
                     without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
                 )
 
+                without_survey_gbis_expected = np.round(
+                    without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0])
+                )
+
             total_expectation = (
                 total_expectation +
                 without_survey_without_ciga_expected +
@@ -5408,6 +5439,8 @@ def fml_analysis(loader):
                 without_survey_eco4_expected
             )
 
+            total_gbis_expectation = total_gbis_expectation + without_survey_gbis_expected
+
         surveys = loader.data[ha_name]["survey_list"]
         sold_now = 0
         if not surveys.empty:
@@ -5421,20 +5454,22 @@ def fml_analysis(loader):
             {
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
+                "Postcode List - Remaining": postcode_list_remaining,
                 "Of which sold": sales_since_nov,
                 "Of which ECO4 Eligible - Remaining": int(total_expectation),
+                "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
                 "Proportion with a survey": proportion_with_survey,
             }
         )
 
     results_df = pd.DataFrame(results)
 
-    results_df["Delta vs November"] = 100 * (
-        results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
-    ) / results_df["Original ECO4 Estimate - Remaining"]
+    # results_df["Delta vs November"] = 100 * (
+    #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
+    # ) / results_df["Original ECO4 Estimate - Remaining"]
 
-    # TODO: Split into high and low confidence?
-    #
+    # TODO: Add in estimated GBIS (for eco jobs, of which look like gbis)
+    # TODO: Change the left hand side number for our post CIGA estimates
 
 
 def app():

From 479a2b08c33e2911a5ae98c3d315903af04e4980 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 14 Mar 2024 19:02:33 +0000
Subject: [PATCH 139/248] ffs

---
 .../ha_15_32/ha_analysis_batch_3.py           | 22 +++++++++++++++++--
 etl/epc_clean/app.py                          |  3 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 53ce69e2..9462642f 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5119,7 +5119,9 @@ def forecast_remaining_sales(loader):
 
 
 def fml_data_pull(loader):
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16",
+                # Do these
+                "HA1", "HA13", "HA50", "HA24"]
     # DO
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
@@ -5197,9 +5199,19 @@ def fml_analysis(loader):
     no_ciga_cavity_descriptions = [
         "Cavity wall, as built, insulated (assumed)",
         "Cavity wall, as built, no insulation (assumed)",
-        "Cavity wall, as built, partial insulation (assumed)"
+        "Cavity wall, as built, partial insulation (assumed)",
+        "Cavity wall, no insulation (assumed)",
+        "Cavity wall, partial insulation (assumed)",
+        "Cavity wall,",
+        "Cavity wall, insulated (assumed)",
+        "Cavity wall, no insulation (assumed)",
+        "Cavity wall, as built, insulated (assumed)",
+        "Cavity wall, partial insulation (assumed)",
     ]
 
+    # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
+    #  them!
+
     codes = [
         "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
         "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
@@ -5217,6 +5229,7 @@ def fml_analysis(loader):
     remaining_eligible_mapping = dict(zip(codes, values))
 
     results = []
+    wall_descriptions = []
     for ha_name in has_bruh:
 
         original_figures = loader.december_figures[
@@ -5236,6 +5249,7 @@ def fml_analysis(loader):
         # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
         # issue at this point
         epc_data = epc_data.drop_duplicates("uprn")
+        wall_descriptions.extend(epc_data["walls-description"].unique().tolist())
 
         # time from the inspection to now
         epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
@@ -5464,6 +5478,10 @@ def fml_analysis(loader):
 
     results_df = pd.DataFrame(results)
 
+    wall_descriptions = list(set(wall_descriptions))
+    from pprint import pprint
+    pprint(wall_descriptions)
+
     # results_df["Delta vs November"] = 100 * (
     #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
     # ) / results_df["Original ECO4 Estimate - Remaining"]
diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py
index 53c1a329..3f1a1a80 100644
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@@ -36,8 +36,11 @@ def app():
     cleaned_data = {}
     epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
 
+    WALLS = []
     for directory in tqdm(epc_directories):
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        z = data["WALLS_DESCRIPTION"].unique().tolist()
+        WALLS.extend(z)
         # Rename the columns to the same format as the api returns
         data.columns = [c.replace("_", "-").lower() for c in data.columns]
         # Take just date before the date threshold

From cc319ab91149f77dd04e691e6bc6b99bb9d39702 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 15 Mar 2024 10:09:26 +0000
Subject: [PATCH 140/248] new ha analysis wip

---
 .../ha_15_32/ha_analysis_batch_3.py            | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 9462642f..a0b7e0bb 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5210,7 +5210,7 @@ def fml_analysis(loader):
     ]
 
     # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
-    #  them!
+    #  them! Non-invasices will have checked the wall though
 
     codes = [
         "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
@@ -5352,16 +5352,11 @@ def fml_analysis(loader):
             ]
 
         # Characterise no CIGA check needed
-
-        # TODO: WHAT ABOUT PASSED CIGA - don't need to apply the further deduction
-
         ciga_check_needed = had_survey[
             had_survey["ECO Eligibility"].str.contains("subject to ciga")
         ].copy()
 
-        ciga_check_passed = had_survey[
-            had_survey["ECO Eligibility"] == "eco4 - passed ciga"
-            ]
+        ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"]
         # These should be treated the same as one that have passed their ciga checks, from a detection perspective
         ciga_check_passed_eligible = ciga_check_passed[
             (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
@@ -5469,18 +5464,15 @@ def fml_analysis(loader):
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
                 "Postcode List - Remaining": postcode_list_remaining,
-                "Of which sold": sales_since_nov,
+                # "Of which sold": sales_since_nov,
                 "Of which ECO4 Eligible - Remaining": int(total_expectation),
                 "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
-                "Proportion with a survey": proportion_with_survey,
+                # "Proportion with a survey": proportion_with_survey,
             }
         )
 
     results_df = pd.DataFrame(results)
-
-    wall_descriptions = list(set(wall_descriptions))
-    from pprint import pprint
-    pprint(wall_descriptions)
+    results_df.to_csv("analysis - revised.csv")
 
     # results_df["Delta vs November"] = 100 * (
     #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]

From 12f780a08989e896235adf96e175d39240c3adbb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 15 Mar 2024 16:54:48 +0000
Subject: [PATCH 141/248] setting up complete data pull

---
 .../ha_15_32/ha_analysis_batch_3.py           | 380 +++++++++++++++++-
 1 file changed, 369 insertions(+), 11 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index a0b7e0bb..902d48fd 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -42,6 +42,15 @@ PROPERTY_TYPE_LOOKUP = {
             'Detached Local Connect': 'Detached',
         }
     },
+    "HA2": {
+        'HOUSE': 'House',
+        'FLAT': 'Flat',
+        'SHELTERED': None,
+        'BUNGALOW': 'Bungalow',
+        'BED-SIT': None,
+        'MAISONETTE': "Maisonette",
+        'HOSTEL': None
+    },
     "HA6": {
         "property_type": {
             'HOUSE': "House",
@@ -69,6 +78,23 @@ PROPERTY_TYPE_LOOKUP = {
             "End Terraced": "End-Terrace",
         }
     },
+    "HA12": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+    },
+    "HA13": {
+        'House': "House",
+        'Flat': "Flat",
+        'House MT': "House",
+        'House SD': "House",
+        'House ET': "House",
+        'Bungalow MT': "Bungalow",
+        'Bungalow ET': "Bungalow",
+        'ii': None,
+    },
     "HA14": {
         "property_type": {
             "House": "House",
@@ -77,6 +103,13 @@ PROPERTY_TYPE_LOOKUP = {
             "Maisonette": "Maisonette",
         }
     },
+    "HA15": {
+        'House': 'House',
+        'Flat': 'Flat',
+        'Bungalow': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'Flat over garage': 'Flat',
+    },
     "HA16": {
         'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
         'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
@@ -95,6 +128,30 @@ PROPERTY_TYPE_LOOKUP = {
         'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
         'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
     },
+    "HA18": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+        "Shop": None,
+        "Hostel": None,
+        "Block": None,
+    },
+    "HA24": {
+        '01 HOUSE': 'House',
+        '02 FLAT': 'Flat',
+        '03 BUNGALOW': 'Bungalow',
+        '10 PBUNGALOW': 'Bungalow',
+        '01 HOUSE MID': 'House',
+        '13 SBUNGALOW': 'Bungalow',
+        '12 SBEDSIT': None,  # BEDSIT does not match the specified property types
+        '14 SFLAT': 'Flat',
+        '05 BEDSIT': None,
+        '04 MAISONETTE': 'Maisonette',
+        '11 PFLAT': 'Flat',
+        '09 PBEDSIT': None
+    },
     "HA25": {
         'Flat': 'Flat',
         'Mid Terrace House': 'House',
@@ -116,6 +173,77 @@ PROPERTY_TYPE_LOOKUP = {
         'Mid Terrace Housekeeping ': 'House',
         'End Terrace Housex': 'House'
     },
+    "HA28": {
+        'Flat': 'Flat',
+        'Semi detached house': 'House',
+        'Terraced house': 'House',
+        'Maisonette flat': 'Maisonette',
+        'Sheltered bedsit': None,
+        'APD flat': 'Flat',
+        'Bungalow terraced': 'Bungalow',
+        'Flat with partition': 'Flat',
+        'Bungalow semi detached': 'Bungalow',
+        'APD Bungalow': 'Bungalow',
+        'Sheltered flat': 'Flat',
+        'Bedsit Flat': 'Flat',
+        'Bedsit bungalow semi detached': 'Bungalow',
+        'Sheltered bungalow terraced': 'Bungalow',
+        'Sheltered bedsit disabled': None,
+        'Bedsit bungalow terraced': 'Bungalow',
+        'Sheltered bungalow semi detached': 'Bungalow',
+        'Sheltered warden flat': 'Flat',
+        'Bungalow detached': 'Bungalow',
+        'Block': None,  # Does not match the specified property types
+        'End Terraced House': 'House',
+        'Mid Terraced House': 'House',
+        '#N/A': None,  # Assuming this is an invalid or missing entry
+        0: None  # Assuming 0 is also an invalid or missing entry
+    },
+    "HA30": {
+        'House': 'House',
+        'Flat': 'Flat',
+        'Bungalow': 'Bungalow',
+        'House with Attached Garage': 'House',
+        'Bed Space': None,  # Assuming this does not fit the specified property types
+        'House with Garage': 'House',
+        'Bungalow with Wheelchair Access': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'Flat with Wheelchair Access': 'Flat',
+        'Bedsit': None,  # Assuming this does not fit the specified property types
+        'Flat w Wheelchair Access & Car Park': 'Flat',
+        'House with Wheelchair Access': 'House',
+        'Bungalow w Wheelchair Access & Car ': 'Bungalow'
+    },
+    "HA32": {
+        'Bungalow': 'Bungalow',
+        'Flat': 'Flat',
+        'Bungalow Disabled': 'Bungalow',  # "Disabled" properties categorized with their base type
+        'House': 'House',
+        'Dormer Bungalow': 'Bungalow',
+        'Pop-In': None,  # Does not fit the specified property types
+        'Flat Disabled': 'Flat',
+        'Laundry': None,  # Does not fit the specified property types
+        'Bedsit': None,  # Excluded from the given categories
+        'Shed': None,  # Does not fit the specified property types
+        'Store Room': None  # Does not fit the specified property types
+    },
+    "HA34": {
+        'Flat': 'Flat',
+        'House': 'House',
+        'Bungalow': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'ND': None,
+    },
+    "HA35": {
+        "Flat": "Flat",
+        "Maisonette": "Maisonette",
+        "House": "House",
+        "Bedsit": None,
+        "2 Bedroom Unknown": None,
+        "1 Bedroom Unknown": None,
+        "3 Bedroom Unknown": None,
+        "4 Bedroom Unknown": None,
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -140,6 +268,105 @@ PROPERTY_TYPE_LOOKUP = {
         "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
         "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
     },
+    "HA41": {
+        'Garage': None,
+        'House 1919-1945': 'House',
+        'House 1946-1964': 'House',
+        'Flats & Maisonettes post 1974': 'Flat',
+        'Non traditional houses': 'House',
+        'Sheltered': None,
+        'Flats & Maisonettes 1965-1974': 'Flat',
+        'House post 1974': 'House',
+        'Block': None,
+        'Flats & Maisonettes 1946-1964': 'Flat',
+        'House 1965-1974': 'House',
+        'Non traditional flats': 'Flat',
+        'Bungalow 1965-1974': 'Bungalow',
+        'PIMSS EMPTY': None,
+        'Bungalow post 1974': 'Bungalow',
+        'Bungalow 1946-1964': 'Bungalow',
+        'Flats & Maisonettes 1919-1945': 'Flat',
+        'House pre 1919': 'House',
+        'Flats & Maisonettes pre 1919': 'Flat',
+        'Bungalow 1919-1945': 'Bungalow',
+        'Office': None
+    },
+    "HA48": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Unit": None
+    },
+    "HA50": {
+        'House': 'House',
+        'Bungalow': 'Bungalow',
+        'Flat': 'Flat',
+        'House SD': 'House',
+        'House MT': 'House',
+        'House ET': 'House',
+        'Bungalow ET': 'Bungalow',
+        'House SD ': 'House',
+        'House. SD': 'House',
+        'Bungalow SD': 'Bungalow',
+        'Bungalow MT': 'Bungalow',
+        'Bungalow D': 'Bungalow',
+        'House D': 'House',
+        'House. MT': 'House',
+        'House ': 'House',
+        'House ET ': 'House',
+        ' ': None,
+        'Flat?': 'Flat',
+        'Bungalow ': 'Bungalow'
+    },
+    "HA56": {
+        'House Non Specific': 'House',
+        'HOUSE TERRACED': 'House',
+        'HOUSE - SEMI DETACHD': 'House',
+        'Bungalow': 'Bungalow',
+        'House - End Terraced': 'House',
+        'Block': None,
+        'Block with Communal': None,
+        'Bungalow - Terraced': 'Bungalow',
+        'Bungalow - Semi Dtch': 'Bungalow',
+        'Block House with rooms': None,
+        'Bungalow - End Terr': 'Bungalow',
+        'House - Mid Terraced': 'House',
+        'Bungalow - Detached': 'Bungalow',
+        'House - Detached': 'House',
+        'HOUSE THREE STOREY': 'House',
+        'Maisonette': 'Maisonette',
+        'Communal Block': None,
+        'Scheme': None
+    },
+    "HA63": {
+        'Flat': 'Flat',
+        'House - Semi detached': 'House',
+        'House - Detached': 'House',
+        'House - End Terrace': 'House',
+        'House - Mid Terrace': 'House',
+        'Bungalow - Semi detached': 'Bungalow',
+        'Bungalow': 'Bungalow',
+        'Bedsit': None,  # Considering as a non-specific residential category here
+        'Maisonette': 'Maisonette',
+        'Bungalow - End Terrace': 'Bungalow',
+        'Bungalow - Detached': 'Bungalow',
+        'Maisonette - Mid Terrace': 'Maisonette',
+        'Maisonette - End Terrace': 'Maisonette',
+        'Studio Flat': 'Flat',
+        'Maisonette - Detached': 'Maisonette',
+        'Bungalow - Mid Terrace': 'Bungalow',
+        'Bedsit - Mid Terrace': None,
+        'Bedsit - End Terrace': None,
+        'Amenity Block - Semi detached': None,  # Assuming non-residential
+        'Maisonette - Semi Detached': 'Maisonette',
+        'Amenity Block - Detached': None,  # Assuming non-residential
+        'Hostel': None,  # Typically not considered a standard residential property for this context
+        'Bungalow - Attached': 'Bungalow',
+        'Unknown': None,  # Not enough information to categorize
+        'Studio Flat - Mid Terrace': 'Flat',
+        'Chalet - Wheelchair': None  # Specialized type, not categorized here
+    },
     "HA107": {
         "property_type": {
             "HOUSE": "House",
@@ -160,6 +387,27 @@ PROPERTY_TYPE_LOOKUP = {
             "Detached": "Detached",
             "Detatched": "Detached",
         }
+    },
+    "HA117": {
+        "Flat": "Flat",
+        "House": "House",
+        "Bungalow": "Bungalow",
+        "Flat over garage/underpass": "Flat",
+    },
+    "HAXXX": {
+        'mid terraced house': 'House',
+        'semi detached house': 'House',
+        '1st fl 4 in a block': 'Flat',
+        'G/F 4 in a block': 'Flat',
+        'end terraced house': 'House',
+        '1st floor flat': 'Flat',
+        'G/F floor flat': 'Flat',
+        'semi detached bungalow': 'Bungalow',
+        '2nd floor flat': 'Flat',
+        'mid terrace bungalow': 'Bungalow',
+        'detached bungalow': 'Bungalow',
+        'end terrace bungalow': 'Bungalow',
+        'Staff accommodation': None  # Marked as None due to its special nature
     }
 }
 
@@ -2882,12 +3130,36 @@ def get_property_type_and_built_form(property_meta, ha_name):
             property_type = "Flat"
 
         built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None)
+    elif ha_name == "HA2":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip())
+        built_form = None
     elif ha_name == "HA6":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
     elif ha_name == "HA7":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
         built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
+    elif ha_name == "HA9":
+        property_description = property_meta["Asset Type"].strip().lower()
+        if "house" in property_description:
+            return "House", None
+
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
+    elif ha_name == "HA12":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip())
+        built_form = None
+    elif ha_name == "HA13":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Type Cd"].strip())
+        built_form = None
     elif ha_name == "HA14":
         if property_meta["Asset Type Description"] == "Block - Repair":
             # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
@@ -2902,15 +3174,60 @@ def get_property_type_and_built_form(property_meta, ha_name):
             ]
 
         built_form = None
-    elif ha_name == "HA25":
-        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
+    elif ha_name == "HA15":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
     elif ha_name == "HA16":
         config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
         property_type = config.get("property-type")
         built_form = config.get("built-form")
-    elif ha_name == "HA39":
+    elif ha_name == "HA18":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA19":
+        property_type = property_meta["Dwelling Type"]
+        built_form = None
+    elif ha_name == "HA24":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA25":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
+        built_form = None
+    elif ha_name == "HA27":
+        property_type = property_meta["Property Type"]
+        built_form = None
+    elif ha_name == "HA28":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Property Type - Academy"]]
+        built_form = None
+    elif ha_name == "HA30":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["A_AssetType"]]
+        built_form = None
+    elif ha_name == "HA31":
+        property_description = property_meta["A_AssetType"].strip().lower()
+        if "house" in property_description:
+            return "House", None
 
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
+
+    elif ha_name == "HA32":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling type"].strip())
+        built_form = None
+    elif ha_name == "HA34":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA35":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip())
+        built_form = None
+    elif ha_name == "HA39":
         property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
         property_type = property_type_config.get("property_type", None)
         built_form = property_type_config.get("built_form", None)
@@ -2921,11 +3238,35 @@ def get_property_type_and_built_form(property_meta, ha_name):
                 property_type = "Flat"
             else:
                 property_type = "House"
+    elif ha_name == "HA41":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip())
+        built_form = None
+    elif ha_name == "HA48":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA49":
+        property_type = property_meta["Property Class"].strip()
+        built_form = None
+    elif ha_name == "HA54":
+        property_type = property_meta["Property Type"]
+        built_form = None
+    elif ha_name == "HA56":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type Description"].strip())
+        built_form = None
+    elif ha_name == "HA63":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PropertyType"].strip())
+        built_form = None
     elif ha_name == "HA107":
-
         property_type = property_meta.get("property_type", None)
         built_form = property_meta.get("built_form", None)
-
+    elif ha_name == "HA117":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HAXX":
+        return property_meta["Property Type"].split(":")[0].strip(), None
+    elif ha_name == "HAXXX":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Unit Description"].strip())
+        built_form = None
     else:
         raise NotImplementedError("Implement me")
 
@@ -5119,9 +5460,16 @@ def forecast_remaining_sales(loader):
 
 
 def fml_data_pull(loader):
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16",
-                # Do these
-                "HA1", "HA13", "HA50", "HA24"]
+    has_bruh = [
+        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28",
+        # Updated get_property_type_and_built_form, still needs running
+        "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+        # todo
+    ]
+
+    # Can't pull from EPC database because it's based in Scotland
+    # "HAXXX", "HAXX"
     # DO
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
@@ -5134,14 +5482,24 @@ def fml_data_pull(loader):
         # For each property, search for the latest EPC
         epc_data = []
         for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+
             property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+
+            if ha == "HAXXX":
+                to_join = [str(x) for x in
+                           [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
+                            row["Postcode"]] if x is not None]
+                full_address = ", ".join(to_join)
+            else:
+                full_address = row["matching_address"]
+
             searcher = SearchEpc(
-                address1=row["HouseNo"],
+                address1=str(row["HouseNo"]),
                 postcode=row["matching_postcode"],
                 auth_token=epc_api_key,
                 os_api_key="",
                 property_type=property_type,
-                full_address=row["matching_address"],
+                full_address=full_address,
             )
             # Force the skipping of estimating the EPC
             searcher.ordnance_survey_client.property_type = None
@@ -5194,7 +5552,7 @@ def classify_loft(x):
 
 def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
+    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", "HA1"]
 
     no_ciga_cavity_descriptions = [
         "Cavity wall, as built, insulated (assumed)",

From 6423ab2fac732a905645260263ebc72149424712 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 15 Mar 2024 17:53:18 +0000
Subject: [PATCH 142/248] data pull pipeline ready

---
 backend/SearchEpc.py                          |  11 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 100 ++++++++++--------
 2 files changed, 61 insertions(+), 50 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 3d2df9fb..cc2ee4a9 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -147,6 +147,7 @@ class SearchEpc:
         uprn: [int, None] = None,
         size=None,
         property_type=None,
+        fast=False
     ):
         """
         Address lines 1 and postcode are mandatory fields. The other address lines are optional
@@ -187,6 +188,7 @@ class SearchEpc:
         self.size = size if size is not None else 25
 
         self.property_type = property_type
+        self.fast = fast
 
     @classmethod
     def get_house_number(cls, address: str) -> str | None:
@@ -365,9 +367,6 @@ class SearchEpc:
         # Finally, we identify the newest epc and the rest, and then return
         newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
 
-        # Retrieve postcode and address
-        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
-
         # Ge the uprn from the newest record for this home
         uprns = {r["uprn"] for r in rows if r["uprn"]}
         # We can sometimes have no uprn for a property
@@ -384,6 +383,12 @@ class SearchEpc:
 
         uprn = uprns.pop() if uprns else None
 
+        if self.fast:
+            return newest_epc, [], {}, "", "", None
+
+        # Retrieve postcode and address
+        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
+
         return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
 
     @staticmethod
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 902d48fd..7db97733 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5461,9 +5461,9 @@ def forecast_remaining_sales(loader):
 
 def fml_data_pull(loader):
     has_bruh = [
-        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28",
+        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         # Updated get_property_type_and_built_form, still needs running
-        "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         # todo
     ]
@@ -5474,57 +5474,63 @@ def fml_data_pull(loader):
     from backend.SearchEpc import SearchEpc
     epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
 
+    failed_has = []
     for ha in has_bruh:
-        asset_list = loader.data[ha]["asset_list"].copy()
-        # properties found as eligibile
-        fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+        print(f"Pulling data for {ha}")
+        try:
+            asset_list = loader.data[ha]["asset_list"].copy()
+            # properties found as eligibile
+            fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
 
-        # For each property, search for the latest EPC
-        epc_data = []
-        for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+            # For each property, search for the latest EPC
+            epc_data = []
+            for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
 
-            property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+                property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
 
-            if ha == "HAXXX":
-                to_join = [str(x) for x in
-                           [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
-                            row["Postcode"]] if x is not None]
-                full_address = ", ".join(to_join)
-            else:
-                full_address = row["matching_address"]
+                if ha == "HAXXX":
+                    to_join = [str(x) for x in
+                               [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
+                                row["Postcode"]] if x is not None]
+                    full_address = ", ".join(to_join)
+                else:
+                    full_address = row["matching_address"]
 
-            searcher = SearchEpc(
-                address1=str(row["HouseNo"]),
-                postcode=row["matching_postcode"],
-                auth_token=epc_api_key,
-                os_api_key="",
-                property_type=property_type,
-                full_address=full_address,
+                searcher = SearchEpc(
+                    address1=str(row["HouseNo"]),
+                    postcode=row["matching_postcode"],
+                    auth_token=epc_api_key,
+                    os_api_key="",
+                    property_type=property_type,
+                    full_address=full_address,
+                    fast=True
+                )
+                # Force the skipping of estimating the EPC
+                searcher.ordnance_survey_client.property_type = None
+                searcher.ordnance_survey_client.built_form = None
+
+                searcher.find_property(skip_os=True)
+                if searcher.newest_epc is None:
+                    continue
+
+                epc = {
+                    "asset_list_row_id": row["asset_list_row_id"],
+                    **searcher.newest_epc.copy()
+                }
+
+                epc_data.append(epc)
+
+            # Remove None entries
+            epc_data = [x for x in epc_data if x is not None]
+            # Save the data in S3 as a parquet
+            epc_data_df = pd.DataFrame(epc_data)
+            save_pickle_to_s3(
+                data=epc_data_df,
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
             )
-            # Force the skipping of estimating the EPC
-            searcher.ordnance_survey_client.property_type = None
-            searcher.ordnance_survey_client.built_form = None
-
-            searcher.find_property(skip_os=True)
-            if searcher.newest_epc is None:
-                continue
-
-            epc = {
-                "asset_list_row_id": row["asset_list_row_id"],
-                **searcher.newest_epc.copy()
-            }
-
-            epc_data.append(epc)
-
-        # Remove None entries
-        epc_data = [x for x in epc_data if x is not None]
-        # Save the data in S3 as a parquet
-        epc_data_df = pd.DataFrame(epc_data)
-        save_pickle_to_s3(
-            data=epc_data_df,
-            bucket_name="retrofit-datalake-dev",
-            s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
-        )
+        except Exception as e:
+            failed_has.append(ha)
 
 
 def extract_lower_bound(age_band):

From 4e077053cd73b4e6cd27392440e4e179846f6f9a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 16 Mar 2024 14:51:39 +0000
Subject: [PATCH 143/248] Adding gbis to output

---
 .../ha_15_32/ha_analysis_batch_3.py           | 92 +++++++++++++++----
 1 file changed, 74 insertions(+), 18 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 7db97733..0ca28927 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3247,6 +3247,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA49":
         property_type = property_meta["Property Class"].strip()
         built_form = None
+    elif ha_name == "HA50":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA54":
         property_type = property_meta["Property Type"]
         built_form = None
@@ -5685,12 +5688,6 @@ def fml_analysis(loader):
 
         fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
 
-        # fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply(
-        #     lambda x: EPCDataProcessor.clean_construction_age_band(x)
-        # )
-        #
-        # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
-
         had_survey = fuck_this[fuck_this["estimated"] == False]
 
         # proportion with a survey:
@@ -5716,10 +5713,6 @@ def fml_analysis(loader):
             ]
 
         # Characterise no CIGA check needed
-        ciga_check_needed = had_survey[
-            had_survey["ECO Eligibility"].str.contains("subject to ciga")
-        ].copy()
-
         ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"]
         # These should be treated the same as one that have passed their ciga checks, from a detection perspective
         ciga_check_passed_eligible = ciga_check_passed[
@@ -5743,20 +5736,60 @@ def fml_analysis(loader):
         # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
         # compared with ~5% for wall insulation and glazing performance
 
+        ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"].str.contains("subject to ciga")
+        ].copy()
+
         ciga_check_needed_eligible = ciga_check_needed[
             (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
             (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
             (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
             ]
 
+        # Finally, characterise gbis properties. Some of the business might look like ECO4 work, whereas we then
+        # qualify what actually looks like gbis
+        gbis_identified = had_survey[
+            had_survey["ECO Eligibility"] == "gbis"
+            ].copy()
+
+        gbis_looks_like_eco4 = gbis_identified[
+            (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (gbis_identified["roof_classiciation"].isin(["high", "medium"])) &
+            (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+            (
+                (
+                    (gbis_identified["property-type"] == "House") &
+                    (gbis_identified["built-form"] != "Mid-Terrace")
+                ) | (
+                    (gbis_identified["property-type"] == "Bungalow") &
+                    (gbis_identified["built-form"].isin(["Detached"]))
+                )
+            )
+            ]
+
+        gbis_qualified = gbis_identified[
+            (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+            (~gbis_identified["asset_list_row_id"].isin(gbis_looks_like_eco4["asset_list_row_id"].values))
+            ]
+
         ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
         without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
         passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
+        identified_as_gbis_looks_like_eco4 = gbis_looks_like_eco4.shape[0]
 
         # Need to add on the non-ciga
-        total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
+        total_eco4_expectation = (
+            ciga_check_expectation +
+            without_ciga_expectation +
+            passed_ciga_expectation +
+            identified_as_gbis_looks_like_eco4
+        )
 
-        total_gbis_expectation = no_ciga_check_needed_eligible_gbis.shape[0]
+        no_ciga_check_needed_actually_gbis = no_ciga_check_needed_eligible_gbis.shape[0]
+        gbis_qualified = gbis_qualified.shape[0]
+
+        total_gbis_expectation = no_ciga_check_needed_actually_gbis + gbis_qualified
 
         if proportion_with_survey < 100:
             # We estimate the rest
@@ -5805,14 +5838,38 @@ def fml_analysis(loader):
                     without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0])
                 )
 
-            total_expectation = (
-                total_expectation +
+            # And gbis
+            without_survey_gbis = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "gbis")
+                ]
+
+            if without_survey_gbis.empty:
+                without_survey_identified_as_gbis_qualified = 0
+                without_survey_identified_as_gbis_eco4 = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_identified_as_gbis_qualified = np.round(
+                    without_survey_gbis.shape[0] * (gbis_qualified / gbis_identified.shape[0])
+                )
+
+                without_survey_identified_as_gbis_eco4 = np.round(
+                    without_survey_eco4.shape[0] * (identified_as_gbis_looks_like_eco4 / gbis_identified.shape[0])
+                )
+
+            total_eco4_expectation = (
+                total_eco4_expectation +
                 without_survey_without_ciga_expected +
                 without_survey_passed_ciga_expected +
-                without_survey_eco4_expected
+                without_survey_eco4_expected +
+                without_survey_identified_as_gbis_eco4
             )
 
-            total_gbis_expectation = total_gbis_expectation + without_survey_gbis_expected
+            total_gbis_expectation = (
+                total_gbis_expectation +
+                without_survey_gbis_expected +
+                without_survey_identified_as_gbis_qualified
+            )
 
         surveys = loader.data[ha_name]["survey_list"]
         sold_now = 0
@@ -5829,9 +5886,8 @@ def fml_analysis(loader):
                 "Original ECO4 Estimate - Remaining": original_remaining,
                 "Postcode List - Remaining": postcode_list_remaining,
                 # "Of which sold": sales_since_nov,
-                "Of which ECO4 Eligible - Remaining": int(total_expectation),
+                "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation),
                 "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
-                # "Proportion with a survey": proportion_with_survey,
             }
         )
 

From a7ed3b84e560ea3e92517f8568bc7918e352d0e7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 17 Mar 2024 14:12:49 +0000
Subject: [PATCH 144/248] Added HA8

---
 .../ha_15_32/ha_analysis_batch_3.py           | 98 ++++++++++++++++++-
 1 file changed, 93 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 0ca28927..67139e40 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -517,6 +517,11 @@ class DataLoader:
                                              asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA8":
+            asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA9":
             asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
@@ -2293,6 +2298,30 @@ class DataLoader:
     def correct_ha49_survey_list(survey_list):
         return survey_list
 
+    @staticmethod
+    def correct_ha8_survey_list(survey_list):
+        # Split on / and take the first half
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WESTONIA COURT HOUSE", "Westonia Court"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Hillesdon Avenue", "Hillesden Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Weston Street", "Western Street"
+        )
+
+        # Remove placeholder rows where postcode is missing
+        survey_list = survey_list[
+            ~pd.isnull(survey_list["Post Code"])
+        ]
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -5464,7 +5493,7 @@ def forecast_remaining_sales(loader):
 
 def fml_data_pull(loader):
     has_bruh = [
-        # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+        "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         # Updated get_property_type_and_built_form, still needs running
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
@@ -5561,7 +5590,13 @@ def classify_loft(x):
 
 def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
-    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", "HA1"]
+    has_bruh = [
+        "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+        # Updated get_property_type_and_built_form, still needs running
+        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+        # todo
+    ]
 
     no_ciga_cavity_descriptions = [
         "Cavity wall, as built, insulated (assumed)",
@@ -5597,12 +5632,13 @@ def fml_analysis(loader):
 
     results = []
     wall_descriptions = []
-    for ha_name in has_bruh:
+    for ha_name in tqdm(has_bruh):
 
         original_figures = loader.december_figures[
             loader.december_figures["HA Name"] == ha_name
             ].copy()
         original_remaining = original_figures["ECO4 remaining"].values[0]
+        original_gbis_remaining = original_figures["GBIS remaining"].values[0]
         postcode_list_remaining = remaining_eligible_mapping[ha_name]
 
         # Read in the epc data
@@ -5669,7 +5705,54 @@ def fml_analysis(loader):
             raise Exception("SOMETHING WENT WRONG")
 
         if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
-            raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
+            # We perform the archetype test. If the property is a house, we it needs to be detached, semi-detached
+            # or end terrace. If it's a bungalow, it must be attached
+            fuck_this["passes_archetype"] = None
+            fuck_this["passes_archetype"] = np.where(
+                (fuck_this["property-type"] == "House") &
+                (fuck_this["built-form"].isin(["Semi-Detached", "End-Terrace", "Detached"])),
+                True,
+                fuck_this["passes_archetype"]
+            )
+
+            fuck_this["passes_archetype"] = np.where(
+                (fuck_this["property-type"] == "Bungalow") &
+                (fuck_this["built-form"].isin(["Detached"])),
+                True,
+                fuck_this["passes_archetype"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+                (fuck_this["passes_archetype"] == True),
+                "eco4 (subject to ciga)",
+                fuck_this["ECO Eligibility"]
+            )
+
+            # If failed the archetype check and needs a CIGA, it's not eligibile
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+                (fuck_this["passes_archetype"] != True),
+                "not eligible",
+                fuck_this["ECO Eligibility"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+                (fuck_this["passes_archetype"] == True),
+                "eco4",
+                fuck_this["ECO Eligibility"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+                (fuck_this["passes_archetype"] != True),
+                "gbis",
+                fuck_this["ECO Eligibility"]
+            )
+
+            if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+                raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
 
         # clean roof insulation
         fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
@@ -5685,6 +5768,9 @@ def fml_analysis(loader):
         fuck_this["roof_insulation_thickness"] = fuck_this[
             "roof_insulation_thickness"
         ].str.replace("average", "150")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("above 150", "150")
 
         fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
 
@@ -5884,6 +5970,7 @@ def fml_analysis(loader):
             {
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
+                "Original GGBIS Estimate - Remaining": original_gbis_remaining,
                 "Postcode List - Remaining": postcode_list_remaining,
                 # "Of which sold": sales_since_nov,
                 "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation),
@@ -5927,7 +6014,8 @@ def app():
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
-
+        # Added as of March 17th
+        "HA8",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 94ad06726320972b02db779b8f2e9440a0ea9c0e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 17 Mar 2024 14:25:49 +0000
Subject: [PATCH 145/248] done ha11

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 67139e40..920ec1b6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -530,6 +530,12 @@ class DataLoader:
                                              asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA11":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA13":
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -2322,6 +2328,15 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha11_survey_list(survey_list):
+        # Remove 39 HOLLYWOOD WAY as it's not in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "HOLLYWOOD WAY") &
+              (survey_list["NO."] == 39))
+        ]
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -6015,7 +6030,7 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
         # Added as of March 17th
-        "HA8",
+        "HA8", "HA11",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 9bbcbc881f3f1c50ab8ec422c5b38f04e864e676 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 17 Mar 2024 14:42:24 +0000
Subject: [PATCH 146/248] Added ha21

---
 etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 920ec1b6..e9de4695 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -573,6 +573,12 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA21":
+            asset_list["matching_address"] = (
+                asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["PostCode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA25":
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
@@ -6030,7 +6036,7 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
         # Added as of March 17th
-        "HA8", "HA11",
+        "HA8", "HA11", "HA21",
         # New HAS
         "HAXX", "HAXXX",
     ]
@@ -6038,7 +6044,7 @@ def app():
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
     #
-    # Consider for ECO4:
+    # Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess
     # Consider for GBIS:
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From 897d58eec2ecc1e51d4a46878918f6c019a2705c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 10:40:12 +0000
Subject: [PATCH 147/248] Added ha44

---
 .../ha_15_32/ha_analysis_batch_3.py           | 189 +++++++++++++++++-
 1 file changed, 178 insertions(+), 11 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e9de4695..dc96d403 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -78,6 +78,29 @@ PROPERTY_TYPE_LOOKUP = {
             "End Terraced": "End-Terrace",
         }
     },
+    "HA8": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+        "Room": None,
+        "Other": None,
+        "Commerical": None
+    },
+    "HA11": {
+        "Flat": "Flat",
+        "House": "House",
+        "Semi-Det House": "House",
+        "Bedsit": None,
+        "End-Terr House": "House",
+        "Mid-Terr House": "House",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "End Terr Flat": "Flat",
+        "Mid Terr Flat": "Flat",
+        "Detached Flat": "Flat",
+    },
     "HA12": {
         "House": "House",
         "Flat": "Flat",
@@ -244,6 +267,13 @@ PROPERTY_TYPE_LOOKUP = {
         "3 Bedroom Unknown": None,
         "4 Bedroom Unknown": None,
     },
+    "HA37": {
+        "FLT": "Flat",
+        "HSE": "House",
+        "BNW": "Bungalow",
+        "MAS": "Maisonette",
+        "HSL": None
+    },
     "HA39": {
         "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
         "1st floor flat": {"property_type": "Flat", "built_form": None},
@@ -291,6 +321,21 @@ PROPERTY_TYPE_LOOKUP = {
         'Bungalow 1919-1945': 'Bungalow',
         'Office': None
     },
+    "HA42": {
+        'Flat': 'Flat',
+        'House': 'House',
+        'Flat Basement': 'Flat',
+        'Room': None,
+        'Bedsit Flat': 'Flat',
+        'Maisonette': 'Maisonette',
+        'Scheme Office': None,
+        'Scheme Lounge': None,
+        'Bungalow': 'Bungalow',
+        'Garage': None,
+        'Scheme Sleep Room': None,
+        'Cluster': None,
+        'Scheme Room': None
+    },
     "HA48": {
         "House": "House",
         "Flat": "Flat",
@@ -626,6 +671,12 @@ class DataLoader:
                                              asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA37":
+            asset_list["matching_address"] = asset_list["ADDRESS LINE 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["ADDRESS LINE 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["ADDRESS LINE 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA38":
             asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -650,6 +701,18 @@ class DataLoader:
                                              asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Postcode"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA42":
+            asset_list["matching_address"] = asset_list["Dwelling Number"].astype(str).str.lower().str.strip() + " " + \
+                                             asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Locality"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA44":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postal Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postal Code"].astype(str).str.lower().str.strip()
         elif ha_name == "HA50":
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
@@ -1177,6 +1240,66 @@ class DataLoader:
             asset_list["matching_address"]
         )
 
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT C",
+                ]
+            )),
+            "10C",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT C",
+                ]
+            )),
+            "FLAT c, spennymoor, co. durham, dl16 7df, 10c, 10 south view",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT D",
+                ]
+            )),
+            "10D",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT D",
+                ]
+            )),
+            "FLAT d, spennymoor, co. durham, dl16 7df, 10d, 10 south view",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT E",
+                ]
+            )),
+            "10E",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT E",
+                ]
+            )),
+            'FLAT e, spennymoor, co. durham, dl16 7df, 10e, 10 south view',
+            asset_list["matching_address"]
+        )
+
         return asset_list
 
     @staticmethod
@@ -1730,6 +1853,13 @@ class DataLoader:
             survey_list["Street / Block Name"]
         )
 
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "BEECH ROAD") &
+            (survey_list["Post Code"] == "DH6 1JD"),
+            "DH6 1JB",
+            survey_list["Post Code"]
+        )
+
         return survey_list
 
     @staticmethod
@@ -2343,6 +2473,18 @@ class DataLoader:
         ]
         return survey_list
 
+    @staticmethod
+    def correct_ha42_survey_list(survey_list):
+        # original asset list has nothing in the street
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Turnstone Terrace", ""
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Pegasus place", ""
+        )
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -2926,7 +3068,7 @@ class DataLoader:
             "eco4 subject to ciga": "eco4 (subject to ciga)",
             "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
             "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
-            "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)"
+            "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)",
         }
 
         ha_facts_and_figures = []
@@ -3189,6 +3331,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA7":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
         built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
+    elif ha_name == "HA8":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA9":
         property_description = property_meta["Asset Type"].strip().lower()
         if "house" in property_description:
@@ -3204,6 +3349,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
             return "Maisonette", None
 
         return None, None
+    elif ha_name == "HA11":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA12":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip())
         built_form = None
@@ -3237,6 +3385,21 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA19":
         property_type = property_meta["Dwelling Type"]
         built_form = None
+    elif ha_name == "HA21":
+        property_description = property_meta["Property Type"].strip().lower()
+        if "house" in property_description:
+            return "House", None
+
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
     elif ha_name == "HA24":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
@@ -3277,6 +3440,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA35":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip())
         built_form = None
+    elif ha_name == "HA37":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PROPERTY TYPE"].strip())
+        built_form = None
     elif ha_name == "HA39":
         property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
         property_type = property_type_config.get("property_type", None)
@@ -3291,6 +3457,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA41":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip())
         built_form = None
+    elif ha_name == "HA42":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip())
+        built_form = None
     elif ha_name == "HA48":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
@@ -5515,10 +5684,9 @@ def forecast_remaining_sales(loader):
 def fml_data_pull(loader):
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        # Updated get_property_type_and_built_form, still needs running
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        # todo
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
     ]
 
     # Can't pull from EPC database because it's based in Scotland
@@ -5613,10 +5781,9 @@ def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        # Updated get_property_type_and_built_form, still needs running
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        # todo
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
     ]
 
     no_ciga_cavity_descriptions = [
@@ -5639,7 +5806,7 @@ def fml_analysis(loader):
         "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
         "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27",
-        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX"
+        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX",
     ]
 
     values = [
@@ -5660,7 +5827,6 @@ def fml_analysis(loader):
             ].copy()
         original_remaining = original_figures["ECO4 remaining"].values[0]
         original_gbis_remaining = original_figures["GBIS remaining"].values[0]
-        postcode_list_remaining = remaining_eligible_mapping[ha_name]
 
         # Read in the epc data
         asset_list = loader.data[ha_name]["asset_list"].copy()
@@ -5992,10 +6158,10 @@ def fml_analysis(loader):
                 "HA Name": ha_name,
                 "Original ECO4 Estimate - Remaining": original_remaining,
                 "Original GGBIS Estimate - Remaining": original_gbis_remaining,
-                "Postcode List - Remaining": postcode_list_remaining,
+                # "Postcode List - Remaining": postcode_list_remaining,
                 # "Of which sold": sales_since_nov,
-                "Of which ECO4 Eligible - Remaining": int(total_eco4_expectation),
-                "Of which GBIS Eligibile - Remaining": int(total_gbis_expectation),
+                "EPC verified ECO4 Eligible - Remaining": int(total_eco4_expectation),
+                "EPC verified GBIS Eligibile - Remaining": int(total_gbis_expectation),
             }
         )
 
@@ -6036,7 +6202,8 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117",
         # Added as of March 17th
-        "HA8", "HA11", "HA21",
+        "HA8", "HA11", "HA21", "HA37", "HA42",
+        "HA44",
         # New HAS
         "HAXX", "HAXXX",
     ]

From c58acadb730b6e6ab1ebb700b4669ab3cf171f5b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 12:19:15 +0000
Subject: [PATCH 148/248] HA51 eco3 matching

---
 .../ha_15_32/ha_analysis_batch_3.py           | 80 ++++++++++++++++---
 1 file changed, 71 insertions(+), 9 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index dc96d403..af9af514 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -491,6 +491,10 @@ class DataLoader:
             "address": "A_Address",
             "postcode": "matching_postcode"
         },
+        "HA45": {
+            "address": "Full postal address",
+            "postcode": "Postcode"
+        },
         "HA48": {
             "address": "Full Address",
             "postcode": "Postcode"
@@ -518,7 +522,8 @@ class DataLoader:
         "HA50": 4,
         "HA63": 15,
         "HA107": 51,
-        "HA48": 0
+        "HA48": 0,
+        "HA45": 0
     }
 
     UNMATCHED_ECO3 = {
@@ -527,7 +532,8 @@ class DataLoader:
         "HA50": 5,
         "HA56": 320,
         "HA63": 0,
-        "HA117": 4
+        "HA117": 4,
+        "HA51": 24
     }
 
     def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@@ -542,7 +548,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -717,6 +723,18 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Post Code"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA51":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_address"] = np.where(
+                asset_list["Block"].str.strip().str.len() > 0,
+                asset_list["Block"].astype(str).str.lower().str.strip() + ", " + \
+                asset_list["matching_address"],
+                asset_list["matching_address"]
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA56":
             asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
@@ -2485,6 +2503,13 @@ class DataLoader:
         )
         return survey_list
 
+    @staticmethod
+    def correct_ha45_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Norwich Road", "Norwich Avenue"
+        )
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -2744,6 +2769,38 @@ class DataLoader:
 
         return eco3_list
 
+    @staticmethod
+    def correct_ha51_eco3_list(eco3_list):
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "HASELEMERE AVENUE", "HASLEMERE AVENUE"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "THORVILLE GROVE", "THORNVILLE GROVE"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "MONTBRETA CLOSE", "MONTBRETIA CLOSE"
+        )
+        eco3_list["Post Code"] = np.where(
+            (eco3_list["Street / Block Name"] == "SYDENHAM ROAD") &
+            (eco3_list["Post Code"] == "CR0 2DW"),
+            "CR0 2ED",
+            eco3_list["Post Code"]
+        )
+        # Not in asset list
+        eco3_list = eco3_list[
+            ~((eco3_list["Street / Block Name"] == "WOODLEY LANE") &
+              (eco3_list["Post Code"] == "SM5 2RJ") &
+              (eco3_list["NO "] == "FLAT 3, 11"))
+        ]
+
+        eco3_list["NO "] = np.where(
+            (eco3_list["NO "] == "47 B"),
+            "47B",
+            eco3_list["NO "]
+        )
+
+        return eco3_list
+
     def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
 
         eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@@ -2752,7 +2809,7 @@ class DataLoader:
         asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
         eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
 
-        if ha_name in ["HA25", "HA56"]:
+        if ha_name in ["HA25", "HA56", "HA51"]:
             # HA25: 317 -> 259
             missed_postcodes = {
                 postcode for postcode in eco3_list["postcode_no_space"] if
@@ -2774,7 +2831,7 @@ class DataLoader:
         matching_lookup = []
         missed = []
         for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
-            # if row["eco3_list_row_id"] == "HA25_Eco3_5422":
+            # if row["eco3_list_row_id"] == "HA51_Eco3_22":
             #     raise Exception()
             postcode = row["postcode_no_space"]
 
@@ -2813,6 +2870,12 @@ class DataLoader:
                 missed.append(row["eco3_list_row_id"])
                 continue
 
+            if df.shape[0] > 1:
+                if "flat" in str(row["NO "]).lower():
+                    df = df[df["matching_address"].str.contains("flat")]
+                else:
+                    df = df[~df["matching_address"].str.contains("flat")]
+
             if df.shape[0] != 1:
                 print(row["Street / Block Name"])
                 print(house_number)
@@ -6200,10 +6263,9 @@ def app():
     priority_has = [
         "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
-        "HA63", "HA107", "HA117",
-        # Added as of March 17th
-        "HA8", "HA11", "HA21", "HA37", "HA42",
-        "HA44",
+        "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
+        # Added as of March 18th
+        "HA44", "HA45", "HA51",
         # New HAS
         "HAXX", "HAXXX",
     ]

From e7cd80eba0ef8f11c62506509b5a7d60c7a37ce7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 12:34:28 +0000
Subject: [PATCH 149/248] Added HA52

---
 .../ha_15_32/ha_analysis_batch_3.py           | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index af9af514..056a4190 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -946,6 +946,17 @@ class DataLoader:
         else:
             return "ECO surveys"
 
+    @staticmethod
+    def correct_ha51_asset_list(asset_list):
+        # Correct this
+        asset_list["HouseNo"] = np.where(
+            asset_list["matching_address"].str.contains("61 wandle bank"),
+            asset_list["Block"].str.lower(),
+            asset_list["HouseNo"]
+        )
+
+        return asset_list
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
         asset_sheetname = self.get_asset_sheetname(workbook)
@@ -2510,6 +2521,16 @@ class DataLoader:
         )
         return survey_list
 
+    @staticmethod
+    def correct_ha51_survey_list(survey_list):
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Autum Close", "Autumn Close"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()

From e6c9dd7074dfba12668b31651ec1a5d9eab6a27c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 12:55:37 +0000
Subject: [PATCH 150/248] Done HA52

---
 .../ha_15_32/ha_analysis_batch_3.py           | 37 +++++++++++++++++--
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 056a4190..bdf15917 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -503,6 +503,10 @@ class DataLoader:
             "address": "Property Address Full",
             "postcode": "Property Postcode"
         },
+        "HA52": {
+            "address": "Postal Address",
+            "postcode": "POSTCODE"
+        },
         "HA54": {
             "address": "Postal Address",
             "postcode": "matching_postcode"
@@ -523,7 +527,8 @@ class DataLoader:
         "HA63": 15,
         "HA107": 51,
         "HA48": 0,
-        "HA45": 0
+        "HA45": 0,
+        "HA52": 5
     }
 
     UNMATCHED_ECO3 = {
@@ -548,7 +553,7 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]:
+        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -2531,6 +2536,25 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha52_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Mardalle Avenue", "Mardale Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Ollerton  Close, Grappenhall", "Ollerton Close"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Bradshaw Road, Grappenhall", "Bradshaw Lane"
+        )
+
+        # Drop a bunch of dupes
+        survey_list = survey_list.drop_duplicates(["NO.", "Street / Block Name", "Post Code"])
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -3165,7 +3189,12 @@ class DataLoader:
             asset_list_starting_size = asset_list.shape[0]
 
             # Change the column name if it's ECO eligibility
-            asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"})
+            asset_list = asset_list.rename(
+                columns={
+                    "ECO eligibility": "ECO Eligibility",
+                    "ECO Eligibilty": "ECO Eligibility",
+                },
+            )
             # Remove surplus whitespace from the ECO Eligibility column
             asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip()
             # Push to lower case
@@ -6286,7 +6315,7 @@ def app():
         "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
         "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
         # Added as of March 18th
-        "HA44", "HA45", "HA51",
+        "HA44", "HA45", "HA51", "HA52",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 92193d773dbd72aca67da82870d3f7da5a4acfe7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 13:21:57 +0000
Subject: [PATCH 151/248] fix facts and figures bug for ha51

---
 .../ha_15_32/ha_analysis_batch_3.py           | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index bdf15917..e40bb98b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3305,11 +3305,18 @@ class DataLoader:
                     )
                 else:
                     # We have some examples, e.g. HA28, where we do not have the installed or cancelled column
-                    survey_list["installation_status"] = np.where(
-                        survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
-                        "cancelled",
-                        "installed",
-                    )
+                    if 'INSTALL/ CANCELLATION DATE' in survey_list.columns:
+                        survey_list["installation_status"] = np.where(
+                            survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                            "cancelled",
+                            "installed",
+                        )
+                    else:
+                        survey_list["installation_status"] = np.where(
+                            survey_list['INSTALL / CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                            "cancelled",
+                            "installed",
+                        )
 
                 # Finally, for other cases, we set the status to "in progress"
                 survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
@@ -5800,6 +5807,8 @@ def fml_data_pull(loader):
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
+        # NEW - add property type
+        'HA44', 'HA45', 'HA51', 'HA52'
     ]
 
     # Can't pull from EPC database because it's based in Scotland

From 443aa585d0c3c35ae34718f0e8338ec48ba7ad3c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 15:40:52 +0000
Subject: [PATCH 152/248] Adding ha5

---
 .../ha_15_32/ha_analysis_batch_3.py           | 181 +++++++++++++++++-
 1 file changed, 171 insertions(+), 10 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e40bb98b..009064c6 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -336,6 +336,16 @@ PROPERTY_TYPE_LOOKUP = {
         'Cluster': None,
         'Scheme Room': None
     },
+    "HA45": {
+        'Large block of flats': 'Flat',
+        'Small block of flats/dwelling converted in to flats': 'Flat',
+        'Semi-detached house': 'House',
+        'Mid-terraced house': 'House',
+        'End-terraced house': 'House',
+        'Block of flats': 'Flat',
+        'Detached house': 'House',
+        'Flat in mixed use building': 'Flat',
+    },
     "HA48": {
         "House": "House",
         "Flat": "Flat",
@@ -364,6 +374,30 @@ PROPERTY_TYPE_LOOKUP = {
         'Flat?': 'Flat',
         'Bungalow ': 'Bungalow'
     },
+    "HA51": {
+        'FLAT': 'Flat',
+        'HOUSE': 'House',
+        'MAISONETTE': 'Maisonette',
+        'BEDSIT': None,  # Considering as a non-specific residential category here
+        'BUNGALOW': 'Bungalow',
+    },
+    "HA52": {
+        'House - Mid Terrace': 'House',
+        'Flat - First Floor': 'Flat',
+        'Flat - Ground Floor': 'Flat',
+        'House - Semi-Detached': 'House',
+        'House - End Terrace': 'House',
+        'Flat - Second Floor': 'Flat',
+        'Bedsit': None,  # Considering as a non-specific residential category here
+        'Bungalow - Semi-Detached': 'Bungalow',
+        'Bungalow - Mid Terrace': 'Bungalow',
+        'Bungalow - End Terrace': 'Bungalow',
+        'House - Detached': 'House',
+        'Flat - Third Floor': 'Flat',
+        'House attached to flats': 'House',
+        'Flat - Fourth Floor': 'Flat',
+        'Bungalow - Detached': 'Bungalow'
+    },
     "HA56": {
         'House Non Specific': 'House',
         'HOUSE TERRACED': 'House',
@@ -463,6 +497,10 @@ class DataLoader:
             "address": "Address",
             "postcode": "Address - Postcode"
         },
+        "HA5": {
+            "address": "Address",
+            "postcode": "matching_postcode"
+        },
         "HA6": {
             "address": "propertyaddress",
             "postcode": "address"  # The 'address' column actually contains postcode
@@ -553,7 +591,9 @@ class DataLoader:
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]:
+        if ha_name in [
+            "HA1", "HA5", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"
+        ]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
             ].astype(str).str.lower().str.strip()
@@ -750,6 +790,10 @@ class DataLoader:
             asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["POSTCODE"].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA70":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@@ -962,9 +1006,100 @@ class DataLoader:
 
         return asset_list
 
+    def prepare_ha17(self, workbook):
+        blocks_sheet = workbook["Blocks List - Cavity Wall only"]
+        blocks_data = []
+        blocks_colnames = [cell.value for cell in blocks_sheet[2]]
+        for row in blocks_sheet.iter_rows(min_row=4, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            blocks_data.append(row_data)
+
+        blocks_df = pd.DataFrame(blocks_data, columns=blocks_colnames)
+
+        blocks_df["matching_address"] = (
+            blocks_df["Block Name\n[as per Naming Convention procedure]"].astype(str).str.lower().str.strip() + ", " +
+            blocks_df["Block Street Name"].astype(str).str.lower().str.strip() + ", " +
+            blocks_df["Postcode"].astype(str).str.lower().str.strip()
+        )
+        blocks_df["matching_postcode"] = blocks_df["Postcode"].astype(str).str.lower().str.strip()
+        blocks_df["property_type"] = "Flat"
+
+        street_properties_sheet = workbook["Street Properties - Cavity Wall"]
+        street_properties_data = []
+        street_properties_colnames = [cell.value for cell in street_properties_sheet[2]]
+        for row in street_properties_sheet.iter_rows(min_row=3, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            street_properties_data.append(row_data)
+
+        street_properties_df = pd.DataFrame(street_properties_data, columns=street_properties_colnames)
+
+        street_properties_df["matching_address"] = (
+            street_properties_df["Block Name\n[as per Naming Convention procedure]"].astype(
+                str).str.lower().str.strip() + ", " +
+            street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+        )
+        street_properties_df["matching_postcode"] = street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+        street_properties_df["property_type"] = street_properties_df[
+            "Block typology based on dwelling type\n[defined list]"
+        ]
+
+        asset_list_compressed = pd.concat(
+            [
+                blocks_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]],
+                street_properties_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]]
+            ],
+            axis=0
+        )
+        # We expand
+        range_pattern = r"(\d+)\s+to\s+(\d+)\s+(.*)"
+        asset_list = []
+        for _, row in tqdm(asset_list_compressed.iterrows(), total=len(asset_list_compressed)):
+            if row["ECO Eligibility"] == "Not Eligible":
+                asset_list.append(row.to_dict())
+                continue
+
+            # Detect a house number range
+            match = re.search(range_pattern, row["matching_address"])
+
+            if not match:
+                asset_list.append(row.to_dict())
+                continue
+
+            # Extracting the start and end of the range
+            start_number = int(match.group(1))
+            end_number = int(match.group(2))
+            rest_of_address = match.group(3)
+
+            # Generating the list of house numbers
+            house_numbers = list(range(start_number, end_number + 1))
+            data_to_extend = []
+            for house_number in house_numbers:
+                new_adress = f"{house_number} {rest_of_address}"
+
+                entry = row.to_dict().copy()
+                entry.update({"matching_address": new_adress})
+
+                data_to_extend.append(entry)
+
+            asset_list.extend(data_to_extend)
+
+        asset_list = pd.DataFrame(asset_list)
+
+        # Add in asset_list_row_id
+        asset_list["asset_list_row_id"] = ["HA17" + str(i) for i in range(0, len(asset_list))]
+
+        # Add on house number
+        asset_list = self.create_asset_list_house_no(ha_name="HA17", asset_list=asset_list)
+
+        return asset_list
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
-        asset_sheetname = self.get_asset_sheetname(workbook)
+        if ha_name == "HA17":
+            asset_list = self.prepare_ha17(workbook)
+            return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+        else:
+            asset_sheetname = self.get_asset_sheetname(workbook)
 
         asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
@@ -977,6 +1112,9 @@ class DataLoader:
         if ha_name == "HA54":
             asset_sheet_colnames[10] = "matching_postcode"
 
+        if ha_name == "HA5":
+            asset_sheet_colnames[2] = "matching_postcode"
+
         rows_data = []
 
         for row in asset_sheet.iter_rows(min_row=2, values_only=False):
@@ -2555,6 +2693,10 @@ class DataLoader:
 
         return survey_list
 
+    @staticmethod
+    def correct_ha5_survey_list(survey_list):
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -3431,6 +3573,9 @@ class DataLoader:
 
 
 def get_property_type_and_built_form(property_meta, ha_name):
+    if ha_name in ["HA44"]:
+        return None, None
+
     if ha_name == "HA1":
         property_type = property_meta["Asset Type"]
         # We correct a small error
@@ -3499,6 +3644,8 @@ def get_property_type_and_built_form(property_meta, ha_name):
         config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
         property_type = config.get("property-type")
         built_form = config.get("built-form")
+    elif ha_name == "HA17":
+        return property_meta["property_type"], None
     elif ha_name == "HA18":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
         built_form = None
@@ -3580,6 +3727,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA42":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip())
         built_form = None
+    elif ha_name == "HA45":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property type"].strip())
+        built_form = None
     elif ha_name == "HA48":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
@@ -3589,6 +3739,14 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA50":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
         built_form = None
+    elif ha_name == "HA51":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA52":
+        if property_meta["Property Type"] is None:
+            return None, None
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
     elif ha_name == "HA54":
         property_type = property_meta["Property Type"]
         built_form = None
@@ -5806,9 +5964,9 @@ def fml_data_pull(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52',
         # NEW - add property type
-        'HA44', 'HA45', 'HA51', 'HA52'
+        "HA17"
     ]
 
     # Can't pull from EPC database because it's based in Scotland
@@ -5905,7 +6063,7 @@ def fml_analysis(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52'
     ]
 
     no_ciga_cavity_descriptions = [
@@ -6320,11 +6478,11 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
-        "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
-        "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
+        "HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
+        "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54",
+        "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
         # Added as of March 18th
-        "HA44", "HA45", "HA51", "HA52",
+        "HA44", "HA45", "HA51", "HA52", "HA17",
         # New HAS
         "HAXX", "HAXXX",
     ]
@@ -6332,7 +6490,10 @@ def app():
     # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
     # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
     #
-    # Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess
+    # Consider for ECO4:
+    # HA 70 - have to merge ECO3 list though,
+    # HA17 has LOTs of assets, but the asset list is a mess
+    # HA53 but has EPCs done
     # Consider for GBIS:
     # Ignore for now:
     # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in

From 6ccfff0411ee2af58d6f7dc47b98f2deb70eac5c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 16:14:11 +0000
Subject: [PATCH 153/248] Added ha20

---
 .../ha_15_32/ha_analysis_batch_3.py           | 50 +++++++++++++++++--
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 009064c6..627fcede 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -566,7 +566,8 @@ class DataLoader:
         "HA107": 51,
         "HA48": 0,
         "HA45": 0,
-        "HA52": 5
+        "HA52": 5,
+        "HA20": 6
     }
 
     UNMATCHED_ECO3 = {
@@ -669,6 +670,17 @@ class DataLoader:
                 asset_list["Postcode"].astype(str).str.lower().str.strip()
             )
             asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA20":
+            asset_list["matching_address"] = (
+                asset_list["House Name"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Block"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA21":
             asset_list["matching_address"] = (
                 asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
@@ -2697,6 +2709,35 @@ class DataLoader:
     def correct_ha5_survey_list(survey_list):
         return survey_list
 
+    @staticmethod
+    def correct_ha20_survey_list(survey_list):
+        # Not in the asset list
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Abbot Close", "ABBOTS CLOSE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Downbarns Road", "DOWN BARNS ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Austin Lane", "AUSTINS LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "South Park Way", "SOUTHPARK WAY"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "OAKLAND ROAD", "OAKWOOD ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ACRE WAY/NORTHWOOD", "ACRE WAY"
+        )
+
+        return survey_list
+
     @staticmethod
     def levenstein_match(matching_string, df):
         match_to = df["matching_address"].tolist()
@@ -3301,7 +3342,8 @@ class DataLoader:
             "AFF0RDALE WARMTH": "ECO4",
             "ECO 4 RdSAP CL": "ECO4",
             "Affordable Warmth (R) ": "ECO4",
-            "Affordable Warmth ": "ECO4"
+            "Affordable Warmth ": "ECO4",
+            "ECO 4 AFFORDABLE WARMTH": "ECO4",
         }
 
         # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
@@ -6478,11 +6520,11 @@ def app():
 
     # Add in:
     priority_has = [
-        "HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
+        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
         "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54",
         "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
         # Added as of March 18th
-        "HA44", "HA45", "HA51", "HA52", "HA17",
+        "HA44", "HA45", "HA51", "HA52", "HA17", "HA5", "HA20",
         # New HAS
         "HAXX", "HAXXX",
     ]

From 3dd30445f92635df45b5da2a756650ca116f3855 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 18 Mar 2024 19:37:11 +0000
Subject: [PATCH 154/248] HA Analysis finalised

---
 .../ha_15_32/ha_analysis_batch_3.py           | 257 +++++++++++++++---
 1 file changed, 225 insertions(+), 32 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 627fcede..2f17ed73 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -51,6 +51,12 @@ PROPERTY_TYPE_LOOKUP = {
         'MAISONETTE': "Maisonette",
         'HOSTEL': None
     },
+    "HA5": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Bedsit": None
+    },
     "HA6": {
         "property_type": {
             'HOUSE': "House",
@@ -161,6 +167,21 @@ PROPERTY_TYPE_LOOKUP = {
         "Hostel": None,
         "Block": None,
     },
+    "HA20": {
+        "House": "House",
+        "Flat": "Flat",
+        'Sheltered Flat': "Flat",
+        'Maisonette': 'Maisonette',
+        'Bungalow': 'Bungalow',
+        'House. SD': 'House',
+        'House. MT': 'House',
+        'House. ET': 'House',
+        'Sheltered Bungalow': 'Bungalow',
+        'Guest Accomodation': None,
+        'Sheltered House': 'House',
+        'House. MT ': 'House',
+        'House. D': 'House'
+    },
     "HA24": {
         '01 HOUSE': 'House',
         '02 FLAT': 'Flat',
@@ -3632,6 +3653,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA2":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip())
         built_form = None
+    elif ha_name == "HA5":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
     elif ha_name == "HA6":
         property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
         built_form = property_meta["built_form"]
@@ -3694,6 +3718,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
     elif ha_name == "HA19":
         property_type = property_meta["Dwelling Type"]
         built_form = None
+    elif ha_name == "HA20":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
     elif ha_name == "HA21":
         property_description = property_meta["Property Type"].strip().lower()
         if "house" in property_description:
@@ -5775,6 +5802,7 @@ def forecast_remaining_sales(loader):
         results.append(to_append)
 
     results = pd.DataFrame(results)
+    results.to_csv("pipeline_remaining_raw.csv")
 
     totals_row = {}
     for col in results.columns:
@@ -6006,9 +6034,7 @@ def fml_data_pull(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52',
-        # NEW - add property type
-        "HA17"
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
 
     # Can't pull from EPC database because it's based in Scotland
@@ -6105,7 +6131,7 @@ def fml_analysis(loader):
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
         "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
-        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52'
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
 
     no_ciga_cavity_descriptions = [
@@ -6124,22 +6150,6 @@ def fml_analysis(loader):
     # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
     #  them! Non-invasices will have checked the wall though
 
-    codes = [
-        "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7",
-        "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12",
-        "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27",
-        "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX",
-    ]
-
-    values = [
-        706, 2161, 1053, 793, 0, 656, 1200, 1647, 4248, 2703, 1087, 1876, 2135,
-        1078, 775, 538, 518, 401, 466, 2627, 98, 1050, 524, 191, 538, 384, 204,
-        281, 422, 74, 313, 71, 6
-    ]
-
-    # Create a dictionary mapping
-    remaining_eligible_mapping = dict(zip(codes, values))
-
     results = []
     wall_descriptions = []
     for ha_name in tqdm(has_bruh):
@@ -6397,9 +6407,13 @@ def fml_analysis(loader):
                 without_survey_without_ciga_expected = 0
             else:
                 # We apply the same conversion rate as the properties with a survey
-                without_survey_without_ciga_expected = np.round(
-                    without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
-                )
+
+                if ciga_check_needed.shape[0] == 0 and ciga_check_expectation == 0:
+                    without_survey_without_ciga_expected = without_survey_needing_ciga.shape[0]
+                else:
+                    without_survey_without_ciga_expected = np.round(
+                        without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+                    )
 
             without_survey_passed_ciga = fuck_this[
                 (fuck_this["estimated"] == True) &
@@ -6466,15 +6480,6 @@ def fml_analysis(loader):
                 without_survey_identified_as_gbis_qualified
             )
 
-        surveys = loader.data[ha_name]["survey_list"]
-        sold_now = 0
-        if not surveys.empty:
-            sold_now = surveys[
-                surveys["installation_status"].str.lower().str.contains("eco4")
-            ].shape[0]
-
-        sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0]
-
         results.append(
             {
                 "HA Name": ha_name,
@@ -6498,6 +6503,194 @@ def fml_analysis(loader):
     # TODO: Change the left hand side number for our post CIGA estimates
 
 
+def create_final_report():
+    """
+    This function will produce the final output for the HA analysis
+    :return:
+    """
+    epc_validated_results = pd.read_csv("analysis - revised.csv")
+    pipeline_results = pd.read_csv("pipeline_remaining_raw.csv")
+
+    ####################################
+    # Original Warmfront estimates
+    ####################################
+    # Create the volumes result
+    all_ha_summary_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 original', '', 'Remaining - #', '')",
+            "('GBIS original', '', 'Remaining - #', '')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 original', '', 'Remaining - #', '')": "# ECO4 remaining - All HA Summary",
+            "('GBIS original', '', 'Remaining - #', '')": "# GBIS remaining - All HA Summary",
+        }
+    )
+    all_ha_summary_remaining["# Total remaining - All HA Summary"] = (
+        all_ha_summary_remaining["# ECO4 remaining - All HA Summary"] +
+        all_ha_summary_remaining["# GBIS remaining - All HA Summary"]
+    )
+    all_ha_summary_remaining = all_ha_summary_remaining.sort_values("HA Name")
+
+    ####################################
+    # Postcode list - pre-CIGA
+    ####################################
+    postcode_list_pre_ciga_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 pre-ciga', '', 'Remaining - #', '')",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 pre-ciga', '', 'Remaining - #', '')": "# ECO4 remaining - Postcode list (pre CIGA)",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+                "# GBIS remaining - Postcode list (pre CIGA)"
+            ),
+        }
+    )
+
+    postcode_list_pre_ciga_remaining["# Total remaining - Postcode list (pre CIGA)"] = (
+        postcode_list_pre_ciga_remaining["# ECO4 remaining - Postcode list (pre CIGA)"] +
+        postcode_list_pre_ciga_remaining["# GBIS remaining - Postcode list (pre CIGA)"]
+    )
+    postcode_list_pre_ciga_remaining = postcode_list_pre_ciga_remaining.sort_values("HA Name")
+
+    ####################################
+    # Postcode list - post-CIGA
+    ####################################
+    postcode_list_post_ciga_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')":
+                "# ECO4 remaining - Postcode list (post CIGA)",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+                "# GBIS remaining - Postcode list (post CIGA)"
+            ),
+        }
+    )
+
+    postcode_list_post_ciga_remaining["# Total remaining - Postcode list (post CIGA)"] = (
+        postcode_list_post_ciga_remaining["# ECO4 remaining - Postcode list (post CIGA)"] +
+        postcode_list_post_ciga_remaining["# GBIS remaining - Postcode list (post CIGA)"]
+    )
+    postcode_list_post_ciga_remaining = postcode_list_post_ciga_remaining.sort_values("HA Name")
+
+    ####################################
+    # From EPC Database
+    ####################################
+    from_epc_database = epc_validated_results[
+        [
+            "HA Name",
+            "EPC verified ECO4 Eligible - Remaining",
+            "EPC verified GBIS Eligibile - Remaining"
+        ]
+    ].copy().rename(
+        columns={
+            "EPC verified ECO4 Eligible - Remaining": "# ECO4 remaining - From EPC Database (post CIGA)",
+            "EPC verified GBIS Eligibile - Remaining": "# GBIS remaining - From EPC Database (post CIGA)",
+        }
+    )
+
+    from_epc_database["# Total remaining - From EPC Database (post CIGA)"] = (
+        from_epc_database["# ECO4 remaining - From EPC Database (post CIGA)"] +
+        from_epc_database["# GBIS remaining - From EPC Database (post CIGA)"]
+    )
+    from_epc_database = from_epc_database.sort_values("HA Name")
+
+    # Combine the datasets
+    volumes = all_ha_summary_remaining.merge(
+        postcode_list_pre_ciga_remaining, how="left", on="HA Name"
+    ).merge(
+        postcode_list_post_ciga_remaining, how="left", on="HA Name"
+    ).merge(
+        from_epc_database, how="inner", on="HA Name"
+    )
+
+    revenue = volumes.copy()
+    # Convert the ECO4 volumes to revenue
+    for col in [
+        '# ECO4 remaining - All HA Summary',
+        '# ECO4 remaining - Postcode list (pre CIGA)',
+        '# ECO4 remaining - Postcode list (post CIGA)',
+        '# ECO4 remaining - From EPC Database (post CIGA)'
+    ]:
+        revenue[col] = revenue[col] * 1710
+
+    # Convert the GBIS volumes to revenue
+    for col in [
+        '# GBIS remaining - All HA Summary',
+        '# GBIS remaining - Postcode list (pre CIGA)',
+        '# GBIS remaining - Postcode list (post CIGA)',
+        '# GBIS remaining - From EPC Database (post CIGA)'
+    ]:
+        revenue[col] = revenue[col] * 600
+
+    # Re-calculate the totals
+    revenue['# Total remaining - All HA Summary'] = (
+        revenue['# ECO4 remaining - All HA Summary'] + revenue['# GBIS remaining - All HA Summary']
+    )
+
+    revenue['# Total remaining - Postcode list (pre CIGA)'] = (
+        revenue['# ECO4 remaining - Postcode list (pre CIGA)'] + revenue['# GBIS remaining - Postcode list (pre CIGA)']
+    )
+
+    revenue['# Total remaining - Postcode list (post CIGA)'] = (
+        revenue['# ECO4 remaining - Postcode list (post CIGA)'] + revenue[
+        '# GBIS remaining - Postcode list (post CIGA)']
+    )
+
+    revenue['# Total remaining - From EPC Database (post CIGA)'] = (
+        revenue['# ECO4 remaining - From EPC Database (post CIGA)'] +
+        revenue['# GBIS remaining - From EPC Database (post CIGA)']
+    )
+
+    # Replace the # with £ in the columns
+    revnue_colnames = [col.replace("#", "£") for col in revenue.columns]
+    revenue.columns = revnue_colnames
+
+    # We check that each column gets smaller
+    decreasing_check1 = all(
+        volumes["# ECO4 remaining - Postcode list (pre CIGA)"] >= volumes[
+            '# ECO4 remaining - Postcode list (post CIGA)']
+    )
+    if not decreasing_check1:
+        raise ValueError("decreasing_check1 failed")
+
+    # Just HA32 and HA17 should fail this, and it's due to GBIS jobs looking like ECO4
+    decreasing_check2 = volumes[volumes["# ECO4 remaining - From EPC Database (post CIGA)"] > volumes[
+        "# ECO4 remaining - Postcode list (post CIGA)"]]
+
+    if set(decreasing_check2["HA Name"].tolist()) != {"HA17", "HA32"}:
+        raise ValueError("decreasing_check2 failed")
+
+    # Check for GBIS
+    decreasing_check3 = all(
+        volumes["# GBIS remaining - Postcode list (pre CIGA)"] >= volumes[
+            '# GBIS remaining - Postcode list (post CIGA)']
+    )
+
+    if not decreasing_check3:
+        raise ValueError("decreasing_check3 failed")
+
+    # Don't perform this - this happens for multiple
+    # decreasing_check4 = volumes[volumes["# GBIS remaining - From EPC Database (post CIGA)"] > volumes[
+    #     "# GBIS remaining - Postcode list (post CIGA)"]]
+
+    # Store final outputs
+    volumes.to_csv("HA Analysis Final - volumes.csv")
+    revenue.to_csv("HA Analysis Final - revenue.csv")
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.

From 724379a86d1bd9b79159f2f8f9e5d8abe9496f5f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 26 Mar 2024 18:05:08 +0000
Subject: [PATCH 155/248] wrapping up ha analysis

---
 .../ha_15_32/ha_analysis_batch_3.py           | 170 ++++++++++--------
 1 file changed, 94 insertions(+), 76 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2f17ed73..e414cd00 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5366,6 +5366,7 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
         if original_warmfront_estimates.empty:
@@ -6032,7 +6033,7 @@ def forecast_remaining_sales(loader):
 def fml_data_pull(loader):
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
@@ -6129,7 +6130,7 @@ def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
@@ -6738,89 +6739,106 @@ def app():
     loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
     loader.load()
     loader.ha_facts_and_figures()
-
     forecast_remaining_sales(loader)
 
-    conversion_rate = 0.95
-    archetype_check_conversion = 0.7
-    res = []
-    for k, v in loader.data.items():
-        asset_list = v["asset_list"].copy()
-        agg = asset_list["ECO Eligibility"].value_counts()
-        # We find a case where there are properties that have passed CIGA
-        if not any("passed" in x for x in agg.index):
+    # gbis rate
+    # breakdowns = []
+    # for ha, data_assets in loader.data.items():
+    #     asset_list = data_assets["asset_list"].copy()
+    #     breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
+    #     breakdowns.append(breakdown)
+    # breakdowns = pd.DataFrame(breakdowns)
+    #
+    # installer = []
+    # for ha, data_assets in loader.data.items():
+    #     survey_list = data_assets["survey_list"]
+    #     if survey_list.empty:
+    #         continue
+    #     if "INSTALLER" not in survey_list.columns:
+    #         continue
+    #
+    #     installers = survey_list["INSTALLER"].value_counts().to_dict()
+    #     installers["ha_name"] = ha
+    #     installer.append(installers)
+    # installer = pd.DataFrame(installer)
+    # installer.drop(columns=["ha_name"]).sum().sum()
+
+    # Adhoc - for HA16, get the properties that still need a CIGA check
+    asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
+    ha_16_need_ciga = asset_list_ha16[
+        asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
+    ]
+    completed_cigas = loader.data["HA16"]["ciga_list"].copy()
+    # Store the results
+    ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
+    completed_cigas.to_csv("ha16_completed_cigas.csv")
+
+    # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
+    # live projects
+
+    # Read excel
+    orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
+    orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
+    orderbook_sheet = orderbook_workbook["Contractual Info"]
+    orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
+
+    rows = []
+    for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        rows.append(row_data)
+
+    orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
+    live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
+    live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
+
+    dormant_properties = []
+    missed_has = []
+    for _, customer in live_orderbook.iterrows():
+        if customer['Redacted HA'] not in loader.data.keys():
+            missed_has.append(customer['Redacted HA'])
             continue
+        asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
+        survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
+        # Remove sold
+        if not survey_list.empty:
+            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+            asset_list = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+            asset_list = asset_list.drop(columns=["installation_status"])
 
-        agg = pd.DataFrame(agg).reset_index()
-
-        passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"]
-        passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0
-
-        failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"]
-        failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0
-
-        ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1
-
-        dormant_ciga = agg[
-            agg["ECO Eligibility"].str.contains("subject to ciga") &
-            ~agg["ECO Eligibility"].str.contains("subject to archetype")
+        # We pull out the properties that need a CIGA check
+        need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
+        need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
+        need_ciga_and_archetype = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
             ]
 
-        dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0
-
-        dormant_ciga_archetype = agg[
-            agg["ECO Eligibility"].str.contains("subject to ciga") &
-            agg["ECO Eligibility"].str.contains("subject to archetype")
-            ]
-
-        dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0
-
-        needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion
-        needing_check = np.round(needing_check)
-
-        additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + (
-            dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate
-        )
-        additional_jobs = np.round(additional_jobs)
-
-        # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs
-        original_estimate = loader.december_figures[
-            loader.december_figures["HA Name"] == k
-            ]
-
-        original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0
-        base_eco_figures = agg[
-            agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"])
-        ]["count"].sum()
-        eco4_from_ciga = original_estimate - base_eco_figures
-        eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0
-        surplus_from_dormant = additional_jobs - eco4_from_ciga
-        surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant
-
-        res.append(
+        dormant_properties.append(
             {
-                "ha_name": k,
-                "additional_eco4": additional_jobs,
-                "needing_check": needing_check,
-                "surplus_from_dormant": surplus_from_dormant
+                "HA Name": customer['Redacted HA'],
+                "Need CIGA": need_ciga.shape[0],
+                "Need Archetype": need_archetype.shape[0],
+                "Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
             }
         )
 
-    res = pd.DataFrame(res)
-    # Drop the HAs that are not in that pervious draft
-    # In the v2 draft, there are 12 HAs
+    dormant_properties = pd.DataFrame(dormant_properties)
+    totals = dormant_properties.sum()
+    totals["HA Name"] = "Total"
 
-    v5_surplus = res[
-        ~res["ha_name"].isin(["HA9"])
-    ]["additional_eco4"].sum()
-    # 7212 properties
-    # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November
-    # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255,
-    # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties
-    # pre-CIGA
+    dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
+    dormant_properties.to_csv("dormant_properties.csv")
 
-    v5_surplus_from_dormant = res[
-        ~res["ha_name"].isin(["HA9"])
-    ]["surplus_from_dormant"].sum()
-    # 5539.0
-    # 9471690
+    loader.december_figures["ECO4 remaining"].sum()
+    december_figures = loader.december_figures.copy()
+    december_figures["ECO4 remaining"] = np.where(
+        december_figures["ECO4 remaining"] < 0,
+        0,
+        december_figures["ECO4 remaining"]
+    )
+    december_figures["ECO4 remaining"].sum()

From ebb28236617abff1e3a5f91dd6b06b66a001a4d7 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Wed, 27 Mar 2024 11:39:51 +0000
Subject: [PATCH 156/248] override scenerio data to have average insulation
 thickness, change impact values

---
 etl/epc/generate_scenarios_data.py | 48 +++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py
index d5bece8b..f9f66034 100644
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@@ -54,9 +54,19 @@ scenario_properties = [
         "postcode": "NN1 5JY",
         "lmk-key": "1459796789102016070507274146560098",
         "measures": [
-            [["internal_wall_insulation"], "11", None, [0]],
-            [["external_wall_insulation"], "10", None, [0]],
-            [["solar", "windows"], "12-15", {"photo_supply_ending": 50}, [0, 1]],
+            [
+                ["internal_wall_insulation"],
+                "11",
+                {"walls_insulation_thickness_ending": "average"},
+                [0],
+            ],
+            [
+                ["external_wall_insulation"],
+                "10",
+                {"walls_insulation_thickness_ending": "average"},
+                [0],
+            ],
+            [["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
         ],
     },
     {
@@ -64,7 +74,12 @@ scenario_properties = [
         "postcode": "HP1 2HA",
         "lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
     {
@@ -72,7 +87,12 @@ scenario_properties = [
         "postcode": "HP1 2HE",
         "lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
     {
@@ -80,7 +100,12 @@ scenario_properties = [
         "postcode": "HP1 2AN",
         "lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
     {
@@ -88,11 +113,17 @@ scenario_properties = [
         "postcode": "HP1 2HX",
         "lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
         "measures": [
-            [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
         ],
     },
 ]
 
+
 recommendations_scoring_data = []
 
 for scenario_property in scenario_properties:
@@ -217,6 +248,9 @@ for scenario_property in scenario_properties:
     recommendations_scoring_data.extend(scoring_list)
 
 recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
+    int
+)
 recommendations_scoring_data = recommendations_scoring_data.drop(
     columns=[
         "rdsap_change",

From dbeba4db43645ee999eb49f40c0359457ae0f703 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 27 Mar 2024 18:12:57 +0000
Subject: [PATCH 157/248] set up first basic asset list for gla demo

---
 etl/customers/gla_croydon_demo/asset_list.py  | 145 ++++++++++++++++++
 .../ha_15_32/ha_analysis_batch_3.py           | 109 ++++++++++---
 2 files changed, 232 insertions(+), 22 deletions(-)
 create mode 100644 etl/customers/gla_croydon_demo/asset_list.py

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
new file mode 100644
index 00000000..526c34a0
--- /dev/null
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -0,0 +1,145 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 67
+
+
+def app():
+    """
+    We shall define a small portfolio of properties, based in Croydon
+    :return:
+    """
+
+    # Firstly, read in the EPC data for Croydon
+    epc_data = pd.read_csv(
+        "local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
+        low_memory=False
+    )
+
+    # Filter on entries where we have a UPRN
+    epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+
+    # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+    epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
+
+    epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+
+    # Now filter on social properties
+    epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
+    # There are 17337 properties with a registered EPC in Croydon
+    # Take below EPC C properties
+    epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
+    # 7994 properties are below EPC C (46%)
+
+    # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
+    epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
+
+    # For the purpose of the sample, take the properties have surveys done in the last 2 years
+    # This gives us 1023 remaining properties
+    two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365))
+    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago]
+
+    # Archetype 1: defined below:
+    # 1) House
+    # 2) Unfilled cavity
+    # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
+    # 4) EPC E
+    # Different buckets of properties
+    archetype_1_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["House"]) &
+        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "Pitched, 12 mm loft insulation",
+                "Pitched, 0 mm loft insulation",
+                "Pitched, no insulation",
+                "Pitched, 50 mm loft insulation",
+                "Flat, no insulation (assumed)",
+                "Pitched, no insulation (assumed)"
+            ]
+        )
+        ]
+    archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
+
+    # Archetype 2: defined below:
+    # 1) Flat
+    # 2) Unfilled cavity
+    # 3) Another property above
+    # 4) EPC E
+    archetype_2_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
+        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "(another dwelling above)"
+            ]
+        )
+        ]
+    archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
+
+    # Archetype 3: defined below:
+    # 1) EPC F
+    # 2) Solid brick wall
+    # 3) House
+    # 4) Pitched roof with no insulation
+    # Just 1 property (more expensive to retrofit)
+    archetype_3_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["House"]) &
+        (epc_data["CURRENT_ENERGY_RATING"] == "F") &
+        epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"])
+        ]
+    archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
+
+    # Archetype 4: defined below:
+    # 1) Maisonette
+    # 2) Empty cavity
+    # 3) EPC E
+    # 14 properties here
+    archetype_4_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"])
+        ]
+    archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
+
+    asset_list = pd.concat(
+        [
+            archetype_1_sample_asset_list,
+            archetype_2_sample_asset_list,
+            archetype_3_sample_asset_list,
+            archetype_4_sample_asset_list
+        ]
+    )
+
+    asset_list = asset_list.rename(
+        columns={
+            "UPRN": "uprn",
+            "ADDRESS1": "address",
+            "POSTCODE": "postcode",
+            "ARCHETYPE": "archetype"
+        }
+    )
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "budget": None,
+        "exclusions": ["floor_insulation"]
+    }
+    print(body)
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e414cd00..b4b82d0b 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -6692,6 +6692,92 @@ def create_final_report():
     revenue.to_csv("HA Analysis Final - revenue.csv")
 
 
+def identify_eco_works(loader):
+    # ha_names = [
+    #     "HA16",  # For Housing
+    #     "HA39",  # Rooftop
+    #     "HA41",  # Settle
+    #     "HA23",  # Lambeth
+    #     "HA14",  # EMH
+    #     "HA7",  # Believe
+    #     "HA102",  # Thrive
+    # ]
+
+    # Unitas, fairhive, acis, LHP
+    ha_names = [
+        "HA50",  # Unitas
+        "HA15",  # Fairhive
+        "HA107",  # ACIS
+        "HA24",  # LHP
+    ]
+    names = {
+        "HA50": "Unitas",
+        "HA15": "Fairhive",
+        "HA107": "ACIS",
+        "HA24": "LHP"
+    }
+
+    # gbis rate
+    breakdowns = []
+    # lists = {}
+    for ha, data_assets in loader.data.items():
+        if ha not in ha_names:
+            continue
+
+        asset_list = data_assets["asset_list"].copy()
+        survey_list = data_assets["survey_list"].copy()
+        # Remove things that have sold
+        if not survey_list.empty:
+            asset_list = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+            asset_list = asset_list.drop(columns=["installation_status"])
+
+        # Needing a CIGA check
+        needs_cga = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"
+            ].copy()
+
+        eco4 = asset_list[
+            asset_list["ECO Eligibility"] == "eco4"
+            ].copy()
+
+        eco4_passed_ciga = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 - passed ciga"
+            ].copy()
+
+        # lists[ha] = {
+        #     "needs_cga": needs_cga,
+        #     "eco4": eco4,
+        #     "eco4_passed_ciga": eco4_passed_ciga
+        # }
+
+        # Store the data
+        if not needs_cga.empty:
+            needs_cga.to_csv(f"local_data/{names[ha]} - needs ciga.csv")
+
+        if not eco4.empty:
+            eco4.to_csv(f"local_data/{names[ha]} - eco4.csv")
+
+        if not eco4_passed_ciga.empty:
+            eco4_passed_ciga.to_csv(f"local_data/{names[ha]} - eco4 passed ciga.csv")
+
+        summary = {
+            "HA Name": ha,
+            "n_needing_ciga": needs_cga.shape[0],
+            "eco4": eco4.shape[0],
+            "eco4_passed_ciga": eco4_passed_ciga.shape[0]
+        }
+
+        breakdowns.append(summary)
+    breakdowns = pd.DataFrame(breakdowns)
+    breakdowns = breakdowns.fillna(0)
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
@@ -6739,29 +6825,8 @@ def app():
     loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
     loader.load()
     loader.ha_facts_and_figures()
-    forecast_remaining_sales(loader)
 
-    # gbis rate
-    # breakdowns = []
-    # for ha, data_assets in loader.data.items():
-    #     asset_list = data_assets["asset_list"].copy()
-    #     breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
-    #     breakdowns.append(breakdown)
-    # breakdowns = pd.DataFrame(breakdowns)
-    #
-    # installer = []
-    # for ha, data_assets in loader.data.items():
-    #     survey_list = data_assets["survey_list"]
-    #     if survey_list.empty:
-    #         continue
-    #     if "INSTALLER" not in survey_list.columns:
-    #         continue
-    #
-    #     installers = survey_list["INSTALLER"].value_counts().to_dict()
-    #     installers["ha_name"] = ha
-    #     installer.append(installers)
-    # installer = pd.DataFrame(installer)
-    # installer.drop(columns=["ha_name"]).sum().sum()
+    forecast_remaining_sales(loader)
 
     # Adhoc - for HA16, get the properties that still need a CIGA check
     asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()

From d34a4d4d963d349877d63a44753549186247a64d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 14:32:29 +0000
Subject: [PATCH 158/248] allowing passage of uprn to Searcher in api

---
 .idea/Model.iml             | 2 +-
 .idea/misc.xml              | 2 +-
 backend/app/plan/router.py  | 4 ++++
 backend/app/plan/schemas.py | 2 ++
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0b98cf2c..5456cdb6 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -91,10 +91,14 @@ async def trigger_plan(body: PlanTriggerRequest):
         input_properties = []
         for config in tqdm(plan_input):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
+            uprn = config.get("uprn", None)
+            if uprn:
+                uprn = int(float(uprn))
 
             epc_searcher = SearchEpc(
                 address1=config["address"],
                 postcode=config["postcode"],
+                uprn=uprn,
                 auth_token=get_settings().EPC_AUTH_TOKEN,
                 os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 9801375f..1e95fb2f 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -8,3 +8,5 @@ class PlanTriggerRequest(BaseModel):
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
+    # optional exclusions list
+    exclusions: list[str] | None = None

From 91eb9c68f1600970541606fdae3869d19ee724cb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 14:49:19 +0000
Subject: [PATCH 159/248] Adding validation to PlanTriggerRequest

---
 backend/app/plan/schemas.py        | 47 +++++++++++++--
 recommendations/Recommendations.py | 94 +++++++++++++++++-------------
 2 files changed, 95 insertions(+), 46 deletions(-)

diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 1e95fb2f..c13e754e 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -1,12 +1,51 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, conlist, validator
+from typing import Optional
 
 
 class PlanTriggerRequest(BaseModel):
-    budget: float | None = None
+    budget: Optional[float] = None
     goal: str
     housing_type: str
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
-    # optional exclusions list
-    exclusions: list[str] | None = None
+    exclusions: Optional[conlist(str, min_items=1)] = None
+
+    # Pre-defined list of possibilities for exclusions
+    _allowed_exclusions = {
+        "wall_insulation",
+        "ventilation",
+        "roof_insulation",
+        "floor_insulation",
+        "windows",
+        "fireplace",
+        "heating",
+        "hot_water",
+        "lighting",
+        "solar_pv"
+    }
+
+    _allowed_goals = {"Increase EPC"}
+
+    _allowed_housing_types = {"Social", "Private"}
+
+    # Validator to ensure exclusions are within the pre-defined possibilities
+    @validator('exclusions', each_item=True)
+    def check_exclusions(self, v):
+        if v not in self._allowed_exclusions:
+            raise ValueError(f"{v} is not an allowed exclusion")
+        return v
+
+    # Validator to ensure that the goal is within the pre-defined possibilities
+    @validator('goal')
+    def check_goal(self, v):
+        if v not in self._allowed_goals:
+            raise ValueError(f"{v} is not a valid goal")
+        return v
+
+    # Validator to ensure that the housing type is within the pre-defined possibilities
+    @validator('housing_type')
+    def check_housing_type(self, v):
+        if v not in self.allowed_housing_types:
+            raise ValueError(f"{v} is not a valid housing type")
+        return v
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 9f838e1c..d3436ef0 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -22,7 +22,8 @@ class Recommendations:
     def __init__(
         self,
         property_instance: Property,
-        materials: List
+        materials: List,
+        exclusions: List[str] = None,
     ):
         """
         :param property_instance: Instance of the Property class, for the home associated to property_id
@@ -31,6 +32,7 @@ class Recommendations:
 
         self.property_instance = property_instance
         self.materials = materials
+        self.exclusions = exclusions if exclusions else []
 
         self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
         self.wall_recomender = WallRecommendations(property_instance=property_instance, materials=materials)
@@ -58,67 +60,75 @@ class Recommendations:
         property_recommendations = []
         phase = 0
 
-        print("WALL RECOMMENDATIONS HAVE BEEN COMMENTED OUT TEMPORARILY - ADD ME BACK IN")
-        if portfolio_id != 66:
-            # Building Fabric
+        # Building Fabric
+        if "wall_insulation" not in self.exclusions:
             self.wall_recomender.recommend(phase=phase)
             if self.wall_recomender.recommendations:
                 property_recommendations.append(self.wall_recomender.recommendations)
                 phase += 1
 
-            # Ventilation recommendations
-            # We only produce a ventilation recommendation if the property is recommended to have wall or roof
-            # insulation
-            # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
-            # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
-            # wall or roof recommendations, we will ensure that ventilation is included in the simulation
+        # Ventilation recommendations
+        # We only produce a ventilation recommendation if the property is recommended to have wall or roof
+        # insulation
+        # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
+        # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
+        # wall or roof recommendations, we will ensure that ventilation is included in the simulation
+        if "ventilation" not in self.exclusions:
             if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
                 self.ventilation_recomender.recommend()
                 if self.ventilation_recomender.recommendation:
                     property_recommendations.append(self.ventilation_recomender.recommendation)
 
-        self.roof_recommender.recommend(phase=phase)
-        if self.roof_recommender.recommendations:
-            property_recommendations.append(self.roof_recommender.recommendations)
-            phase += 1
+        if "roof_insulation" not in self.exclusions:
+            self.roof_recommender.recommend(phase=phase)
+            if self.roof_recommender.recommendations:
+                property_recommendations.append(self.roof_recommender.recommendations)
+                phase += 1
 
-        self.floor_recommender.recommend(phase=phase)
-        if self.floor_recommender.recommendations:
-            property_recommendations.append(self.floor_recommender.recommendations)
-            phase += 1
+        if "floor_insulation" not in self.exclusions:
+            self.floor_recommender.recommend(phase=phase)
+            if self.floor_recommender.recommendations:
+                property_recommendations.append(self.floor_recommender.recommendations)
+                phase += 1
 
-        self.windows_recommender.recommend(phase=phase)
-        if self.windows_recommender.recommendation:
-            property_recommendations.append(self.windows_recommender.recommendation)
-            phase += 1
+        if "windows" not in self.exclusions:
+            self.windows_recommender.recommend(phase=phase)
+            if self.windows_recommender.recommendation:
+                property_recommendations.append(self.windows_recommender.recommendation)
+                phase += 1
 
-        self.fireplace_recommender.recommend(phase=phase)
-        if self.fireplace_recommender.recommendation:
-            property_recommendations.append(self.fireplace_recommender.recommendation)
-            phase += 1
+        if "fireplace" not in self.exclusions:
+            self.fireplace_recommender.recommend(phase=phase)
+            if self.fireplace_recommender.recommendation:
+                property_recommendations.append(self.fireplace_recommender.recommendation)
+                phase += 1
 
         # Heating and Electical systems
-        self.heating_recommender.recommend(phase=phase)
-        if self.heating_recommender.recommendations:
-            property_recommendations.append(self.heating_recommender.recommendations)
-            phase += 1
+        if "heating" not in self.exclusions:
+            self.heating_recommender.recommend(phase=phase)
+            if self.heating_recommender.recommendations:
+                property_recommendations.append(self.heating_recommender.recommendations)
+                phase += 1
 
         # Hot water
-        self.hotwater_recommender.recommend(phase=phase)
-        if self.hotwater_recommender.recommendations:
-            property_recommendations.append(self.hotwater_recommender.recommendations)
-            phase += 1
+        if "hot_water" not in self.exclusions:
+            self.hotwater_recommender.recommend(phase=phase)
+            if self.hotwater_recommender.recommendations:
+                property_recommendations.append(self.hotwater_recommender.recommendations)
+                phase += 1
 
-        self.lighting_recommender.recommend(phase=phase)
-        if self.lighting_recommender.recommendation:
-            property_recommendations.append(self.lighting_recommender.recommendation)
-            phase += 1
+        if "lighting" not in self.exclusions:
+            self.lighting_recommender.recommend(phase=phase)
+            if self.lighting_recommender.recommendation:
+                property_recommendations.append(self.lighting_recommender.recommendation)
+                phase += 1
 
         # Renewables
-        self.solar_recommender.recommend(phase=phase)
-        if self.solar_recommender.recommendation:
-            property_recommendations.append(self.solar_recommender.recommendation)
-            phase += 1
+        if "solar_pv" not in self.exclusions:
+            self.solar_recommender.recommend(phase=phase)
+            if self.solar_recommender.recommendation:
+                property_recommendations.append(self.solar_recommender.recommendation)
+                phase += 1
 
         # We insert temporary ids into the recommendations which is important for the optimiser later
         property_recommendations = self.insert_temp_recommendation_id(property_recommendations)

From 22a3e21f523b79da4ec65fa12d8d901242c5cfb6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 14:52:24 +0000
Subject: [PATCH 160/248] update validation of PlanTriggerRequest to use cls
 rather than self

---
 backend/app/plan/router.py         |  4 +---
 backend/app/plan/schemas.py        | 12 ++++++------
 recommendations/Recommendations.py |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 5456cdb6..e25c04a5 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -170,9 +170,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
             recommender = Recommendations(property_instance=p, materials=materials)
-            # TODO: portfolio id as an input is temp
-            print("DELETE PORTFOLIO ID AS AN INPUT!!")
-            property_recommendations, property_representative_recommendations = recommender.recommend(body.portfolio_id)
+            property_recommendations, property_representative_recommendations = recommender.recommend()
 
             if not property_recommendations:
                 continue
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index c13e754e..b8a99704 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -31,21 +31,21 @@ class PlanTriggerRequest(BaseModel):
 
     # Validator to ensure exclusions are within the pre-defined possibilities
     @validator('exclusions', each_item=True)
-    def check_exclusions(self, v):
-        if v not in self._allowed_exclusions:
+    def check_exclusions(cls, v):
+        if v not in cls._allowed_exclusions:
             raise ValueError(f"{v} is not an allowed exclusion")
         return v
 
     # Validator to ensure that the goal is within the pre-defined possibilities
     @validator('goal')
-    def check_goal(self, v):
-        if v not in self._allowed_goals:
+    def check_goal(cls, v):
+        if v not in cls._allowed_goals:
             raise ValueError(f"{v} is not a valid goal")
         return v
 
     # Validator to ensure that the housing type is within the pre-defined possibilities
     @validator('housing_type')
-    def check_housing_type(self, v):
-        if v not in self.allowed_housing_types:
+    def check_housing_type(cls, v):
+        if v not in cls._allowed_housing_types:
             raise ValueError(f"{v} is not a valid housing type")
         return v
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d3436ef0..b2e6d991 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -47,7 +47,7 @@ class Recommendations:
         self.heating_recommender = HeatingRecommender(property_instance=property_instance)
         self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
 
-    def recommend(self, portfolio_id):
+    def recommend(self):
 
         """
         This method runs the recommendations for the individual measures and then appends them to a list for output

From 8dbd69eef9140efdb3feab6933f195c762a2ba8c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 15:54:31 +0000
Subject: [PATCH 161/248] Updating router for chunked scoring

---
 backend/Property.py        |  2 +-
 backend/app/plan/router.py | 36 ++++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f86e33dc..d97ce8cf 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -233,7 +233,7 @@ class Property:
                 output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"]
                 # Setting the insulation thickness here to above average should be tested further because we
                 # don't see a high volume of instances for this
-                output["walls_insulation_thickness_ending"] = "above average"
+                output["walls_insulation_thickness_ending"] = "average"
                 output["walls_energy_eff_ending"] = "Good"
 
                 # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e25c04a5..bcbc4332 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -41,6 +41,7 @@ from backend.ml_models.Valuation import PropertyValuation
 logger = setup_logger()
 
 BATCH_SIZE = 5
+SCORING_BATCH_SIZE = 400
 
 
 def patch_epc(config, epc_records):
@@ -164,7 +165,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         recommendations = {}
         recommendations_scoring_data = []
         representative_recommendations = {}
-        for p in input_properties:
+        for p in tqdm(input_properties):
 
             # Property recommendations
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
@@ -196,15 +197,30 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        all_predictions = model_api.predict_all(
-            df=recommendations_scoring_data,
-            bucket=get_settings().DATA_BUCKET,
-            prediction_buckets={
-                "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
-                "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
-                "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
-            }
-        )
+        all_predictions = {
+            "sap_change_predictions": pd.DataFrame(),
+            "heat_demand_predictions": pd.DataFrame(),
+            "carbon_change_predictions": pd.DataFrame()
+        }
+        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = model_api.predict_all(
+                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
+                bucket=get_settings().DATA_BUCKET,
+                prediction_buckets={
+                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
+                }
+            )
+
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+        # TODO: TEMP
+        # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
+        # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy()
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a

From bd15ce65c2b05cdffe7304121d1fd8282fea55cb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 16:29:23 +0000
Subject: [PATCH 162/248] debugging optimisation with ventilation, when
 ventilation already exists

---
 backend/app/plan/router.py         | 16 +++++++++-------
 recommendations/Recommendations.py | 18 ++++++++++--------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index bcbc4332..a0d93190 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -170,7 +170,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             # Property recommendations
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
-            recommender = Recommendations(property_instance=p, materials=materials)
+            recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
             property_recommendations, property_representative_recommendations = recommender.recommend()
 
             if not property_recommendations:
@@ -196,6 +196,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
+        # model_api.MODEL_PREFIXES = ["sap_change_predictions"]
 
         all_predictions = {
             "sap_change_predictions": pd.DataFrame(),
@@ -274,14 +275,15 @@ async def trigger_plan(body: PlanTriggerRequest):
             if any(x in [r["type"] for r in solution] for x in [
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
             ]):
-                ventilation_rec = [
-                    r for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"
-                ][0]
-
-                selected_recommendations = set(
-                    list(selected_recommendations) + [ventilation_rec[0]["recommendation_id"]]
+                ventilation_rec = next(
+                    (r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
+                    None
                 )
 
+                # If a matching recommendation was found, add its ID to the selected recommendations
+                if ventilation_rec:
+                    selected_recommendations.add(ventilation_rec["recommendation_id"])
+
             # We check if the selected recommendation is wall ventilation and if so, we make sure
             # mechanical ventilation is selected
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index b2e6d991..944fec7a 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -67,11 +67,19 @@ class Recommendations:
                 property_recommendations.append(self.wall_recomender.recommendations)
                 phase += 1
 
+        if "roof_insulation" not in self.exclusions:
+            self.roof_recommender.recommend(phase=phase)
+            if self.roof_recommender.recommendations:
+                property_recommendations.append(self.roof_recommender.recommendations)
+                phase += 1
+
         # Ventilation recommendations
         # We only produce a ventilation recommendation if the property is recommended to have wall or roof
         # insulation
-        # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
-        # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
+        # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
+        # has no
+        # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
+        # have any
         # wall or roof recommendations, we will ensure that ventilation is included in the simulation
         if "ventilation" not in self.exclusions:
             if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
@@ -79,12 +87,6 @@ class Recommendations:
                 if self.ventilation_recomender.recommendation:
                     property_recommendations.append(self.ventilation_recomender.recommendation)
 
-        if "roof_insulation" not in self.exclusions:
-            self.roof_recommender.recommend(phase=phase)
-            if self.roof_recommender.recommendations:
-                property_recommendations.append(self.roof_recommender.recommendations)
-                phase += 1
-
         if "floor_insulation" not in self.exclusions:
             self.floor_recommender.recommend(phase=phase)
             if self.floor_recommender.recommendations:

From 72a4feb6af3967dc6ce00bb4df7d7d47c4772dc1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 17:18:08 +0000
Subject: [PATCH 163/248] minor tweak to asset list to make uprn int

---
 etl/customers/gla_croydon_demo/asset_list.py | 8 ++++++--
 etl/customers/gla_croydon_demo/slides.py     | 0
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/gla_croydon_demo/slides.py

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 526c34a0..01220d0a 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -36,7 +36,7 @@ def app():
     epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
 
     # For the purpose of the sample, take the properties have surveys done in the last 2 years
-    # This gives us 1023 remaining properties
+    # This gives us 1167 remaining properties
     two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365))
     epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago]
 
@@ -45,7 +45,7 @@ def app():
     # 2) Unfilled cavity
     # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
     # 4) EPC E
-    # Different buckets of properties
+    # 12 properties
     archetype_1_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
         (epc_data["CURRENT_ENERGY_RATING"] == "E") &
@@ -69,6 +69,7 @@ def app():
     # 2) Unfilled cavity
     # 3) Another property above
     # 4) EPC E
+    # 14 properties here
     archetype_2_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
         (epc_data["CURRENT_ENERGY_RATING"] == "E") &
@@ -108,6 +109,7 @@ def app():
     archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
 
+    # 41 total properties
     asset_list = pd.concat(
         [
             archetype_1_sample_asset_list,
@@ -126,6 +128,8 @@ def app():
         }
     )
 
+    asset_list["uprn"] = asset_list["uprn"].astype(int)
+
     filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
     save_csv_to_s3(
         dataframe=asset_list,
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
new file mode 100644
index 00000000..e69de29b

From 80fc7c821e0923918252edde9b90ab32a18cc765 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 17:38:52 +0000
Subject: [PATCH 164/248] moed reading csv function

---
 backend/app/plan/router.py               |  7 ++--
 backend/app/utils.py                     | 21 -----------
 etl/customers/gla_croydon_demo/slides.py | 44 ++++++++++++++++++++++++
 utils/s3.py                              | 24 +++++++++++--
 4 files changed, 69 insertions(+), 27 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a0d93190..2067d796 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -24,7 +24,7 @@ from backend.app.db.models.portfolio import rating_lookup
 from backend.app.dependencies import validate_token
 from backend.app.plan.schemas import PlanTriggerRequest
 from backend.app.plan.utils import get_cleaned
-from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc
+from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
 
 from backend.ml_models.api import ModelApi
 from backend.Property import Property
@@ -35,7 +35,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
 from recommendations.Recommendations import Recommendations
 from utils.logger import setup_logger
-from utils.s3 import read_dataframe_from_s3_parquet
+from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
 from backend.ml_models.Valuation import PropertyValuation
 
 logger = setup_logger()
@@ -196,7 +196,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
-        # model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+        # model_api.MODEL_PREFIXES = ['sap_change_predictions', 'carbon_change_predictions']
 
         all_predictions = {
             "sap_change_predictions": pd.DataFrame(),
@@ -221,7 +221,6 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # TODO: TEMP
         # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
-        # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy()
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
diff --git a/backend/app/utils.py b/backend/app/utils.py
index ba5509e1..b3843206 100644
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@@ -1,6 +1,4 @@
 import boto3
-import csv
-from io import StringIO
 import string
 import secrets
 import logging
@@ -41,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
     return logger
 
 
-def read_csv_from_s3(bucket_name, filepath):
-    s3 = boto3.client('s3')
-
-    # Get the object from s3
-    s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
-
-    # Read the CSV body from the s3 object
-    body = s3_object['Body'].read()
-
-    # Use StringIO to create a file-like object from the string
-    csv_data = StringIO(body.decode('utf-8'))
-
-    # Use csv library to read it into a list of dictionaries
-    reader = csv.DictReader(csv_data)
-    data = list(reader)
-
-    return data
-
-
 def generate_api_key():
     # Define the characters that will be used to generate the api key
     characters = string.ascii_letters + string.digits
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index e69de29b..5954f604 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -0,0 +1,44 @@
+"""
+This script contains the code to generate the data required to populate the slides
+We connect to the database amd extract the data for the portfolio needed so it is recommended to use
+a environment akin to the backend to run this script
+"""
+import pandas as pd
+import numpy as np
+from backend.app.db.connection import db_engine
+from sqlalchemy.orm import sessionmaker
+from utils.s3 import read_csv_from_s3
+from etl.customers.slide_utils import (
+    plot_epc_distribution,
+    get_property_details_by_portfolio_id,
+    get_plan_by_portfolio_id,
+    get_properties_with_default_recommendations,
+    create_powerpoint,
+    create_recommendations_summary
+)
+
+USER_ID = 8
+PORTFOLIO_ID_1 = 67
+EPC_TARGET_1 = "C"
+SAP_TARGET_1 = 69
+CUSTOMER_KEY = "gla-demo"
+
+
+def app():
+    # Connect to database
+    session = sessionmaker(bind=db_engine)()
+
+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
+    portfolio_id = PORTFOLIO_ID_1
+
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv"
+    )
+
+    # Get the properties for the portfolio
+    properties = get_properties_with_default_recommendations(session, portfolio_id)
+    properties_df = pd.DataFrame(properties)
diff --git a/utils/s3.py b/utils/s3.py
index 8d36bdb3..fd5992ce 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -1,9 +1,10 @@
 import pickle
 import boto3
-from io import BytesIO, StringIO
-from botocore.exceptions import NoCredentialsError, PartialCredentialsError
+import csv
 import pandas as pd
+from io import BytesIO, StringIO
 from utils.logger import setup_logger
+from botocore.exceptions import NoCredentialsError, PartialCredentialsError
 
 logger = setup_logger()
 
@@ -224,3 +225,22 @@ def read_excel_from_s3(bucket_name, file_key, header_row):
     df.reset_index(drop=True, inplace=True)
 
     return df
+
+
+def read_csv_from_s3(bucket_name, filepath):
+    s3 = boto3.client('s3')
+
+    # Get the object from s3
+    s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
+
+    # Read the CSV body from the s3 object
+    body = s3_object['Body'].read()
+
+    # Use StringIO to create a file-like object from the string
+    csv_data = StringIO(body.decode('utf-8'))
+
+    # Use csv library to read it into a list of dictionaries
+    reader = csv.DictReader(csv_data)
+    data = list(reader)
+
+    return data

From 053218b3fd9ef7bec918baed43473f3d3485fa4e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 2 Apr 2024 11:18:58 +0100
Subject: [PATCH 165/248] updated price cap figures

---
 backend/app/plan/router.py                   |  4 --
 backend/ml_models/AnnualBillSavings.py       | 10 ++---
 etl/customers/gla_croydon_demo/asset_list.py | 40 +++++++++++-------
 etl/customers/gla_croydon_demo/slides.py     | 43 ++++++++++++++++++++
 4 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 2067d796..50b8a837 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -196,7 +196,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
-        # model_api.MODEL_PREFIXES = ['sap_change_predictions', 'carbon_change_predictions']
 
         all_predictions = {
             "sap_change_predictions": pd.DataFrame(),
@@ -219,9 +218,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             for key, scored in predictions_dict.items():
                 all_predictions[key] = pd.concat([all_predictions[key], scored])
 
-        # TODO: TEMP
-        # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
-
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
         #       possibility with heating system
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 99fae4db..4a433a7f 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -10,13 +10,13 @@ class AnnualBillSavings:
     AVERAGE_ELECTRICITY_CONSUMPTION = 2700
     AVERAGE_GAS_CONSUMPTION = 11500
 
-    # Latest price cap figures from Ofgem are for January 2024
-    # https://www.ofgem.gov.uk/publications/changes-energy-price-cap-1-january-2024
-    ELECTRICITY_PRICE_CAP = 0.29
-    GAS_PRICE_CAP = 0.07
+    # Latest price cap figures from Ofgem are for April 2024
+    # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
+    ELECTRICITY_PRICE_CAP = 0.245
+    GAS_PRICE_CAP = 0.0604
 
     # This is a weighted mean of the price caps, using the consumption figures above as weights
-    PRICE_FACTOR = 0.11183098591549295
+    PRICE_FACTOR = 0.09549999999999999
 
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 01220d0a..a0475807 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -35,20 +35,20 @@ def app():
     # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
     epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
 
-    # For the purpose of the sample, take the properties have surveys done in the last 2 years
-    # This gives us 1167 remaining properties
-    two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365))
-    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago]
+    # For the purpose of the sample, take the properties have surveys done in the last 3 years
+    # This gives us 1351 remaining properties
+    three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
+    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
 
     # Archetype 1: defined below:
     # 1) House
     # 2) Unfilled cavity
     # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
-    # 4) EPC E
-    # 12 properties
+    # 4) EPC E or D
+    # 24 properties
     archetype_1_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
-        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
         epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
         epc_data["ROOF_DESCRIPTION"].isin(
             [
@@ -69,10 +69,10 @@ def app():
     # 2) Unfilled cavity
     # 3) Another property above
     # 4) EPC E
-    # 14 properties here
+    # 57 properties here
     archetype_2_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
-        (epc_data["CURRENT_ENERGY_RATING"] == "E") &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
         epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
         epc_data["ROOF_DESCRIPTION"].isin(
             [
@@ -88,11 +88,18 @@ def app():
     # 2) Solid brick wall
     # 3) House
     # 4) Pitched roof with no insulation
-    # Just 1 property (more expensive to retrofit)
+    # Just 7 properties (more expensive to retrofit)
     archetype_3_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
-        (epc_data["CURRENT_ENERGY_RATING"] == "F") &
-        epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"])
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "Pitched, no insulation",
+                "Pitched, limited insulation (assumed)",
+                "Pitched, 100 mm loft insulation",
+                "Pitched, no insulation (assumed)",
+            ]
+        )
         ]
     archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
@@ -101,15 +108,18 @@ def app():
     # 1) Maisonette
     # 2) Empty cavity
     # 3) EPC E
-    # 14 properties here
+    # 16 properties here
     archetype_4_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
-        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"])
+        epc_data["WALLS_DESCRIPTION"].isin(
+            ["Cavity wall, as built, no insulation (assumed)"]
+        )
         ]
+
     archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
 
-    # 41 total properties
+    # 104 total properties
     asset_list = pd.concat(
         [
             archetype_1_sample_asset_list,
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index 5954f604..ebca7dc3 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -38,7 +38,50 @@ def app():
     asset_list = read_csv_from_s3(
         "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv"
     )
+    asset_list = pd.DataFrame(asset_list)
 
     # Get the properties for the portfolio
     properties = get_properties_with_default_recommendations(session, portfolio_id)
     properties_df = pd.DataFrame(properties)
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+    property_details_df = pd.DataFrame(property_details)
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans = get_plan_by_portfolio_id(session, portfolio_id)
+    plans_df = pd.DataFrame(plans)
+
+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+    # Add uprn on
+    recommendations_df = recommendations_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        how="left",
+        on="property_id"
+    )
+
+    # Summary information by each archetype
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+
+    recommendations_arch_1_summary = create_recommendations_summary(
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
+        properties_df[properties_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
+        SAP_TARGET_1
+    )
+
+    # Take the mean, median and maximum of each value
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+
+    arch_1_property_details_means = arch_1_property_details.mean()
+
+    arch_1_recommendation_means["total_bill_savings"] / arch_1_property_details_means["adjusted_energy_consumption"]

From 08a657eb9f505a10608377eff1c0c10b76bd2f0a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 12:18:08 +0100
Subject: [PATCH 166/248] Adding costs for ttzc

---
 backend/ml_models/AnnualBillSavings.py       |  13 +++
 etl/customers/gla_croydon_demo/asset_list.py |  13 +++
 etl/customers/gla_croydon_demo/slides.py     | 100 ++++++++++++++---
 etl/customers/slide_utils.py                 |  22 +++-
 recommendations/Costs.py                     |  83 +++++++++++++-
 recommendations/HeatingControlRecommender.py | 108 +++++++++++++++++++
 recommendations/HeatingRecommender.py        |  17 +++
 7 files changed, 338 insertions(+), 18 deletions(-)

diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 4a433a7f..9be9d78a 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -18,6 +18,9 @@ class AnnualBillSavings:
     # This is a weighted mean of the price caps, using the consumption figures above as weights
     PRICE_FACTOR = 0.09549999999999999
 
+    # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
+    DAILY_STANDARD_CHARGE = 0.3143
+
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
     @classmethod
@@ -38,6 +41,16 @@ class AnnualBillSavings:
         """
         return cls.ELECTRICITY_PRICE_CAP * kwh
 
+    @classmethod
+    def calculate_annual_bill(cls, kwh):
+        """
+        This method will estimate the total annual bill for a property
+        :param kwh: The total kwh consumption
+        :return: An estimate for annual bill
+        """
+
+        return cls.PRICE_FACTOR * kwh + cls.DAILY_STANDARD_CHARGE * 365
+
     @classmethod
     def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
         """
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index a0475807..3a3f02a3 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -140,6 +140,19 @@ def app():
 
     asset_list["uprn"] = asset_list["uprn"].astype(int)
 
+    # We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
+    # manually remove
+    # 1) 3 Reid Close, CR5 3BL
+    # 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
+    asset_list = asset_list[
+        ~asset_list["uprn"].isin(
+            [
+                100020576460,
+                100020624352,
+            ]
+        )
+    ]
+
     filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
     save_csv_to_s3(
         dataframe=asset_list,
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index ebca7dc3..1d217226 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -16,11 +16,15 @@ from etl.customers.slide_utils import (
     create_powerpoint,
     create_recommendations_summary
 )
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 
 USER_ID = 8
 PORTFOLIO_ID_1 = 67
+PORTFOLIO_ID_2 = 68
 EPC_TARGET_1 = "C"
+EPC_TARGET_2 = "A"
 SAP_TARGET_1 = 69
+SAP_TARGET_2 = 100
 CUSTOMER_KEY = "gla-demo"
 
 
@@ -32,11 +36,13 @@ def app():
     # Get the data we need
     ########################################################################
 
-    portfolio_id = PORTFOLIO_ID_1
+    # TODO: Update to portfolio desired
+    # portfolio_id = PORTFOLIO_ID_1
+    portfolio_id = PORTFOLIO_ID_2
 
     # Get the asset list
     asset_list = read_csv_from_s3(
-        "retrofit-plan-inputs-dev", f"{USER_ID}/{portfolio_id}/inputs.csv"
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
     )
     asset_list = pd.DataFrame(asset_list)
 
@@ -47,6 +53,10 @@ def app():
     # We now pull the data for the property details
     property_details = get_property_details_by_portfolio_id(session, portfolio_id)
     property_details_df = pd.DataFrame(property_details)
+    # We estimate bills based on the adjusted_energy_consumption
+    property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+        lambda x: AnnualBillSavings.calculate_annual_bill(x)
+    )
     # Merge on uprn
     property_details_df = property_details_df.merge(
         properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
@@ -66,22 +76,84 @@ def app():
         on="property_id"
     )
 
-    # Summary information by each archetype
-    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
-
-    recommendations_arch_1_summary = create_recommendations_summary(
-        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
-        properties_df[properties_df["uprn"].astype(str).isin(archetype_1["uprn"].values)],
+    recommendations_summary = create_recommendations_summary(
+        recommendations_df,
+        properties_df,
+        property_details_df,
         SAP_TARGET_1
     )
 
-    # Take the mean, median and maximum of each value
-    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+    # Calculate % changes of energ, co2 and abs
+    recommendations_summary["carbon_percent_change"] = (
+        recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+    )
 
-    arch_1_property_details = property_details_df[
-        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    recommendations_summary["energy_percent_change"] = (
+        recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+    )
+
+    recommendations_summary["bills_percent_change"] = (
+        recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+    )
+
+    # Summary information by each archetype
+    ########################
+    # Archetype 1
+    ########################
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+    recommendations_arch_1_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
     ]
 
-    arch_1_property_details_means = arch_1_property_details.mean()
+    # Take the mean, median and maximum of each value
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
 
-    arch_1_recommendation_means["total_bill_savings"] / arch_1_property_details_means["adjusted_energy_consumption"]
+    ########################
+    # Archetype 2
+    ########################
+    archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+    recommendations_arch_2_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_2_recommendation_min = recommendations_arch_2_summary.min()
+    arch_2_recommendation_max = recommendations_arch_2_summary.max()
+    arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+    ########################
+    # Archetype 3
+    ########################
+    archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+    recommendations_arch_3_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_3_recommendation_min = recommendations_arch_3_summary.min()
+    arch_3_recommendation_max = recommendations_arch_3_summary.max()
+    arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+    ########################
+    # Archetype 4
+    ########################
+    archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+    recommendations_arch_4_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_4_recommendation_min = recommendations_arch_4_summary.min()
+    arch_4_recommendation_max = recommendations_arch_4_summary.max()
+    arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+    property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]["total_floor_area"].mean()
+
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py
index d1efce47..9170ab17 100644
--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@@ -246,7 +246,7 @@ def create_powerpoint(data, save_location):
     prs.save(save_location)
 
 
-def create_recommendations_summary(recommendations_df, properties_df, sap_target):
+def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
     # Aggregate the impact of the recommendations
     # We want:
     # Total number of sap points
@@ -259,13 +259,15 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
         total_valuation_impact=("property_valuation_increase", "sum"),
         total_bill_savings=("energy_cost_savings", "sum"),
         total_cost=("estimated_cost", "sum"),
-        total_carbon=("co2_equivalent_savings", "sum")
+        total_carbon=("co2_equivalent_savings", "sum"),
+        adjusted_heat_demand=("adjusted_heat_demand", "sum")
     ).reset_index()
-    # Merge on current sap points
+    # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
     recommendations_summary = recommendations_summary.merge(
         properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
         how="left"
     )
+
     recommendations_summary["expected_sap_points"] = (
         recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
     )
@@ -274,4 +276,18 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
     )
     recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
 
+    if property_details_df is not None:
+        recommendations_summary = recommendations_summary.merge(
+            property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
+                columns={
+                    "id": "property_id",
+                    "co2_emissions": "current_co2",
+                    "adjusted_energy_consumption": "current_energy",
+                    "energy_bill": "current_energy_bill"
+                }
+            ),
+            on="uprn",
+            how="left"
+        )
+
     return recommendations_summary
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index b2874f28..47844657 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -42,7 +42,22 @@ BATTERY_COST = 3500
 
 # This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
 SMART_APPLIANCE_THERMOSTAT_COST = 400
-PROGRAMMER_COST = 200
+PROGRAMMER_COST = 120
+ROOM_THERMOSTAT_COST = 150
+TRVS_COST = 35
+
+# Cost for TTZC
+# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
+# Based on the Nest system
+TTZC_SMART_THERMOSTAT_COST = 205
+TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
+TTZC_ELECTRICIAN_HOURLY_RATE = 45
+# Based on cost of a Nest temperature sensor
+TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
+TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17  # (Assume ~ 10 mins install per sensor)
+# Basedon an average cost of smart radiator values
+TTZC_SMART_RADIATOR_VALUES = 50
+TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37  # (Assume ~ 15-30 mins install per valve)
 
 
 class Costs:
@@ -998,3 +1013,69 @@ class Costs:
             "labour_hours": 0,
             "labour_days": 0,
         }
+
+    def roomstat_programmer_trvs(
+        self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat
+    ):
+        """
+
+        :return:
+        """
+
+        total_cost = 0
+        labour_hours = 0
+
+        if not has_programmer:
+            total_cost += PROGRAMMER_COST
+            labour_hours += 1
+
+        if not has_trvs:
+            total_cost += TRVS_COST * number_heated_rooms
+            labour_hours += 0.25 * number_heated_rooms
+
+        if not has_room_thermostat:
+            total_cost += ROOM_THERMOSTAT_COST
+            labour_hours += 0.5
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": 1,
+        }
+
+    def time_and_temperature_zone_control(self, number_heated_rooms):
+
+        # The product costs are inclusive of VAT
+        product_costs = (
+            TTZC_SMART_THERMOSTAT_COST +
+            TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
+            TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
+        )
+        labour_hours = (
+            TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
+            TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
+            TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
+        )
+        labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
+        # Add continency and preliminaries to the labour to account for the complexity of the job
+        labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+        vat = labour_costs * self.VAT_RATE
+
+        subtotal_before_vat = product_costs + labour_costs
+        total_cost = subtotal_before_vat + vat
+
+        labour_days = np.ceil(labour_hours / 8)
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 81597f61..99b41469 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -27,6 +27,14 @@ class HeatingControlRecommender:
             self.recommend_high_heat_retention_controls()
             return
 
+        if heating_description in ["Boiler and radiators, mains gas"]:
+            # We can recommend roomstat programmer trvs
+            self.recommend_roomstat_programmer_trvs()
+            # We can also recommend time and temperature zone controls
+            self.recommend_time_temperature_zone_controls()
+
+            return
+
     def recommend_room_heaters_electric_controls(self):
         """
         If the home has Room heaters, electric, we start by identifying potential heating controls that could
@@ -105,3 +113,103 @@ class HeatingControlRecommender:
 
         # We don't implement any other recommendations right now
         return
+
+    def recommend_roomstat_programmer_trvs(self):
+        """
+        If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
+        be upgraded, that would provide a practical impact.
+
+        The criteria for recommending an upgrade to heating controls are (one of these must be true)
+        1) There are no controls
+        2) No programmer
+        3) No room thermostat
+        4) No TRVs
+
+
+        :return:
+        """
+
+        # We check if we have the conditions to recommend this upgrade
+
+        needs_programmer = self.property.main_heating_controls["switch_system"] is None
+        needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None
+        needs_trvs = self.property.main_heating_controls["trvs"] is None
+
+        can_recommend = (
+            (self.property.main_heating_controls["no_control"] is not None) or
+            needs_programmer or
+            needs_room_thermostat or
+            needs_trvs
+        )
+
+        if not can_recommend:
+            return
+
+        ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
+        # We use this to determine how we should be updating the config
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+        # This upgrade will only take the heating system to average energy efficiency
+        # If the current system is below good, we make it good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Good"
+
+        has_programmer = not needs_programmer
+        has_room_thermostat = not needs_room_thermostat
+        has_trvs = not needs_trvs
+
+        self.recommendation.append(
+            {
+                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
+                **self.costs.roomstat_programmer_trvs(
+                    number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+                    has_programmer=has_programmer,
+                    has_room_thermostat=has_room_thermostat,
+                    has_trvs=has_trvs
+                ),
+                "simulation_config": simulation_config
+            }
+        )
+
+        return
+
+    def recommend_time_temperature_zone_controls(self):
+        """
+        If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
+        and more efficient control system than the standard controls that come with a boiler. However, it may come
+        with a higher cost and more involved usage
+        :return:
+        """
+
+        # We check if the efficiency of the current heating controls is good or below, and
+
+        # Conditions for installation are as follows:
+        # 1) The current heating controls are not time and temperature zone controls
+        # 2) The current heating controls are not already at 'Very Good' or above
+
+        if (
+            (self.property["thermostatic_control"] == "time and temperature zone control") or
+            (self.property.data["mainheatc-energy-eff"] in ["Very Good"])
+        ):
+            # No recommendation needed
+            return
+
+        ending_config = MainheatControlAttributes("Time and temperature zone control").process()
+
+        # We use this to determine how we should be updating the config
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+
+        # If the current system is below very good, we make it very good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
+
+        self.recommendation.append(
+            {
+                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
+                **self.costs.time_and_temperature_zone_control(),
+                "simulation_config": simulation_config
+            }
+        )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 11ae3da6..6467bd2f 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -26,6 +26,11 @@ class HeatingRecommender:
             self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
             return
 
+        # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
+        if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]:
+            self.recommend_roomstat_programmer_trvs(phase=phase)
+            return
+
     @staticmethod
     def check_simulation_difference(old_config, new_config):
         """
@@ -182,3 +187,15 @@ class HeatingRecommender:
         )
 
         self.recommendations.extend(recommendations)
+
+    def recommend_roomstat_programmer_trvs(self, phase):
+        """
+
+        :param phase:
+        :return:
+        """
+        # We recommend the heating controls
+        controls_recommender = HeatingControlRecommender(self.property)
+        controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
+
+        controls_recommender.recommendation

From 45552f5e06d3b814729cc57b6ca4329d19a8c31e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 14:39:28 +0100
Subject: [PATCH 167/248] Added costing for boiler

---
 recommendations/Costs.py                     | 51 ++++++++++++
 recommendations/HeatingControlRecommender.py |  6 +-
 recommendations/HeatingRecommender.py        | 83 +++++++++++++++++++-
 recommendations/Recommendations.py           |  3 +
 4 files changed, 137 insertions(+), 6 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 47844657..e5ceb0c0 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -59,6 +59,26 @@ TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17  # (Assume ~ 10 mins install pe
 TTZC_SMART_RADIATOR_VALUES = 50
 TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37  # (Assume ~ 15-30 mins install per valve)
 
+# Low carbon combi boiler - median value based on £2200 - £3000 range
+LOW_CARBON_COMBI_BOILER = 2200
+
+# boiler prices based on
+# https://www.greenmatch.co.uk/boilers/30kw-boiler
+# https://www.greenmatch.co.uk/boilers/35kw-boiler
+# https://www.greenmatch.co.uk/boilers/40kw-boiler
+# These are exclusive of installation costs
+COMBI_BOILER_COSTS = {
+    "30kw": 1550,
+    "35kw": 1610,
+    "40kw": 1625
+}
+
+CONVENTIONAL_BOILER_COSTS = {
+    "30kw": 1117,
+    "35kw": 1546,
+    "40kw": 1776
+}
+
 
 class Costs:
     """
@@ -1079,3 +1099,34 @@ class Costs:
             "labour_hours": labour_hours,
             "labour_days": labour_days,
         }
+
+    def low_carbon_boiler(self, is_combi, size):
+        """
+        Based on a basic estimate of median value £2600 to install a low carbon combi boiler
+        :return:
+        """
+
+        unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
+        # The unit cost is the cost without VAT
+        # We now need to estimate the cost of the works
+        labour_days = 2
+        labour_rate = 500
+
+        # Average cost of installation is 1 (maybe 2days) at £300 per day
+        # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
+        # To be pessimistic, assume 2 days work and £500 day rate
+        labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
+        # Add contingency and preliminaries
+        labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+        vat = labour_cost * self.VAT_RATE
+
+        subtotal_before_vat = unit_cost + labour_cost
+        total_cost = subtotal_before_vat + vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_days * 8,
+            "labour_days": labour_days,
+        }
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 99b41469..547ea497 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -189,7 +189,7 @@ class HeatingControlRecommender:
         # 2) The current heating controls are not already at 'Very Good' or above
 
         if (
-            (self.property["thermostatic_control"] == "time and temperature zone control") or
+            (self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or
             (self.property.data["mainheatc-energy-eff"] in ["Very Good"])
         ):
             # No recommendation needed
@@ -209,7 +209,9 @@ class HeatingControlRecommender:
         self.recommendation.append(
             {
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
-                **self.costs.time_and_temperature_zone_control(),
+                **self.costs.time_and_temperature_zone_control(
+                    number_heated_rooms=int(self.property.data["number-heated-rooms"])
+                ),
                 "simulation_config": simulation_config
             }
         )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 6467bd2f..c7064274 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -28,7 +28,7 @@ class HeatingRecommender:
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
         if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]:
-            self.recommend_roomstat_programmer_trvs(phase=phase)
+            self.recommend_boiler_upgrades(phase=phase)
             return
 
     @staticmethod
@@ -188,14 +188,89 @@ class HeatingRecommender:
 
         self.recommendations.extend(recommendations)
 
-    def recommend_roomstat_programmer_trvs(self, phase):
-        """
+    @staticmethod
+    def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
+        # Step 1: Base size estimation based on property type (as a starting point)
+        base_size = {
+            'Flat': 25,
+            'House': 30,
+            'Maisonette': 28,
+            'Bungalow': 27
+        }
 
+        # Step 2: Calculate the volume of the property
+        volume = floor_area * floor_height
+
+        # Step 3: Adjust base size for built form (to account for heat retention)
+        form_adjustment = {
+            'Mid-Terrace': 0,
+            'End-Terrace': 2,
+            'Semi-Detached': 4,
+            'Detached': 6
+        }
+
+        # Step 4: Further adjust for the total volume and number of heated rooms
+        volume_adjustment = (volume / 100)  # Simplified adjustment factor for volume
+        rooms_adjustment = (num_heated_rooms - 5) * 0.5  # Assuming base case of 5 rooms
+
+        # Calculate the estimated boiler size
+        estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment
+
+        # Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more
+        available_sizes = [30, 35, 40, 45, 50]
+        estimated_size = min(max(estimated_size, 30), 40)  # Ensure within 30kW to 35kW range
+
+        # Find the closest available size (in this case, either rounding up or down to align with 30 or 35)
+        closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size))
+
+        return closest_size
+
+    def recommend_boiler_upgrades(self, phase):
+        """
+        This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
+        is generally more expensive
         :param phase:
         :return:
         """
+
+        # We now recommend boiler upgrades, if applicable
+        if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
+            boiler_size = self.estimate_boiler_size(
+                property_type=self.property.data["property-type"],
+                built_form=self.property.data["built-form"],
+                floor_area=self.property.floor_area,
+                floor_height=self.property.floor_height,
+                num_heated_rooms=self.property.data["number-heated-rooms"],
+            )
+
+            # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
+            is_combi = self.property.hotwater["clean_description"] in ["From main system"]
+            if is_combi:
+                description = "Upgrade to a low carbon combi boiler"
+            else:
+                description = "Upgrade to a low carbon boiler"
+
+            self.recommendations.append(
+                {
+                    "phase": phase,
+                    "parts": [
+                        # TODO
+                    ],
+                    "type": "heating",
+                    "description": description,
+                    "starting_u_value": None,
+                    "new_u_value": None,
+                    "sap_points": None,
+                    **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+                }
+            )
+
         # We recommend the heating controls
         controls_recommender = HeatingControlRecommender(self.property)
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
+        # We may have 2 recommendations from the heating controls
 
-        controls_recommender.recommendation
+        # The heating controls recommendation is distrinct from the boiler upgrade recommendation
+        # We insert phase into the recommendations for heating controls
+        for recommendation in controls_recommender.recommendation:
+            recommendation["phase"] = phase
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 944fec7a..d9a0a0fd 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -110,6 +110,9 @@ class Recommendations:
             self.heating_recommender.recommend(phase=phase)
             if self.heating_recommender.recommendations:
                 property_recommendations.append(self.heating_recommender.recommendations)
+                # We check if we have distinct heating and heating controls recommendations
+                # If so, we increment by 2 (one of the heating system, one for the heating controls)
+                # otherwise we incremenet by 1
                 phase += 1
 
         # Hot water

From 09bbeaecae8156faedf090a28bfe0bcae231f0d2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 14:57:11 +0100
Subject: [PATCH 168/248] incorporate heating and heating control
 recommendations

---
 recommendations/HeatingRecommender.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index c7064274..676a4b06 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -233,6 +233,8 @@ class HeatingRecommender:
         :return:
         """
 
+        recommendation_phase = phase
+
         # We now recommend boiler upgrades, if applicable
         if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
             boiler_size = self.estimate_boiler_size(
@@ -252,7 +254,7 @@ class HeatingRecommender:
 
             self.recommendations.append(
                 {
-                    "phase": phase,
+                    "phase": recommendation_phase,
                     "parts": [
                         # TODO
                     ],
@@ -261,16 +263,21 @@ class HeatingRecommender:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
+                    "simulation_config": {"mainheat_energy_eff_ending": "Good"},
                     **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
                 }
             )
 
+            # We increment the recommendation phase, in the case of us having heating control recommendations
+            recommendation_phase += 1
+
         # We recommend the heating controls
         controls_recommender = HeatingControlRecommender(self.property)
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
         # We may have 2 recommendations from the heating controls
 
-        # The heating controls recommendation is distrinct from the boiler upgrade recommendation
-        # We insert phase into the recommendations for heating controls
-        for recommendation in controls_recommender.recommendation:
-            recommendation["phase"] = phase
+        if controls_recommender.recommendation:
+            # The heating controls recommendation is distrinct from the boiler upgrade recommendation
+            # We insert phase into the recommendations for heating controls
+            for recommendation in controls_recommender.recommendation:
+                recommendation["phase"] = recommendation_phase

From 9130ad55fffc21858ca7061d26a2f6ecb8d66e3d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 14:59:42 +0100
Subject: [PATCH 169/248] Added missing controls to output

---
 recommendations/HeatingRecommender.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 676a4b06..9658aaa3 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -281,3 +281,5 @@ class HeatingRecommender:
             # We insert phase into the recommendations for heating controls
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
+
+        self.recommendations.extend(controls_recommender.recommendation)

From a9c2bf1b9c0be1192edbeb50ba01401d1e55578f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 15:06:44 +0100
Subject: [PATCH 170/248] added correct incrementing of phase

---
 recommendations/HeatingControlRecommender.py | 8 ++++++++
 recommendations/Recommendations.py           | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 547ea497..e224f243 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -161,6 +161,7 @@ class HeatingControlRecommender:
 
         self.recommendation.append(
             {
+                "type": "heating_control",
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
                 **self.costs.roomstat_programmer_trvs(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"]),
@@ -168,6 +169,9 @@ class HeatingControlRecommender:
                     has_room_thermostat=has_room_thermostat,
                     has_trvs=has_trvs
                 ),
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
                 "simulation_config": simulation_config
             }
         )
@@ -208,10 +212,14 @@ class HeatingControlRecommender:
 
         self.recommendation.append(
             {
+                "type": "heating_control",
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
                 "simulation_config": simulation_config
             }
         )
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d9a0a0fd..902023dc 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -113,7 +113,9 @@ class Recommendations:
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
-                phase += 1
+                max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations])
+                amount_to_increment = max_used_phase - phase + 1
+                phase += amount_to_increment
 
         # Hot water
         if "hot_water" not in self.exclusions:

From 2234269ca62611c9f0285acc0f79491ce98cf277 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 15:14:19 +0100
Subject: [PATCH 171/248] added simulation

---
 backend/Property.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index d97ce8cf..82108bbb 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -344,7 +344,7 @@ class Property:
                 else:
                     output["glazed_type_ending"] = "double glazing installed during or after 2002"
 
-            if recommendation["type"] in ["heating", "hot_water_tank_insulation"]:
+            if recommendation["type"] in ["heating", "hot_water_tank_insulation", "heating_control"]:
                 # We update the data, as defined in the recommendaton
 
                 simulation_config = recommendation["simulation_config"]
@@ -364,7 +364,8 @@ class Property:
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                 "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                 "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
-                "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation"
+                "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
+                "heating_control",
             ]:
                 raise NotImplementedError("Implement me, given type %s" % recommendation["type"])
 

From f2cec8de11305c7d763a712050f0da685001bd7f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 16:30:45 +0100
Subject: [PATCH 172/248] fixed description for ttaz

---
 recommendations/HeatingControlRecommender.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index e224f243..7010ad53 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -162,6 +162,7 @@ class HeatingControlRecommender:
         self.recommendation.append(
             {
                 "type": "heating_control",
+                "parts": [],
                 "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
                 **self.costs.roomstat_programmer_trvs(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"]),
@@ -213,7 +214,8 @@ class HeatingControlRecommender:
         self.recommendation.append(
             {
                 "type": "heating_control",
-                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
+                "parts": [],
+                "description": "Upgrade heating controls to Time and Temperature Zone Controls",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),

From 519dc6cfcb31ce4093ae0e6cace03ba30920e5e7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 19:17:27 +0100
Subject: [PATCH 173/248] added off-gas property recommendations

---
 backend/app/plan/router.py                   |   1 +
 etl/customers/gla_croydon_demo/asset_list.py |  42 +++-
 etl/customers/gla_croydon_demo/slides.py     | 200 ++++++++++++++++++-
 recommendations/HeatingControlRecommender.py |   2 +-
 recommendations/HeatingRecommender.py        |  12 +-
 5 files changed, 247 insertions(+), 10 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 50b8a837..4868749d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -389,6 +389,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # Commit final changes
         session.commit()
+
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
         session.rollback()
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 3a3f02a3..52e9422c 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -4,6 +4,23 @@ from utils.s3 import save_csv_to_s3
 USER_ID = 8
 PORTFOLIO_ID = 67
 
+archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
+                     100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
+                     200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
+archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
+                     100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
+                     100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
+                     100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
+                     100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
+                     100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
+                     100020576459, 100020665643, 100020665654, 100022917303]
+archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
+                     100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
+                     100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
+                     100020692380, 100020581690]
+archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
+                     200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
+
 
 def app():
     """
@@ -84,14 +101,15 @@ def app():
     archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
 
     # Archetype 3: defined below:
-    # 1) EPC F
+    # 1) EPC E or below
     # 2) Solid brick wall
     # 3) House
     # 4) Pitched roof with no insulation
     # Just 7 properties (more expensive to retrofit)
     archetype_3_sample = epc_data[
         epc_data["PROPERTY_TYPE"].isin(["House"]) &
-        (epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])) &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
+        epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
         epc_data["ROOF_DESCRIPTION"].isin(
             [
                 "Pitched, no insulation",
@@ -119,7 +137,6 @@ def app():
     archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
     archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
 
-    # 104 total properties
     asset_list = pd.concat(
         [
             archetype_1_sample_asset_list,
@@ -152,6 +169,25 @@ def app():
             ]
         )
     ]
+    # We have slightly too many properties, so we take a random sample of each archetype
+    # achetype_1_size = 20
+    # achetype_2_size = 46
+    # achetype_3_size = 23
+    # achetype_4_size = 13
+    # archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
+    #     int(achetype_1_size)
+    # ).tolist()
+    # archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
+    #     int(achetype_2_size)
+    # ).tolist()
+    # archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
+    #     int(achetype_3_size)
+    # ).tolist()
+    # archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
+    #     int(achetype_4_size)
+    # ).tolist()
+    uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
+    asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
 
     filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
     save_csv_to_s3(
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index 1d217226..e6c4b5b8 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -27,8 +27,24 @@ SAP_TARGET_1 = 69
 SAP_TARGET_2 = 100
 CUSTOMER_KEY = "gla-demo"
 
+# Sample UPRNS
+archetype_1_sample = ['100020618076', '100020619814', '100020581792', '100020671205', '100020585305', '100020606853',
+                      '100020625813', '100020618042', '200001188304', '200001187196', '100020603026', '100020604138',
+                      '100020615039', '200001188299', '100020618060', '200001192253']
 
-def app():
+archetype_2_sample = ['100020616325', '100020665634', '100020665654', '100020665638', '100020587936', '100020587905',
+                      '100020665645', '100020625597', '100022907528', '100020665630', '100020624348', '10001008257',
+                      '100020666735', '100020698027', '100020624351', '100020665656', '100020666716', '100020665632',
+                      '100020666715', '100020645639', '200001191309', '100020625451', '100020624347', '100020665658',
+                      '100020585002', '100022917303', '100020665650', '100020667737', '100020620659', '100022904981',
+                      '100020642337', '100020657902', '100020615603', '100020626800', '100020665647', '100020665643']
+
+archetype_3_sample = ['100020607980', '200001193193', '100020581690', '100020665611']
+archetype_4_sample = ['100020631683', '100020607667', '100020660228', '100020605116', '200001187539', '100020582907',
+                      '100020610165', '100020650607', '100020655500', '100020598277', '100020642537']
+
+
+def scenario_1():
     # Connect to database
     session = sessionmaker(bind=db_engine)()
 
@@ -36,9 +52,7 @@ def app():
     # Get the data we need
     ########################################################################
 
-    # TODO: Update to portfolio desired
-    # portfolio_id = PORTFOLIO_ID_1
-    portfolio_id = PORTFOLIO_ID_2
+    portfolio_id = PORTFOLIO_ID_1
 
     # Get the asset list
     asset_list = read_csv_from_s3(
@@ -157,3 +171,181 @@ def app():
     # Overview
     ########################
     overview_totals = recommendations_summary.sum()
+
+
+def make_sample():
+    # sample_proportion = 67 / 102
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+    )
+    asset_list = pd.DataFrame(asset_list)
+
+    # From the asset list, we deduce how many properties we need
+    archetype_1_sample_size = 16
+    archetype_2_sample_size = 36
+    archetype_3_sample_size = 4
+    archetype_4_sample_size = 11
+
+    # We take the sample and we'll keep the uprns static
+    archetype_1_sample = asset_list[
+        asset_list["archetype"] == "Archetype 1"
+        ].sample(archetype_1_sample_size)["uprn"].to_list()
+
+    archetype_2_sample = asset_list[
+        asset_list["archetype"] == "Archetype 2"
+        ].sample(archetype_2_sample_size)["uprn"].to_list()
+
+    archetype_3_sample = asset_list[
+        asset_list["archetype"] == "Archetype 3"
+        ].sample(archetype_3_sample_size)["uprn"].to_list()
+
+    archetype_4_sample = asset_list[
+        asset_list["archetype"] == "Archetype 4"
+        ].sample(archetype_4_sample_size)["uprn"].to_list()
+
+
+def scenario_2():
+    # Connect to database
+    session = sessionmaker(bind=db_engine)()
+
+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
+    portfolio_id = PORTFOLIO_ID_2
+
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+    )
+    asset_list = pd.DataFrame(asset_list)
+
+    sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample
+
+    # Filter on sample uprns
+    asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)]
+
+    # Get the properties for the portfolio
+    properties = get_properties_with_default_recommendations(session, portfolio_id)
+    properties_df = pd.DataFrame(properties)
+    properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)]
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+    property_details_df = pd.DataFrame(property_details)
+    property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)]
+    # We estimate bills based on the adjusted_energy_consumption
+    property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+        lambda x: AnnualBillSavings.calculate_annual_bill(x)
+    )
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans = get_plan_by_portfolio_id(session, portfolio_id)
+    plans_df = pd.DataFrame(plans)
+
+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+    # Add uprn on
+    recommendations_df = recommendations_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        how="left",
+        on="property_id"
+    )
+
+    recommendations_summary = create_recommendations_summary(
+        recommendations_df,
+        properties_df,
+        property_details_df,
+        SAP_TARGET_1
+    )
+
+    # Calculate % changes of energ, co2 and abs
+    recommendations_summary["carbon_percent_change"] = (
+        recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+    )
+
+    recommendations_summary["energy_percent_change"] = (
+        recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+    )
+
+    recommendations_summary["bills_percent_change"] = (
+        recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+    )
+
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
+    overview_means = recommendations_summary.mean()
+
+    ########################
+    # Measures
+    ########################
+    measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+
+    z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
+
+    recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts()
+
+    # Summary information by each archetype
+    ########################
+    # Archetype 1
+    ########################
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+    recommendations_arch_1_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+
+    ########################
+    # Archetype 2
+    ########################
+    archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+    recommendations_arch_2_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_2_recommendation_min = recommendations_arch_2_summary.min()
+    arch_2_recommendation_max = recommendations_arch_2_summary.max()
+    arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+    ########################
+    # Archetype 3
+    ########################
+    archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+    recommendations_arch_3_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_3_recommendation_min = recommendations_arch_3_summary.min()
+    arch_3_recommendation_max = recommendations_arch_3_summary.max()
+    arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+    ########################
+    # Archetype 4
+    ########################
+    archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+    recommendations_arch_4_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+
+    # Take the mean, median and maximum of each value
+    arch_4_recommendation_min = recommendations_arch_4_summary.min()
+    arch_4_recommendation_max = recommendations_arch_4_summary.max()
+    arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+    property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]["total_floor_area"].mean()
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 7010ad53..95b5e3b1 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -215,7 +215,7 @@ class HeatingControlRecommender:
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "Upgrade heating controls to Time and Temperature Zone Controls",
+                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 9658aaa3..8b20c0cd 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -19,9 +19,17 @@ class HeatingRecommender:
         self.recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
-        if self.property.main_heating["clean_description"] in [
+
+        has_electric_heating_description = self.property.main_heating["clean_description"] in [
             "Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
-        ]:
+        ]
+
+        no_heating_no_mains = (
+            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
+            not self.property.data["mains-gas-flag"]
+        )
+
+        if has_electric_heating_description or no_heating_no_mains:
             # Recommend high heat retention storage heaters
             self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
             return

From 47ebf866ee141c8ed91a7191b5bb75ef49246950 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 3 Apr 2024 20:02:37 +0100
Subject: [PATCH 174/248] fixed sample in slides

---
 backend/app/plan/router.py               |  1 -
 etl/customers/gla_croydon_demo/slides.py | 35 +++++++++++----------
 recommendations/HeatingRecommender.py    | 39 ++++++++++++++++++++++--
 3 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4868749d..50b8a837 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -389,7 +389,6 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # Commit final changes
         session.commit()
-
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
         session.rollback()
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index e6c4b5b8..cbd1f7e4 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -28,20 +28,22 @@ SAP_TARGET_2 = 100
 CUSTOMER_KEY = "gla-demo"
 
 # Sample UPRNS
-archetype_1_sample = ['100020618076', '100020619814', '100020581792', '100020671205', '100020585305', '100020606853',
-                      '100020625813', '100020618042', '200001188304', '200001187196', '100020603026', '100020604138',
-                      '100020615039', '200001188299', '100020618060', '200001192253']
+archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060',
+                      '100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489',
+                      '100020588913']
 
-archetype_2_sample = ['100020616325', '100020665634', '100020665654', '100020665638', '100020587936', '100020587905',
-                      '100020665645', '100020625597', '100022907528', '100020665630', '100020624348', '10001008257',
-                      '100020666735', '100020698027', '100020624351', '100020665656', '100020666716', '100020665632',
-                      '100020666715', '100020645639', '200001191309', '100020625451', '100020624347', '100020665658',
-                      '100020585002', '100022917303', '100020665650', '100020667737', '100020620659', '100022904981',
-                      '100020642337', '100020657902', '100020615603', '100020626800', '100020665647', '100020665643']
+archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348',
+                      '100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597',
+                      '100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303',
+                      '100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405',
+                      '100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643']
 
-archetype_3_sample = ['100020607980', '200001193193', '100020581690', '100020665611']
-archetype_4_sample = ['100020631683', '100020607667', '100020660228', '100020605116', '200001187539', '100020582907',
-                      '100020610165', '100020650607', '100020655500', '100020598277', '100020642537']
+archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611',
+                      '100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980',
+                      '200001185785', '100020616446', '100020692380']
+
+archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277',
+                      '100020650607', '100020605116', '100020650603']
 
 
 def scenario_1():
@@ -182,10 +184,11 @@ def make_sample():
     asset_list = pd.DataFrame(asset_list)
 
     # From the asset list, we deduce how many properties we need
-    archetype_1_sample_size = 16
-    archetype_2_sample_size = 36
-    archetype_3_sample_size = 4
-    archetype_4_sample_size = 11
+    # Need to figure out the sizes
+    archetype_1_sample_size = 13
+    archetype_2_sample_size = 30
+    archetype_3_sample_size = 15
+    archetype_4_sample_size = 9
 
     # We take the sample and we'll keep the uprns static
     archetype_1_sample = asset_list[
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 8b20c0cd..9d2e99e3 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -4,6 +4,7 @@ from recommendations.Costs import Costs
 from recommendations.recommendation_utils import check_simulation_difference
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
 from recommendations.HeatingControlRecommender import HeatingControlRecommender
 
 
@@ -35,7 +36,14 @@ class HeatingRecommender:
             return
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
-        if self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]:
+        has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
+
+        # We also check that the property doesn't have a heating system, but it has access to the mains gas
+        no_heating_has_mains = self.property.main_heating["clean_description"] in [
+            'No system present, electric heaters assumed'
+        ] and self.property.data["mains-gas-flag"]
+
+        if has_boiler or no_heating_has_mains:
             self.recommend_boiler_upgrades(phase=phase)
             return
 
@@ -254,12 +262,37 @@ class HeatingRecommender:
             )
 
             # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
-            is_combi = self.property.hotwater["clean_description"] in ["From main system"]
+            hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"]
+            access_to_mains_no_system = self.property.main_heating["clean_description"] in [
+                'No system present, electric heaters assumed'
+            ] and self.property.data["mains-gas-flag"]
+            is_combi = hotwater_from_mains or access_to_mains_no_system
             if is_combi:
                 description = "Upgrade to a low carbon combi boiler"
             else:
                 description = "Upgrade to a low carbon boiler"
 
+            simulation_config = {"mainheat_energy_eff_ending": "Good"}
+            if access_to_mains_no_system:
+                # Installation of a boiler improves the hot water system so we need to reflect this in
+                # the outcome of the recommendation
+                heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
+                hotwater_ending_config = HotWaterAttributes("From main system").process()
+
+                heating_simulation_config = check_simulation_difference(
+                    new_config=heating_ending_config, old_config=self.property.main_heating
+                )
+                hotwater_simulation_config = check_simulation_difference(
+                    new_config=hotwater_ending_config, old_config=self.property.hotwater
+                )
+
+                simulation_config = {
+                    **simulation_config,
+                    **heating_simulation_config,
+                    **hotwater_simulation_config,
+                    "hot_water_energy_eff_ending": "Good"
+                }
+
             self.recommendations.append(
                 {
                     "phase": recommendation_phase,
@@ -271,7 +304,7 @@ class HeatingRecommender:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
-                    "simulation_config": {"mainheat_energy_eff_ending": "Good"},
+                    "simulation_config": simulation_config,
                     **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
                 }
             )

From 93830f90bb785a3f7f17e77a1ef8285d4aed966e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 4 Apr 2024 16:35:14 +0100
Subject: [PATCH 175/248] removed low carbon from boiler terminology

---
 backend/ml_models/AnnualBillSavings.py   |   6 +-
 etl/customers/gla_croydon_demo/slides.py | 424 ++++++++++++++++++++++-
 recommendations/HeatingRecommender.py    |  15 +-
 3 files changed, 431 insertions(+), 14 deletions(-)

diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 9be9d78a..99d67126 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -19,7 +19,8 @@ class AnnualBillSavings:
     PRICE_FACTOR = 0.09549999999999999
 
     # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
-    DAILY_STANDARD_CHARGE = 0.3143
+    DAILY_STANDARD_CHARGE_GAS = 0.3143
+    DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
 
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
@@ -45,11 +46,12 @@ class AnnualBillSavings:
     def calculate_annual_bill(cls, kwh):
         """
         This method will estimate the total annual bill for a property
+        It assumed gas & electricity are used
         :param kwh: The total kwh consumption
         :return: An estimate for annual bill
         """
 
-        return cls.PRICE_FACTOR * kwh + cls.DAILY_STANDARD_CHARGE * 365
+        return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
 
     @classmethod
     def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
index cbd1f7e4..9f791bbd 100644
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -112,6 +112,49 @@ def scenario_1():
         recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
     )
 
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
+    overview_means = recommendations_summary.mean()
+
+    ########################
+    # Measures
+    ########################
+    measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+    wall_insulation_measures = measures_count[
+        measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+    ]["id"].sum()
+    ventilation_measures = measures_count[
+        measures_count["type"].isin(["mechanical_ventilation"])
+    ]["id"].sum()
+    roof_insulation_measures = measures_count[
+        measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+    ]["id"].sum()
+    floor_insulation_measures = measures_count[
+        measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+    ]["id"].sum()
+    windows = measures_count[
+        measures_count["type"].isin(["windows_glazing"])
+    ]["id"].sum()
+    heating = measures_count[
+        measures_count["type"].isin(["heating"])
+    ]["id"].sum()
+    heating_controls = measures_count[
+        measures_count["type"].isin(["heating_control"])
+    ]["id"].sum()
+    solar = measures_count[
+        measures_count["type"].isin(["solar_pv"])
+    ]["id"].sum()
+    other = measures_count[
+        ~measures_count["type"].isin([
+            "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+            "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+            "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+            "mechanical_ventilation"
+        ])
+    ]["id"].sum()
+
     # Summary information by each archetype
     ########################
     # Archetype 1
@@ -121,10 +164,54 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
     ]
 
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+    arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
-    arch_1_recommendation_min = recommendations_arch_1_summary.min()
-    arch_1_recommendation_max = recommendations_arch_1_summary.max()
-    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+    cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand",
+                    "energy_percent_change", "carbon_percent_change", "bills_percent_change"]
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep]
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep]
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep]
+    arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep]
+
+    annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['energy_percent_change']} - "
+                           f"{arch_1_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_1_recommendation_min['total_bill_savings']} - "
+                 f"{arch_1_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_1_recommendation_min['bills_percent_change']} - "
+                         f"{arch_1_recommendation_max['bills_percent_change']}")
 
     ########################
     # Archetype 2
@@ -134,11 +221,53 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
     ]
 
+    arch_2_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+    arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_2_recommendation_min = recommendations_arch_2_summary.min()
     arch_2_recommendation_max = recommendations_arch_2_summary.max()
     arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
 
+    total_cost = recommendations_arch_2_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['energy_percent_change']} - "
+                           f"{arch_2_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_2_recommendation_min['total_bill_savings']} - "
+                 f"{arch_2_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_2_recommendation_min['bills_percent_change']} - "
+                         f"{arch_2_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 3
     ########################
@@ -147,11 +276,53 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
     ]
 
+    arch_3_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+    arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_3_recommendation_min = recommendations_arch_3_summary.min()
     arch_3_recommendation_max = recommendations_arch_3_summary.max()
     arch_3_recommendation_means = recommendations_arch_3_summary.mean()
 
+    total_cost = recommendations_arch_3_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['energy_percent_change']} - "
+                           f"{arch_3_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_3_recommendation_min['total_bill_savings']} - "
+                 f"{arch_3_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_3_recommendation_min['bills_percent_change']} - "
+                         f"{arch_3_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 4
     ########################
@@ -160,14 +331,52 @@ def scenario_1():
         recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
     ]
 
+    arch_4_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+    arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_4_recommendation_min = recommendations_arch_4_summary.min()
     arch_4_recommendation_max = recommendations_arch_4_summary.max()
     arch_4_recommendation_means = recommendations_arch_4_summary.mean()
 
-    property_details_df[
-        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
-    ]["total_floor_area"].mean()
+    total_cost = recommendations_arch_4_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['energy_percent_change']} - "
+                           f"{arch_4_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_4_recommendation_min['total_bill_savings']} - "
+                 f"{arch_4_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_4_recommendation_min['bills_percent_change']} - "
+                         f"{arch_4_recommendation_max['bills_percent_change']}")
 
     ########################
     # Overview
@@ -291,6 +500,38 @@ def scenario_2():
     # Measures
     ########################
     measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+    wall_insulation_measures = measures_count[
+        measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+    ]["id"].sum()
+    ventilation_measures = measures_count[
+        measures_count["type"].isin(["mechanical_ventilation"])
+    ]["id"].sum()
+    roof_insulation_measures = measures_count[
+        measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+    ]["id"].sum()
+    floor_insulation_measures = measures_count[
+        measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+    ]["id"].sum()
+    windows = measures_count[
+        measures_count["type"].isin(["windows_glazing"])
+    ]["id"].sum()
+    heating = measures_count[
+        measures_count["type"].isin(["heating"])
+    ]["id"].sum()
+    heating_controls = measures_count[
+        measures_count["type"].isin(["heating_control"])
+    ]["id"].sum()
+    solar = measures_count[
+        measures_count["type"].isin(["solar_pv"])
+    ]["id"].sum()
+    other = measures_count[
+        ~measures_count["type"].isin([
+            "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+            "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+            "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+            "mechanical_ventilation"
+        ])
+    ]["id"].sum()
 
     z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
 
@@ -305,11 +546,54 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
     ]
 
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+    arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_1_recommendation_min = recommendations_arch_1_summary.min()
     arch_1_recommendation_max = recommendations_arch_1_summary.max()
     arch_1_recommendation_means = recommendations_arch_1_summary.mean()
 
+    arch_1_totals = recommendations_arch_1_summary.sum()
+
+    annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['energy_percent_change']} - "
+                           f"{arch_1_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_1_recommendation_min['total_bill_savings']} - "
+                 f"{arch_1_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_1_recommendation_min['bills_percent_change']} - "
+                         f"{arch_1_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 2
     ########################
@@ -318,11 +602,53 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
     ]
 
+    arch_2_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+    arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_2_recommendation_min = recommendations_arch_2_summary.min()
     arch_2_recommendation_max = recommendations_arch_2_summary.max()
     arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
 
+    total_cost = recommendations_arch_2_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['energy_percent_change']} - "
+                           f"{arch_2_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_2_recommendation_min['total_bill_savings']} - "
+                 f"{arch_2_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_2_recommendation_min['bills_percent_change']} - "
+                         f"{arch_2_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 3
     ########################
@@ -331,11 +657,53 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
     ]
 
+    arch_3_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+    arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_3_recommendation_min = recommendations_arch_3_summary.min()
     arch_3_recommendation_max = recommendations_arch_3_summary.max()
     arch_3_recommendation_means = recommendations_arch_3_summary.mean()
 
+    total_cost = recommendations_arch_3_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['energy_percent_change']} - "
+                           f"{arch_3_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_3_recommendation_min['total_bill_savings']} - "
+                 f"{arch_3_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_3_recommendation_min['bills_percent_change']} - "
+                         f"{arch_3_recommendation_max['bills_percent_change']}")
+
     ########################
     # Archetype 4
     ########################
@@ -344,11 +712,49 @@ def scenario_2():
         recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
     ]
 
+    arch_4_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+    arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
     # Take the mean, median and maximum of each value
     arch_4_recommendation_min = recommendations_arch_4_summary.min()
     arch_4_recommendation_max = recommendations_arch_4_summary.max()
     arch_4_recommendation_means = recommendations_arch_4_summary.mean()
 
-    property_details_df[
-        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
-    ]["total_floor_area"].mean()
+    total_cost = recommendations_arch_4_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['energy_percent_change']} - "
+                           f"{arch_4_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_4_recommendation_min['total_bill_savings']} - "
+                 f"{arch_4_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_4_recommendation_min['bills_percent_change']} - "
+                         f"{arch_4_recommendation_max['bills_percent_change']}")
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 9d2e99e3..2c075820 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -186,9 +186,18 @@ class HeatingRecommender:
         # This upgrade will only take the heating system to average energy efficiency
         heating_simulation_config["mainheat_energy_eff_ending"] = "Average"
 
+        # If the property is off-gas and has no heating system in place, the number of heated rooms will actually
+        # be 0, so we use the number of rooms as the figure
+        number_heated_rooms = (
+            self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0
+            else (
+                self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else
+                self.property.number_of_rooms
+            )
+        )
         # Upgrade to electric storage heaters
         costs = self.costs.high_heat_electric_storage_heaters(
-            number_heated_rooms=self.property.data["number-heated-rooms"]
+            number_heated_rooms=number_heated_rooms
         )
         description = "Install high heat retention electric storage heaters"
 
@@ -268,9 +277,9 @@ class HeatingRecommender:
             ] and self.property.data["mains-gas-flag"]
             is_combi = hotwater_from_mains or access_to_mains_no_system
             if is_combi:
-                description = "Upgrade to a low carbon combi boiler"
+                description = "Upgrade to a new combi boiler"
             else:
-                description = "Upgrade to a low carbon boiler"
+                description = "Upgrade to a new boiler"
 
             simulation_config = {"mainheat_energy_eff_ending": "Good"}
             if access_to_mains_no_system:

From e182d7acd77aa9dfc56a03650c59ffb3d763aa36 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 10:19:22 +0100
Subject: [PATCH 176/248] change calculation of energy savings to use adjusted
 heat demand, not heat demand

---
 backend/app/db/functions/portfolio_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index a8a882bd..ead8280f 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -11,7 +11,7 @@ def aggregate_portfolio_recommendations(
         session.query(
             func.sum(Recommendation.estimated_cost).label("cost"),
             func.sum(Recommendation.total_work_hours).label("total_work_hours"),
-            func.sum(Recommendation.heat_demand).label("energy_savings"),
+            func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
             func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
             func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
         )

From 02e72c569513b846cd1348caa17d20a786507c7b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 14:02:48 +0100
Subject: [PATCH 177/248] prevent hot water tank insulation recommendations
 when no heating system is in place

---
 recommendations/HotwaterRecommendations.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 298671a2..667f5f69 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -22,8 +22,13 @@ class HotwaterRecommendations:
 
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
-        if (self.property.hotwater["heater_type"] in ["electric immersion"]) & \
-            (self.property.data["hot-water-energy-eff"] == "Very Poor"):
+        # If there is not system present, we do not recommend anything, since we will have a separate recommendation
+        # suggesting system upgrades (e.g. boiler replacement)
+        if (
+            (self.property.hotwater["heater_type"] in ["electric immersion"]) &
+            (self.property.data["hot-water-energy-eff"] == "Very Poor") &
+            (self.property.hotwater["no_system_present"] is None)
+        ):
             self.recommend_tank_insulation(phase=phase)
             return
 

From 4134fdbb755f4a25e8162bfb851709372d0c5677 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 15:00:24 +0100
Subject: [PATCH 178/248] Added pruning of solar panel options to prevent
 systems much too large or much too small

---
 recommendations/SolarPvRecommendations.py | 46 +++++++++++++++++++----
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 3a89b213..744351be 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -8,6 +8,9 @@ class SolarPvRecommendations:
     # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
     SOLAR_PANEL_WATTAGE = 250
 
+    MAX_SYSTEM_WATTAGE = 4200
+    MIN_SYSTEM_WATTAGE = 2500
+
     def __init__(self, property_instance):
         """
         :param property_instance: Instance of the Property class, for the home associated to property_id
@@ -18,6 +21,19 @@ class SolarPvRecommendations:
 
         self.recommendation = []
 
+    @staticmethod
+    def trim_solar_wattage_options(scenarios_with_wattage):
+        # Initialize the list with the first element, assuming the list is not empty
+        trimmed_list = [scenarios_with_wattage[0]]
+
+        # Iterate over the list starting from the second element
+        for scenario in scenarios_with_wattage[1:]:
+            # Compare the second element (index 1) of the current tuple with the last tuple in the trimmed list
+            if scenario[1] > trimmed_list[-1][1]:
+                trimmed_list.append(scenario)
+
+        return trimmed_list
+
     def recommend(self, phase):
         """
         We check if a property is potentially suitable for solar PV based on the following criteria:
@@ -46,26 +62,40 @@ class SolarPvRecommendations:
             self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
             self.property.solar_pv_percentage + 0.1
         ]
-        # We make sure we haven't gone too low or high
-        roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 1]
+        # We make sure we haven't gone too low or high - we allow no more than 60% coverage
+        roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
+        # If we only have two scenarios, we add a coverage scenario 10% less than the smallest
+        if len(roof_coverage_scenarios) == 2:
+            roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1)
         battery_scenarios = [False, True]
 
-        # I now produce the cross product of the scenarios
-        scenarios = [(roof, battery) for roof in roof_coverage_scenarios for battery in battery_scenarios]
-
-        for roof_coverage, has_battery in scenarios:
+        scenarios_with_wattage = []
+        for roof_coverage in roof_coverage_scenarios:
             # We now have a property which is potentially suitable for solar PV
             solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage)
 
             number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
             solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+            solar_panel_wattage = np.clip(
+                a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
+            )
+            scenarios_with_wattage.append((roof_coverage, solar_panel_wattage))
 
+        # We trim the scenarios, so that we don't have duplicate wattages
+        scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage)
+
+        # Produce the cross product of the scenarios
+        scenarios = [
+            (roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios
+        ]
+        # We deduce the wattage of the solar panels based on the roof coverage
+
+        for roof_coverage, solar_panel_wattage, has_battery in scenarios:
+            # We now have a property which is potentially suitable for solar PV
             roof_coverage_percent = round(roof_coverage * 100)
-
             # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
             # of solar PV installations
             cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery)
-
             kw = np.floor(solar_panel_wattage / 100) / 10
 
             if has_battery:

From ec6fc84911d1a8ac3689c9f07b866fda98086212 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 15:14:55 +0100
Subject: [PATCH 179/248] updating solar panel logic

---
 recommendations/SolarPvRecommendations.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 744351be..4cf1c1fc 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -8,8 +8,8 @@ class SolarPvRecommendations:
     # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
     SOLAR_PANEL_WATTAGE = 250
 
-    MAX_SYSTEM_WATTAGE = 4200
-    MIN_SYSTEM_WATTAGE = 2500
+    MAX_SYSTEM_WATTAGE = 6000
+    MIN_SYSTEM_WATTAGE = 1000
 
     def __init__(self, property_instance):
         """
@@ -60,8 +60,9 @@ class SolarPvRecommendations:
         # 2) With and without battery
         roof_coverage_scenarios = [
             self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
-            self.property.solar_pv_percentage + 0.1
         ]
+        if self.property.solar_pv_percentage <= 0.4:
+            roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1)
         # We make sure we haven't gone too low or high - we allow no more than 60% coverage
         roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
         # If we only have two scenarios, we add a coverage scenario 10% less than the smallest
@@ -76,6 +77,10 @@ class SolarPvRecommendations:
 
             number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
             solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+
+            if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE:
+                continue
+
             solar_panel_wattage = np.clip(
                 a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
             )

From 6258c347d68ecd1156387f9e2a532d099e2be2c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 16:06:30 +0100
Subject: [PATCH 180/248] updating boiler recommendation to impact mains fuel
 and consider the impact on the main fuel

---
 etl/customers/gla_croydon_demo/asset_list.py |  4 ++
 recommendations/HeatingRecommender.py        | 58 +++++++++++++++-----
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 52e9422c..777cba83 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,6 +34,10 @@ def app():
         low_memory=False
     )
 
+    z = epc_data.groupby(["MAINHEAT_DESCRIPTION", "MAINHEATCONT_DESCRIPTION", "MAIN_FUEL"]).size().reset_index(
+        name="count")
+    z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
 
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 2c075820..f602ecab 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -5,6 +5,7 @@ from recommendations.recommendation_utils import check_simulation_difference
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
 from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
+from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
 from recommendations.HeatingControlRecommender import HeatingControlRecommender
 
 
@@ -44,7 +45,7 @@ class HeatingRecommender:
         ] and self.property.data["mains-gas-flag"]
 
         if has_boiler or no_heating_has_mains:
-            self.recommend_boiler_upgrades(phase=phase)
+            self.recommend_boiler_upgrades(phase=phase, no_heating_has_mains=no_heating_has_mains)
             return
 
     @staticmethod
@@ -250,17 +251,20 @@ class HeatingRecommender:
 
         return closest_size
 
-    def recommend_boiler_upgrades(self, phase):
+    def recommend_boiler_upgrades(self, phase, no_heating_has_mains):
         """
         This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
         is generally more expensive
         :param phase:
+        :param no_heating_has_mains: indicaes if the property has no heating system, but has access to the mains gas
         :return:
         """
 
         recommendation_phase = phase
 
         # We now recommend boiler upgrades, if applicable
+        simulation_config = {}
+        boiler_costs = {}
         if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
             boiler_size = self.estimate_boiler_size(
                 property_type=self.property.data["property-type"],
@@ -272,21 +276,20 @@ class HeatingRecommender:
 
             # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
             hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"]
-            access_to_mains_no_system = self.property.main_heating["clean_description"] in [
-                'No system present, electric heaters assumed'
-            ] and self.property.data["mains-gas-flag"]
-            is_combi = hotwater_from_mains or access_to_mains_no_system
+
+            is_combi = hotwater_from_mains or no_heating_has_mains
             if is_combi:
                 description = "Upgrade to a new combi boiler"
             else:
                 description = "Upgrade to a new boiler"
 
             simulation_config = {"mainheat_energy_eff_ending": "Good"}
-            if access_to_mains_no_system:
+            if no_heating_has_mains:
                 # Installation of a boiler improves the hot water system so we need to reflect this in
                 # the outcome of the recommendation
                 heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
                 hotwater_ending_config = HotWaterAttributes("From main system").process()
+                fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
 
                 heating_simulation_config = check_simulation_difference(
                     new_config=heating_ending_config, old_config=self.property.main_heating
@@ -294,14 +297,20 @@ class HeatingRecommender:
                 hotwater_simulation_config = check_simulation_difference(
                     new_config=hotwater_ending_config, old_config=self.property.hotwater
                 )
+                fuel_simulation_config = check_simulation_difference(
+                    new_config=fuel_ending_config, old_config=self.property.main_fuel
+                )
 
                 simulation_config = {
                     **simulation_config,
                     **heating_simulation_config,
                     **hotwater_simulation_config,
+                    **fuel_simulation_config,
                     "hot_water_energy_eff_ending": "Good"
                 }
 
+            boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+
             self.recommendations.append(
                 {
                     "phase": recommendation_phase,
@@ -314,22 +323,45 @@ class HeatingRecommender:
                     "new_u_value": None,
                     "sap_points": None,
                     "simulation_config": simulation_config,
-                    **self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+                    **boiler_costs
                 }
             )
 
-            # We increment the recommendation phase, in the case of us having heating control recommendations
-            recommendation_phase += 1
-
         # We recommend the heating controls
+        # If the property did not previously have a boiler, we combine
         controls_recommender = HeatingControlRecommender(self.property)
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
         # We may have 2 recommendations from the heating controls
 
-        if controls_recommender.recommendation:
+        if not controls_recommender.recommendation:
+            return
+
+        if no_heating_has_mains:
+            # We combine the heating and controls recommendations
+            boiler_recommendation = self.recommendations[0].copy()
+            combined_recommendations = []
+            for controls_recommendation in controls_recommender.recommendation:
+                combined_recommendation = self.combine_heating_and_controls(
+                    controls_recommendations=[controls_recommendation],
+                    heating_simulation_config=simulation_config,
+                    costs=boiler_costs,
+                    description=boiler_recommendation["description"],
+                    phase=recommendation_phase,
+                    heating_controls_only=False,
+                    system_change=True
+                )
+                combined_recommendations.extend(combined_recommendation)
+
+            # Overwrite the existing boiler recommendation
+            self.recommendations = combined_recommendations
+        else:
+            # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
+            recommendation_phase += 1
             # The heating controls recommendation is distrinct from the boiler upgrade recommendation
             # We insert phase into the recommendations for heating controls
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-        self.recommendations.extend(controls_recommender.recommendation)
+            self.recommendations.extend(controls_recommender.recommendation)
+
+        return

From 35a288fd7406c630fddde596360fa35e53d3fdd4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 5 Apr 2024 16:47:15 +0100
Subject: [PATCH 181/248] Updating recommendations

---
 backend/Property.py                          | 5 -----
 etl/customers/gla_croydon_demo/asset_list.py | 3 +--
 recommendations/HeatingRecommender.py        | 7 ++++++-
 recommendations/HotwaterRecommendations.py   | 5 +++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 19f15b02..d3dd8395 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -298,11 +298,6 @@ class Property:
                 if recommendation["type"] == "cavity_wall_insulation":
                     output["is_filled_cavity_ending"] = True
 
-                # TODO: perhaps detrimental
-                # When making a recommendation for the wall, we will also update the ventilation
-                # if output["mechanical_ventilation_ending"] == 'natural':
-                #     output["mechanical_ventilation_ending"] = 'mechanical, extract only'
-
             else:
                 if output["walls_thermal_transmittance_ending"] is None:
                     raise ValueError("We should not have a None value for the u value")
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 777cba83..7dde8926 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,8 +34,7 @@ def app():
         low_memory=False
     )
 
-    z = epc_data.groupby(["MAINHEAT_DESCRIPTION", "MAINHEATCONT_DESCRIPTION", "MAIN_FUEL"]).size().reset_index(
-        name="count")
+    z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
     z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
 
     # Filter on entries where we have a UPRN
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index f602ecab..aec1f419 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -104,8 +104,13 @@ class HeatingRecommender:
                     **recommendation_simulation_config,
                     **controls_recommendations[0]["simulation_config"]
                 }
+                controls_description = controls_recommendations[0]['description']
+                # Make the first letter of the description lowercase
+                controls_description = (
+                    controls_description[0].lower() + controls_description[1:]
+                )
 
-                recommendation_description = f"{description} and {controls_recommendations[0]['description']}"
+                recommendation_description = f"{description} and {controls_description}"
 
             recommendation = {
                 "phase": phase,
diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 667f5f69..7f77597f 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -22,8 +22,9 @@ class HotwaterRecommendations:
 
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
-        # If there is not system present, we do not recommend anything, since we will have a separate recommendation
-        # suggesting system upgrades (e.g. boiler replacement)
+
+        # If there is no system present, but access to the mains, we
+
         if (
             (self.property.hotwater["heater_type"] in ["electric immersion"]) &
             (self.property.data["hot-water-energy-eff"] == "Very Poor") &

From 0142e6fe5fcbcffc836bc139df48cf31e77545f1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Apr 2024 15:29:52 +0100
Subject: [PATCH 182/248] wip matching completed surveys back to the asset list

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 78 +++++++++++++++++++
 3 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b4b82d0b..de2c0e6a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -6907,3 +6907,81 @@ def app():
         december_figures["ECO4 remaining"]
     )
     december_figures["ECO4 remaining"].sum()
+
+    # Adhoc - for UNITAS, stripping out additional surveys that have been completed
+    unitas_data = loader.data["HA50"].copy()
+    unitas_asset_list = unitas_data["asset_list"].copy()
+    unitas_survey_sheet = unitas_data["survey_list"].copy()
+    # We remove the surveyed properties from the asset sheet
+    unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
+    unitas_asset_list = unitas_asset_list.merge(
+        unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
+        how="left",
+        on="asset_list_row_id"
+    )
+    unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
+    unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
+
+    # We read in the data for the further completed surveys
+    unitas_phase_1_workbook = openpyxl.load_workbook(
+        "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
+    )
+    phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
+    phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
+    phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
+    phase_1_rows_data = []
+    for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_1_rows_data.append(row_data)
+
+    phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
+
+    # Correct phase 1 surveys in the same fashion as the previous approach
+    phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
+
+    # We check all phase 1 surveys are contained in the data we had before
+    additional = []
+    for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
+        # We look for the entry in the old survey sheet:
+        # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
+        # if matched_uprn.shape[0] == 1:
+        #     continue
+
+        matched_1 = unitas_survey_sheet[
+            (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_1.shape[0] == 1:
+            continue
+
+        matched_2 = unitas_survey_sheet[
+            (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_2.shape[0] == 1:
+            continue
+
+        additional.append(row.to_dict())
+    additional = pd.DataFrame(additional)
+
+    phase_2_rows_data = []
+    for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_2_rows_data.append(row_data)
+
+    phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
+    phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
+    # Drop all of the occurances of "OFFICE USE ONLY" columns
+    phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
+    common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
+    additional_filtered = additional[common_columns]
+
+    further_unitas_completed_surveys = pd.concat(
+        [phase_2_surveys, additional_filtered],
+        axis=0,
+        ignore_index=True
+    )
+
+    # We match these back to the asset list

From dc80313eca2119703e161c6a6ad1c9380f1cc886 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 9 Apr 2024 14:57:55 +0100
Subject: [PATCH 183/248] merging EPC data and survey outcomes to asset list

---
 .../ha_15_32/ha_analysis_batch_3.py           | 413 ++++++++++++++----
 1 file changed, 334 insertions(+), 79 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index de2c0e6a..35bb63fe 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -3459,7 +3459,7 @@ class DataLoader:
                     "not eligible",
                     asset_list["ECO Eligibility"]
                 )
-                asset_list = asset_list.drop(columns=["has_eco3"])
+                # asset_list = asset_list.drop(columns=["has_eco3"])
 
             # Report on sales
             sales_report = {}
@@ -6778,6 +6778,339 @@ def identify_eco_works(loader):
     breakdowns = breakdowns.fillna(0)
 
 
+def unitas_data_prep(loader):
+    #####
+    # Adhoc - for UNITAS, stripping out additional surveys that have been completed
+    unitas_data = loader.data["HA50"].copy()
+    unitas_asset_list = unitas_data["asset_list"].copy()
+    unitas_survey_sheet = unitas_data["survey_list"].copy()
+
+    # We remove the surveyed properties from the asset sheet
+    unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
+    unitas_asset_list = unitas_asset_list.merge(
+        unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
+        how="left",
+        on="asset_list_row_id"
+    )
+    unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
+    unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
+
+    # We read in the data for the further completed surveys
+    unitas_phase_1_workbook = openpyxl.load_workbook(
+        "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
+    )
+    phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
+    phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
+    phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
+    phase_1_rows_data = []
+    for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_1_rows_data.append(row_data)
+
+    phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
+
+    # Correct phase 1 surveys in the same fashion as the previous approach
+    phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
+
+    # We check all phase 1 surveys are contained in the data we had before
+    additional = []
+    for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
+        # We look for the entry in the old survey sheet:
+        # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
+        # if matched_uprn.shape[0] == 1:
+        #     continue
+
+        matched_1 = unitas_survey_sheet[
+            (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_1.shape[0] == 1:
+            continue
+
+        matched_2 = unitas_survey_sheet[
+            (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_2.shape[0] == 1:
+            continue
+
+        additional.append(row.to_dict())
+    additional = pd.DataFrame(additional)
+
+    phase_2_rows_data = []
+    for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_2_rows_data.append(row_data)
+
+    phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
+    phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
+    # Drop all of the occurances of "OFFICE USE ONLY" columns
+    phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
+    common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
+    additional_filtered = additional[common_columns]
+
+    further_unitas_completed_surveys = pd.concat(
+        [phase_2_surveys, additional_filtered],
+        axis=0,
+        ignore_index=True
+    )
+
+    # Add a phase 2 key
+    further_unitas_completed_surveys["survey_list_row_id"] = [
+        "unitas_phase_2" + str(i) for i in further_unitas_completed_surveys.index
+    ]
+
+    not_in_asset_list = [
+        "unitas_phase_20", "unitas_phase_234", "unitas_phase_2163", "unitas_phase_2173", "unitas_phase_2374"
+    ]
+
+    additional_postcodes = ["st28bg"]
+
+    full_asset_list = unitas_data["asset_list"].copy()
+    full_asset_list["matching_postcode"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+    further_unitas_completed_surveys["Post Code"] = further_unitas_completed_surveys["Post Code"].str.replace(
+        "ST 5DT", "ST3 5DT"
+    )
+
+    # We match these back to the asset list
+    matching_lookup = []
+    for _, row in tqdm(further_unitas_completed_surveys.iterrows(), total=len(further_unitas_completed_surveys)):
+
+        if row["survey_list_row_id"] in not_in_asset_list:
+            continue
+
+        postcode_lower = row["Post Code"].lower().strip().replace(" ", "")
+        if postcode_lower in additional_postcodes:
+            continue
+
+        # Confirmed not in asset lsit
+        # Filter asset list on postcode
+        df = full_asset_list[
+            full_asset_list["matching_postcode"].str.contains(postcode_lower)
+        ]
+
+        df = df[df["HouseNo"] == str(row["NO."])]
+
+        if df.shape[0] != 1:
+            raise Exception("NOT FOUND")
+
+        matching_lookup.append(
+            {
+                "survey_list_row_id": row["survey_list_row_id"],
+                "asset_list_row_id": df["asset_list_row_id"].values[0],
+            }
+        )
+
+    matching_lookup = pd.DataFrame(matching_lookup)
+    matching_lookup["phase_2_surveyed"] = True
+
+    # We merge this onto the asset list and remove the rows
+    unitas_asset_list = unitas_asset_list.merge(
+        matching_lookup, how="left", on="asset_list_row_id"
+    )
+    # Drop rows where phase_2_surveyed is populated
+    unitas_asset_list = unitas_asset_list[
+        pd.isnull(unitas_asset_list["phase_2_surveyed"])
+    ]
+
+    # We add in the new CIGA submissions
+    unitas_round_2_ciga_workbook = openpyxl.load_workbook("local_data/ha_data/Unitas second round CIGA checks.xlsx")
+    ciga_round_2_worksheet = unitas_round_2_ciga_workbook["Worksheet"]
+    ciga_round_2_colnames = [cell.value for cell in ciga_round_2_worksheet[1]]
+    round_2_rows_data = []
+    for row in ciga_round_2_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        round_2_rows_data.append(row_data)
+
+    ciga_round_2 = pd.DataFrame(round_2_rows_data, columns=ciga_round_2_colnames)
+    # We merge the ciga sheet to the asset list
+    ciga_dependent_asset_list = unitas_asset_list[
+        unitas_asset_list["ECO Eligibility"].str.contains("subject to ciga")
+    ].copy()
+
+    # We merge the ciga sheet to the asset list
+    ciga_round_2_matched = ciga_dependent_asset_list.merge(
+        ciga_round_2, how="inner", on=["Address Line 1", "Post Code"]
+    )
+    # Filter on just the properties that had no guarantee
+    ciga_round_2_matched = ciga_round_2_matched[ciga_round_2_matched["Guarantee"] == "No"]
+
+    # ECO Eligibility
+    # not eligible              9227
+    # failed ciga               2711
+    # eco4 (subject to ciga)    2238
+    # eco4 - passed ciga         901
+    # gbis                       114
+    # eco4                        91
+
+    # We filter on the properties we're looking to re-survey
+    unitas_properties_to_survey = unitas_asset_list[
+        unitas_asset_list["ECO Eligibility"].isin(
+            [
+                "eco4 - passed ciga",
+                "eco4"
+            ]
+        )
+    ].copy()
+
+    unitas_properties_to_survey = pd.concat(
+        [
+            unitas_properties_to_survey,
+            ciga_round_2_matched[unitas_properties_to_survey.columns]
+        ]
+    )
+
+    epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
+
+    # We now retrieve the lastest EPC data
+    epc_data = []
+    for _, unitas_property in tqdm(unitas_properties_to_survey.iterrows(), total=len(unitas_properties_to_survey)):
+        property_type, _ = get_property_type_and_built_form(property_meta=unitas_property, ha_name="HA50")
+
+        full_address = unitas_property["matching_address"]
+
+        searcher = SearchEpc(
+            address1=str(unitas_property["HouseNo"]),
+            postcode=unitas_property["matching_postcode"],
+            auth_token=epc_api_key,
+            os_api_key="",
+            property_type=property_type,
+            full_address=full_address,
+            fast=True
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_row_id": unitas_property["asset_list_row_id"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+    # Pull out just the columns we need
+    epc_df = epc_df[
+        [
+            "asset_list_row_id",
+            "address1", "postcode",
+            "current-energy-efficiency",
+            "current-energy-rating",
+            "inspection-date",
+            "transaction-type",
+            "built-form"
+        ]
+    ]
+
+    epc_df["EPC Rating"] = (
+        epc_df["current-energy-efficiency"].astype(str) +
+        epc_df["current-energy-rating"].astype(str)
+    )
+
+    # Merge onto the Unitas data:
+    unitas_properties_to_survey_full = unitas_properties_to_survey.merge(
+        epc_df[
+            [
+                "asset_list_row_id",
+                "EPC Rating",
+                "inspection-date",
+                "transaction-type",
+                "built-form"
+            ]
+        ],
+        how="left",
+        on="asset_list_row_id"
+    )
+
+    unitas_properties_to_survey_full["ECO Eligibility"] = unitas_properties_to_survey_full["ECO Eligibility"].replace(
+        "eco4 (subject to ciga)", "eco4 - passed ciga, phase 2 check"
+    )
+
+    for col in ["EPC Rating", "inspection-date", "transaction-type", "built-form"]:
+        unitas_properties_to_survey_full[col] = np.where(
+            pd.isnull(unitas_properties_to_survey_full[col]),
+            "No EPC found",
+            unitas_properties_to_survey_full[col]
+        )
+        unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].fillna(
+            "No EPC found"
+        )
+        unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].astype(str)
+
+    unitas_properties_to_survey_full = unitas_properties_to_survey_full.rename(
+        columns={
+            "inspection-date": "Last EPC Inspection Date",
+            "transaction-type": "Last EPC Reason",
+            "built-form": "Last EPC Built Form",
+        }
+    )
+
+    # We now match to the survey outcomes
+    unitas_survey_outcomes_workbook = openpyxl.load_workbook(
+        "local_data/ha_data/UNITAS - survey outcomes 26.03.2024.xlsx"
+    )
+    unitas_survey_outcomes_worksheet = unitas_survey_outcomes_workbook["OUTCOMES"]
+    unitas_outcomes_colnames = [cell.value for cell in unitas_survey_outcomes_worksheet[2]]
+    outcomes_rows_data = []
+    for row in unitas_survey_outcomes_worksheet.iter_rows(min_row=3, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        outcomes_rows_data.append(row_data)
+
+    unitas_outcomes = pd.DataFrame(outcomes_rows_data, columns=unitas_outcomes_colnames)
+    unitas_outcomes = unitas_outcomes.rename(
+        columns={
+            "Notes                 (If 'no answer' under outcomes, have you checked around the property for access "
+            "issues where possible?)": "Notes"
+        }
+    )
+
+    unitas_outcomes["Postcode"].unique()
+    eg1 = unitas_properties_to_survey_full[
+        (unitas_properties_to_survey_full["Post Code"] == "ST6 6RF")
+    ]
+    eg1_outcomes = unitas_outcomes[
+        (unitas_outcomes["Postcode"] == "ST6 6RF")
+    ]
+
+    # Merge outcomes onto properties to survey. Will probably have to do algorithmically
+    full_asset_list["matching_postcode_nospace"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+    outcome_matching = []
+    for _, outcome in tqdm(unitas_outcomes.iterrows(), total=len(unitas_outcomes)):
+        # We search for the corresponding entry in the asset list
+        postcode_lower = outcome["Postcode"].lower().strip().replace(" ", "")
+
+        # Confirmed not in asset lsit
+        # Filter asset list on postcode
+        df = unitas_properties_to_survey_full[
+            unitas_properties_to_survey_full["matching_postcode_nospace"].str.contains(postcode_lower)
+        ]
+
+        df = df[df["HouseNo"] == str(outcome["No."])]
+        if df.empty:
+            continue
+
+        if df.shape[0] == 1:
+            outcome_matching.append(
+                {
+                    "asset_list_row_id": df["asset_list_row_id"].values[0],
+                    **outcome.to_dict()
+                }
+            )
+            continue
+
+        raise Exception("something went wrong")
+
+    # Store as an excel
+    unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx")
+
+
 def app():
     """
     This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
@@ -6907,81 +7240,3 @@ def app():
         december_figures["ECO4 remaining"]
     )
     december_figures["ECO4 remaining"].sum()
-
-    # Adhoc - for UNITAS, stripping out additional surveys that have been completed
-    unitas_data = loader.data["HA50"].copy()
-    unitas_asset_list = unitas_data["asset_list"].copy()
-    unitas_survey_sheet = unitas_data["survey_list"].copy()
-    # We remove the surveyed properties from the asset sheet
-    unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
-    unitas_asset_list = unitas_asset_list.merge(
-        unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
-        how="left",
-        on="asset_list_row_id"
-    )
-    unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
-    unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
-
-    # We read in the data for the further completed surveys
-    unitas_phase_1_workbook = openpyxl.load_workbook(
-        "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
-    )
-    phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
-    phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
-    phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
-    phase_1_rows_data = []
-    for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
-        row_data = [cell.value for cell in row]  # This will get you the cell values
-        phase_1_rows_data.append(row_data)
-
-    phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
-
-    # Correct phase 1 surveys in the same fashion as the previous approach
-    phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
-
-    # We check all phase 1 surveys are contained in the data we had before
-    additional = []
-    for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
-        # We look for the entry in the old survey sheet:
-        # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
-        # if matched_uprn.shape[0] == 1:
-        #     continue
-
-        matched_1 = unitas_survey_sheet[
-            (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
-            (unitas_survey_sheet["NO."] == row["NO."])
-            ]
-
-        if matched_1.shape[0] == 1:
-            continue
-
-        matched_2 = unitas_survey_sheet[
-            (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
-            (unitas_survey_sheet["NO."] == row["NO."])
-            ]
-
-        if matched_2.shape[0] == 1:
-            continue
-
-        additional.append(row.to_dict())
-    additional = pd.DataFrame(additional)
-
-    phase_2_rows_data = []
-    for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
-        row_data = [cell.value for cell in row]  # This will get you the cell values
-        phase_2_rows_data.append(row_data)
-
-    phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
-    phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
-    # Drop all of the occurances of "OFFICE USE ONLY" columns
-    phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
-    common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
-    additional_filtered = additional[common_columns]
-
-    further_unitas_completed_surveys = pd.concat(
-        [phase_2_surveys, additional_filtered],
-        axis=0,
-        ignore_index=True
-    )
-
-    # We match these back to the asset list

From f0c4ca0143ee886ba84960b00e3f2700b6047429 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 11:14:33 +0100
Subject: [PATCH 184/248] completed unitas

---
 .../ha_15_32/ha_analysis_batch_3.py           | 46 ++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 35bb63fe..f99c7b1a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -7106,9 +7106,53 @@ def unitas_data_prep(loader):
             continue
 
         raise Exception("something went wrong")
+    outcome_matching = pd.DataFrame(outcome_matching)
+
+    # We can have duplicate matches, so we format the Date letter sent column and retrieve the newest outcome
+    outcome_matching["Date letters sent"] = outcome_matching["Date letters sent"].str.lower()
+    outcome_matching["Extracted Date"] = outcome_matching["Date letters sent"].str.extract(
+        r'(?:w[./]c )(\d{2}\.\d{2}\.\d{4})')
+    outcome_matching["Extracted Date"] = pd.to_datetime(outcome_matching["Extracted Date"], format='%d.%m.%Y')
+    # We sort by asset_list_row_id and extracted date, and retrieve the newest
+    outcome_matching = outcome_matching.sort_values(["asset_list_row_id", "Extracted Date"], ascending=[True, False])
+
+    # Some properties will have multiple outcomes - for these, we re-format
+    outcome_matching_grouped = []
+    for asset_list_row_id, grouped_data in outcome_matching.groupby("asset_list_row_id"):
+        if grouped_data.shape[0] == 1:
+            outcome_matching_grouped.append(
+                {
+                    "Number of previous visits": 1,
+                    **grouped_data.to_dict("records")[0]
+                }
+            )
+            continue
+        if grouped_data.shape[0] == 2:
+            newest_visit = grouped_data.head(1)
+            oldest_visit = grouped_data.tail(1)[['Outcomes', 'Surveyor', 'Notes', 'Date letters sent']].add_suffix(
+                " second visit")
+            to_append = {
+                "Number of previous visits": 2,
+                **newest_visit.to_dict("records")[0],
+                **oldest_visit.to_dict("records")[0]
+            }
+            outcome_matching_grouped.append(to_append)
+        else:
+            raise Exception("something went wrong")
+
+    outcome_matching_grouped = pd.DataFrame(outcome_matching_grouped)
+
+    unitas_properties_to_survey_with_outcomes = unitas_properties_to_survey_full.merge(
+        outcome_matching_grouped, how="left", on="asset_list_row_id"
+    )
+    unitas_properties_to_survey_with_outcomes["Number of previous visits"] = (
+        unitas_properties_to_survey_with_outcomes["Number of previous visits"].fillna(0)
+    )
 
     # Store as an excel
-    unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx")
+    unitas_properties_to_survey_with_outcomes.to_excel("Unitas - phase 2 properties to Survey.xlsx")
+
+    unitas_properties_to_survey_with_outcomes["Last EPC Built Form"].value_counts()
 
 
 def app():

From cf7627a8d7fa06df445faf7637e06eefd7f8764b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 12:04:17 +0100
Subject: [PATCH 185/248] started setting up asset list and gathering council
 tax bands

---
 etl/customers/immo/pilot/asset_list.py | 44 ++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 etl/customers/immo/pilot/asset_list.py

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
new file mode 100644
index 00000000..33f79729
--- /dev/null
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -0,0 +1,44 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+# Read in the .env file in backend
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+USER_ID = 8
+PORTFOLIO_ID = 70
+
+council_tax_bands = [
+    {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'},
+    {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'},
+    {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'},
+    {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'},
+    {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'},
+    {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'},
+    {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'},
+    {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'},
+    {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
+    {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
+]
+
+
+def app():
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx",
+        header_row=0
+    )
+    raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"])
+    # Extract address and postcode
+    raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+    raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+    raw_asset_list[["address", "postcode"]].to_dict("records")

From b791ecb054f0e5be39f91f78771f74ed80fe904d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 12:08:47 +0100
Subject: [PATCH 186/248] set up asset list

---
 etl/customers/immo/pilot/asset_list.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 33f79729..269ffe00 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -41,4 +41,23 @@ def app():
     raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
     raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
 
-    raw_asset_list[["address", "postcode"]].to_dict("records")
+    council_tax_bands = pd.DataFrame(council_tax_bands)
+    asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "budget": None,
+    }
+    print(body)

From 5079170a25066e4ed3ab96c7a5034f1ddce5ada2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 13:34:15 +0100
Subject: [PATCH 187/248] pulled valuations for immo pilot from Zoopla

---
 .idea/Model.iml                |  2 +-
 .idea/misc.xml                 |  2 +-
 backend/app/plan/router.py     | 10 ++++++++++
 backend/ml_models/Valuation.py | 11 +++++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 50b8a837..c71533fa 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -142,6 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 )
             )
 
+        z = []
+        for p in input_properties:
+            z.append(
+                {
+                    "uprn": p.uprn,
+                    "address": p.address,
+                    "postcode": p.postcode,
+                }
+            )
+
         if not input_properties:
             return Response(status_code=204)
 
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 2bb7de32..251c016a 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -52,6 +52,17 @@ class PropertyValuation:
         10070056829: 76_000,
         10070056920: 76_000,
         10023345463: 76_000,
+        # IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        90070461: 172_000,  # Based on Zoopla
+        90022227: 181_000,  # Based on Zoopla
+        90106884: 180_000,  # Based on Zoopla
+        90051858: 201_000,  # Based on Zoopla
+        90060989: 172_000,  # Based on Zoopla
+        90048026: 196_000,  # Based on Zoopla
+        90077535: 192_000,  # Based on Zoopla
+        90093693: 279_000,  # Based on Zoopla
+        90055152: 149_000,  # Based on Zoopla
+        90028499: 238_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources

From 5ac5cd7737a5b632258d130ea0e36057c25b0b6a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 14:02:19 +0100
Subject: [PATCH 188/248] fixing bug when setting phase for heating controls,
 without a recommendation

---
 backend/app/plan/router.py            | 10 ----------
 recommendations/HeatingRecommender.py |  7 ++++++-
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c71533fa..50b8a837 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -142,16 +142,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                 )
             )
 
-        z = []
-        for p in input_properties:
-            z.append(
-                {
-                    "uprn": p.uprn,
-                    "address": p.address,
-                    "postcode": p.postcode,
-                }
-            )
-
         if not input_properties:
             return Response(status_code=204)
 
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index aec1f419..91730053 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -361,7 +361,12 @@ class HeatingRecommender:
             self.recommendations = combined_recommendations
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
-            recommendation_phase += 1
+            # but we'll only upgrade if we have a heating recommendation
+            has_heating_recommendation = any(
+                recommendation["type"] == "heating" for recommendation in self.recommendations
+            )
+            if has_heating_recommendation:
+                recommendation_phase += 1
             # The heating controls recommendation is distrinct from the boiler upgrade recommendation
             # We insert phase into the recommendations for heating controls
             for recommendation in controls_recommender.recommendation:

From 4e4199345511c2aa8e838581cebe9e7c307c1475 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 17:20:49 +0100
Subject: [PATCH 189/248] savings

---
 etl/customers/immo/pilot/asset_list.py           | 13 +------------
 recommendations/optimiser/optimiser_functions.py |  6 +-----
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 269ffe00..7939a555 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -1,18 +1,7 @@
-import os
-
 import pandas as pd
-from tqdm import tqdm
-
-from dotenv import load_dotenv
 from utils.s3 import read_excel_from_s3
-from backend.SearchEpc import SearchEpc
-from epc_api.client import EpcClient
 from utils.s3 import save_csv_to_s3
 
-# Read in the .env file in backend
-load_dotenv(dotenv_path="backend/.env")
-EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
-
 USER_ID = 8
 PORTFOLIO_ID = 70
 
@@ -28,6 +17,7 @@ council_tax_bands = [
     {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
     {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
 ]
+council_tax_bands = pd.DataFrame(council_tax_bands)
 
 
 def app():
@@ -41,7 +31,6 @@ def app():
     raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
     raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
 
-    council_tax_bands = pd.DataFrame(council_tax_bands)
     asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
 
     # Store the data in s3
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 27838d6e..9860c5ea 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -20,10 +20,6 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
     if not goal_key:
         raise NotImplementedError("Not implemented this gain type - investigate me")
 
-    # We don't include suspended and solid floor insulation as possible measures in private housing, because
-    # of the need to decant the tenant
-    ignored_measures = ["suspended_floor_insulation", "solid_floor_insulation"] if housing_type == "Private" else []
-
     input_measures = []
     for recs in property_recommendations:
         input_measures.append(
@@ -34,7 +30,7 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
                     "gain": rec[goal_key],
                     "type": rec["type"]
                 }
-                for rec in recs if rec["type"] not in ignored_measures
+                for rec in recs
             ]
         )
 

From 346b798c192e4c071640123379c021373d965543 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 17:26:00 +0100
Subject: [PATCH 190/248] removed whitespace

---
 backend/app/plan/router.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 50b8a837..bbf9261b 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -75,7 +75,6 @@ async def trigger_plan(body: PlanTriggerRequest):
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
-
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 

From e0e60f8c9822aec63e1acb74bdb037a8a4840210 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 17:26:23 +0100
Subject: [PATCH 191/248] added whitespace

---
 backend/app/plan/router.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index bbf9261b..4b4d45e7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -75,6 +75,7 @@ async def trigger_plan(body: PlanTriggerRequest):
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
+    
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 

From 505fe0736becf7ad649d24ff68bf902825239b02 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 18:46:51 +0100
Subject: [PATCH 192/248] Updating optimiser to only optimise solar
 recommendations that include the battery

---
 backend/app/plan/router.py                       |  7 ++-----
 recommendations/SolarPvRecommendations.py        |  3 ++-
 recommendations/optimiser/optimiser_functions.py | 12 +++++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4b4d45e7..6f179c79 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -75,7 +75,7 @@ async def trigger_plan(body: PlanTriggerRequest):
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
-    
+
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 
@@ -242,7 +242,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 expected_adjusted_energy=expected_adjusted_energy
             )
 
-            input_measures = prepare_input_measures(recommendations_with_impact, body.goal, body.housing_type)
+            input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
 
             current_sap_points = int(property_instance.data["current-energy-efficiency"])
             target_sap_points = epc_to_sap_lower_bound(body.goal_value)
@@ -279,9 +279,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                 if ventilation_rec:
                     selected_recommendations.add(ventilation_rec["recommendation_id"])
 
-            # We check if the selected recommendation is wall ventilation and if so, we make sure
-            # mechanical ventilation is selected
-
             # We'll use the set of selected recommendations to filter the recommendations to upload
             final_recommendations = [
                 [
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 4cf1c1fc..f75003ce 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -122,6 +122,7 @@ class SolarPvRecommendations:
                     **cost_result,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here
-                    "photo_supply": 100 * roof_coverage
+                    "photo_supply": 100 * roof_coverage,
+                    "has_battery": has_battery
                 }
             )
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 9860c5ea..6159b930 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -1,17 +1,13 @@
-def prepare_input_measures(property_recommendations, goal, housing_type):
+def prepare_input_measures(property_recommendations, goal):
     """
     Basic function to convert recommendations_to_upload to a format that is
     suitable for the optimiser - large
     :param property_recommendations:   object containing the recommendations, created in the plan trigger api
     :param goal:    goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points,
                     the goal should reflect that desired gain
-    :param housing_type:    type of housing the recommendations are for - should be one of "Social" or "Private"
     :return:    Nested list of input measures
     """
 
-    if housing_type not in ["Social", "Private"]:
-        raise ValueError("Invalid housing type - investigate me")
-
     goal_map = {
         "Increase EPC": "sap_points"
     }
@@ -22,6 +18,12 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
 
     input_measures = []
     for recs in property_recommendations:
+        if recs[0]["type"] == "solar_pv":
+            # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
+            # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
+            # under-report the potential cost
+            recs = [r for r in recs if recs["has_battery"]]
+
         input_measures.append(
             [
                 {

From f04b79d6800fce396fdbc5494b66f221d43a9826 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Apr 2024 18:54:58 +0100
Subject: [PATCH 193/248] fixed bug with selecting batter solar recommendations

---
 recommendations/optimiser/optimiser_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 6159b930..d6353eea 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -22,7 +22,7 @@ def prepare_input_measures(property_recommendations, goal):
             # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
             # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
             # under-report the potential cost
-            recs = [r for r in recs if recs["has_battery"]]
+            recs = [r for r in recs if r["has_battery"]]
 
         input_measures.append(
             [

From 43af0de04732ba737459a1f04ccb50950287c235 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 10:30:36 +0100
Subject: [PATCH 194/248] Updated condittions we recommend loft insulation, so
 it is not recommended if the home has more than 200mm insulation in place
 already

---
 recommendations/RoofRecommendations.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index eb1c6c4f..8d6a91e7 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -20,8 +20,9 @@ class RoofRecommendations:
 
     DIMINISHING_RETURNS_U_VALUE = 0.14
 
-    # It is recommended that lofts should have at least 270mm of insulation
-    MINIMUM_LOFT_ISULATION_MM = 270
+    # It is recommended that lofts should have at least 270mm of insulation. If the property has more than 200mm of
+    # loft insulation in place already, we do not recommend anything for the moment
+    MINIMUM_LOFT_ISULATION_MM = 200
     # Flat roof should have at least 100mm of insulation
     MINIMUM_FLAT_ROOF_ISULATION_MM = 100
 
@@ -71,7 +72,7 @@ class RoofRecommendations:
         # Building regulations part L recommend installing at least 270mm of insulation, however generally we
         # experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
         # This only holds true for pitched roofs.
-        if (insulation_thickness >= self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
+        if (insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
             return
 
         if (insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:

From db6fd58af4e89dcbdbecd436f2a9328ea6924521 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 10:56:17 +0100
Subject: [PATCH 195/248] changing the logic we use to recommend a combi boiler

---
 backend/Property.py                   | 13 +++++++++-
 recommendations/HeatingRecommender.py | 36 ++++++++++++++++++---------
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index d3dd8395..6f2e648d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -55,7 +55,13 @@ class Property:
 
     DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
 
-    def __init__(self, id, postcode, address, epc_record):
+    # Surplus information, that can be provided as optional inputs, by a customer
+    n_bathrooms = None
+    n_bedrooms = None
+
+    def __init__(
+        self, id, postcode, address, epc_record, **kwargs
+    ):
 
         self.epc_record = epc_record
 
@@ -133,6 +139,11 @@ class Property:
 
         self.recommendations_scoring_data = []
 
+    def parse_kwargs(self, kwargs):
+        # We extract the elements from kwargs that we recognise. Anything additional is ignored
+        self.n_bathrooms = kwargs.get("n_bathrooms", None)
+        self.n_bedrooms = kwargs.get("n_bedrooms", None)
+
     def create_base_difference_epc_record(self, cleaned_lookup: dict):
         """
         Creates a EPCDifferenceRecord object, which is used to store the difference between the current and
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 91730053..d4fe0a90 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -34,7 +34,6 @@ class HeatingRecommender:
         if has_electric_heating_description or no_heating_no_mains:
             # Recommend high heat retention storage heaters
             self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
-            return
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
         has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
@@ -44,9 +43,16 @@ class HeatingRecommender:
             'No system present, electric heaters assumed'
         ] and self.property.data["mains-gas-flag"]
 
-        if has_boiler or no_heating_has_mains:
-            self.recommend_boiler_upgrades(phase=phase, no_heating_has_mains=no_heating_has_mains)
-            return
+        # We also check if the property has electric heating, but it has access to the mains gas
+        electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
+
+        if has_boiler or no_heating_has_mains or electic_heating_has_mains:
+            # This indicates that the home previously did not have a boiler in place and so would require
+            # an overhaul to the system
+            system_change = not has_boiler
+            self.recommend_boiler_upgrades(phase=phase, system_change=system_change)
+
+        return
 
     @staticmethod
     def check_simulation_difference(old_config, new_config):
@@ -256,12 +262,14 @@ class HeatingRecommender:
 
         return closest_size
 
-    def recommend_boiler_upgrades(self, phase, no_heating_has_mains):
+    def recommend_boiler_upgrades(self, phase, system_change):
         """
         This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
         is generally more expensive
         :param phase:
-        :param no_heating_has_mains: indicaes if the property has no heating system, but has access to the mains gas
+        :param system_change: Indicates if the property would be undergoing a heating system change. This could be true
+                              if the home didn't have a heating system in place, or if the home had electric heating
+                              previously
         :return:
         """
 
@@ -279,17 +287,21 @@ class HeatingRecommender:
                 num_heated_rooms=self.property.data["number-heated-rooms"],
             )
 
-            # If heating and hot water come from the mains, we need a combi boiler, otherwise we need a regular boiler
-            hotwater_from_mains = self.property.hotwater["clean_description"] in ["From main system"]
-
-            is_combi = hotwater_from_mains or no_heating_has_mains
+            # We recommend a combi boiler under the following conditions
+            # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
+            #    heated if there is no existing heating system).
+            # 2) There is more than 1 bathroom
+            is_combi = (
+                (self.property.data["number-heated-rooms"] <= 4) or
+                (self.property.n_bathrooms not in [None, 0, 1])
+            )
             if is_combi:
                 description = "Upgrade to a new combi boiler"
             else:
-                description = "Upgrade to a new boiler"
+                description = "Upgrade to a new gas condensing boiler"
 
             simulation_config = {"mainheat_energy_eff_ending": "Good"}
-            if no_heating_has_mains:
+            if system_change:
                 # Installation of a boiler improves the hot water system so we need to reflect this in
                 # the outcome of the recommendation
                 heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()

From ac8cf271698788d4479626dae19f09a0027c79aa Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 11:20:29 +0100
Subject: [PATCH 196/248] created extract kwargs to read bathrooms and bedrooms

---
 backend/Property.py        | 22 ++++++++++++++++++++++
 backend/app/plan/router.py |  1 +
 2 files changed, 23 insertions(+)

diff --git a/backend/Property.py b/backend/Property.py
index 6f2e648d..5fe9716e 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -139,6 +139,28 @@ class Property:
 
         self.recommendations_scoring_data = []
 
+    @classmethod
+    def extract_kwargs(cls, kwargs):
+        """
+        This method is to be used in the router, to extract the kwargs from the request and prevent any errors such as
+        non-integer values, or inputs that clash with the __init__ method of this class
+        :param kwargs:
+        :return:
+        """
+        n_bathrooms = kwargs.get("n_bathrooms", None)
+        if n_bathrooms is not None:
+            # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
+            n_bathrooms = int(round(n_bathrooms + 1e-5))
+
+        n_bedrooms = kwargs.get("n_bedrooms", None)
+        if n_bedrooms is not None:
+            n_bedrooms = int(round(n_bedrooms + 1e-5))
+
+        return {
+            "n_bathrooms": n_bathrooms,
+            "n_bedrooms": n_bedrooms,
+        }
+
     def parse_kwargs(self, kwargs):
         # We extract the elements from kwargs that we recognise. Anything additional is ignored
         self.n_bathrooms = kwargs.get("n_bathrooms", None)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 6f179c79..7dc11bb9 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -139,6 +139,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
+                    **Property.extract_kwargs(config)
                 )
             )
 

From 2aa2e5947e6d29acf5c82962788a18ad9daf3351 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 11:36:14 +0100
Subject: [PATCH 197/248] adding bedrooms and bathrooms to asset list for immo

---
 .idea/Model.iml                        | 2 +-
 .idea/misc.xml                         | 2 +-
 etl/customers/immo/pilot/asset_list.py | 8 ++++++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 7939a555..9756e00b 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -33,6 +33,14 @@ def app():
 
     asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
 
+    # We're provided with number of bathrooms and number of bedrooms.
+    asset_list = asset_list.rename(
+        columns={
+            "No. of Beds": "n_bedrooms",
+            "No. of WC's": "n_bathrooms"
+        }
+    )
+
     # Store the data in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
     save_csv_to_s3(

From 606fd3a615e2188f78e2721aef9732e5d0d76328 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 11:49:48 +0100
Subject: [PATCH 198/248] Adding parsing of kwargs to Property class

---
 .idea/Model.iml            |  2 +-
 .idea/misc.xml             |  2 +-
 backend/Property.py        |  6 ++++--
 backend/app/plan/router.py | 20 ++++++++++----------
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 5fe9716e..950c1ac9 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -139,6 +139,8 @@ class Property:
 
         self.recommendations_scoring_data = []
 
+        self.parse_kwargs(kwargs)
+
     @classmethod
     def extract_kwargs(cls, kwargs):
         """
@@ -150,11 +152,11 @@ class Property:
         n_bathrooms = kwargs.get("n_bathrooms", None)
         if n_bathrooms is not None:
             # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
-            n_bathrooms = int(round(n_bathrooms + 1e-5))
+            n_bathrooms = int(round(float(n_bathrooms) + 1e-5))
 
         n_bedrooms = kwargs.get("n_bedrooms", None)
         if n_bedrooms is not None:
-            n_bedrooms = int(round(n_bedrooms + 1e-5))
+            n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
 
         return {
             "n_bathrooms": n_bathrooms,
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7dc11bb9..3cb2027d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -109,16 +109,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
             # if a new record was not created, we don't produduce recommendations
-            if not is_new:
-                continue
-
-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            # if not is_new:
+            #     continue
+            #
+            # create_property_targets(
+            #     session,
+            #     property_id=property_id,
+            #     portfolio_id=body.portfolio_id,
+            #     epc_target=body.goal_value,
+            #     heat_demand_target=None
+            # )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),

From 69424149510c38f59d1d847cbcef740a287da23b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 15:40:50 +0100
Subject: [PATCH 199/248] Updating heating recommender to recommend heating
 controls, with the heating change

---
 backend/app/plan/router.py            | 21 ++++++++++-----------
 recommendations/HeatingRecommender.py |  6 +++---
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 3cb2027d..4b91566e 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -108,17 +108,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            # if a new record was not created, we don't produduce recommendations
-            # if not is_new:
-            #     continue
-            #
-            # create_property_targets(
-            #     session,
-            #     property_id=property_id,
-            #     portfolio_id=body.portfolio_id,
-            #     epc_target=body.goal_value,
-            #     heat_demand_target=None
-            # )
+            if not is_new:
+                continue
+
+            create_property_targets(
+                session,
+                property_id=property_id,
+                portfolio_id=body.portfolio_id,
+                epc_target=body.goal_value,
+                heat_demand_target=None
+            )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index d4fe0a90..6e4b2230 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -48,7 +48,7 @@ class HeatingRecommender:
 
         if has_boiler or no_heating_has_mains or electic_heating_has_mains:
             # This indicates that the home previously did not have a boiler in place and so would require
-            # an overhaul to the system
+            # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
             system_change = not has_boiler
             self.recommend_boiler_upgrades(phase=phase, system_change=system_change)
 
@@ -353,8 +353,8 @@ class HeatingRecommender:
         if not controls_recommender.recommendation:
             return
 
-        if no_heating_has_mains:
-            # We combine the heating and controls recommendations
+        if system_change:
+            # We combine the heating and controls recommendations, in the case of a system change
             boiler_recommendation = self.recommendations[0].copy()
             combined_recommendations = []
             for controls_recommendation in controls_recommender.recommendation:

From 014d51c0605e853351b621fbeafdf8ca3b870cbf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 16:09:31 +0100
Subject: [PATCH 200/248] fixing the case where we recommend a boiler and new
 heating controls, as well as an improved electrical system

---
 recommendations/HeatingRecommender.py | 36 +++++++++++++--------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 6e4b2230..1813e5e8 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -278,6 +278,7 @@ class HeatingRecommender:
         # We now recommend boiler upgrades, if applicable
         simulation_config = {}
         boiler_costs = {}
+        boiler_recommendation = {}
         if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
             boiler_size = self.estimate_boiler_size(
                 property_type=self.property.data["property-type"],
@@ -290,10 +291,12 @@ class HeatingRecommender:
             # We recommend a combi boiler under the following conditions
             # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
             #    heated if there is no existing heating system).
-            # 2) There is more than 1 bathroom
+            # 2) There 1 or fewer bathrooms
+            # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
+            # bathrooms
             is_combi = (
                 (self.property.data["number-heated-rooms"] <= 4) or
-                (self.property.n_bathrooms not in [None, 0, 1])
+                (self.property.n_bathrooms in [None, 0, 1])
             )
             if is_combi:
                 description = "Upgrade to a new combi boiler"
@@ -328,21 +331,19 @@ class HeatingRecommender:
 
             boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
 
-            self.recommendations.append(
-                {
-                    "phase": recommendation_phase,
-                    "parts": [
-                        # TODO
-                    ],
-                    "type": "heating",
-                    "description": description,
-                    "starting_u_value": None,
-                    "new_u_value": None,
-                    "sap_points": None,
-                    "simulation_config": simulation_config,
-                    **boiler_costs
-                }
-            )
+            boiler_recommendation = {
+                "phase": recommendation_phase,
+                "parts": [
+                    # TODO
+                ],
+                "type": "heating",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "simulation_config": simulation_config,
+                **boiler_costs
+            }
 
         # We recommend the heating controls
         # If the property did not previously have a boiler, we combine
@@ -355,7 +356,6 @@ class HeatingRecommender:
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
-            boiler_recommendation = self.recommendations[0].copy()
             combined_recommendations = []
             for controls_recommendation in controls_recommender.recommendation:
                 combined_recommendation = self.combine_heating_and_controls(

From 88f43bcc822b4550540c88e7363d920937563072 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 16:49:44 +0100
Subject: [PATCH 201/248] fixed the combi boiler logic

---
 recommendations/HeatingControlRecommender.py | 3 ++-
 recommendations/HeatingRecommender.py        | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 95b5e3b1..76eaba4f 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -215,7 +215,8 @@ class HeatingControlRecommender:
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves",
+                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves "
+                               "(time & temperature zone control)",
                 **self.costs.time_and_temperature_zone_control(
                     number_heated_rooms=int(self.property.data["number-heated-rooms"])
                 ),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 1813e5e8..bd4d87a2 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -295,7 +295,7 @@ class HeatingRecommender:
             # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
             # bathrooms
             is_combi = (
-                (self.property.data["number-heated-rooms"] <= 4) or
+                (self.property.data["number-heated-rooms"] <= 4) and
                 (self.property.n_bathrooms in [None, 0, 1])
             )
             if is_combi:
@@ -370,7 +370,7 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations = combined_recommendations
+            self.recommendations.extend(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation

From 61584a6320bfd50bb4f18266a09cc1bb1e4e2ba1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 18:14:38 +0100
Subject: [PATCH 202/248] extend recommendations to cover portable electric
 heaters

---
 recommendations/Costs.py              | 18 ++++++++++++-
 recommendations/HeatingRecommender.py | 37 ++++++++++++++++++++++++---
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index e5ceb0c0..f4ac259b 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -79,6 +79,10 @@ CONVENTIONAL_BOILER_COSTS = {
     "40kw": 1776
 }
 
+# Assumes 3 hours to remove each heater (including re-decorating)
+ROOM_HEATER_REMOVAL_COST = 120
+ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
+
 
 class Costs:
     """
@@ -1100,7 +1104,7 @@ class Costs:
             "labour_days": labour_days,
         }
 
-    def low_carbon_boiler(self, is_combi, size):
+    def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
         :return:
@@ -1118,6 +1122,18 @@ class Costs:
         labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
         # Add contingency and preliminaries
         labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+        # if there are existing room heaters, we need to add the cost of removing them
+        if exising_room_heaters:
+            removal_cost = ROOM_HEATER_REMOVAL_COST * n_heated_rooms
+            removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_heated_rooms
+        else:
+            removal_cost = 0
+            removal_labour_hours = 0
+
+        labour_cost = labour_cost + removal_cost
+        labour_days = labour_days + (removal_labour_hours / 8)
+
         vat = labour_cost * self.VAT_RATE
 
         subtotal_before_vat = unit_cost + labour_cost
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index bd4d87a2..14509eea 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -43,14 +43,36 @@ class HeatingRecommender:
             'No system present, electric heaters assumed'
         ] and self.property.data["mains-gas-flag"]
 
+        has_gas_heaters = (
+            self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
+            self.property.data["mains-gas-flag"]
+        )
+
         # We also check if the property has electric heating, but it has access to the mains gas
         electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
 
-        if has_boiler or no_heating_has_mains or electic_heating_has_mains:
+        portable_heaters_has_mains = (
+            self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
+            self.property.data["mains-gas-flag"]
+        )
+
+        if (
+            has_boiler or
+            no_heating_has_mains or
+            electic_heating_has_mains or
+            has_gas_heaters or
+            portable_heaters_has_mains
+        ):
             # This indicates that the home previously did not have a boiler in place and so would require
             # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
             system_change = not has_boiler
-            self.recommend_boiler_upgrades(phase=phase, system_change=system_change)
+            exising_room_heaters = self.property.main_heating["clean_description"] in [
+                "Room heaters, electric", "Room heaters, mains gas"
+            ]
+
+            self.recommend_boiler_upgrades(
+                phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
+            )
 
         return
 
@@ -262,7 +284,7 @@ class HeatingRecommender:
 
         return closest_size
 
-    def recommend_boiler_upgrades(self, phase, system_change):
+    def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
         """
         This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
         is generally more expensive
@@ -270,6 +292,8 @@ class HeatingRecommender:
         :param system_change: Indicates if the property would be undergoing a heating system change. This could be true
                               if the home didn't have a heating system in place, or if the home had electric heating
                               previously
+        :param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler
+                                     recommendation will need to be accompanied by removal of the room heaters
         :return:
         """
 
@@ -329,7 +353,12 @@ class HeatingRecommender:
                     "hot_water_energy_eff_ending": "Good"
                 }
 
-            boiler_costs = self.costs.low_carbon_boiler(is_combi=is_combi, size=f"{boiler_size}kw")
+            boiler_costs = self.costs.boiler(
+                is_combi=is_combi,
+                size=f"{boiler_size}kw",
+                exising_room_heaters=exising_room_heaters,
+                n_heated_rooms=self.property.data["number-heated-rooms"]
+            )
 
             boiler_recommendation = {
                 "phase": recommendation_phase,

From 3ecd7a974276bb6f4296124c6acf7e55f280e574 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Apr 2024 19:14:49 +0100
Subject: [PATCH 203/248] added simulation for secondary heating

---
 backend/Property.py                   |  6 ++-
 recommendations/Costs.py              | 45 ++++++++++++++++------
 recommendations/HeatingRecommender.py |  2 +-
 recommendations/Recommendations.py    |  8 ++++
 recommendations/SecondaryHeating.py   | 55 +++++++++++++++++++++++++++
 5 files changed, 102 insertions(+), 14 deletions(-)
 create mode 100644 recommendations/SecondaryHeating.py

diff --git a/backend/Property.py b/backend/Property.py
index 950c1ac9..0f5e7e77 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -456,7 +456,9 @@ class Property:
                         "double glazing installed during or after 2002"
                     )
 
-            if recommendation["type"] in ["heating", "hot_water_tank_insulation", "heating_control"]:
+            if recommendation["type"] in [
+                "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating"
+            ]:
                 # We update the data, as defined in the recommendaton
 
                 simulation_config = recommendation["simulation_config"]
@@ -477,7 +479,7 @@ class Property:
                 "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                 "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
                 "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
-                "heating_control",
+                "heating_control", "secondary_heating"
             ]:
                 raise NotImplementedError(
                     "Implement me, given type %s" % recommendation["type"]
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index f4ac259b..45c17102 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -1104,6 +1104,28 @@ class Costs:
             "labour_days": labour_days,
         }
 
+    def heater_removal(self, n_rooms):
+        """
+        Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
+        :return:
+        """
+
+        removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms
+        removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms
+
+        vat = removal_cost * self.VAT_RATE
+
+        subtotal_before_vat = removal_cost
+        total_cost = subtotal_before_vat + vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": removal_labour_hours,
+            "labour_days": np.ceil(removal_labour_hours / 8),
+        }
+
     def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
@@ -1114,6 +1136,7 @@ class Costs:
         # The unit cost is the cost without VAT
         # We now need to estimate the cost of the works
         labour_days = 2
+        labour_hours = labour_days * 8
         labour_rate = 500
 
         # Average cost of installation is 1 (maybe 2days) at £300 per day
@@ -1123,26 +1146,26 @@ class Costs:
         # Add contingency and preliminaries
         labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
 
-        # if there are existing room heaters, we need to add the cost of removing them
-        if exising_room_heaters:
-            removal_cost = ROOM_HEATER_REMOVAL_COST * n_heated_rooms
-            removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_heated_rooms
-        else:
-            removal_cost = 0
-            removal_labour_hours = 0
-
-        labour_cost = labour_cost + removal_cost
-        labour_days = labour_days + (removal_labour_hours / 8)
+        # labour_days = labour_days + (removal_labour_hours / 8)
 
         vat = labour_cost * self.VAT_RATE
 
         subtotal_before_vat = unit_cost + labour_cost
         total_cost = subtotal_before_vat + vat
 
+        # if there are existing room heaters, we need to add the cost of removing them
+        if exising_room_heaters:
+            removal_costing = self.heater_removal(n_rooms=n_heated_rooms)
+            # Add the totals to the existing totals
+            total_cost += removal_costing["total"]
+            subtotal_before_vat += removal_costing["subtotal"]
+            labour_hours += removal_costing["labour_hours"]
+            labour_days += removal_costing["labour_days"]
+
         return {
             "total": total_cost,
             "subtotal": subtotal_before_vat,
             "vat": vat,
-            "labour_hours": labour_days * 8,
+            "labour_hours": labour_hours,
             "labour_days": labour_days,
         }
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 14509eea..92457a27 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -319,7 +319,7 @@ class HeatingRecommender:
             # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
             # bathrooms
             is_combi = (
-                (self.property.data["number-heated-rooms"] <= 4) and
+                (self.property.number_of_rooms <= 4) and
                 (self.property.n_bathrooms in [None, 0, 1])
             )
             if is_combi:
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 902023dc..68fead16 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -11,6 +11,7 @@ from recommendations.SolarPvRecommendations import SolarPvRecommendations
 from recommendations.WindowsRecommendations import WindowsRecommendations
 from recommendations.HeatingRecommender import HeatingRecommender
 from recommendations.HotwaterRecommendations import HotwaterRecommendations
+from recommendations.SecondaryHeating import SecondaryHeating
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 
 
@@ -46,6 +47,7 @@ class Recommendations:
         self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
         self.heating_recommender = HeatingRecommender(property_instance=property_instance)
         self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
+        self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)
 
     def recommend(self):
 
@@ -130,6 +132,12 @@ class Recommendations:
                 property_recommendations.append(self.lighting_recommender.recommendation)
                 phase += 1
 
+        if "secondary_heating" not in self.exclusions:
+            self.secondary_heating_recommender.recommend(phase=phase)
+            if self.secondary_heating_recommender.recommendation:
+                property_recommendations.append(self.secondary_heating_recommender.recommendation)
+                phase += 1
+
         # Renewables
         if "solar_pv" not in self.exclusions:
             self.solar_recommender.recommend(phase=phase)
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
new file mode 100644
index 00000000..f31c4c05
--- /dev/null
+++ b/recommendations/SecondaryHeating.py
@@ -0,0 +1,55 @@
+from recommendations.Costs import Costs
+from backend.Property import Property
+
+
+class SecondaryHeating:
+    """
+    This class recommends the removal of the secondary heating system for properties that have a primary heating
+    system.
+    """
+
+    # The list of existing heating systems that are accepted
+    ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"]
+    ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"]
+    # These are the heaters where works are required to remove them
+    FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
+
+    def __init__(self, property_instance: Property):
+        self.property = property_instance
+        self.costs = Costs(self.property)
+
+        self.recommendation = []
+
+    def recommend(self, phase: int):
+        # Reset
+        self.recommendation = []
+
+        if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS:
+            return
+
+        # TODO: We need to clean secondary data
+        if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS:
+            return
+
+        if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS:
+            # We have an associated cost otherwise, there is no cost
+            n_rooms = self.property.data['number-heated-rooms']
+        else:
+            n_rooms = 0
+
+        costs = self.costs.heater_removal(n_rooms=n_rooms)
+        self.recommendation.append(
+            {
+                "phase": phase,
+                "parts": [],
+                "type": "secondary_heating",
+                "description": "Remove the secondary heating system",
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                **costs,
+                "simulation_config": {
+                    "secondheat_description_ending": "None"
+                }
+            }
+        )

From 0b75ec9210e7c7c097bf4e6b5d2d87cb273af6cd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 14:41:19 +0100
Subject: [PATCH 204/248] Added patches and overrides to immo asset list

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 .../AirSourceHeatPumpEfficiency.py            | 78 +++++++++++++++++++
 etl/air_source_heat_pump/app.py               | 24 ++++++
 etl/customers/immo/pilot/asset_list.py        | 70 ++++++++++++++++-
 5 files changed, 172 insertions(+), 4 deletions(-)
 create mode 100644 etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
 create mode 100644 etl/air_source_heat_pump/app.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
new file mode 100644
index 00000000..2ba82e77
--- /dev/null
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@@ -0,0 +1,78 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from etl.epc.settings import EARLIEST_EPC_DATE
+
+logger = setup_logger()
+
+
+class AirSourceHeatPumpEfficiency:
+
+    def __init__(self, file_directories, cleaned_lookup):
+        """
+        :param file_directories: A list of directories where files are stored.
+        :param cleaned_lookup: A dictionary containing cleaned lookup data.
+        """
+        self.file_directories = file_directories
+        self.cleaned_lookup = cleaned_lookup
+
+        self.results = []
+
+    def create_dataset(self):
+        logger.info("Creating solar photo supply dataset")
+        for dir in tqdm(self.file_directories):
+            filepath = dir / "certificates.csv"
+            df = pd.read_csv(filepath, low_memory=False)
+            df = df[~pd.isnull(df["UPRN"])]
+            df["UPRN"] = df["UPRN"].astype(int).astype(str)
+            # Take entries after SAP12
+            df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
+            df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
+
+            df = df[
+                ~df["TENURE"].isin(
+                    [
+                        "unknown",
+                        "Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
+                        "It is not to be used for an existing dwelling"
+                    ]
+                )
+            ]
+
+            # Take entries that contain an air source heat pump
+            df = df[
+                df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
+            ]
+            # Get the columns we're interested in
+            df = df[
+                [
+                    "MAINHEAT_DESCRIPTION",
+                    "MAINHEAT_ENERGY_EFF",
+                    "MAINHEATCONT_DESCRIPTION",
+                    "MAINHEATC_ENERGY_EFF",
+                    "MAIN_FUEL",
+                    "HOTWATER_DESCRIPTION",
+                    "HOT_WATER_ENERGY_EFF",
+                    "MAINS_GAS_FLAG"
+                ]
+            ]
+
+            counts = df.groupby(
+                [
+                    "MAINHEAT_DESCRIPTION",
+                    "MAINHEAT_ENERGY_EFF",
+                    "MAINHEATCONT_DESCRIPTION",
+                    "MAINHEATC_ENERGY_EFF",
+                    "MAIN_FUEL",
+                    "HOTWATER_DESCRIPTION",
+                    "HOT_WATER_ENERGY_EFF",
+                    "MAINS_GAS_FLAG"
+                ]
+            ).size().reset_index(name="count")
+
+            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+                df = df[~pd.isnull(df[col])]
+            # Take newest LODGEMENT_DATE per UPRN
+            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
diff --git a/etl/air_source_heat_pump/app.py b/etl/air_source_heat_pump/app.py
new file mode 100644
index 00000000..ac87b34b
--- /dev/null
+++ b/etl/air_source_heat_pump/app.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+from backend.app.plan.utils import get_cleaned
+from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    """
+    This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
+    heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
+    how to set the simulation
+    :return:
+    """
+
+    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+    cleaned_lookup = get_cleaned()
+
+    ashp_data_client = AirSourceHeatPumpEfficiency(
+        file_directories=directories,
+        cleaned_lookup=cleaned_lookup
+    )
+
+    ashp_data_client.create_dataset()
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 9756e00b..0da8f885 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -19,6 +19,40 @@ council_tax_bands = [
 ]
 council_tax_bands = pd.DataFrame(council_tax_bands)
 
+# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
+# that has not reached the API
+patches = [
+    {
+        'address': '6 Beech Road', 'postcode': 'DY1 4BP',
+        'walls-description': 'Mixed: Filled cavity and external insulated solid brick',
+        'walls-energy-eff': 'Good',
+        'roof-description': 'Pitched, 12 mm loft insulation',
+        'roof-energy-eff': 'Very Poor',
+        'windows-description': 'Fully double glazed',
+        'windows-energy-eff': 'Good',
+        'mainheat-description': 'Room heaters, electric',
+        'mainheat-energy-eff': 'Very Poor',
+        'mainheatcont-description': 'Appliance thermostats',
+        'mainheatc-energy-eff': 'Good',
+        'lighting-description': 'Low energy lighting in 25% of fixed outlets',
+        'lighting-energy-eff': 'Good',
+        'floor-description': 'Mixed: Solid no insulation and suspended no insulation',
+        'secondheat-description': 'None',
+        'current-energy-efficiency': '32',
+    }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+overrides = [
+    {
+        'address': '5 Oaklands',
+        'postcode': 'B62 0JA',
+        "overrides": ["windows_glazing"]
+    }
+]
+
 
 def app():
     raw_asset_list = read_excel_from_s3(
@@ -41,7 +75,7 @@ def app():
         }
     )
 
-    # Store the data in s3
+    # Store the asset list in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
     save_csv_to_s3(
         dataframe=asset_list,
@@ -49,12 +83,44 @@ def app():
         file_name=filename
     )
 
+    # Store overrides in s3
+    overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(overrides),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=overrides_filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    # EPC C portoflio
     body = {
         "portfolio_id": str(PORTFOLIO_ID),
         "housing_type": "Private",
         "goal": "Increase EPC",
-        "goal_value": "A",
+        "goal_value": "C",
         "trigger_file_path": filename,
+        "overrides_file_path": overrides_filename,
+        "patches_file_path": patches_filename,
+        "budget": None,
+    }
+    print(body)
+
+    # EPC B portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID + 1),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "overrides_file_path": overrides_filename,
+        "patches_file_path": patches_filename,
         "budget": None,
     }
     print(body)

From ab180f65225507c6d666516fd70259a7c0ec4ac5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:06:12 +0100
Subject: [PATCH 205/248] Added overrides and patches to router

---
 .idea/Model.iml                        |  2 +-
 .idea/misc.xml                         |  2 +-
 backend/Property.py                    |  6 ++++-
 backend/app/plan/router.py             | 34 ++++++++++++++++++--------
 backend/app/plan/schemas.py            |  2 ++
 etl/customers/immo/pilot/asset_list.py |  4 +--
 6 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 0f5e7e77..882e450c 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -60,7 +60,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, **kwargs
+        self, id, postcode, address, epc_record, overrides=None, **kwargs
     ):
 
         self.epc_record = epc_record
@@ -74,6 +74,10 @@ class Property:
         }
         self.old_data = epc_record.get("old_data")
         self.property_dimensions = None
+        # This is a list of measures that have already been installed in the property, typically found as a result
+        # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
+        # cost and instead, provide a message that the measure has already been installed
+        self.overrides = overrides
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4b91566e..8d39c97f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -44,20 +44,15 @@ BATCH_SIZE = 5
 SCORING_BATCH_SIZE = 400
 
 
-def patch_epc(config, epc_records):
+def patch_epc(patch, epc_records):
     """
     This utility function is useful to patch the epc data if we have data from the customer
     :return:
     """
 
-    number_habitable_rooms = config.get("number-habitable-rooms", None)
-    number_heated_rooms = config.get("number-heated-rooms", None)
-
-    if number_habitable_rooms is not None:
-        epc_records["original_epc"]["number-habitable-rooms"] = int(number_habitable_rooms)
-
-    if number_heated_rooms is not None:
-        epc_records["original_epc"]["number-heated-rooms"] = int(number_heated_rooms)
+    for patch_variable, patch_value in patch.items():
+        if patch_variable in epc_records["original_epc"]:
+            epc_records["original_epc"][patch_variable] = patch_value
 
     return epc_records
 
@@ -85,6 +80,17 @@ async def trigger_plan(body: PlanTriggerRequest):
         session.begin()
         logger.info("Getting the inputs")
         plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+        # If we have patches or overrides, we should read them in here
+        patches = []
+        if body.patches_file_path:
+            patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
+
+        overrides = []
+        if body.overrides_file_path:
+            overrides = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.overrides_file_path
+            )
+
         cleaning_data = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
@@ -124,7 +130,11 @@ async def trigger_plan(body: PlanTriggerRequest):
                 'full_sap_epc': epc_searcher.full_sap_epc.copy(),
                 'old_data': epc_searcher.older_epcs.copy(),
             }
-            epc_records = patch_epc(config, epc_records)
+
+            patch = next((
+                x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), None)
+            epc_records = patch_epc(patch, epc_records)
 
             prepared_epc = EPCRecord(
                 epc_records=epc_records,
@@ -132,12 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
+            overrides = next((
+                x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), None)
             input_properties.append(
                 Property(
                     id=property_id,
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
+                    overrides=overrides,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index b8a99704..ec49e41e 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -9,6 +9,8 @@ class PlanTriggerRequest(BaseModel):
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
+    overrides_file_path: Optional[str] = None
+    patches_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
 
     # Pre-defined list of possibilities for exclusions
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 0da8f885..15681d42 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -24,7 +24,7 @@ council_tax_bands = pd.DataFrame(council_tax_bands)
 patches = [
     {
         'address': '6 Beech Road', 'postcode': 'DY1 4BP',
-        'walls-description': 'Mixed: Filled cavity and external insulated solid brick',
+        'walls-description': 'Cavity wall, filled cavity',
         'walls-energy-eff': 'Good',
         'roof-description': 'Pitched, 12 mm loft insulation',
         'roof-energy-eff': 'Very Poor',
@@ -36,7 +36,7 @@ patches = [
         'mainheatc-energy-eff': 'Good',
         'lighting-description': 'Low energy lighting in 25% of fixed outlets',
         'lighting-energy-eff': 'Good',
-        'floor-description': 'Mixed: Solid no insulation and suspended no insulation',
+        'floor-description': 'Solid, no insulation (assumed)',
         'secondheat-description': 'None',
         'current-energy-efficiency': '32',
     }

From 8e2d823693f53ad47a4fe857fd8f24d84c0c4ec1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:11:51 +0100
Subject: [PATCH 206/248] corrected parsing of overrides

---
 backend/Property.py                    | 4 ++--
 backend/app/plan/router.py             | 8 ++++----
 etl/customers/immo/pilot/asset_list.py | 3 +++
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 882e450c..3fac3667 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -60,7 +60,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, overrides=None, **kwargs
+        self, id, postcode, address, epc_record, override=None, **kwargs
     ):
 
         self.epc_record = epc_record
@@ -77,7 +77,7 @@ class Property:
         # This is a list of measures that have already been installed in the property, typically found as a result
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
-        self.overrides = overrides
+        self.override = override
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 8d39c97f..08ce0dcc 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -133,7 +133,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             patch = next((
                 x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), None)
+            ), {})
             epc_records = patch_epc(patch, epc_records)
 
             prepared_epc = EPCRecord(
@@ -142,16 +142,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
-            overrides = next((
+            override = next((
                 x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), None)
+            ), {})
             input_properties.append(
                 Property(
                     id=property_id,
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
-                    overrides=overrides,
+                    override=override,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 15681d42..07ebe884 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -39,6 +39,9 @@ patches = [
         'floor-description': 'Solid, no insulation (assumed)',
         'secondheat-description': 'None',
         'current-energy-efficiency': '32',
+        'energy-consumption-current': '491',
+        'co2-emissions-current': '5.0',
+        'potential-energy-efficiency': '87'
     }
 ]
 

From 0ede95cc4a7499ad0db1c6eda5ef6e012ab9f763 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:25:08 +0100
Subject: [PATCH 207/248] added override to wall insulation

---
 backend/Property.py                     |  4 +++-
 recommendations/WallRecommendations.py  | 15 ++++++++++++++-
 recommendations/recommendation_utils.py | 12 ++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 3fac3667..d000be28 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1,4 +1,5 @@
 import os
+import ast
 from itertools import groupby
 import pandas as pd
 
@@ -77,7 +78,8 @@ class Property:
         # This is a list of measures that have already been installed in the property, typically found as a result
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
-        self.override = override
+
+        self.override = ast.literal_eval(override['overrides']) if override is not None else []
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 6b59c148..3acc17f0 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -8,7 +8,7 @@ from backend.Property import Property
 from BaseUtility import Definitions
 from recommendations.recommendation_utils import (
     r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_wall_u_value
+    get_recommended_part, get_wall_u_value, override_costs
 )
 from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
 from recommendations.Costs import Costs
@@ -221,6 +221,10 @@ class WallRecommendations(Definitions):
                     material=material.to_dict(),
                 )
 
+                is_override = "cavity_wall_insulation" in cost_result
+                if is_override:
+                    cost_result = override_costs(cost_result)
+
                 recommendations.append(
                     {
                         "phase": phase,
@@ -237,6 +241,7 @@ class WallRecommendations(Definitions):
                         "starting_u_value": u_value,
                         "new_u_value": new_u_value,
                         "sap_points": None,
+                        "is_override": is_override,
                         **cost_result
                     }
                 )
@@ -277,12 +282,19 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+                        is_override = "internal_wall_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
+
                     elif material["type"] == "external_wall_insulation":
                         cost_result = self.costs.external_wall_insulation(
                             wall_area=self.property.insulation_wall_area,
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+                        is_override = "external_wall_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
 
@@ -301,6 +313,7 @@ class WallRecommendations(Definitions):
                             "description": self._make_description(material),
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
+                            "is_override": is_override,
                             "sap_points": None,
                             **cost_result
                         }
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 0d5f9743..a3043c31 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -767,3 +767,15 @@ def check_simulation_difference(old_config, new_config):
     differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
 
     return differences
+
+
+def override_costs(costs):
+    """
+    If the method is overridden, we want to make sure that the costs are zero. This function sets the costs to zero
+    :param costs: Dictionary of costing, as returned by the Costs class
+    :return:
+    """
+    for k in costs:
+        costs[k] = 0
+
+    return costs

From 1c5ccb2c8c46a613851dfaf153a16ee4242eaf0a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:26:11 +0100
Subject: [PATCH 208/248] added override to roof insulation

---
 recommendations/RoofRecommendations.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 8d6a91e7..ed087228 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -5,7 +5,7 @@ from typing import List
 from datatypes.enums import QuantityUnits
 from recommendations.recommendation_utils import (
     get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
-    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric
+    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
 )
 from recommendations.Costs import Costs
 
@@ -207,12 +207,18 @@ class RoofRecommendations:
                             floor_area=self.property.insulation_floor_area,
                             material=material
                         )
+                        is_override = "loft_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
                     elif material["type"] == "flat_roof_insulation":
                         cost_result = self.costs.flat_roof_insulation(
                             floor_area=self.property.insulation_floor_area,
                             material=material,
                             non_insulation_materials=non_insulation_materials
                         )
+                        is_override = "flat_roof_insulation" in cost_result
+                        if is_override:
+                            cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
 
@@ -232,6 +238,7 @@ class RoofRecommendations:
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
+                            "is_override": is_override,
                             **cost_result
                         }
                     )

From adcd31c8f4e69e92ff592a03103eb60f1c06617a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:27:58 +0100
Subject: [PATCH 209/248] correcting override in walls and roof

---
 recommendations/RoofRecommendations.py        | 4 ++--
 recommendations/VentilationRecommendations.py | 4 ++++
 recommendations/WallRecommendations.py        | 6 +++---
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index ed087228..5ba7e82e 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -207,7 +207,7 @@ class RoofRecommendations:
                             floor_area=self.property.insulation_floor_area,
                             material=material
                         )
-                        is_override = "loft_insulation" in cost_result
+                        is_override = "loft_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
                     elif material["type"] == "flat_roof_insulation":
@@ -216,7 +216,7 @@ class RoofRecommendations:
                             material=material,
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "flat_roof_insulation" in cost_result
+                        is_override = "flat_roof_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
                     else:
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 1657b759..aa6299e0 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -56,6 +56,10 @@ class VentilationRecommendations(Definitions):
         part[0]["quantity"] = n_units
         part[0]["quantity_unit"] = "part"
 
+        is_override = "cavity_wall_insulation" in cost_result
+        if is_override:
+            cost_result = override_costs(cost_result)
+
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
             {
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 3acc17f0..471a62cb 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -221,7 +221,7 @@ class WallRecommendations(Definitions):
                     material=material.to_dict(),
                 )
 
-                is_override = "cavity_wall_insulation" in cost_result
+                is_override = "cavity_wall_insulation" in self.property.override
                 if is_override:
                     cost_result = override_costs(cost_result)
 
@@ -282,7 +282,7 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "internal_wall_insulation" in cost_result
+                        is_override = "internal_wall_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
 
@@ -292,7 +292,7 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "external_wall_insulation" in cost_result
+                        is_override = "external_wall_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
                     else:

From fadff714d2c3227eb835b94951ed09b25ff870c4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:29:41 +0100
Subject: [PATCH 210/248] add override to ventilation

---
 recommendations/VentilationRecommendations.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index aa6299e0..07f7cf1e 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -50,16 +50,16 @@ class VentilationRecommendations(Definitions):
 
         part = self.materials.copy()
 
-        estimated_cost = n_units * part[0]["cost"]
+        is_override = "cavity_wall_insulation" in self.property.override
+
+        estimated_cost = n_units * part[0]["cost"] if not is_override else 0
+        labour_hours = 4 * n_units if not is_override else 0
+        labour_days = 4 * n_units / 8.0 if not is_override else 0
 
         part[0]["total"] = estimated_cost
         part[0]["quantity"] = n_units
         part[0]["quantity_unit"] = "part"
 
-        is_override = "cavity_wall_insulation" in cost_result
-        if is_override:
-            cost_result = override_costs(cost_result)
-
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
             {
@@ -76,7 +76,7 @@ class VentilationRecommendations(Definitions):
                 "energy_cost_savings": 0,
                 "total": estimated_cost,
                 # We use a very simple and rough estimate of 4 hours per unit
-                "labour_hours": 4 * n_units,
-                "labour_days": 4 * n_units / 8.0  # Assume 8 hour day
+                "labour_hours": labour_hours,
+                "labour_days": labour_days  # Assume 8 hour day
             }
         ]

From 493db6c4a01dcf825fe49d77cfc8fcb974a7d1e1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:31:07 +0100
Subject: [PATCH 211/248] added floor insulation to override

---
 recommendations/FloorRecommendations.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 713d5f92..1744a928 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -8,7 +8,7 @@ from datatypes.enums import QuantityUnits
 from backend.Property import Property
 from recommendations.recommendation_utils import (
     r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_floor_u_value
+    get_recommended_part, get_floor_u_value, override_costs
 )
 from recommendations.Costs import Costs
 
@@ -192,12 +192,22 @@ class FloorRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+
+                        is_override = "suspended_floor_insulation" in self.property.override
+                        if is_override:
+                            cost_result = override_costs(cost_result)
+
                     elif material["type"] == "solid_floor_insulation":
                         cost_result = self.costs.solid_floor_insulation(
                             insulation_floor_area=self.property.insulation_floor_area,
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
+
+                        is_override = "solid_floor_insulation" in self.property.override
+                        if is_override:
+                            cost_result = override_costs(cost_result)
+
                     else:
                         raise NotImplementedError("Implement me!")
 

From b052c9925f9064d2462442cccecac08bc511cc21 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:41:52 +0100
Subject: [PATCH 212/248] Added heating override

---
 recommendations/FireplaceRecommendations.py |  4 ++-
 recommendations/FloorRecommendations.py     |  2 +-
 recommendations/HeatingRecommender.py       | 22 ++++++++++++-----
 recommendations/WindowsRecommendations.py   | 27 +++++++++++++--------
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py
index 5d620d49..c1114f31 100644
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@@ -32,7 +32,8 @@ class FireplaceRecommendations(Definitions):
         if number_open_fireplaces == 0:
             return
 
-        estimated_cost = number_open_fireplaces * self.COST_OF_WORK
+        is_override = "sealing_open_fireplace" in self.property.override
+        estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not is_override else 0
 
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
@@ -44,6 +45,7 @@ class FireplaceRecommendations(Definitions):
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "total": estimated_cost,
                 # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
                 "labour_hours": 6 * number_open_fireplaces,
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 1744a928..b7bd370c 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -207,7 +207,6 @@ class FloorRecommendations(Definitions):
                         is_override = "solid_floor_insulation" in self.property.override
                         if is_override:
                             cost_result = override_costs(cost_result)
-
                     else:
                         raise NotImplementedError("Implement me!")
 
@@ -227,6 +226,7 @@ class FloorRecommendations(Definitions):
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
+                            "is_override": is_override,
                             **cost_result
                         }
                     )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 92457a27..27e4985a 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -1,7 +1,7 @@
 import pandas as pd
 
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import check_simulation_difference
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
 from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
@@ -33,7 +33,7 @@ class HeatingRecommender:
 
         if has_electric_heating_description or no_heating_no_mains:
             # Recommend high heat retention storage heaters
-            self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
+            self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
 
         # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
         has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
@@ -89,9 +89,8 @@ class HeatingRecommender:
 
         return differences
 
-    @staticmethod
     def combine_heating_and_controls(
-        controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
+        self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
         system_change
     ):
         """
@@ -140,6 +139,11 @@ class HeatingRecommender:
 
                 recommendation_description = f"{description} and {controls_description}"
 
+            is_override = "cavity_wall_insulation" in self.property.override
+            if is_override:
+                total_costs = override_costs(total_costs)
+                recommendation_description = "Heating system has already been upgraded, no further action needed."
+
             recommendation = {
                 "phase": phase,
                 "parts": [
@@ -150,6 +154,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **total_costs,
                 "simulation_config": recommendation_simulation_config
             }
@@ -181,9 +186,8 @@ class HeatingRecommender:
 
         return output
 
-    def recommend_electric_storage_heaters(self, phase, system_change, heating_controls_only):
+    def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
         """
-        We recommend electric storage heaters as an upgrade to the heating system.
         We will recommend upgrading to a high heat retention storage system, if the current system is not already
         high heat retention storage
 
@@ -360,6 +364,11 @@ class HeatingRecommender:
                 n_heated_rooms=self.property.data["number-heated-rooms"]
             )
 
+            is_override = "heating" in self.property.override
+            if is_override:
+                boiler_costs = override_costs(boiler_costs)
+                description = "Heating system has already been upgraded, no further action needed."
+
             boiler_recommendation = {
                 "phase": recommendation_phase,
                 "parts": [
@@ -370,6 +379,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "simulation_config": simulation_config,
                 **boiler_costs
             }
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index d7404e3b..b2fe20a6 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -4,6 +4,7 @@ import numpy as np
 
 from backend.Property import Property
 from recommendations.Costs import Costs
+from recommendation_utils import override_costs
 
 
 class WindowsRecommendations:
@@ -70,18 +71,23 @@ class WindowsRecommendations:
             is_secondary_glazing=is_secondary_glazing
         )
 
-        glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
-        if self.property.windows["glazing_coverage"] in ["partial", "most"]:
-            description = f"Install {glazing_type} to the remaining windows"
+        is_override = "windows_glazing" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "The property already has double glazing installed. No further action is required."
         else:
-            description = f"Install {glazing_type} to all windows"
+            glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
+            if self.property.windows["glazing_coverage"] in ["partial", "most"]:
+                description = f"Install {glazing_type} to the remaining windows"
+            else:
+                description = f"Install {glazing_type} to all windows"
 
-        if self.property.is_listed:
-            description += ". Secondary glazing recommended due to listed building status"
-        elif self.property.is_heritage:
-            description += ". Secondary glazing recommended due to herigate building status"
-        elif self.property.in_conservation_area:
-            description += ". Secondary glazing recommended due to conservation area status"
+            if self.property.is_listed:
+                description += ". Secondary glazing recommended due to listed building status"
+            elif self.property.is_heritage:
+                description += ". Secondary glazing recommended due to herigate building status"
+            elif self.property.in_conservation_area:
+                description += ". Secondary glazing recommended due to conservation area status"
 
         self.recommendation = [
             {
@@ -92,6 +98,7 @@ class WindowsRecommendations:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **cost_result,
                 "is_secondary_glazing": is_secondary_glazing
             }

From 1ee115fa7e73f170d559a24026680677f89aaf5d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 15:48:44 +0100
Subject: [PATCH 213/248] Added overrides

---
 recommendations/HotwaterRecommendations.py | 11 ++++++++++-
 recommendations/LightingRecommendations.py |  7 +++++++
 recommendations/SecondaryHeating.py        | 12 +++++++++++-
 recommendations/SolarPvRecommendations.py  |  6 ++++++
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 7f77597f..88cfa932 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -1,5 +1,6 @@
 from backend.Property import Property
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 
 
 class HotwaterRecommendations:
@@ -41,6 +42,13 @@ class HotwaterRecommendations:
 
         recommendation_cost = self.costs.hot_water_tank_insulation()
 
+        is_override = "hot_water_tank_insulation" in self.property.override
+        if is_override:
+            recommendation_cost = override_costs(recommendation_cost)
+            description = "Insulation tank has already been insulated, no further action required"
+        else:
+            description = "Insulate hot water tank"
+
         self.recommendations.append(
             {
                 "phase": phase,
@@ -48,10 +56,11 @@ class HotwaterRecommendations:
                     # TODO
                 ],
                 "type": "hot_water_tank_insulation",
-                "description": "Insulate the hot water tank with an insulation jacket",
+                "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **recommendation_cost,
                 "simulation_config": {"hot_water_energy_eff_ending": "Average"}
             }
diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py
index 352c4d8a..9e4c8e43 100644
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@@ -1,6 +1,7 @@
 from backend.Property import Property
 from typing import List
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 
 
 class LightingRecommendations:
@@ -91,6 +92,11 @@ class LightingRecommendations:
 
         heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets)
 
+        is_override = "low_energy_lighting" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "Low energy lighting has already been installed, no further action required"
+
         self.recommendation = [
             {
                 "phase": phase,
@@ -99,6 +105,7 @@ class LightingRecommendations:
                 "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
+                "is_override": is_override,
                 # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
                 # the proportion of lights that will be set to low energy
                 "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
index f31c4c05..e426977e 100644
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@@ -1,4 +1,5 @@
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 from backend.Property import Property
 
 
@@ -38,15 +39,24 @@ class SecondaryHeating:
             n_rooms = 0
 
         costs = self.costs.heater_removal(n_rooms=n_rooms)
+
+        is_override = "secondary_heating" in self.property.override
+        if is_override:
+            costs = override_costs(costs)
+            description = "Secondary heating system has already been removed, no further action required"
+        else:
+            description = "Remove the secondary heating system"
+
         self.recommendation.append(
             {
                 "phase": phase,
                 "parts": [],
                 "type": "secondary_heating",
-                "description": "Remove the secondary heating system",
+                "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 **costs,
                 "simulation_config": {
                     "secondheat_description_ending": "None"
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index f75003ce..72fcdf4b 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -1,5 +1,6 @@
 import numpy as np
 from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
 
 
 class SolarPvRecommendations:
@@ -110,6 +111,10 @@ class SolarPvRecommendations:
                 description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
                                f"anel system on {round(roof_coverage_percent)}% the roof.")
 
+            is_override = "solar_pv" in self.property.override
+            if is_override:
+                cost_result = override_costs(cost_result)
+
             self.recommendation.append(
                 {
                     "phase": phase,
@@ -119,6 +124,7 @@ class SolarPvRecommendations:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
+                    "is_override": is_override,
                     **cost_result,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here

From 14a1f35fb16cbf1199afbd66ce50f598b5d7a10b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 16:27:26 +0100
Subject: [PATCH 214/248] ammended system change costs for first time central
 heating

---
 recommendations/Costs.py              | 72 +++++++++++++++++++++++++--
 recommendations/HeatingRecommender.py |  9 +++-
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 45c17102..0e67b352 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -83,6 +83,14 @@ CONVENTIONAL_BOILER_COSTS = {
 ROOM_HEATER_REMOVAL_COST = 120
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
 
+# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
+SYSTEM_FLUSH_COST = 250
+
+SINGLE_RADIATOR_COST = 150
+DOUBLE_RADIATOR_COST = 300
+FLUE_COST = 600
+PIPEWORK_COST = 750  # Min cost is £500
+
 
 class Costs:
     """
@@ -1126,9 +1134,45 @@ class Costs:
             "labour_days": np.ceil(removal_labour_hours / 8),
         }
 
-    def boiler(self, is_combi, size, exising_room_heaters, n_heated_rooms):
+    @staticmethod
+    def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
+        # Base number of radiators: one per habitable room
+        base_radiators = number_habitable_rooms
+
+        # Additional radiators for non-habitable essential areas (e.g., kitchens, hallways)
+        additional_radiators = 3  # Initial assumption
+
+        # Adjust additional radiators based on property type
+        if property_type == 'Flat':
+            additional_radiators -= 1  # Flats may need fewer radiators due to less exposure
+        elif property_type in ['House', 'Bungalow', 'Maisonette']:
+            # Multiple floors in Maisonette may require additional heating points
+            additional_radiators += 2  # Houses and bungalows might need more due to greater exposure
+        else:
+            raise Exception("Invalid property type")
+
+        # Adjust total radiator needs based on built form
+        form_factor = {
+            'Mid-Terrace': 0.95,
+            'Semi-Detached': 1.05,
+            'Detached': 1.25,
+            'End-Terrace': 1.05
+        }
+
+        # Calculate total heating power needed and number of radiators based on standard output
+        total_heating_power_required = total_floor_area * 80  # Watts per square meter
+        radiator_output = 1000  # Average wattage per radiator
+        total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
+
+        # Final estimation taking the higher of calculated needs or base room count
+        estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
+        return round(estimated_radiators)
+
+    def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
+        First time central heating vosts can als be found here:
+        https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/
         :return:
         """
 
@@ -1137,11 +1181,11 @@ class Costs:
         # We now need to estimate the cost of the works
         labour_days = 2
         labour_hours = labour_days * 8
-        labour_rate = 500
+        labour_rate = 300
 
         # Average cost of installation is 1 (maybe 2days) at £300 per day
         # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
-        # To be pessimistic, assume 2 days work and £500 day rate
+        # To be pessimistic, assume 2 days work
         labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
         # Add contingency and preliminaries
         labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
@@ -1161,6 +1205,28 @@ class Costs:
             subtotal_before_vat += removal_costing["subtotal"]
             labour_hours += removal_costing["labour_hours"]
             labour_days += removal_costing["labour_days"]
+            vat += removal_costing["vat"]
+
+        if system_change:
+            # We need the cost of radiators
+            n_radiators = self._estimate_n_radiators(
+                number_habitable_rooms=n_rooms,
+                total_floor_area=self.property.floor_area,
+                property_type=self.property.data["property-type"],
+                built_form=self.property.data["built-form"]
+            )
+
+            additionals_labour_cost = labour_rate * self.labour_adjustment_factor
+            radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
+            system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
+            system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
+            system_change_vat = system_change_cost - system_change_cost_before_vat
+            # We add an extra labour day for the system change
+            labour_days += 1
+            labour_hours += 8
+            total_cost += system_change_cost
+            subtotal_before_vat += system_change_cost_before_vat
+            vat += system_change_vat
 
         return {
             "total": total_cost,
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 27e4985a..d83b755e 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -18,6 +18,11 @@ class HeatingRecommender:
         self.recommendations = []
 
     def recommend(self, phase=0):
+
+        # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
+        #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
+        #       in the Costs class, stored as SYSTEM_FLUSH_COST
+
         self.recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
@@ -361,7 +366,9 @@ class HeatingRecommender:
                 is_combi=is_combi,
                 size=f"{boiler_size}kw",
                 exising_room_heaters=exising_room_heaters,
-                n_heated_rooms=self.property.data["number-heated-rooms"]
+                system_change=system_change,
+                n_heated_rooms=self.property.data["number-heated-rooms"],
+                n_rooms=self.property.number_of_rooms
             )
 
             is_override = "heating" in self.property.override

From 94f9979f561c5a64acea1fc871c38a9d4868f8e0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 16:31:11 +0100
Subject: [PATCH 215/248] fixed override bug

---
 backend/Property.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index d000be28..2892b86e 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -79,7 +79,7 @@ class Property:
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
 
-        self.override = ast.literal_eval(override['overrides']) if override is not None else []
+        self.override = ast.literal_eval(override['overrides']) if override else []
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")

From d8caacae97006638aed112e7c8682a0a23372690 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 12 Apr 2024 17:46:06 +0100
Subject: [PATCH 216/248] creating non-invasive survey results WIP

---
 .idea/Model.iml                           |   2 +-
 .idea/misc.xml                            |   2 +-
 etl/customers/immo/pilot/non_invasive.py  | 131 ++++++++++++++++++++++
 etl/customers/immo/pilot/requirements.txt |   1 +
 4 files changed, 134 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/immo/pilot/non_invasive.py
 create mode 100644 etl/customers/immo/pilot/requirements.txt

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
new file mode 100644
index 00000000..cb978059
--- /dev/null
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -0,0 +1,131 @@
+import extract_msg
+
+
+def parse_msg_body(text):
+    # Split the text into lines
+    lines = text.split('\r\n')
+
+    # Dictionary to hold the parsed data
+    data = {}
+
+    # Process each line
+    for line in lines:
+        # Remove all asterisks and extra whitespace
+        clean_line = line.replace('*', '').strip()
+
+        if clean_line:  # Ensure the line is not empty after cleaning
+            # Attempt to split clean '=' if present
+            if '=' in clean_line:
+                clean_line = clean_line.replace(' = ', ': ')
+
+            # Use line content as a key with a default value indicating presence
+            # Generate a unique key for lines without '='
+            data[f"Info{len(data) + 1}"] = clean_line
+
+    return data
+
+
+def app():
+    """
+    This code retrieves the results of the non-invasive surveys, to be stored in S3
+    :return:
+    """
+
+    # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 "
+    #             "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg")
+    # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 "
+    #             "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg")
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 "
+    #     "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
+    #     "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
+    # )
+    filepath = (
+        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
+        "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+    )
+
+    with extract_msg.Message(filepath) as msg:
+        sender = msg.sender
+        recipients = msg.to
+        subject = msg.subject
+        body = msg.body
+        # If the msg has attachments, they can be extracted as well
+        attachments = msg.attachments
+
+    from pprint import pprint
+    pprint(parse_msg_body(body))
+
+    # We manually create the non-invasive notes for the pilot
+    non_invasive_notes = [
+        {
+            'address': '5 Oaklands',
+            'postcode': 'B62 0JA',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation. '
+                               'There is a shared alleyway with the neighbour, that is a solid brick wall.',
+            'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window',
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: North East, Back house direction: South West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '6 Beech Road',
+            'postcode': 'DY1 4BP',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
+                               'retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Side house direction: North East',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '8 Corporation Road',
+            'postcode': 'DY2 7PX',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': "External wall insulation",
+            'Wall Render': "Render finish throughout",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: North East, Back house direction: South West',
+            'Access to mains?': None,
+        },
+        {
+
+            'address': '21 Wells Road',
+            'postcode': 'DY5 3TB',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: East, Back house direction: West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '47 Fairfield Road',
+            'postcode': 'DY8 5UJ',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: East, Back house direction: West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': None,
+            'postcode': None,
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': None,
+            'Wall Render': None,
+            'Existing solar PV': None,
+            'Orientation': None,
+            'Access to mains?': None,
+        },
+    ]
diff --git a/etl/customers/immo/pilot/requirements.txt b/etl/customers/immo/pilot/requirements.txt
new file mode 100644
index 00000000..4673ab35
--- /dev/null
+++ b/etl/customers/immo/pilot/requirements.txt
@@ -0,0 +1 @@
+extract-msg

From a158f2353c0f84bb005924441166ef56a899f59c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 15:36:58 +0100
Subject: [PATCH 217/248] manually created non-invasie notes

---
 etl/customers/immo/pilot/non_invasive.py | 63 ++++++++++++++++++++----
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
index cb978059..c2b8ea64 100644
--- a/etl/customers/immo/pilot/non_invasive.py
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -43,9 +43,17 @@ def app():
     #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
     #     "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
     # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
+    #     "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
+    #     "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
+    # )
     filepath = (
-        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
-        "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
+        "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
     )
 
     with extract_msg.Message(filepath) as msg:
@@ -119,13 +127,50 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
-            'address': None,
-            'postcode': None,
+            'address': '53 Bromley',
+            'postcode': 'DY5 4PJ',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
-            'Wall Insulation': None,
-            'Wall Render': None,
-            'Existing solar PV': None,
-            'Orientation': None,
-            'Access to mains?': None,
+            'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
+                               "recommends a cavity wall fill",
+            "Roof": "Hipped roof",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West",
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '91 Osprey Drive',
+            'postcode': 'DY1 2JS',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': 'Tile hung front and rear of property',
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Side house direction: East',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'address': '150 Huntingtree Road',
+            'postcode': 'B63 4HP',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Heating': 'Electric (storage heaters)',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            "Roof": "Hipped roof",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: "
+                           "North East",
+        },
+        {
+            'address': '195 Ashenhurst Road',
+            'postcode': 'DY1 2JB',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': "Solid render front and rear of property",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: South, Back house direction: North',
+            'Access to mains?': 'Property has access to the mains',
         },
     ]
+
+    # TODO: Push the non-invasive results straight to the database from here

From 485c01cbd69cf8b562b2d53da0ae03915edf8d93 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:14:01 +0100
Subject: [PATCH 218/248] Added uprns to non-invaive notes

---
 etl/customers/immo/pilot/non_invasive.py | 35 ++++++++++++++++++------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
index c2b8ea64..0a376388 100644
--- a/etl/customers/immo/pilot/non_invasive.py
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -51,18 +51,17 @@ def app():
     #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
     #     "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
     # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
+    #     "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
+    # )
     filepath = (
-        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
-        "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
+        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
+        "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
     )
 
     with extract_msg.Message(filepath) as msg:
-        sender = msg.sender
-        recipients = msg.to
-        subject = msg.subject
         body = msg.body
-        # If the msg has attachments, they can be extracted as well
-        attachments = msg.attachments
 
     from pprint import pprint
     pprint(parse_msg_body(body))
@@ -70,6 +69,7 @@ def app():
     # We manually create the non-invasive notes for the pilot
     non_invasive_notes = [
         {
+            'uprn': 90028499,
             'address': '5 Oaklands',
             'postcode': 'B62 0JA',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -82,6 +82,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90055152,
             'address': '6 Beech Road',
             'postcode': 'DY1 4BP',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -94,6 +95,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90070461,
             'address': '8 Corporation Road',
             'postcode': 'DY2 7PX',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -104,7 +106,7 @@ def app():
             'Access to mains?': None,
         },
         {
-
+            'uprn': 90022227,
             'address': '21 Wells Road',
             'postcode': 'DY5 3TB',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -116,6 +118,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90077535,
             'address': '47 Fairfield Road',
             'postcode': 'DY8 5UJ',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -127,6 +130,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90060989,
             'address': '53 Bromley',
             'postcode': 'DY5 4PJ',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -138,6 +142,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90048026,
             'address': '91 Osprey Drive',
             'postcode': 'DY1 2JS',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -149,6 +154,7 @@ def app():
             'Access to mains?': 'Property has access to the mains',
         },
         {
+            'uprn': 90093693,
             'address': '150 Huntingtree Road',
             'postcode': 'B63 4HP',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -161,6 +167,7 @@ def app():
                            "North East",
         },
         {
+            'uprn': 90051858,
             'address': '195 Ashenhurst Road',
             'postcode': 'DY1 2JB',
             'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
@@ -171,6 +178,18 @@ def app():
             'Orientation': 'Front house direction: South, Back house direction: North',
             'Access to mains?': 'Property has access to the mains',
         },
+        {
+            'uprn': 90106884,
+            'address': '27 Milton Road',
+            'postcode': 'WV14 8HZ',
+            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': "Solid render front and rear of property",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: South East, Back house direction: North West',
+            'Access to mains?': 'Property has access to the mains',
+        },
     ]
 
     # TODO: Push the non-invasive results straight to the database from here

From 65f83930d56290fc73846ca4c8626ac46e3cd7c6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:25:12 +0100
Subject: [PATCH 219/248] added is_override to storage of recommendation

---
 .../db/functions/recommendations_functions.py |  3 ++-
 .../app/db/models/non_intrusive_surveys.py    | 24 +++++++++++++++++++
 backend/app/db/models/recommendations.py      |  1 +
 3 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 backend/app/db/models/non_intrusive_surveys.py

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index 1426e339..43daec77 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -85,7 +85,8 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
             "co2_equivalent_savings": rec["co2_equivalent_savings"],
             "total_work_hours": rec["labour_hours"],
             "energy_cost_savings": rec["energy_cost_savings"],
-            "labour_days": rec["labour_days"]
+            "labour_days": rec["labour_days"],
+            "is_override": rec["is_override"],
         }
         for rec in recommendations_to_upload
     ]
diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py
new file mode 100644
index 00000000..c5f3734a
--- /dev/null
+++ b/backend/app/db/models/non_intrusive_surveys.py
@@ -0,0 +1,24 @@
+from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum, Integer
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.sql import func
+from backend.app.db.models.portfolio import Portfolio, PropertyModel
+from backend.app.db.models.materials import Material
+from datatypes.enums import QuantityUnits
+
+Base = declarative_base()
+
+
+class NonIntrusiveSurvey(Base):
+    __tablename__ = 'non_intrusive_survey'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(Integer, nullable=False)
+    survey_date = Column(TIMESTAMP, nullable=False)
+    surveyor = Column(String, nullable=False)
+
+
+class NonIntrusiveSurveyNotes(Base):
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
+    title = Column(String, nullable=False)
+    note = Column(String, nullable=False)
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index a492f2f2..be5ff30c 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -30,6 +30,7 @@ class Recommendation(Base):
     rental_yield_increase = Column(Float)
     total_work_hours = Column(Float)
     labour_days = Column(Float)
+    is_override = Column(Boolean, nullable=False, default=False)
 
 
 class RecommendationMaterials(Base):

From aaa279463eea2505b3d36ee46c26b33b17955e77 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:37:28 +0100
Subject: [PATCH 220/248] Added is_override to heating controls

---
 .idea/Model.iml                              |  2 +-
 .idea/misc.xml                               |  2 +-
 recommendations/HeatingControlRecommender.py | 46 ++++++++++++++------
 3 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 76eaba4f..63218163 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -1,5 +1,5 @@
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import check_simulation_difference
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
 
@@ -159,20 +159,30 @@ class HeatingControlRecommender:
         has_room_thermostat = not needs_room_thermostat
         has_trvs = not needs_trvs
 
+        cost_result = self.costs.roomstat_programmer_trvs(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+            has_programmer=has_programmer,
+            has_room_thermostat=has_room_thermostat,
+            has_trvs=has_trvs
+        )
+
+        description = "upgrade heating controls to Room thermostat, programmer and TRVs"
+
+        is_override = "heating_control" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
         self.recommendation.append(
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "upgrade heating controls to Room thermostat, programmer and TRVs",
-                **self.costs.roomstat_programmer_trvs(
-                    number_heated_rooms=int(self.property.data["number-heated-rooms"]),
-                    has_programmer=has_programmer,
-                    has_room_thermostat=has_room_thermostat,
-                    has_trvs=has_trvs
-                ),
+                "description": description,
+                **cost_result,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "simulation_config": simulation_config
             }
         )
@@ -211,18 +221,28 @@ class HeatingControlRecommender:
         if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
             simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
 
+        cost_result = self.costs.time_and_temperature_zone_control(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"])
+        )
+
+        description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
+                       "temperature zone control)")
+
+        is_override = "heating_control" in self.property.override
+        if is_override:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
         self.recommendation.append(
             {
                 "type": "heating_control",
                 "parts": [],
-                "description": "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves "
-                               "(time & temperature zone control)",
-                **self.costs.time_and_temperature_zone_control(
-                    number_heated_rooms=int(self.property.data["number-heated-rooms"])
-                ),
+                "description": description,
+                **cost_result,
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
+                "is_override": is_override,
                 "simulation_config": simulation_config
             }
         )

From 527291b4395eb8b5563f52fd8449faee569d6789 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 16:40:13 +0100
Subject: [PATCH 221/248] Added is_override to mechanical ventilation
 recommendation

---
 recommendations/VentilationRecommendations.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 07f7cf1e..7ffcda08 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -69,6 +69,7 @@ class VentilationRecommendations(Definitions):
                 "description": f"Install {n_units} {part[0]['description']} units",
                 "starting_u_value": None,
                 "new_u_value": None,
+                "is_override": is_override,
                 "sap_points": 0,
                 "heat_demand": 0,
                 "adjusted_heat_demand": 0,

From 34d6a075289b0c2d31d75a1bad8ea5c969f12fca Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 13 Apr 2024 17:07:42 +0100
Subject: [PATCH 222/248] Pushed non-intrusive survey results to bd

---
 .../app/db/functions/non_intrusive_surveys.py | 50 ++++++++++
 .../app/db/models/non_intrusive_surveys.py    |  8 +-
 etl/customers/immo/pilot/non_invasive.py      | 99 +++++++++++--------
 3 files changed, 110 insertions(+), 47 deletions(-)
 create mode 100644 backend/app/db/functions/non_intrusive_surveys.py

diff --git a/backend/app/db/functions/non_intrusive_surveys.py b/backend/app/db/functions/non_intrusive_surveys.py
new file mode 100644
index 00000000..93348121
--- /dev/null
+++ b/backend/app/db/functions/non_intrusive_surveys.py
@@ -0,0 +1,50 @@
+from sqlalchemy.orm import Session
+from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes
+
+
+def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500):
+    """
+    Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents
+    one survey and its associated notes.
+
+    :param session: SQLAlchemy Session object through which all database transactions are handled.
+    :param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn',
+                               'survey_date', 'surveyor', and other notes as key-value pairs.
+    :param batch_size: The size of each batch to be processed (default is 500).
+    :return: None
+    """
+
+    # Helper function to process each batch
+    def process_batch(batch):
+        surveys = []
+        notes = []
+
+        for note in batch:
+            survey = NonIntrusiveSurvey(
+                uprn=note['uprn'],
+                survey_date=note['survey_date'],
+                surveyor=note['surveyor']
+            )
+            surveys.append(survey)
+
+        session.add_all(surveys)
+        session.flush()  # Get IDs for surveys
+
+        for note, survey in zip(batch, surveys):
+            for key, value in note.items():
+                if key not in ['uprn', 'survey_date', 'surveyor']:
+                    notes.append(NonIntrusiveSurveyNotes(
+                        survey_id=survey.id,
+                        title=key,
+                        note=value
+                    ))
+
+        session.bulk_save_objects(notes)
+        session.commit()
+
+    # Split the data into batches and process each batch
+    total = len(non_invasive_notes)
+    for start in range(0, total, batch_size):
+        end = min(start + batch_size, total)
+        batch = non_invasive_notes[start:end]
+        process_batch(batch)
diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py
index c5f3734a..bc2d8adc 100644
--- a/backend/app/db/models/non_intrusive_surveys.py
+++ b/backend/app/db/models/non_intrusive_surveys.py
@@ -1,9 +1,5 @@
-from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum, Integer
+from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
 from sqlalchemy.orm import declarative_base
-from sqlalchemy.sql import func
-from backend.app.db.models.portfolio import Portfolio, PropertyModel
-from backend.app.db.models.materials import Material
-from datatypes.enums import QuantityUnits
 
 Base = declarative_base()
 
@@ -18,6 +14,8 @@ class NonIntrusiveSurvey(Base):
 
 
 class NonIntrusiveSurveyNotes(Base):
+    __tablename__ = 'non_intrusive_survey_notes'
+
     id = Column(BigInteger, primary_key=True, autoincrement=True)
     survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
     title = Column(String, nullable=False)
diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
index 0a376388..6dc22c62 100644
--- a/etl/customers/immo/pilot/non_invasive.py
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -1,4 +1,8 @@
-import extract_msg
+# import extract_msg
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
 
 
 def parse_msg_body(text):
@@ -55,24 +59,25 @@ def app():
     #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
     #     "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
     # )
-    filepath = (
-        "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
-        "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
-    )
-
-    with extract_msg.Message(filepath) as msg:
-        body = msg.body
-
-    from pprint import pprint
-    pprint(parse_msg_body(body))
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
+    #     "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
+    # )
+    #
+    # with extract_msg.Message(filepath) as msg:
+    #     body = msg.body
+    #
+    # from pprint import pprint
+    # pprint(parse_msg_body(body))
 
     # We manually create the non-invasive notes for the pilot
     non_invasive_notes = [
         {
             'uprn': 90028499,
-            'address': '5 Oaklands',
-            'postcode': 'B62 0JA',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '5 Oaklands',
+            # 'postcode': 'B62 0JA',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation. '
                                'There is a shared alleyway with the neighbour, that is a solid brick wall.',
@@ -83,9 +88,10 @@ def app():
         },
         {
             'uprn': 90055152,
-            'address': '6 Beech Road',
-            'postcode': 'DY1 4BP',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '6 Beech Road',
+            # 'postcode': 'DY1 4BP',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
                                'retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
@@ -96,9 +102,10 @@ def app():
         },
         {
             'uprn': 90070461,
-            'address': '8 Corporation Road',
-            'postcode': 'DY2 7PX',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '8 Corporation Road',
+            # 'postcode': 'DY2 7PX',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': "External wall insulation",
             'Wall Render': "Render finish throughout",
             'Existing solar PV': 'No existing solar',
@@ -107,9 +114,10 @@ def app():
         },
         {
             'uprn': 90022227,
-            'address': '21 Wells Road',
-            'postcode': 'DY5 3TB',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '21 Wells Road',
+            # 'postcode': 'DY5 3TB',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': None,
@@ -119,9 +127,10 @@ def app():
         },
         {
             'uprn': 90077535,
-            'address': '47 Fairfield Road',
-            'postcode': 'DY8 5UJ',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '47 Fairfield Road',
+            # 'postcode': 'DY8 5UJ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': None,
@@ -131,9 +140,10 @@ def app():
         },
         {
             'uprn': 90060989,
-            'address': '53 Bromley',
-            'postcode': 'DY5 4PJ',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '53 Bromley',
+            # 'postcode': 'DY5 4PJ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
                                "recommends a cavity wall fill",
             "Roof": "Hipped roof",
@@ -143,9 +153,10 @@ def app():
         },
         {
             'uprn': 90048026,
-            'address': '91 Osprey Drive',
-            'postcode': 'DY1 2JS',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '91 Osprey Drive',
+            # 'postcode': 'DY1 2JS',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': 'Tile hung front and rear of property',
@@ -155,9 +166,10 @@ def app():
         },
         {
             'uprn': 90093693,
-            'address': '150 Huntingtree Road',
-            'postcode': 'B63 4HP',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '150 Huntingtree Road',
+            # 'postcode': 'B63 4HP',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Heating': 'Electric (storage heaters)',
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
@@ -168,9 +180,10 @@ def app():
         },
         {
             'uprn': 90051858,
-            'address': '195 Ashenhurst Road',
-            'postcode': 'DY1 2JB',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '195 Ashenhurst Road',
+            # 'postcode': 'DY1 2JB',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': "Solid render front and rear of property",
@@ -180,9 +193,10 @@ def app():
         },
         {
             'uprn': 90106884,
-            'address': '27 Milton Road',
-            'postcode': 'WV14 8HZ',
-            'Surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            # 'address': '27 Milton Road',
+            # 'postcode': 'WV14 8HZ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
             'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
                                'CIGA check and extracting the cavity, replacing with bead insulation.',
             'Wall Render': "Solid render front and rear of property",
@@ -192,4 +206,5 @@ def app():
         },
     ]
 
-    # TODO: Push the non-invasive results straight to the database from here
+    session = sessionmaker(bind=db_engine)()
+    upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)

From 954fa9d32c5d30bd63098b74512b006b47bf3056 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 14:57:13 +0100
Subject: [PATCH 223/248] changed is_override to already_installed in
 recommendations

---
 backend/Property.py                           |  4 ++--
 backend/app/plan/router.py                    | 17 +++++++++--------
 backend/app/plan/schemas.py                   |  2 +-
 recommendations/FireplaceRecommendations.py   |  6 +++---
 recommendations/FloorRecommendations.py       | 10 +++++-----
 recommendations/HeatingControlRecommender.py  | 12 ++++++------
 recommendations/HeatingRecommender.py         | 12 ++++++------
 recommendations/HotwaterRecommendations.py    |  6 +++---
 recommendations/LightingRecommendations.py    |  6 +++---
 recommendations/RoofRecommendations.py        | 10 +++++-----
 recommendations/SecondaryHeating.py           |  6 +++---
 recommendations/SolarPvRecommendations.py     |  6 +++---
 recommendations/VentilationRecommendations.py | 10 +++++-----
 recommendations/WallRecommendations.py        | 16 ++++++++--------
 recommendations/WindowsRecommendations.py     |  6 +++---
 15 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 2892b86e..a8ed9129 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, override=None, **kwargs
+        self, id, postcode, address, epc_record, already_installed=None, **kwargs
     ):
 
         self.epc_record = epc_record
@@ -79,7 +79,7 @@ class Property:
         # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
         # cost and instead, provide a message that the measure has already been installed
 
-        self.override = ast.literal_eval(override['overrides']) if override else []
+        self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 08ce0dcc..49e14872 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -74,7 +74,7 @@ async def trigger_plan(body: PlanTriggerRequest):
     # TODO: We should store the trigger file path in the database with the plan so we can track the file that
     #       triggered the plan
 
-    # TODO: Create the ability to congigure/switch off certain measures
+    # TODO: if the measure is already installed, it should actually be the very first phase
 
     try:
         session.begin()
@@ -85,10 +85,10 @@ async def trigger_plan(body: PlanTriggerRequest):
         if body.patches_file_path:
             patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
 
-        overrides = []
-        if body.overrides_file_path:
-            overrides = read_csv_from_s3(
-                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.overrides_file_path
+        already_installed = []
+        if body.already_installed_file_path:
+            already_installed = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
             )
 
         cleaning_data = read_dataframe_from_s3_parquet(
@@ -142,8 +142,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
-            override = next((
-                x for x in overrides if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            property_already_installed = next((
+                x for x in already_installed if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
             ), {})
             input_properties.append(
                 Property(
@@ -151,7 +152,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
-                    override=override,
+                    already_installed=property_already_installed,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index ec49e41e..76eb49d2 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -9,7 +9,7 @@ class PlanTriggerRequest(BaseModel):
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
-    overrides_file_path: Optional[str] = None
+    already_installed_file_path: Optional[str] = None
     patches_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
 
diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py
index c1114f31..601a8eb0 100644
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@@ -32,8 +32,8 @@ class FireplaceRecommendations(Definitions):
         if number_open_fireplaces == 0:
             return
 
-        is_override = "sealing_open_fireplace" in self.property.override
-        estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not is_override else 0
+        already_installed = "sealing_open_fireplace" in self.property.already_installed
+        estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0
 
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
@@ -45,7 +45,7 @@ class FireplaceRecommendations(Definitions):
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "total": estimated_cost,
                 # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
                 "labour_hours": 6 * number_open_fireplaces,
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index b7bd370c..3f764d83 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -193,8 +193,8 @@ class FloorRecommendations(Definitions):
                             non_insulation_materials=non_insulation_materials
                         )
 
-                        is_override = "suspended_floor_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "suspended_floor_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
 
                     elif material["type"] == "solid_floor_insulation":
@@ -204,8 +204,8 @@ class FloorRecommendations(Definitions):
                             non_insulation_materials=non_insulation_materials
                         )
 
-                        is_override = "solid_floor_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "solid_floor_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     else:
                         raise NotImplementedError("Implement me!")
@@ -226,7 +226,7 @@ class FloorRecommendations(Definitions):
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
-                            "is_override": is_override,
+                            "already_installed": already_installed,
                             **cost_result
                         }
                     )
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 63218163..d24ad811 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -168,8 +168,8 @@ class HeatingControlRecommender:
 
         description = "upgrade heating controls to Room thermostat, programmer and TRVs"
 
-        is_override = "heating_control" in self.property.override
-        if is_override:
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "Heating controls have already been upgraded, no further action needed."
 
@@ -182,7 +182,7 @@ class HeatingControlRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "simulation_config": simulation_config
             }
         )
@@ -228,8 +228,8 @@ class HeatingControlRecommender:
         description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
                        "temperature zone control)")
 
-        is_override = "heating_control" in self.property.override
-        if is_override:
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "Heating controls have already been upgraded, no further action needed."
 
@@ -242,7 +242,7 @@ class HeatingControlRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "simulation_config": simulation_config
             }
         )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index d83b755e..432dc6a6 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -144,8 +144,8 @@ class HeatingRecommender:
 
                 recommendation_description = f"{description} and {controls_description}"
 
-            is_override = "cavity_wall_insulation" in self.property.override
-            if is_override:
+            already_installed = "cavity_wall_insulation" in self.property.already_installed
+            if already_installed:
                 total_costs = override_costs(total_costs)
                 recommendation_description = "Heating system has already been upgraded, no further action needed."
 
@@ -159,7 +159,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **total_costs,
                 "simulation_config": recommendation_simulation_config
             }
@@ -371,8 +371,8 @@ class HeatingRecommender:
                 n_rooms=self.property.number_of_rooms
             )
 
-            is_override = "heating" in self.property.override
-            if is_override:
+            already_installed = "heating" in self.property.already_installed
+            if already_installed:
                 boiler_costs = override_costs(boiler_costs)
                 description = "Heating system has already been upgraded, no further action needed."
 
@@ -386,7 +386,7 @@ class HeatingRecommender:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "simulation_config": simulation_config,
                 **boiler_costs
             }
diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 88cfa932..9c5c7045 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -42,8 +42,8 @@ class HotwaterRecommendations:
 
         recommendation_cost = self.costs.hot_water_tank_insulation()
 
-        is_override = "hot_water_tank_insulation" in self.property.override
-        if is_override:
+        already_installed = "hot_water_tank_insulation" in self.property.already_installed
+        if already_installed:
             recommendation_cost = override_costs(recommendation_cost)
             description = "Insulation tank has already been insulated, no further action required"
         else:
@@ -60,7 +60,7 @@ class HotwaterRecommendations:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **recommendation_cost,
                 "simulation_config": {"hot_water_energy_eff_ending": "Average"}
             }
diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py
index 9e4c8e43..31720579 100644
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@@ -92,8 +92,8 @@ class LightingRecommendations:
 
         heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets)
 
-        is_override = "low_energy_lighting" in self.property.override
-        if is_override:
+        already_installed = "low_energy_lighting" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "Low energy lighting has already been installed, no further action required"
 
@@ -105,7 +105,7 @@ class LightingRecommendations:
                 "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
                 # the proportion of lights that will be set to low energy
                 "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 5ba7e82e..dc5ee7db 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -207,8 +207,8 @@ class RoofRecommendations:
                             floor_area=self.property.insulation_floor_area,
                             material=material
                         )
-                        is_override = "loft_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "loft_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     elif material["type"] == "flat_roof_insulation":
                         cost_result = self.costs.flat_roof_insulation(
@@ -216,8 +216,8 @@ class RoofRecommendations:
                             material=material,
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "flat_roof_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "flat_roof_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
@@ -238,7 +238,7 @@ class RoofRecommendations:
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "sap_points": None,
-                            "is_override": is_override,
+                            "already_installed": already_installed,
                             **cost_result
                         }
                     )
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
index e426977e..5d763510 100644
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@@ -40,8 +40,8 @@ class SecondaryHeating:
 
         costs = self.costs.heater_removal(n_rooms=n_rooms)
 
-        is_override = "secondary_heating" in self.property.override
-        if is_override:
+        already_installed = "secondary_heating" in self.property.already_installed
+        if already_installed:
             costs = override_costs(costs)
             description = "Secondary heating system has already been removed, no further action required"
         else:
@@ -56,7 +56,7 @@ class SecondaryHeating:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **costs,
                 "simulation_config": {
                     "secondheat_description_ending": "None"
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 72fcdf4b..58cf9735 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -111,8 +111,8 @@ class SolarPvRecommendations:
                 description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
                                f"anel system on {round(roof_coverage_percent)}% the roof.")
 
-            is_override = "solar_pv" in self.property.override
-            if is_override:
+            already_installed = "solar_pv" in self.property.already_installed
+            if already_installed:
                 cost_result = override_costs(cost_result)
 
             self.recommendation.append(
@@ -124,7 +124,7 @@ class SolarPvRecommendations:
                     "starting_u_value": None,
                     "new_u_value": None,
                     "sap_points": None,
-                    "is_override": is_override,
+                    "already_installed": already_installed,
                     **cost_result,
                     # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                     # back up here
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 7ffcda08..5b36bd9c 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -50,11 +50,11 @@ class VentilationRecommendations(Definitions):
 
         part = self.materials.copy()
 
-        is_override = "cavity_wall_insulation" in self.property.override
+        already_installed = "cavity_wall_insulation" in self.property.already_installed
 
-        estimated_cost = n_units * part[0]["cost"] if not is_override else 0
-        labour_hours = 4 * n_units if not is_override else 0
-        labour_days = 4 * n_units / 8.0 if not is_override else 0
+        estimated_cost = n_units * part[0]["cost"] if not already_installed else 0
+        labour_hours = 4 * n_units if not already_installed else 0
+        labour_days = 4 * n_units / 8.0 if not already_installed else 0
 
         part[0]["total"] = estimated_cost
         part[0]["quantity"] = n_units
@@ -69,7 +69,7 @@ class VentilationRecommendations(Definitions):
                 "description": f"Install {n_units} {part[0]['description']} units",
                 "starting_u_value": None,
                 "new_u_value": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 "sap_points": 0,
                 "heat_demand": 0,
                 "adjusted_heat_demand": 0,
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 471a62cb..feb2620b 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -221,8 +221,8 @@ class WallRecommendations(Definitions):
                     material=material.to_dict(),
                 )
 
-                is_override = "cavity_wall_insulation" in self.property.override
-                if is_override:
+                already_installed = "cavity_wall_insulation" in self.property.already_installed
+                if already_installed:
                     cost_result = override_costs(cost_result)
 
                 recommendations.append(
@@ -241,7 +241,7 @@ class WallRecommendations(Definitions):
                         "starting_u_value": u_value,
                         "new_u_value": new_u_value,
                         "sap_points": None,
-                        "is_override": is_override,
+                        "already_installed": already_installed,
                         **cost_result
                     }
                 )
@@ -282,8 +282,8 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "internal_wall_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "internal_wall_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
 
                     elif material["type"] == "external_wall_insulation":
@@ -292,8 +292,8 @@ class WallRecommendations(Definitions):
                             material=material.to_dict(),
                             non_insulation_materials=non_insulation_materials
                         )
-                        is_override = "external_wall_insulation" in self.property.override
-                        if is_override:
+                        already_installed = "external_wall_insulation" in self.property.already_installed
+                        if already_installed:
                             cost_result = override_costs(cost_result)
                     else:
                         raise ValueError("Invalid material type")
@@ -313,7 +313,7 @@ class WallRecommendations(Definitions):
                             "description": self._make_description(material),
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
-                            "is_override": is_override,
+                            "already_installed": already_installed,
                             "sap_points": None,
                             **cost_result
                         }
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index b2fe20a6..b7c2823a 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -71,8 +71,8 @@ class WindowsRecommendations:
             is_secondary_glazing=is_secondary_glazing
         )
 
-        is_override = "windows_glazing" in self.property.override
-        if is_override:
+        already_installed = "windows_glazing" in self.property.already_installed
+        if already_installed:
             cost_result = override_costs(cost_result)
             description = "The property already has double glazing installed. No further action is required."
         else:
@@ -98,7 +98,7 @@ class WindowsRecommendations:
                 "starting_u_value": None,
                 "new_u_value": None,
                 "sap_points": None,
-                "is_override": is_override,
+                "already_installed": already_installed,
                 **cost_result,
                 "is_secondary_glazing": is_secondary_glazing
             }

From c58389a26695d863d003a4cf2c9f26515f9898ea Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 14:57:46 +0100
Subject: [PATCH 224/248] updated push to db

---
 backend/app/db/functions/recommendations_functions.py | 2 +-
 backend/app/db/models/recommendations.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index 43daec77..b22ce92f 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -86,7 +86,7 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
             "total_work_hours": rec["labour_hours"],
             "energy_cost_savings": rec["energy_cost_savings"],
             "labour_days": rec["labour_days"],
-            "is_override": rec["is_override"],
+            "already_installed": rec["already_installed"],
         }
         for rec in recommendations_to_upload
     ]
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index be5ff30c..186f87a8 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -30,7 +30,7 @@ class Recommendation(Base):
     rental_yield_increase = Column(Float)
     total_work_hours = Column(Float)
     labour_days = Column(Float)
-    is_override = Column(Boolean, nullable=False, default=False)
+    already_installed = Column(Boolean, nullable=False, default=False)
 
 
 class RecommendationMaterials(Base):

From f1e3bca9bff0c68ba9ce068c91a91268da794cb0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 14:59:30 +0100
Subject: [PATCH 225/248] updated asset list for immo to reference already
 installed

---
 etl/customers/immo/pilot/asset_list.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 07ebe884..d8839924 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -48,7 +48,7 @@ patches = [
 # This is information that is found as a result of the non-invasives, that mean that certain measures
 # have been installed already. To reflect this in the front end, it is included in the recommendation, however
 # the cost is removed and instead, a message is presented saying that the measure is already installed.
-overrides = [
+already_installed = [
     {
         'address': '5 Oaklands',
         'postcode': 'B62 0JA',
@@ -87,11 +87,11 @@ def app():
     )
 
     # Store overrides in s3
-    overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json"
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
     save_csv_to_s3(
-        dataframe=pd.DataFrame(overrides),
+        dataframe=pd.DataFrame(already_installed),
         bucket_name="retrofit-plan-inputs-dev",
-        file_name=overrides_filename
+        file_name=already_installed_filename
     )
 
     # Store patches in s3
@@ -109,7 +109,7 @@ def app():
         "goal": "Increase EPC",
         "goal_value": "C",
         "trigger_file_path": filename,
-        "overrides_file_path": overrides_filename,
+        "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
         "budget": None,
     }
@@ -122,7 +122,7 @@ def app():
         "goal": "Increase EPC",
         "goal_value": "B",
         "trigger_file_path": filename,
-        "overrides_file_path": overrides_filename,
+        "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
         "budget": None,
     }

From 046ac3dc39bc7c478a91fcaa58bddc30508c5166 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Apr 2024 15:05:31 +0100
Subject: [PATCH 226/248] fixed bug in already installed

---
 etl/customers/immo/pilot/asset_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index d8839924..e587cc25 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -52,7 +52,7 @@ already_installed = [
     {
         'address': '5 Oaklands',
         'postcode': 'B62 0JA',
-        "overrides": ["windows_glazing"]
+        "already_installed": ["windows_glazing"]
     }
 ]
 

From 56bf3c121fbc0d4bb31a5e1b073b80daac7dba51 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 13:31:46 +0100
Subject: [PATCH 227/248] Adding cdn to terraform

---
 infrastructure/terraform/main.tf              |  9 +++
 .../terraform/modules/cloudfront/main.tf      | 65 +++++++++++++++++++
 .../terraform/modules/cloudfront/variables.tf |  9 +++
 3 files changed, 83 insertions(+)
 create mode 100644 infrastructure/terraform/modules/cloudfront/main.tf
 create mode 100644 infrastructure/terraform/modules/cloudfront/variables.tf

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index d545cdf8..1d0562dd 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -181,4 +181,13 @@ module "lambda_carbon_prediction_ecr" {
 module "lambda_heat_prediction_ecr" {
   ecr_name = "lambda-heat-prediction-${var.stage}"
   source   = "./modules/ecr"
+}
+
+##############################################
+# CDN - Cloudfront
+##############################################
+module "cloudfront_distribution" {
+  source      = "./modules/cloudfront"
+  bucket_name = module.s3.bucket_name
+  stage       = var.stage
 }
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/infrastructure/terraform/modules/cloudfront/main.tf
new file mode 100644
index 00000000..fbb88160
--- /dev/null
+++ b/infrastructure/terraform/modules/cloudfront/main.tf
@@ -0,0 +1,65 @@
+resource "aws_cloudfront_distribution" "s3_distribution" {
+  origin {
+    domain_name = "${aws_s3_bucket.bucket.bucket_regional_domain_name}"
+    origin_id   = "S3-${var.bucket_name}"
+
+    s3_origin_config {
+      origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
+    }
+  }
+
+  enabled = true
+
+  default_cache_behavior {
+    allowed_methods        = ["GET", "HEAD"]
+    cached_methods         = ["GET", "HEAD"]
+    target_origin_id       = "S3-${var.bucket_name}"
+    viewer_protocol_policy = "redirect-to-https"
+    compress               = true
+
+    forwarded_values {
+      query_string = false
+      cookies {
+        forward = "none"
+      }
+    }
+
+    min_ttl     = 0
+    default_ttl = 86400
+    max_ttl     = 31536000
+  }
+
+  price_class = "PriceClass_All"
+
+  restrictions {
+    geo_restriction {
+      restriction_type = "none"
+    }
+  }
+
+  viewer_certificate {
+    cloudfront_default_certificate = true
+  }
+}
+
+resource "aws_cloudfront_origin_access_identity" "oai" {
+  comment = "OAI for ${var.bucket_name}"
+}
+
+resource "aws_s3_bucket_policy" "bucket_policy" {
+  bucket = aws_s3_bucket.bucket.id
+
+  policy = jsonencode({
+    Version   = "2012-10-17"
+    Statement = [
+      {
+        Effect    = "Allow"
+        Principal = {
+          AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
+        }
+        Action   = "s3:GetObject"
+        Resource = "${aws_s3_bucket.bucket.arn}/*"
+      },
+    ]
+  })
+}
diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/infrastructure/terraform/modules/cloudfront/variables.tf
new file mode 100644
index 00000000..433edc24
--- /dev/null
+++ b/infrastructure/terraform/modules/cloudfront/variables.tf
@@ -0,0 +1,9 @@
+variable "bucket_name" {
+  description = "The name of the bucket"
+  type        = string
+}
+
+variable "stage" {
+  description = "The deployment stage"
+  type        = string
+}

From ce546b56f7db4a88d82ee3f72148d2b4fe64f1c2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 13:38:23 +0100
Subject: [PATCH 228/248] passing additional data to cloudfront distribution

---
 infrastructure/terraform/main.tf                  |  9 ++++++---
 .../terraform/modules/cloudfront/main.tf          |  6 +++---
 .../terraform/modules/cloudfront/variables.tf     | 15 +++++++++++++++
 infrastructure/terraform/modules/s3/outputs.tf    | 12 ++++++++++++
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index 1d0562dd..fde25487 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -187,7 +187,10 @@ module "lambda_heat_prediction_ecr" {
 # CDN - Cloudfront
 ##############################################
 module "cloudfront_distribution" {
-  source      = "./modules/cloudfront"
-  bucket_name = module.s3.bucket_name
-  stage       = var.stage
+  source             = "./modules/cloudfront"
+  bucket_name        = module.s3.bucket_name
+  bucket_id          = module.s3.bucket_id
+  bucket_arn         = module.s3.bucket_arn
+  bucket_domain_name = module.s3.bucket_domain_name
+  stage              = var.stage
 }
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/infrastructure/terraform/modules/cloudfront/main.tf
index fbb88160..281ff09f 100644
--- a/infrastructure/terraform/modules/cloudfront/main.tf
+++ b/infrastructure/terraform/modules/cloudfront/main.tf
@@ -1,6 +1,6 @@
 resource "aws_cloudfront_distribution" "s3_distribution" {
   origin {
-    domain_name = "${aws_s3_bucket.bucket.bucket_regional_domain_name}"
+    domain_name = var.bucket_domain_name
     origin_id   = "S3-${var.bucket_name}"
 
     s3_origin_config {
@@ -47,7 +47,7 @@ resource "aws_cloudfront_origin_access_identity" "oai" {
 }
 
 resource "aws_s3_bucket_policy" "bucket_policy" {
-  bucket = aws_s3_bucket.bucket.id
+  bucket = var.bucket_id
 
   policy = jsonencode({
     Version   = "2012-10-17"
@@ -58,7 +58,7 @@ resource "aws_s3_bucket_policy" "bucket_policy" {
           AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
         }
         Action   = "s3:GetObject"
-        Resource = "${aws_s3_bucket.bucket.arn}/*"
+        Resource = "${var.bucket_arn}/*"
       },
     ]
   })
diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/infrastructure/terraform/modules/cloudfront/variables.tf
index 433edc24..88f770a8 100644
--- a/infrastructure/terraform/modules/cloudfront/variables.tf
+++ b/infrastructure/terraform/modules/cloudfront/variables.tf
@@ -7,3 +7,18 @@ variable "stage" {
   description = "The deployment stage"
   type        = string
 }
+
+variable "bucket_id" {
+  description = "The ID of the S3 bucket"
+  type        = string
+}
+
+variable "bucket_arn" {
+  description = "The ARN of the S3 bucket"
+  type        = string
+}
+
+variable "bucket_domain_name" {
+  description = "The regional domain name of the S3 bucket"
+  type        = string
+}
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/s3/outputs.tf b/infrastructure/terraform/modules/s3/outputs.tf
index a5e7ddb4..7668dbc4 100644
--- a/infrastructure/terraform/modules/s3/outputs.tf
+++ b/infrastructure/terraform/modules/s3/outputs.tf
@@ -2,3 +2,15 @@ output "bucket_name" {
   description = "The name of the S3 bucket"
   value       = aws_s3_bucket.bucket.bucket
 }
+
+output "bucket_id" {
+  value = aws_s3_bucket.bucket.id
+}
+
+output "bucket_arn" {
+  value = aws_s3_bucket.bucket.arn
+}
+
+output "bucket_domain_name" {
+  value = aws_s3_bucket.bucket.bucket_regional_domain_name
+}
\ No newline at end of file

From e6f9416c8e4b3452f42c47044503c4fdcd68b7cf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 14:05:36 +0100
Subject: [PATCH 229/248] upgrade db instance version

---
 infrastructure/terraform/main.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index fde25487..55266e10 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" {
 resource "aws_db_instance" "default" {
   allocated_storage      = var.allocated_storage
   engine                 = "postgres"
-  engine_version         = "14.7"
+  engine_version         = "14.10"
   instance_class         = var.instance_class
   db_name                = var.database_name
   username               = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]

From 6076eb4f24905ad026c7a0dca9eb3d15f7678a5b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 14:50:00 +0100
Subject: [PATCH 230/248] building photo upload app

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 etl/non_invasive_surveys/photos/README.md     |  19 +++
 etl/non_invasive_surveys/photos/app.py        | 120 ++++++++++++++++++
 .../photos/requirements.txt                   |   3 +
 5 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 etl/non_invasive_surveys/photos/README.md
 create mode 100644 etl/non_invasive_surveys/photos/app.py
 create mode 100644 etl/non_invasive_surveys/photos/requirements.txt

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..c75af922 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="non_invasive_surveys-photos" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1f2c584d 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="non_invasive_surveys-photos" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_invasive_surveys/photos/README.md
new file mode 100644
index 00000000..9dbe951f
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/README.md
@@ -0,0 +1,19 @@
+# Non Intrusive Surveys - photo upload
+
+This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID.
+
+## Getting started
+
+Install the required packages by running the following command:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+The main application is found in the app.py file. To run the application, use the following command:
+
+```bash
+python app.py
+```
\ No newline at end of file
diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py
new file mode 100644
index 00000000..1b6790f9
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/app.py
@@ -0,0 +1,120 @@
+import boto3
+from PIL import Image
+from pathlib import Path
+from dotenv import load_dotenv
+
+# Inputs
+ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env"
+PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
+FOLDER_UPRN_LOOKUP = {
+    "91 Osprey Drive DY1 2JS": 90048026,
+    "195 Ashenhurst Rd DY1 2JB": 90051858,
+    "6 Beech Rd DY1 4BP": 90055152,
+    "53 Bromley DY5 4PJ": 90060989,
+    "5 Oaklands B62 0JA": 90028499,
+    "47 Fairfield Rd DY8 5UJ": 90077535,
+    "150 Huntingtree Rd B63 4HP": 90093693,
+    "27 Milton Rd DY1 2JB": 90106884,
+    "21 Wells Rd DY5 3TB": 90022227,
+    "8 Corporation Rd DY2 7PX": 90070461
+}
+
+
+def list_subdirectories(directory_path):
+    """
+    List all subdirectories within a given directory.
+
+    :param directory_path: Path to the directory.
+    :return: A list of paths to the subdirectories.
+    """
+    directory = Path(directory_path)
+    subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()]
+    return subdirectories
+
+
+def list_files_in_directory(directory_path, file_extension=".jpg"):
+    """
+    List all files with a specific extension within a given directory and its subdirectories.
+
+    :param directory_path: Path to the directory to scan.
+    :param file_extension: File extension to filter by.
+    :return: A list of paths to the files.
+    """
+    # Convert the directory path to a Path object if it's not already one
+    directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path
+
+    # List all files of the specified type in the directory and subdirectories
+    file_list = [file for file in directory.rglob(f'*{file_extension}')]
+
+    return file_list
+
+
+def create_images(input_path):
+    # Load the image
+    with Image.open(input_path) as img:
+        # Create a thumbnail
+        thumbnail = img.copy()
+        thumbnail.thumbnail((128, 128), Image.ANTIALIAS)  # Resize to 128x128 (or any desired size)
+        thumbnail.save('thumbnail.jpg')
+
+        # Create a 1080p version
+        full_hd = img.copy()
+        full_hd.thumbnail((1920, 1080), Image.ANTIALIAS)  # Resize to 1080p
+        full_hd.save('1080p.jpg')
+
+    # Return paths to the processed images
+    return 'thumbnail.jpg', '1080p.jpg', input_path
+
+
+def upload_to_s3(bucket_name, file_path, object_name):
+    s3_client = boto3.client('s3')
+    s3_client.upload_file(file_path, bucket_name, object_name)
+    print(f"Uploaded {object_name} to S3 bucket {bucket_name}")
+
+
+def upload_photos_to_s3(bucket_name, photo_paths):
+    # Upload each photo
+    for path in photo_paths:
+        object_name = path.split('/')[-1]  # Assuming the path format is folder/filename
+        upload_to_s3(bucket_name, path, object_name)
+
+
+def generate_cdn_url(distribution_domain, object_name):
+    return f"https://{distribution_domain}/{object_name}"
+
+
+def process_and_upload_images(input_image_path, bucket_name, distribution_domain):
+    # Create images
+    thumbnail, full_hd, original = create_images(input_image_path)
+
+    # Upload images
+    upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
+
+    # Generate CDN links
+    cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
+
+    return cdn_links
+
+
+def app():
+    """
+    This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the
+    database.
+    To begin with, this app will simply read the files from the local machine, however we will come up with a more
+    efficient way to do this in the future.
+
+    :return:
+    """
+
+    # List all files in the directory using pathlib
+    property_directories = list_subdirectories(PHOTO_DIRECTORY)
+
+    # For each property, we want to list all of the photos in the directory
+    for property_dir in property_directories:
+        photo_files = list_files_in_directory(property_dir)
+
+        # We now want to convert each file, and upload it to s3
+        for photo_filepath in photo_files:
+            process_and_upload_images(
+                photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com"
+            )
diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_invasive_surveys/photos/requirements.txt
new file mode 100644
index 00000000..2199a0b4
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/requirements.txt
@@ -0,0 +1,3 @@
+Pillow
+boto3
+python-dotenv
\ No newline at end of file

From d3a175468330774214e4c7225157dd4481cb60cd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 15:20:38 +0100
Subject: [PATCH 231/248] modifying photo upload code

---
 etl/non_invasive_surveys/photos/app.py | 43 ++++++++++++++++++++------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py
index 1b6790f9..ffd993a6 100644
--- a/etl/non_invasive_surveys/photos/app.py
+++ b/etl/non_invasive_surveys/photos/app.py
@@ -1,4 +1,5 @@
 import boto3
+import os
 from PIL import Image
 from pathlib import Path
 from dotenv import load_dotenv
@@ -19,6 +20,10 @@ FOLDER_UPRN_LOOKUP = {
     "8 Corporation Rd DY2 7PX": 90070461
 }
 
+load_dotenv(ENV_FILEPATH)
+CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME = os.getenv("CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME", None)
+CDN_BUCKET_NAME = os.getenv("CDN_BUCKET_NAME", None)
+
 
 def list_subdirectories(directory_path):
     """
@@ -49,21 +54,33 @@ def list_files_in_directory(directory_path, file_extension=".jpg"):
     return file_list
 
 
-def create_images(input_path):
+def create_images(input_path, uprn):
+    # Need to create local directory if it doesn't exist
+    os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True)
+
     # Load the image
     with Image.open(input_path) as img:
+        # Define output paths
+        thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg"
+        full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg"
+        webp_path = f"non_invasive_photos/{uprn}/webp.webp"  # Save as WebP format
+
         # Create a thumbnail
         thumbnail = img.copy()
-        thumbnail.thumbnail((128, 128), Image.ANTIALIAS)  # Resize to 128x128 (or any desired size)
-        thumbnail.save('thumbnail.jpg')
+        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)  # High-quality downsampling
+        thumbnail.save(thumbnail_path, 'JPEG', quality=85)  # Save as JPEG with quality setting
 
         # Create a 1080p version
         full_hd = img.copy()
-        full_hd.thumbnail((1920, 1080), Image.ANTIALIAS)  # Resize to 1080p
-        full_hd.save('1080p.jpg')
+        full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
+        full_hd.save(full_hd_path, 'JPEG', quality=90)  # Slightly higher quality for larger image
+
+        # Convert to WebP for better compression
+        webp = img.copy()
+        webp.save(webp_path, 'WEBP', quality=90)
 
     # Return paths to the processed images
-    return 'thumbnail.jpg', '1080p.jpg', input_path
+    return thumbnail_path, full_hd_path, webp_path
 
 
 def upload_to_s3(bucket_name, file_path, object_name):
@@ -83,9 +100,9 @@ def generate_cdn_url(distribution_domain, object_name):
     return f"https://{distribution_domain}/{object_name}"
 
 
-def process_and_upload_images(input_image_path, bucket_name, distribution_domain):
+def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
     # Create images
-    thumbnail, full_hd, original = create_images(input_image_path)
+    thumbnail, full_hd, original = create_images(str(uprn), input_image_path)
 
     # Upload images
     upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
@@ -93,6 +110,10 @@ def process_and_upload_images(input_image_path, bucket_name, distribution_domain
     # Generate CDN links
     cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
 
+    # Delete local files
+    for path in [thumbnail, full_hd, original]:
+        os.remove(path)
+
     return cdn_links
 
 
@@ -112,9 +133,13 @@ def app():
     # For each property, we want to list all of the photos in the directory
     for property_dir in property_directories:
         photo_files = list_files_in_directory(property_dir)
+        uprn = FOLDER_UPRN_LOOKUP[property_dir.name]
 
         # We now want to convert each file, and upload it to s3
         for photo_filepath in photo_files:
             process_and_upload_images(
-                photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com"
+                uprn=uprn,
+                input_image_path=photo_filepath,
+                bucket_name=CDN_BUCKET_NAME,
+                distribution_domain=CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME
             )

From 5d3440815d7616bf3af37ca68136a73d610f071a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 16:33:15 +0100
Subject: [PATCH 232/248] Pushing non-invasive photos to app wip

---
 .../photos/README.md                          |  0
 .../photos/app.py                             | 28 +++++++++++--------
 .../photos/requirements.txt                   |  0
 3 files changed, 16 insertions(+), 12 deletions(-)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/README.md (100%)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/app.py (84%)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/requirements.txt (100%)

diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_intrusive_surveys/photos/README.md
similarity index 100%
rename from etl/non_invasive_surveys/photos/README.md
rename to etl/non_intrusive_surveys/photos/README.md
diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_intrusive_surveys/photos/app.py
similarity index 84%
rename from etl/non_invasive_surveys/photos/app.py
rename to etl/non_intrusive_surveys/photos/app.py
index ffd993a6..c531355b 100644
--- a/etl/non_invasive_surveys/photos/app.py
+++ b/etl/non_intrusive_surveys/photos/app.py
@@ -5,7 +5,7 @@ from pathlib import Path
 from dotenv import load_dotenv
 
 # Inputs
-ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env"
+ENV_FILEPATH = "etl/non_intrusive_surveys/photos/.env"
 PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
 FOLDER_UPRN_LOOKUP = {
     "91 Osprey Drive DY1 2JS": 90048026,
@@ -55,25 +55,29 @@ def list_files_in_directory(directory_path, file_extension=".jpg"):
 
 
 def create_images(input_path, uprn):
+    # Define the base directory path
+    base_directory = f"non_intrusive_photos/{uprn}"
+    print(f"Creating directory: {base_directory}")  # Debug: print the directory to be created
+
     # Need to create local directory if it doesn't exist
-    os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True)
+    os.makedirs(base_directory, exist_ok=True)
+
+    # Define output paths
+    thumbnail_path = os.path.join(base_directory, "thumbnail.jpg")
+    full_hd_path = os.path.join(base_directory, "1080p.jpg")
+    webp_path = os.path.join(base_directory, "webp.webp")  # Save as WebP format
 
     # Load the image
     with Image.open(input_path) as img:
-        # Define output paths
-        thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg"
-        full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg"
-        webp_path = f"non_invasive_photos/{uprn}/webp.webp"  # Save as WebP format
-
         # Create a thumbnail
         thumbnail = img.copy()
-        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)  # High-quality downsampling
-        thumbnail.save(thumbnail_path, 'JPEG', quality=85)  # Save as JPEG with quality setting
+        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)
+        thumbnail.save(thumbnail_path, 'JPEG', quality=85)
 
         # Create a 1080p version
         full_hd = img.copy()
         full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
-        full_hd.save(full_hd_path, 'JPEG', quality=90)  # Slightly higher quality for larger image
+        full_hd.save(full_hd_path, 'JPEG', quality=90)
 
         # Convert to WebP for better compression
         webp = img.copy()
@@ -102,10 +106,10 @@ def generate_cdn_url(distribution_domain, object_name):
 
 def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
     # Create images
-    thumbnail, full_hd, original = create_images(str(uprn), input_image_path)
+    thumbnail, full_hd, original = create_images(input_image_path, uprn=str(uprn))
 
     # Upload images
-    upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
+    upload_photos_to_s3(bucket_name, photo_paths=[thumbnail, full_hd, original])
 
     # Generate CDN links
     cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_intrusive_surveys/photos/requirements.txt
similarity index 100%
rename from etl/non_invasive_surveys/photos/requirements.txt
rename to etl/non_intrusive_surveys/photos/requirements.txt

From d6fa81939d6a0f7752728953250b3554995a5297 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 23:41:24 +0100
Subject: [PATCH 233/248] creating new aggregations for front end

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 backend/Property.py                           |   8 +-
 .../app/db/functions/portfolio_functions.py   |   3 +-
 backend/app/plan/router.py                    | 128 +++++++++++++++++-
 recommendations/Recommendations.py            |  11 +-
 6 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index c75af922..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="non_invasive_surveys-photos" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1f2c584d..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="non_invasive_surveys-photos" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index a8ed9129..7b5a6bc3 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -142,6 +142,8 @@ class Property:
 
         self.current_adjusted_energy = None
         self.expected_adjusted_energy = None
+        self.current_energy_bill = None
+        self.expected_energy_bill = None
 
         self.recommendations_scoring_data = []
 
@@ -892,12 +894,16 @@ class Property:
 
         return component_data
 
-    def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy):
+    def set_adjusted_energy(
+        self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill
+    ):
         """
         Stores these values for usage later
         """
         self.current_adjusted_energy = current_adjusted_energy
         self.expected_adjusted_energy = expected_adjusted_energy
+        self.current_energy_bill = current_energy_bill
+        self.expected_energy_bill = expected_energy_bill
 
     def set_windows_count(self):
         """
diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index ead8280f..69203368 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -4,7 +4,7 @@ from backend.app.db.models.portfolio import Portfolio
 
 
 def aggregate_portfolio_recommendations(
-    session, portfolio_id: int, total_valuation_increase: float, labour_days: float
+    session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
 ):
     # Aggregate multiple fields
     aggregates = (
@@ -27,6 +27,7 @@ def aggregate_portfolio_recommendations(
         "energy_savings": aggregates.energy_savings or 0,
         "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
         "energy_cost_savings": aggregates.energy_cost_savings or 0,
+        **aggregated_data
     }
 
     # Get the portfolio and update the fields
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 49e14872..b8b2d5c8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -1,3 +1,4 @@
+import json
 from datetime import datetime
 
 from tqdm import tqdm
@@ -57,6 +58,109 @@ def patch_epc(patch, epc_records):
     return epc_records
 
 
+def extract_portfolio_aggregation_data(
+    input_properties, total_valuation_increase, recommendations, new_epc_bands
+):
+    # We aggregate a number of metrics for the portfolio:
+    # 1) A breakdown of the number of properties in each EPC band
+    #    a) before retrofit
+    #    b) after retrofit
+    # 2) Number of units
+    # 3) Co2/unit
+    #    a) before retrofit
+    #    b) after retrofit
+    # 4) Energy bulls/unit
+    #    a) before retrofit
+    #    b) after retrofit
+    # 5) Average valuation improvement/unit
+    # 6) Total cost
+    # 7) Cost per unit
+    # 8) £ per CO2 saved
+    # 9) £ per SAP point
+
+    # We need to construct the underlyind data for this
+
+    # Helper function to reformat the EPC data
+    def reformat_epc_data(epc_counts):
+        # Define all possible EPC bands in the required order
+        epc_bands = ["G", "F", "E", "D", "C", "B", "A"]
+
+        # Create the formatted data list by checking each band in the order
+        formatted_data = []
+        for band in epc_bands:
+            # Get the count from the dictionary, defaulting to 0 if not present
+            count = epc_counts.get(band, 0)
+            # Append the formatted dictionary to the list
+            formatted_data.append({"name": band, band: count})
+
+        return formatted_data
+
+    n_units = len(input_properties)
+
+    agg_data = []
+    for p in input_properties:
+        # Get the recommendations for the property
+        property_recommendations = recommendations.get(p.id, [])
+        if not property_recommendations:
+            continue
+        # Get just the default recommendations
+        default_recommendations = [r for r in property_recommendations if r["default"]]
+
+        # We can now calculate multiple outputs based on default recommendations
+        carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
+
+        pre_retrofit_co2 = p.data["co2-emissions-current"]
+        post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
+
+        pre_retrofit_energy_bill = p.current_energy_bill
+        post_retrofit_energy_bill = p.expected_energy_bill
+
+        cost = sum([r["total"] for r in default_recommendations])
+        sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
+
+        agg_data.append({
+            "pre_retrofit_epc": p.data["current-energy-rating"],
+            "post_retrofit_epc": new_epc_bands[p.id],
+            "pre_retrofit_co2": pre_retrofit_co2,
+            "post_retrofit_co2": post_retrofit_co2,
+            "pre_retrofit_energy_bill": pre_retrofit_energy_bill,
+            "post_retrofit_energy_bill": post_retrofit_energy_bill,
+            "cost": cost,
+            "sap_point_improvement": sap_point_improvement
+        })
+
+    agg_data = pd.DataFrame(agg_data)
+
+    n_units_to_retrofit = len(agg_data)
+
+    valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit
+
+    total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
+    total_sap_points = agg_data["sap_point_improvement"].sum()
+
+    aggregation_data = {
+        "epc_breakdown_pre_retrofit": json.dumps(
+            reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
+        ),
+        "epc_breakdown_post_retrofit": json.dumps(
+            reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
+        ),
+        "number_of_properties": n_units,
+        "n_units_to_retrofit": n_units_to_retrofit,
+        "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(),
+        "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(),
+        "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(),
+        "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(),
+        "valuation_improvement_per_unit": valuation_improvment_per_unit,
+        "total_cost": agg_data["cost"].sum(),
+        "cost_per_unit": agg_data["cost"].mean(),
+        "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved,
+        "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points
+    }
+
+    return aggregation_data
+
+
 router = APIRouter(
     prefix="/plan",
     tags=["plan"],
@@ -243,7 +347,13 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
-            recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
+            (
+                recommendations_with_impact,
+                current_adjusted_energy,
+                expected_adjusted_energy,
+                current_energy_bill,
+                expected_energy_bill
+            ) = (
                 Recommendations.calculate_recommendation_impact(
                     property_instance=property_instance,
                     all_predictions=all_predictions,
@@ -254,7 +364,9 @@ async def trigger_plan(body: PlanTriggerRequest):
             # Store the resulting adjusted energy in the property instance
             property_instance.set_adjusted_energy(
                 current_adjusted_energy=current_adjusted_energy,
-                expected_adjusted_energy=expected_adjusted_energy
+                expected_adjusted_energy=expected_adjusted_energy,
+                current_energy_bill=current_energy_bill,
+                expected_energy_bill=expected_energy_bill
             )
 
             input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
@@ -316,6 +428,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Uploading recommendations to the database")
         property_valuation_increases = []
         session.commit()
+        new_epc_bands = {}
         for i in range(0, len(input_properties), BATCH_SIZE):
             try:
                 # Take a slice of the input_properties list to make a batch
@@ -327,6 +440,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     total_sap_points = sum([r["sap_points"] for r in default_recommendations])
                     new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
                     new_epc = sap_to_epc(new_sap_points)
+                    new_epc_bands[p.id] = new_epc
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
 
@@ -392,11 +506,19 @@ async def trigger_plan(body: PlanTriggerRequest):
             [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
         ))
 
+        aggregated_data = extract_portfolio_aggregation_data(
+            input_properties=input_properties,
+            total_valuation_increase=total_valuation_increase,
+            recommendations=recommendations,
+            new_epc_bands=new_epc_bands
+        )
+
         aggregate_portfolio_recommendations(
             session,
             portfolio_id=body.portfolio_id,
             total_valuation_increase=total_valuation_increase,
-            labour_days=labour_days
+            labour_days=labour_days,
+            aggregated_data=aggregated_data
         )
 
         # Commit final changes
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 68fead16..659b41a8 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -281,6 +281,9 @@ class Recommendations:
             current_adjusted_energy - expected_adjusted_energy
         )
 
+        current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
+        expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
+
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
 
@@ -355,4 +358,10 @@ class Recommendations:
                     rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
                     raise ValueError("sap points, co2 or heat demand is missing")
 
-        return property_recommendations, current_adjusted_energy, expected_adjusted_energy
+        return (
+            property_recommendations,
+            current_adjusted_energy,
+            expected_adjusted_energy,
+            current_energy_bill,
+            expected_energy_bill
+        )

From cc6277c191dea07ce1a8a26b8083e1eebdd2887b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 23:52:10 +0100
Subject: [PATCH 234/248] extended outputs

---
 backend/app/plan/router.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b8b2d5c8..f7a825db 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -115,6 +115,9 @@ def extract_portfolio_aggregation_data(
         pre_retrofit_energy_bill = p.current_energy_bill
         post_retrofit_energy_bill = p.expected_energy_bill
 
+        pre_retrofit_energy_consumption = p.current_adjusted_energy
+        post_retrofit_energy_consumption = p.expected_adjusted_energy
+
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
@@ -125,6 +128,8 @@ def extract_portfolio_aggregation_data(
             "post_retrofit_co2": post_retrofit_co2,
             "pre_retrofit_energy_bill": pre_retrofit_energy_bill,
             "post_retrofit_energy_bill": post_retrofit_energy_bill,
+            "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
+            "post_retrofit_energy_consumption": post_retrofit_energy_consumption,
             "cost": cost,
             "sap_point_improvement": sap_point_improvement
         })
@@ -138,6 +143,9 @@ def extract_portfolio_aggregation_data(
     total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
     total_sap_points = agg_data["sap_point_improvement"].sum()
 
+    def format_money(amount):
+        return f"£{amount:,.0f}"
+
     aggregation_data = {
         "epc_breakdown_pre_retrofit": json.dumps(
             reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
@@ -147,15 +155,18 @@ def extract_portfolio_aggregation_data(
         ),
         "number_of_properties": n_units,
         "n_units_to_retrofit": n_units_to_retrofit,
-        "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(),
-        "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(),
-        "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(),
-        "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(),
-        "valuation_improvement_per_unit": valuation_improvment_per_unit,
-        "total_cost": agg_data["cost"].sum(),
-        "cost_per_unit": agg_data["cost"].mean(),
-        "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved,
-        "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points
+        "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
+        "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
+        "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
+        "energy_bill_per_unit_post_retrofit": format_money(agg_data["post_retrofit_energy_bill"].mean()),
+        "energy_consumption_per_unit_pre_retrofit": str(
+            round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
+        "energy_consumption_per_unit_post_retrofit": str(
+            round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
+        "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
+        "cost_per_unit": format_money(agg_data["cost"].mean()),
+        "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
+        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points)
     }
 
     return aggregation_data

From 83d472a7108019fb7ea9f21c9196a5abba154ad0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 03:05:26 +0100
Subject: [PATCH 235/248] debugging

---
 backend/app/db/models/portfolio.py | 15 +++++++++++++++
 backend/app/plan/router.py         | 18 ++++++++++++------
 recommendations/Recommendations.py |  3 +++
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index 830866e6..aa0146c0 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -45,6 +45,21 @@ class Portfolio(Base):
     labour_days = Column(Float)
     created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
     updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    # Aggregations for summary
+    epc_breakdown_pre_retrofit = Column(Text)
+    epc_breakdown_post_retrofit = Column(Text)
+    n_units_to_retrofit = Column(Integer)
+    co2_per_unit_pre_retrofit = Column(Text)
+    co2_per_unit_post_retrofit = Column(Text)
+    energy_bill_per_unit_pre_retrofit = Column(Text)
+    energy_bill_per_unit_post_retrofit = Column(Text)
+    energy_consumption_per_unit_pre_retrofit = Column(Text)
+    energy_consumption_per_unit_post_retrofit = Column(Text)
+    valuation_improvement_per_unit = Column(Text)
+    cost_per_unit = Column(Text)
+    cost_per_co2_saved = Column(Text)
+    cost_per_sap_point = Column(Text)
+    valuation_return_on_investment = Column(Text)
 
 
 class PropertyCreationStatus(enum.Enum):
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f7a825db..661858b7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -99,10 +99,9 @@ def extract_portfolio_aggregation_data(
 
     agg_data = []
     for p in input_properties:
-        # Get the recommendations for the property
+        # Get the recommendations for the property - we include all properties, even ones without recommendations
         property_recommendations = recommendations.get(p.id, [])
-        if not property_recommendations:
-            continue
+
         # Get just the default recommendations
         default_recommendations = [r for r in property_recommendations if r["default"]]
 
@@ -113,11 +112,16 @@ def extract_portfolio_aggregation_data(
         post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
 
         pre_retrofit_energy_bill = p.current_energy_bill
-        post_retrofit_energy_bill = p.expected_energy_bill
+        post_retrofit_energy_bill = p.current_energy_bill - sum(
+            [r["energy_cost_savings"] for r in default_recommendations]
+        )
 
         pre_retrofit_energy_consumption = p.current_adjusted_energy
-        post_retrofit_energy_consumption = p.expected_adjusted_energy
+        post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
+            [r["adjusted_heat_demand"] for r in default_recommendations]
+        )
 
+        # Add up energy savings
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
@@ -166,7 +170,9 @@ def extract_portfolio_aggregation_data(
         "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
         "cost_per_unit": format_money(agg_data["cost"].mean()),
         "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
-        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points)
+        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
+        "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2))
+        # TODO: Could we add 10yr carbon credits value?
     }
 
     return aggregation_data
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 659b41a8..e626ecfa 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -272,6 +272,8 @@ class Recommendations:
             current_epc_rating=property_instance.data["current-energy-rating"],
         )
 
+        # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
+        #       actually implemented
         expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
             epc_energy_consumption=expected_heat_demand,
             current_epc_rating=property_instance.data["current-energy-rating"],
@@ -281,6 +283,7 @@ class Recommendations:
             current_adjusted_energy - expected_adjusted_energy
         )
 
+        # TODO: We should determine if the home is gas & electricity or just electricity
         current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
         expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
 

From 0f7e815379eacb6d76100a25186cd38e23d9b8c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:18:36 +0100
Subject: [PATCH 236/248] updating text for valuation improvement

---
 backend/app/plan/router.py | 49 +++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 661858b7..45d87dd3 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -59,7 +59,7 @@ def patch_epc(patch, epc_records):
 
 
 def extract_portfolio_aggregation_data(
-    input_properties, total_valuation_increase, recommendations, new_epc_bands
+    input_properties, total_valuation_increase, recommendations, new_epc_bands, property_value_increase_ranges
 ):
     # We aggregate a number of metrics for the portfolio:
     # 1) A breakdown of the number of properties in each EPC band
@@ -69,7 +69,7 @@ def extract_portfolio_aggregation_data(
     # 3) Co2/unit
     #    a) before retrofit
     #    b) after retrofit
-    # 4) Energy bulls/unit
+    # 4) Energy bill/unit
     #    a) before retrofit
     #    b) after retrofit
     # 5) Average valuation improvement/unit
@@ -105,6 +105,8 @@ def extract_portfolio_aggregation_data(
         # Get just the default recommendations
         default_recommendations = [r for r in property_recommendations if r["default"]]
 
+        has_recommendations = len(default_recommendations) > 0
+
         # We can now calculate multiple outputs based on default recommendations
         carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
 
@@ -125,6 +127,15 @@ def extract_portfolio_aggregation_data(
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
+        lower_bound_valuation_uplift = (
+            property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
+            property_value_increase_ranges[p.id]["current_value"]
+        )
+        upper_bound_valuation_uplift = (
+            property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
+            property_value_increase_ranges[p.id]["current_value"]
+        )
+
         agg_data.append({
             "pre_retrofit_epc": p.data["current-energy-rating"],
             "post_retrofit_epc": new_epc_bands[p.id],
@@ -135,14 +146,22 @@ def extract_portfolio_aggregation_data(
             "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
             "post_retrofit_energy_consumption": post_retrofit_energy_consumption,
             "cost": cost,
-            "sap_point_improvement": sap_point_improvement
+            "sap_point_improvement": sap_point_improvement,
+            "lower_bound_valuation_uplift": lower_bound_valuation_uplift,
+            "upper_bound_valuation_uplift": upper_bound_valuation_uplift,
+            "has_recommendations": has_recommendations
         })
 
     agg_data = pd.DataFrame(agg_data)
 
-    n_units_to_retrofit = len(agg_data)
+    n_units_to_retrofit = agg_data["has_recommendations"].sum()
 
-    valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit
+    valuation_improvement_lower_bound_per_unit = (
+        agg_data["lower_bound_valuation_uplift"].mean()
+    )
+    valuation_improvement_upper_bound_per_unit = (
+        agg_data["upper_bound_valuation_uplift"].mean()
+    )
 
     total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
     total_sap_points = agg_data["sap_point_improvement"].sum()
@@ -150,6 +169,17 @@ def extract_portfolio_aggregation_data(
     def format_money(amount):
         return f"£{amount:,.0f}"
 
+    valuation_improvment_per_unit = format_money(
+        total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
+                                               f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+
+    valuation_return_on_investment = (
+        str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
+        f" ("
+        f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
+        f"{agg_data['upper_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f})"
+    )
+
     aggregation_data = {
         "epc_breakdown_pre_retrofit": json.dumps(
             reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
@@ -167,11 +197,11 @@ def extract_portfolio_aggregation_data(
             round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
         "energy_consumption_per_unit_post_retrofit": str(
             round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
-        "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
+        "valuation_improvement_per_unit": valuation_improvment_per_unit,
         "cost_per_unit": format_money(agg_data["cost"].mean()),
         "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
         "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
-        "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2))
+        "valuation_return_on_investment": valuation_return_on_investment,
         # TODO: Could we add 10yr carbon credits value?
     }
 
@@ -446,6 +476,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         property_valuation_increases = []
         session.commit()
         new_epc_bands = {}
+        property_value_increase_ranges = {}
         for i in range(0, len(input_properties), BATCH_SIZE):
             try:
                 # Take a slice of the input_properties list to make a batch
@@ -460,6 +491,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     new_epc_bands[p.id] = new_epc
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
+                    property_value_increase_ranges[p.id] = valuations
 
                     # Your existing operations
                     property_details_epc = p.get_property_details_epc(
@@ -527,7 +559,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             input_properties=input_properties,
             total_valuation_increase=total_valuation_increase,
             recommendations=recommendations,
-            new_epc_bands=new_epc_bands
+            new_epc_bands=new_epc_bands,
+            property_value_increase_ranges=property_value_increase_ranges
         )
 
         aggregate_portfolio_recommendations(

From 02399667798370cab35608dc5edac17db7de1960 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:32:15 +0100
Subject: [PATCH 237/248] setting up non-invasive recommendations

---
 etl/customers/immo/pilot/asset_list.py | 29 +++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index e587cc25..614fa8a0 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -21,6 +21,7 @@ council_tax_bands = pd.DataFrame(council_tax_bands)
 
 # This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
 # that has not reached the API
+# For 53 Bromley, the non-invasives found the walls to be partially filled
 patches = [
     {
         'address': '6 Beech Road', 'postcode': 'DY1 4BP',
@@ -42,7 +43,11 @@ patches = [
         'energy-consumption-current': '491',
         'co2-emissions-current': '5.0',
         'potential-energy-efficiency': '87'
-    }
+    },
+    {
+        'address': '53 Bromley', 'postcode': 'DY5 4PJ',
+        'walls-description': 'Cavity wall, partial insulation',
+    },
 ]
 
 # This is information that is found as a result of the non-invasives, that mean that certain measures
@@ -56,6 +61,19 @@ already_installed = [
     }
 ]
 
+non_invasive_recommendations = [
+    {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'recommendations': []},
+    {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'recommendations': ['cavity_surveyed_as_filled_is_partial']},
+    {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'recommendations': []},
+    {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'recommendations': ['cavity_extract_and_refill']},
+]
+
 
 def app():
     raw_asset_list = read_excel_from_s3(
@@ -102,6 +120,14 @@ def app():
         file_name=patches_filename
     )
 
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
     # EPC C portoflio
     body = {
         "portfolio_id": str(PORTFOLIO_ID),
@@ -111,6 +137,7 @@ def app():
         "trigger_file_path": filename,
         "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "budget": None,
     }
     print(body)

From b3e7675488b7004cc98f171b8d78793188345148 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:38:58 +0100
Subject: [PATCH 238/248] added non-invasive recommendations to property class

---
 backend/Property.py                    |  7 ++++++-
 backend/app/plan/router.py             | 13 +++++++++++++
 backend/app/plan/schemas.py            |  1 +
 etl/customers/immo/pilot/asset_list.py |  1 +
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index 7b5a6bc3..2d1dbd5d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,8 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, already_installed=None, **kwargs
+        self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None,
+        **kwargs
     ):
 
         self.epc_record = epc_record
@@ -80,6 +81,10 @@ class Property:
         # cost and instead, provide a message that the measure has already been installed
 
         self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
+        self.non_invasive_recommendations = (
+            ast.literal_eval(property_non_invasive_recommendations['recommendations']) if
+            property_non_invasive_recommendations else []
+        )
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 45d87dd3..e5a2aa79 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -242,6 +242,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
             )
 
+        non_invasive_recommendations = []
+        if body.non_invasive_recommendations_file_path:
+            non_invasive_recommendations = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
+            )
+
         cleaning_data = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
@@ -297,6 +303,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 x for x in already_installed if
                 (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
             ), {})
+
+            property_non_invasive_recommendations = next((
+                x for x in non_invasive_recommendations if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
+
             input_properties.append(
                 Property(
                     id=property_id,
@@ -304,6 +316,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
                     already_installed=property_already_installed,
+                    non_invasive_recommendations=property_non_invasive_recommendations,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 76eb49d2..59c0ebef 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -11,6 +11,7 @@ class PlanTriggerRequest(BaseModel):
     trigger_file_path: str
     already_installed_file_path: Optional[str] = None
     patches_file_path: Optional[str] = None
+    non_invasive_recommendations_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
 
     # Pre-defined list of possibilities for exclusions
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 614fa8a0..57fa5957 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -151,6 +151,7 @@ def app():
         "trigger_file_path": filename,
         "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "budget": None,
     }
     print(body)

From 0c1fb0360fa1473d4123e3a41c3a82f65d9a3512 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:50:02 +0100
Subject: [PATCH 239/248] fixed patching of partial cwi description

---
 backend/app/plan/router.py             | 2 ++
 etl/customers/immo/pilot/asset_list.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e5a2aa79..7200d2ef 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -52,6 +52,8 @@ def patch_epc(patch, epc_records):
     """
 
     for patch_variable, patch_value in patch.items():
+        if patch_value == "":
+            continue
         if patch_variable in epc_records["original_epc"]:
             epc_records["original_epc"][patch_variable] = patch_value
 
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 57fa5957..6329a2be 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -46,7 +46,7 @@ patches = [
     },
     {
         'address': '53 Bromley', 'postcode': 'DY5 4PJ',
-        'walls-description': 'Cavity wall, partial insulation',
+        'walls-description': 'Cavity wall, partial insulation (assumed)',
     },
 ]
 

From 4cf4d67ac91610d19e418aa33ae794a37c1be505 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 13:21:14 +0100
Subject: [PATCH 240/248] Added cavity extraction and re-fill recommendation
 and costing

---
 backend/Property.py                    | 17 ++++++++++++++---
 backend/app/plan/router.py             | 14 ++++++++------
 recommendations/Costs.py               | 13 ++++++++++++-
 recommendations/Recommendations.py     | 19 ++++++++++++++-----
 recommendations/WallRecommendations.py | 17 +++++++++++++++--
 5 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 2d1dbd5d..2e6cbbb6 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None,
+        self, id, postcode, address, epc_record, already_installed=None, non_invasive_recommendations=None,
         **kwargs
     ):
 
@@ -82,8 +82,8 @@ class Property:
 
         self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
         self.non_invasive_recommendations = (
-            ast.literal_eval(property_non_invasive_recommendations['recommendations']) if
-            property_non_invasive_recommendations else []
+            ast.literal_eval(non_invasive_recommendations['recommendations']) if
+            non_invasive_recommendations else []
         )
 
         self.uprn = epc_record.get("uprn")
@@ -284,6 +284,7 @@ class Property:
                     recommendation_record=recommendation_record,
                     recommendations=previous_phase_representatives + [rec],
                     primary_recommendation_id=rec["recommendation_id"],
+                    non_invasive_recommendations=self.non_invasive_recommendations,
                 )
                 self.recommendations_scoring_data.append(scoring_dict)
 
@@ -293,6 +294,7 @@ class Property:
         recommendation_record,
         recommendations: list,
         primary_recommendation_id: int,
+        non_invasive_recommendations: list = None,
     ):
         """
         This function will iterate through a list of recommendations and apply a simulation for each recommendation
@@ -301,10 +303,12 @@ class Property:
         :param recommendation_record: The record of the property, which will be updated
         :param recommendations: The list of recommendations to apply
         :param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record
+        :param non_invasive_recommendations: The list of non-invasive recommendations
         :return: The updated recommendation record
         """
 
         output = recommendation_record.copy()
+        non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations
 
         for col in [
             "walls_insulation_thickness",
@@ -323,6 +327,13 @@ class Property:
                 "external_wall_insulation",
                 "cavity_wall_insulation",
             ]:
+
+                # # If we have a non-incasive recommendation that the cavity wall is partially filled, we skip the
+                # # cavity wall insulation recommendation (since on the EPC, the property will look like how it did
+                # # before any works)
+                # if "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations:
+                #     continue
+
                 # The upgrade made here is to the u-value of the walls and the description of the
                 # insulation thickness
                 output["walls_thermal_transmittance_ending"] = recommendation[
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7200d2ef..9854abe8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -171,11 +171,13 @@ def extract_portfolio_aggregation_data(
     def format_money(amount):
         return f"£{amount:,.0f}"
 
-    valuation_improvment_per_unit = format_money(
-        total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
-                                               f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+    valuation_improvment_per_unit = str(
+        format_money(
+            total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
+                                                   f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+    )
 
-    valuation_return_on_investment = (
+    valuation_return_on_investment = str(
         str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
         f" ("
         f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
@@ -189,8 +191,8 @@ def extract_portfolio_aggregation_data(
         "epc_breakdown_post_retrofit": json.dumps(
             reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
         ),
-        "number_of_properties": n_units,
-        "n_units_to_retrofit": n_units_to_retrofit,
+        "number_of_properties": int(n_units),
+        "n_units_to_retrofit": int(n_units_to_retrofit),
         "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
         "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
         "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 0e67b352..852bb11f 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -91,6 +91,10 @@ DOUBLE_RADIATOR_COST = 300
 FLUE_COST = 600
 PIPEWORK_COST = 750  # Min cost is £500
 
+# This is the cost per meter squared for cavity extraction
+# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/
+CAVITY_EXTRACTION_COST = 21.5
+
 
 class Costs:
     """
@@ -173,7 +177,7 @@ class Costs:
         if not self.labour_adjustment_factor:
             raise ValueError("Labour adjustment factor not found")
 
-    def cavity_wall_insulation(self, wall_area, material):
+    def cavity_wall_insulation(self, wall_area, material, is_extraction_and_refill=False):
         """
         Calculates the total cost for cavity wall insulation based on material and labor costs,
         including contingency, preliminaries, profit, and VAT.
@@ -208,6 +212,13 @@ class Costs:
         # Assume a team of 2
         labour_days = (labour_hours / 8) / 2
 
+        if is_extraction_and_refill:
+            # bump up the cost of the work
+            total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area
+            # Additional 2 days work
+            labour_hours = labour_hours + (2 * 8)
+            labour_days = labour_days + 2
+
         return {
             "total": total_cost,
             "subtotal": subtotal_before_vat,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index e626ecfa..5960d7be 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -149,12 +149,14 @@ class Recommendations:
         property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
 
         # We also need to create the representative recommendations for each recommendation type
-        property_representative_recommendations = self.create_representative_recommendations(property_recommendations)
+        property_representative_recommendations = self.create_representative_recommendations(
+            property_recommendations, non_invasive_recommendations=self.property_instance.non_invasive_recommendations
+        )
 
         return property_recommendations, property_representative_recommendations
 
     @staticmethod
-    def create_representative_recommendations(property_recommendations):
+    def create_representative_recommendations(property_recommendations, non_invasive_recommendations):
         """
         This method will create a representative recommendation for each recommendation type
         In order to create a representative recommendation, we choose the recommendation that has:
@@ -169,6 +171,13 @@ class Recommendations:
 
         for recommendations_by_type in property_recommendations:
 
+            # If the property was initially surveyed as filled, but the cavity was only partially filled, we don't
+            # want to include the cavity wall insulation recommendation in the defaults
+            # if (recommendations_by_type[0].get("type") == "cavity_wall_insulation") and (
+            #     "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations
+            # ):
+            #     continue
+
             if recommendations_by_type[0].get("type") == "mechanical_ventilation":
                 continue
 
@@ -238,13 +247,13 @@ class Recommendations:
 
         property_sap_predictions = all_predictions["sap_change_predictions"][
             all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
         property_heat_predictions = all_predictions["heat_demand_predictions"][
             all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
         property_carbon_predictions = all_predictions["carbon_change_predictions"][
             all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
 
         property_recommendations = recommendations[property_instance.id].copy()
 
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index feb2620b..20fc453c 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -113,7 +113,9 @@ class WallRecommendations(Definitions):
         insulation_thickness = self.property.walls["insulation_thickness"]
 
         # We check if the wall is already insulated and if so, we exit
-        if (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]:
+        if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and (
+            "cavity_extract_and_refill" not in self.property.non_invasive_recommendations
+        ):
             return
 
         if u_value:
@@ -216,15 +218,26 @@ class WallRecommendations(Definitions):
             if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE:
                 lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
 
+                is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations
+
                 cost_result = self.costs.cavity_wall_insulation(
                     wall_area=self.property.insulation_wall_area,
                     material=material.to_dict(),
+                    is_extraction_and_refill=is_extraction_and_refill
                 )
 
                 already_installed = "cavity_wall_insulation" in self.property.already_installed
                 if already_installed:
                     cost_result = override_costs(cost_result)
 
+                if is_extraction_and_refill:
+                    description = f"Extract and refill cavity wall insulation with {material['description']}"
+                else:
+                    description = self._make_description(material)
+
+                # updated the new u-value with the best possible our installers have
+                new_u_value = max(0.31, new_u_value)
+
                 recommendations.append(
                     {
                         "phase": phase,
@@ -237,7 +250,7 @@ class WallRecommendations(Definitions):
                             )
                         ],
                         "type": "cavity_wall_insulation",
-                        "description": self._make_description(material),
+                        "description": description,
                         "starting_u_value": u_value,
                         "new_u_value": new_u_value,
                         "sap_points": None,

From e000c87cad98963e8c734a5cf8990a5a7b713217 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 12:16:13 +0100
Subject: [PATCH 241/248] added patches for immo pilot 2

---
 etl/customers/immo/pilot/asset_list_2.py | 126 +++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 etl/customers/immo/pilot/asset_list_2.py

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
new file mode 100644
index 00000000..f722a490
--- /dev/null
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -0,0 +1,126 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 72
+
+# For
+patches = [
+    {
+        'address': '116 Parkes Hall Road',
+        'postcode': 'DY1 3RJ',
+        'walls-description': 'Cavity wall, filled cavity',
+        'walls-energy-eff': 'Average',
+        'roof-description': 'Pitched, 270 mm loft insulation',
+        'roof-energy-eff': 'Good',
+        'windows-description': 'Fully double glazed',
+        'windows-energy-eff': 'Good',
+        'mainheat-description': 'Boiler and radiators, mains gas',
+        'mainheat-energy-eff': 'Good',
+        'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+        'mainheatc-energy-eff': 'Good',
+        'lighting-description': 'Low energy lighting in 27% of fixed outlets',
+        'lighting-energy-eff': 'Good',
+        'floor-description': 'Solid, no insulation (assumed)',
+        'secondheat-description': 'None',
+        'current-energy-efficiency': '73',
+        'current-energy-rating': 'C',
+        'energy-consumption-current': '184',
+        'co2-emissions-current': '2.4',
+        'potential-energy-efficiency': '88',
+        'total-floor-area': '73',
+        'construction-age-band': 'England and Wales: 1930-1949',
+        'property-type': 'House',
+        'built-form': 'Mid-Terrace',
+    }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+already_installed = []
+
+non_invasive_recommendations = []
+
+
+def app():
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Immo/Dudley Asset List - Hestia - pilot2.xlsx",
+        header_row=0
+    )
+
+    raw_asset_list = raw_asset_list[raw_asset_list["in_pilot"]].copy()
+
+    # Extract address and postcode
+    raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+    raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+    # We're provided with number of bathrooms and number of bedrooms.
+    asset_list = raw_asset_list.rename(
+        columns={
+            "No. of Beds": "n_bedrooms",
+            "No. of WC's": "n_bathrooms"
+        }
+    )
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store overrides in s3
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(already_installed),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=already_installed_filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "budget": None,
+    }
+    print(body)
+
+    # EPC B portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID + 1),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "budget": None,
+    }
+    print(body)

From acada27061d09f47ac76ecd2785c95eb39e741d3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 15:16:46 +0100
Subject: [PATCH 242/248] rounding up roof coverage %

---
 backend/SearchEpc.py                      |  9 +++++++--
 backend/app/plan/router.py                | 11 +++++++++--
 backend/ml_models/Valuation.py            |  8 ++++++++
 etl/customers/immo/pilot/asset_list_2.py  | 21 ++++++++++++++++++---
 etl/epc/Record.py                         |  2 +-
 recommendations/SolarPvRecommendations.py | 10 +++++++---
 6 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index cc2ee4a9..44178792 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -709,8 +709,13 @@ class SearchEpc:
                 self.full_sap_epc = {}
 
                 # Finally, set a standardised address 1 and postcode
-                self.address_clean = self.ordnance_survey_client.address_os
-                self.postcode_clean = self.ordnance_survey_client.postcode_os
+                self.address_clean = (
+                    self.ordnance_survey_client.address_os if self.ordnance_survey_client.address_os else self.address1
+                )
+                self.postcode_clean = (
+                    self.ordnance_survey_client.postcode_os if self.ordnance_survey_client.postcode_os else
+                    self.postcode
+                )
             return
 
         os_response = self.ordnance_survey_client.get_places_api()
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 9854abe8..a8464ee6 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -52,6 +52,10 @@ def patch_epc(patch, epc_records):
     """
 
     for patch_variable, patch_value in patch.items():
+
+        if patch_variable in ["address", "postcode"]:
+            continue
+
         if patch_value == "":
             continue
         if patch_variable in epc_records["original_epc"]:
@@ -268,9 +272,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 postcode=config["postcode"],
                 uprn=uprn,
                 auth_token=get_settings().EPC_AUTH_TOKEN,
-                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
+                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
             )
-            epc_searcher.find_property()
+            epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
+            epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
+            # For the moment, our OS API access is unavailable, so we skip and interpolate
+            epc_searcher.find_property(skip_os=True)
             # Create a record in db
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 251c016a..39ea5a98 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -63,6 +63,14 @@ class PropertyValuation:
         90093693: 279_000,  # Based on Zoopla
         90055152: 149_000,  # Based on Zoopla
         90028499: 238_000,  # Based on Zoopla
+        # IMMO Dudley Pilot 2- search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        90039318: 177_000,  # Based on Zoopla
+        90038384: 170_000,  # Based on Zoopla
+        90105380: 185_000,  # Based on Zoopla
+        90124001: 165_000,  # Based on Zoopla
+        90013980: 148_000,  # Based on Zoopla
+        90087154: 184_000,  # Based on Zoopla
+        90046817: 167_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index f722a490..121e7a81 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -10,6 +10,7 @@ patches = [
     {
         'address': '116 Parkes Hall Road',
         'postcode': 'DY1 3RJ',
+        'uprn': '90046817',
         'walls-description': 'Cavity wall, filled cavity',
         'walls-energy-eff': 'Average',
         'roof-description': 'Pitched, 270 mm loft insulation',
@@ -21,7 +22,7 @@ patches = [
         'mainheatcont-description': 'Programmer, room thermostat and TRVs',
         'mainheatc-energy-eff': 'Good',
         'lighting-description': 'Low energy lighting in 27% of fixed outlets',
-        'lighting-energy-eff': 'Good',
+        'lighting-energy-eff': 'Average',
         'floor-description': 'Solid, no insulation (assumed)',
         'secondheat-description': 'None',
         'current-energy-efficiency': '73',
@@ -39,7 +40,11 @@ patches = [
 # This is information that is found as a result of the non-invasives, that mean that certain measures
 # have been installed already. To reflect this in the front end, it is included in the recommendation, however
 # the cost is removed and instead, a message is presented saying that the measure is already installed.
-already_installed = []
+already_installed = [
+    {
+        'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
+    }
+]
 
 non_invasive_recommendations = []
 
@@ -58,13 +63,23 @@ def app():
     raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
 
     # We're provided with number of bathrooms and number of bedrooms.
+    # THe UPRNs are not the official ones
     asset_list = raw_asset_list.rename(
         columns={
             "No. of Beds": "n_bedrooms",
-            "No. of WC's": "n_bathrooms"
+            "No. of WC's": "n_bathrooms",
+            'Property Type': 'property_type',
+            'Architype': 'built_form'
         }
     )
 
+    # Remap the values
+    asset_list["built_form"] = asset_list["built_form"].map({
+        "SEMI DETACHED": "Semi-Detached",
+        "MID TERRACE": "Mid-Terrace",
+        "END TERRACE": "End-Terrace",
+    })
+
     # Store the asset list in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
     save_csv_to_s3(
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index e74330a2..9a965c6a 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -191,7 +191,7 @@ class EPCRecord:
         This method will clean the records using the data processor
         """
         epc_data_processor = EPCDataProcessor(
-            data=self.epc_record_as_dataframe("prepared_epc"),
+            data=self.epc_record_as_dataframe("prepared_epc").copy(),
             run_mode="newdata",
             cleaning_averages=self.cleaning_data,
         )
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 58cf9735..b44557ab 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -56,14 +56,18 @@ class SolarPvRecommendations:
         if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
             return
 
+        solar_pv_percentage = self.property.solar_pv_percentage
+        # We round up to the neaest 10%
+        solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
+
         # For the solar recommendations, we produce the following scenarios:
         # 1) Solar panels only, we present a high, medium and low coverage
         # 2) With and without battery
         roof_coverage_scenarios = [
-            self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
+            solar_pv_percentage - 0.1, solar_pv_percentage,
         ]
-        if self.property.solar_pv_percentage <= 0.4:
-            roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1)
+        if solar_pv_percentage <= 0.4:
+            roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
         # We make sure we haven't gone too low or high - we allow no more than 60% coverage
         roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
         # If we only have two scenarios, we add a coverage scenario 10% less than the smallest

From db2586061598471f182fc338668618dfd4109a61 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 16:01:41 +0100
Subject: [PATCH 243/248] Completed pilot 2

---
 etl/customers/immo/pilot/asset_list_2.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index 121e7a81..1b4fad9a 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -43,6 +43,15 @@ patches = [
 already_installed = [
     {
         'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '51 Hillwood Road', 'postcode': 'B62 8NQ', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"]
     }
 ]
 

From 3593b7ae9ebd4245985a2dabc80446b23f00d84e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 13:54:04 +0100
Subject: [PATCH 244/248] Added boiler upgrade recommendation

---
 etl/customers/gla_croydon_demo/asset_list.py |  5 ++--
 recommendations/Costs.py                     | 12 ++------
 recommendations/HeatingRecommender.py        | 31 +++++++++-----------
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 7dde8926..1655979b 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,8 +34,9 @@ def app():
         low_memory=False
     )
 
-    z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
-    z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+    z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+    z["HOTWATER_DESCRIPTION"].value_counts()
+    z["MAIN_FUEL"].value_counts()
 
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 852bb11f..d7a8ad2f 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -67,18 +67,12 @@ LOW_CARBON_COMBI_BOILER = 2200
 # https://www.greenmatch.co.uk/boilers/35kw-boiler
 # https://www.greenmatch.co.uk/boilers/40kw-boiler
 # These are exclusive of installation costs
-COMBI_BOILER_COSTS = {
+CONDENSING_BOILER_COSTS = {
     "30kw": 1550,
     "35kw": 1610,
     "40kw": 1625
 }
 
-CONVENTIONAL_BOILER_COSTS = {
-    "30kw": 1117,
-    "35kw": 1546,
-    "40kw": 1776
-}
-
 # Assumes 3 hours to remove each heater (including re-decorating)
 ROOM_HEATER_REMOVAL_COST = 120
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
@@ -1179,7 +1173,7 @@ class Costs:
         estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
         return round(estimated_radiators)
 
-    def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
+    def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
         First time central heating vosts can als be found here:
@@ -1187,7 +1181,7 @@ class Costs:
         :return:
         """
 
-        unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
+        unit_cost = CONDENSING_BOILER_COSTS[size]
         # The unit cost is the cost without VAT
         # We now need to estimate the cost of the works
         labour_days = 2
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 432dc6a6..2423901a 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -312,7 +312,15 @@ class HeatingRecommender:
         simulation_config = {}
         boiler_costs = {}
         boiler_recommendation = {}
-        if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
+
+        has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]
+
+        has_inefficient_mains_water = (
+            self.property.hotwater["clean_description"] in ["From main system"] and
+            self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
+        )
+
+        if has_inefficient_space_heating or has_inefficient_mains_water:
             boiler_size = self.estimate_boiler_size(
                 property_type=self.property.data["property-type"],
                 built_form=self.property.data["built-form"],
@@ -321,22 +329,12 @@ class HeatingRecommender:
                 num_heated_rooms=self.property.data["number-heated-rooms"],
             )
 
-            # We recommend a combi boiler under the following conditions
-            # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
-            #    heated if there is no existing heating system).
-            # 2) There 1 or fewer bathrooms
-            # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
-            # bathrooms
-            is_combi = (
-                (self.property.number_of_rooms <= 4) and
-                (self.property.n_bathrooms in [None, 0, 1])
-            )
-            if is_combi:
-                description = "Upgrade to a new combi boiler"
-            else:
-                description = "Upgrade to a new gas condensing boiler"
+            description = "Upgrade to a new condensing boiler"
 
-            simulation_config = {"mainheat_energy_eff_ending": "Good"}
+            simulation_config = {
+                "mainheat_energy_eff_ending": "Good",
+                "hot_water_energy_eff_ending": "Good"
+            }
             if system_change:
                 # Installation of a boiler improves the hot water system so we need to reflect this in
                 # the outcome of the recommendation
@@ -363,7 +361,6 @@ class HeatingRecommender:
                 }
 
             boiler_costs = self.costs.boiler(
-                is_combi=is_combi,
                 size=f"{boiler_size}kw",
                 exising_room_heaters=exising_room_heaters,
                 system_change=system_change,

From 391cb356ee12270aa9f5a4ffeff6a917f07ff05e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 14:07:47 +0100
Subject: [PATCH 245/248] debugging recommendation when we have independent
 boiler upgrade and heating controls

---
 recommendations/HeatingRecommender.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 2423901a..aa5cabdb 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -394,9 +394,13 @@ class HeatingRecommender:
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
         # We may have 2 recommendations from the heating controls
 
-        if not controls_recommender.recommendation:
+        if not controls_recommender.recommendation and not boiler_recommendation:
             return
 
+        if not system_change and len(boiler_recommendation):
+            # If there is not a system change, we add the boiler recommendation at point.
+            self.recommendations.append(boiler_recommendation)
+
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
             combined_recommendations = []

From 8bd899bcba8739b3232ec254fa799ff8497efb0f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 16:43:13 +0100
Subject: [PATCH 246/248] debugging structure of heating recommendations

---
 backend/app/plan/router.py            | 1 +
 recommendations/HeatingRecommender.py | 8 ++++----
 recommendations/Recommendations.py    | 9 +++++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a8464ee6..06d1aadf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -380,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         logger.info("Preparing data for scoring in sap change api")
         recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+
         recommendations_scoring_data = recommendations_scoring_data.drop(
             columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
                      "carbon_ending"]
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index aa5cabdb..fe5cdd46 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -399,7 +399,7 @@ class HeatingRecommender:
 
         if not system_change and len(boiler_recommendation):
             # If there is not a system change, we add the boiler recommendation at point.
-            self.recommendations.append(boiler_recommendation)
+            self.recommendations.append([boiler_recommendation])
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
@@ -417,12 +417,12 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations.extend(combined_recommendations)
+            self.recommendations.append(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation
             has_heating_recommendation = any(
-                recommendation["type"] == "heating" for recommendation in self.recommendations
+                rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation
             )
             if has_heating_recommendation:
                 recommendation_phase += 1
@@ -431,6 +431,6 @@ class HeatingRecommender:
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-            self.recommendations.extend(controls_recommender.recommendation)
+            self.recommendations.append(controls_recommender.recommendation)
 
         return
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 5960d7be..aba75ad9 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -111,11 +111,16 @@ class Recommendations:
         if "heating" not in self.exclusions:
             self.heating_recommender.recommend(phase=phase)
             if self.heating_recommender.recommendations:
-                property_recommendations.append(self.heating_recommender.recommendations)
+                if len(self.heating_recommender.recommendations) == 1:
+                    property_recommendations.append(self.heating_recommender.recommendations)
+                else:
+                    property_recommendations.extend(self.heating_recommender.recommendations)
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
-                max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations])
+                max_used_phase = max(
+                    [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs]
+                )
                 amount_to_increment = max_used_phase - phase + 1
                 phase += amount_to_increment
 

From 7bdf2147badefd9f43250ac0eedc933f6378b842 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 18:38:16 +0100
Subject: [PATCH 247/248] restructured output of heating and heating control
 recommendations

---
 backend/app/plan/router.py            | 20 ++++++++++----------
 recommendations/HeatingRecommender.py | 16 +++++++++-------
 recommendations/Recommendations.py    | 19 +++++++++++++------
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 06d1aadf..ebaf482d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            if not is_new:
-                continue
-
-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            # if not is_new:
+            #     continue
+            #
+            # create_property_targets(
+            #     session,
+            #     property_id=property_id,
+            #     portfolio_id=body.portfolio_id,
+            #     epc_target=body.goal_value,
+            #     heat_demand_target=None
+            # )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index fe5cdd46..537125a1 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -15,7 +15,8 @@ class HeatingRecommender:
         self.property = property_instance
         self.costs = Costs(self.property)
 
-        self.recommendations = []
+        self.heating_recommendations = []
+        self.heating_control_recommendations = []
 
     def recommend(self, phase=0):
 
@@ -23,7 +24,8 @@ class HeatingRecommender:
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
         #       in the Costs class, stored as SYSTEM_FLUSH_COST
 
-        self.recommendations = []
+        self.heating_recommendations = []
+        self.heating_control_recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
@@ -254,7 +256,7 @@ class HeatingRecommender:
             system_change=system_change
         )
 
-        self.recommendations.extend(recommendations)
+        self.heating_recommendations.extend(recommendations)
 
     @staticmethod
     def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
@@ -399,7 +401,7 @@ class HeatingRecommender:
 
         if not system_change and len(boiler_recommendation):
             # If there is not a system change, we add the boiler recommendation at point.
-            self.recommendations.append([boiler_recommendation])
+            self.heating_recommendations.extend([boiler_recommendation])
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
@@ -417,12 +419,12 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations.append(combined_recommendations)
+            self.heating_recommendations.extend(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation
             has_heating_recommendation = any(
-                rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation
+                rec["type"] == "heating" for rec in self.heating_recommendations
             )
             if has_heating_recommendation:
                 recommendation_phase += 1
@@ -431,6 +433,6 @@ class HeatingRecommender:
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-            self.recommendations.append(controls_recommender.recommendation)
+            self.heating_control_recommendations.extend(controls_recommender.recommendation)
 
         return
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index aba75ad9..06dc2d61 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -110,16 +110,23 @@ class Recommendations:
         # Heating and Electical systems
         if "heating" not in self.exclusions:
             self.heating_recommender.recommend(phase=phase)
-            if self.heating_recommender.recommendations:
-                if len(self.heating_recommender.recommendations) == 1:
-                    property_recommendations.append(self.heating_recommender.recommendations)
-                else:
-                    property_recommendations.extend(self.heating_recommender.recommendations)
+            if (
+                self.heating_recommender.heating_recommendations or
+                self.heating_recommender.heating_control_recommendations
+            ):
+                if self.heating_recommender.heating_recommendations:
+                    property_recommendations.append(self.heating_recommender.heating_recommendations)
+
+                if self.heating_recommender.heating_control_recommendations:
+                    property_recommendations.append(self.heating_recommender.heating_control_recommendations)
+
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
                 max_used_phase = max(
-                    [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs]
+                    [rec["phase"] for rec in
+                     self.heating_recommender.heating_recommendations +
+                     self.heating_recommender.heating_control_recommendations]
                 )
                 amount_to_increment = max_used_phase - phase + 1
                 phase += amount_to_increment

From 5a879572f46fba68fc136f2d0681805119e60ccb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 23 Apr 2024 15:34:29 +0100
Subject: [PATCH 248/248] final modifications for immo pilot

---
 etl/customers/immo/pilot/asset_list_2.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index 1b4fad9a..52260f57 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -51,7 +51,9 @@ already_installed = [
         'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
     },
     {
-        'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"]
+        'address': '44 Hatfield Road',
+        'postcode': 'DY9 7LW',
+        "already_installed": ["loft_insulation", "cavity_wall_insulation"]
     }
 ]