diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 611d0257..945b5e4e 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -2622,7 +2622,8 @@ class AssetList:
 
         # Add in deal and pipeline information
         programme_data["dealname"] = (
-            programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"]
+            programme_data[self.STANDARD_FULL_ADDRESS] + ", " +
+            programme_data[self.STANDARD_POSTCODE] + " : " + programme_data["domna_product"]
         )
         programme_data['Pipeline <DEAL pipeline>'] = hubspot_config.CRM_PIPELINE_NAME
         programme_data['Associations: Listing'] = "Property Owner"
@@ -2656,7 +2657,11 @@ class AssetList:
 
         # Ammend the property type and built form columns
         programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy()
-        programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy()
+        # We don't already have this
+        if self.STANDARD_BUILT_FORM in programme_data.columns:
+            programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy()
+        else:
+            programme_data["hubspot_built_form"] = None
 
         def _replace_property_description_data(programme_data, column_name):
             """
diff --git a/asset_list/abs_estimates.py b/asset_list/abs_estimates.py
index 58adcca6..0cd82dc6 100644
--- a/asset_list/abs_estimates.py
+++ b/asset_list/abs_estimates.py
@@ -13,10 +13,22 @@ from backend.app.utils import sap_to_epc
 load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 
+# project = pd.read_excel(
+
+# )
+#
+# cavity = project[project["cavity_reason"].isin(
+#     ["EPC Shows Empty Cavity: SAP Rating 54 or less", "EPC Shows Empty Cavity: SAP Rating 55-68"]
+# )]
+
 asset_list = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Livewest South-West - Standardised V2.xlsx",
-    sheet_name="Cavity Route (Insta Review)"
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Abs Rates/Desktop ABRI data - Standardised After "
+    "Programmes.xlsx",
+    sheet_name="Reviewed List"
 )
+asset_list = asset_list[asset_list["cavity_reason"].isin(
+    ["EPC Shows Empty Cavity: SAP Rating 54 or less", "EPC Shows Empty Cavity: SAP Rating 55-68"]
+)]
 
 abs_matrix = pd.read_csv(
     "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
@@ -30,7 +42,7 @@ pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
 # We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for
 # cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate.
 route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
-    columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"}
+    columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
 )
 route["address"] = route["address"].astype(str)
 
@@ -42,120 +54,121 @@ asset_list_epc_client = AssetListEpcData(
 asset_list_epc_client.get_data()
 asset_list_epc_client.get_non_invasive_recommendations()
 
-solar_sap_points = []
-for r in asset_list_epc_client.non_invasive_recommendations:
-    if not r.get("recommendations"):
-        continue
-    solar_recommendations = [
-        x for x in r["recommendations"] if "solar_pv" in x["type"]
-    ]
-    if solar_recommendations:
-        solar_recommendations = solar_recommendations[0]
-    else:
-        continue
-
-    address = r["address"]
-    postcode = r["postcode"]
-
-    solar_sap_points.append(
-        {
-            "address": address,
-            "postcode": postcode,
-            "sap_points": solar_recommendations["sap_points"]
-        }
-    )
-
-solar_sap_points = pd.DataFrame(solar_sap_points)
-solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
-# Store the sap points in the cavity route to csv
-# cwi_sap_points.to_csv(
-#     "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv",
-#     index=False
-# )
-
-avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
-avg_solar_points = solar_sap_points["sap_points"].median()
-asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
-asset_list = asset_list.merge(
-    solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
-).drop(
-    columns=["address", "postcode"]
-)
-
-# Fill the sap points with the average cwi points
-asset_list = asset_list.merge(
-    avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
-    how="left", on=["domna_postcode"], suffixes=("", "_avg")
-)
-asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
-asset_list.drop(columns=["sap_points_avg"], inplace=True)
-
-asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points)
-asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
-asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
-asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
-asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
-asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
-
-asset_list["ending_half_band"] = np.where(
-    (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]),
-    "Low_C",
-    asset_list["ending_half_band"]
-)
-# Realistically, we'll take the properties to a low C at worst
-asset_list["ending_half_band"] = np.where(
-    (asset_list["post_works_sap"] < 69),
-    "Low_C",
-    asset_list["ending_half_band"]
-)
-
-asset_list = asset_list.merge(
-    abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
-    right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
-)
-asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
-
-asset_list = asset_list.rename(
-    columns={"Cost Savings": "funding_abs"}
-)
-
-print(asset_list["domna_property_id"].duplicated().sum())
-
-# Store this data
-asset_list.to_csv(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv",
-    index=False
-)
-
-# Cavity process!
-# cwi_sap_points = []
+# solar_sap_points = []
 # for r in asset_list_epc_client.non_invasive_recommendations:
 #     if not r.get("recommendations"):
 #         continue
-#     cwi_recommendations = [
-#         x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
+#     solar_recommendations = [
+#         x for x in r["recommendations"] if "solar_pv" in x["type"]
 #     ]
-#     if cwi_recommendations:
-#         cwi_recommendations = cwi_recommendations[0]
+#     if solar_recommendations:
+#         solar_recommendations = solar_recommendations[0]
 #     else:
 #         continue
 #
 #     address = r["address"]
 #     postcode = r["postcode"]
 #
-#     cwi_sap_points.append(
+#     solar_sap_points.append(
 #         {
 #             "address": address,
 #             "postcode": postcode,
-#             "sap_points": cwi_recommendations["sap_points"]
+#             "sap_points": solar_recommendations["sap_points"]
 #         }
 #     )
 #
-# cwi_sap_points = pd.DataFrame(cwi_sap_points)
-# cwi_sap_points = pd.read_csv(
-#     "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv"
+# solar_sap_points = pd.DataFrame(solar_sap_points)
+# solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
+# # Store the sap points in the cavity route to csv
+# solar_sap_points.to_csv(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Abs Rates/cwi_sap_points.csv",
+#     index=False
 # )
-# cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
+#
+# avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
+# avg_solar_points = solar_sap_points["sap_points"].median()
+# asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
+# asset_list = asset_list.merge(
+#     solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
+# ).drop(
+#     columns=["address", "postcode"]
+# )
+#
+# # Fill the sap points with the average cwi points
+# asset_list = asset_list.merge(
+#     avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
+#     how="left", on=["domna_postcode"], suffixes=("", "_avg")
+# )
+# asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
+# asset_list.drop(columns=["sap_points_avg"], inplace=True)
+#
+# asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points)
+# asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
+# asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
+# asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
+# asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
+# asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
+#
+# asset_list["ending_half_band"] = np.where(
+#     (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]),
+#     "Low_C",
+#     asset_list["ending_half_band"]
+# )
+# # Realistically, we'll take the properties to a low C at worst
+# asset_list["ending_half_band"] = np.where(
+#     (asset_list["post_works_sap"] < 69),
+#     "Low_C",
+#     asset_list["ending_half_band"]
+# )
+#
+# asset_list = asset_list.merge(
+#     abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
+#     right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
+# )
+# asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
+#
+# asset_list = asset_list.rename(
+#     columns={"Cost Savings": "funding_abs"}
+# )
+#
+# print(asset_list["domna_property_id"].duplicated().sum())
+#
+# # Store this data
+# asset_list.to_csv(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv",
+#     index=False
+# )
+
+# Cavity process!
+cwi_sap_points = []
+for r in asset_list_epc_client.non_invasive_recommendations:
+    if not r.get("recommendations"):
+        continue
+    cwi_recommendations = [
+        x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
+    ]
+    if cwi_recommendations:
+        cwi_recommendations = cwi_recommendations[0]
+    else:
+        continue
+
+    address = r["address"]
+    postcode = r["postcode"]
+
+    cwi_sap_points.append(
+        {
+            "address": address,
+            "postcode": postcode,
+            "type": cwi_recommendations["type"],
+            "sap_points": cwi_recommendations["sap_points"]
+        }
+    )
+
+cwi_sap_points = pd.DataFrame(cwi_sap_points)
+cwi_sap_points = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Abs Rates/cwi_sap_points.csv",
+)
+cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
 avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
 avg_cwi_points = cwi_sap_points["sap_points"].median()
 asset_list = asset_list.merge(
@@ -186,13 +199,22 @@ asset_list["funding_scheme"] = np.where(
     "GBIS",
     "ECO4"
 )
+# Note - anything that is EPC E or below that doesn't go up to a C will be GBIS
+# To detect this, if the starting sap score is 54 or below and the endding SAP sore is 68 or below
+# we will assume it is GBIS
+asset_list["funding_scheme"] = np.where(
+    (asset_list["post_works_sap"] < 69) & (asset_list["epc_sap_score_on_register"] < 55),
+    "GBIS",
+    asset_list["funding_scheme"]
+)
+
 asset_list = asset_list.merge(
     abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
     right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
 )
 asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
 
-# Using CWI solid 1.7 -> 0.3 rates
+# Using CWI 0.033 as the partial project score
 cwi_pps_matrix = pps_matrix[
     pps_matrix["Measure_Type"].isin(["CWI_0.033"])
 ]
@@ -220,10 +242,26 @@ asset_list["funding_abs"] = np.where(
     asset_list["Cost Savings"]
 )
 
-asset_list["domna_property_id"].duplicated().sum()
+from recommendations.recommendation_utils import (
+    estimate_external_wall_area,
+)
+
+# For some reason, estimated insulation wall area is missing
+asset_list["estimated_insulation_wall_area"] = asset_list.apply(
+    lambda x: estimate_external_wall_area(
+        num_floors=x["attribute_est_number_floors"],
+        floor_height=(
+            float(x["epc_floor_height"]) if
+            not pd.isnull(x["epc_floor_height"]) else 2.5
+        ),
+        perimeter=x["attribute_est_perimter"],
+        built_form=x["epc_archetype"]
+    ),
+    axis=1
+)
 
 # Store this data
 asset_list.to_csv(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_abs_estimates.csv",
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Abs Rates/Abri CWI ABS Estimates.csv",
     index=False
 )
diff --git a/asset_list/app.py b/asset_list/app.py
index efc9cf44..37d9ae0d 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -60,11 +60,11 @@ def app():
     """
 
     # TODO: Delete me
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild"
-    data_filename = "Bromford Asset List.xlsx"
-    sheet_name = "Asset List"
-    postcode_column = 'PostCode'
-    fulladdress_column = "FullAddress"
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/"
+    data_filename = "20250716 Asset List.xlsx"
+    sheet_name = "Sheet 1"
+    postcode_column = 'Postcode'
+    fulladdress_column = "Full Address"
     address1_column = None
     address1_method = "house_number_extraction"
     address_cols_to_concat = []
@@ -76,24 +76,93 @@ def app():
     landlord_wall_construction = None
     landlord_heating_system = None
     landlord_existing_pv = None
-    landlord_property_id = "Asset"
+    landlord_property_id = "Row ID"
     outcomes_filename = []
     outcomes_sheetname = []
     outcomes_postcode = []
     outcomes_houseno = []
     outcomes_address = []
-    outcomes_id = [None]
-    master_filepaths = [os.path.join("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/",
-                                     "Needs ID/SOLAR PV ONLY-Table 1.csv")]
+    outcomes_id = []
+    master_filepaths = []
     master_to_asset_list_filepath = None
     asset_list_header = 0
     landlord_block_reference = None
-    master_id_colnames = [None]
+    master_id_colnames = []
     landlord_roof_construction = None
     phase = False
     landlord_sap = None
     ecosurv_landlords = None
 
+    # Southend
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southend/July 2025 Programme"
+    data_filename = "SOUTHEND - RYAN.xlsx"
+    sheet_name = "July 2025 Surveys"
+    postcode_column = 'Postcode'
+    fulladdress_column = "Full postal address"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "Property age"
+    landlord_os_uprn = None
+    landlord_property_type = "Property type"
+    landlord_built_form = "Property type"
+    landlord_wall_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "ID"
+    outcomes_filename = []
+    outcomes_sheetname = []
+    outcomes_postcode = []
+    outcomes_houseno = []
+    outcomes_address = []
+    outcomes_id = []
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+    asset_list_header = 0
+    landlord_block_reference = None
+    master_id_colnames = []
+    landlord_roof_construction = None
+    phase = False
+    landlord_sap = None
+    ecosurv_landlords = None
+
+    # For Rooftop
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Rooftop"
+    data_filename = "Rooftop Asset List - July 2025.xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = 'post_code'
+    fulladdress_column = None
+    address1_column = "add_1"
+    address1_method = None
+    address_cols_to_concat = [
+        "add_1", "add_2", "add_3", "add_4"
+    ]
+    missing_postcodes_method = None
+    landlord_year_built = "date_built"
+    landlord_os_uprn = None
+    landlord_property_type = "ConstructionStyle"
+    landlord_built_form = "ConstructionStyle"
+    landlord_wall_construction = None
+    landlord_heating_system = "Description"
+    landlord_existing_pv = None
+    landlord_property_id = "PropertyCode"
+    outcomes_filename = [os.path.join(data_folder, "Rooftop_Outcomes.xlsx")]
+    outcomes_sheetname = ["OUTCOMESs"]
+    outcomes_postcode = ["POSTCODE"]
+    outcomes_houseno = ["NO"]
+    outcomes_address = ["ADDRESS"]
+    outcomes_id = [None]
+    master_filepaths = [os.path.join(data_folder, "Master.csv")]
+    master_to_asset_list_filepath = None
+    asset_list_header = 1
+    landlord_block_reference = "bl_rec_ref"
+    master_id_colnames = [None]
+    landlord_roof_construction = None
+    phase = False
+    landlord_sap = None
+    ecosurv_landlords = "rooftop"
+
     # For Housing
     data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/For Housing/New Programme July 2025"
     data_filename = "FOR HOUSING Asset List (Combined).xlsx"
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index ba2a2d23..56ce37ed 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -45,13 +45,13 @@ def app():
 
     # inputs:
     reconcile_programme = True  # If True, the hubspot upload will include all properties with a project code
-    customer_domain = "https://calico.org.uk"
-    installer_name = "WARM FRONT"
+    customer_domain = "https://southend.gov.uk"
+    installer_name = "J & J CRUMP"
     asset_list_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico/Hubspot/07.04 CALICO - Final List - "
-        "Standardised.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southend/July 2025 Programme/SOUTHEND - RYAN - "
+        "Standardised 2.xlsx"
     )
-    asset_list_sheet_name = "Final Route March"
+    asset_list_sheet_name = "Standardised Asset List"
     asset_list_header = 0
 
     contact_details_filepath = None
@@ -107,7 +107,7 @@ def app():
         raise ValueError("FIX MEEE")
 
     if pd.isnull(asset_list.hubspot_data['Deal Stage <DEAL dealstage>']).any():
-        raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.")
+        raise ValueError("Warning: Some rows have missing deal stage. These will not be uploaded to HubSpot.")
 
     # Just store locally
     asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py
index 4ebe016f..c9cd061f 100644
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@@ -384,6 +384,7 @@ BUILT_FORM_MAPPINGS = {
     'Cottage Flat': 'ground floor',
     'Maisonette Over Shop': 'mid-floor',
     'Medium Rise Flat': 'mid-floor',
-    'Maisonette Medium Rise': 'unknown'
+    'Maisonette Medium Rise': 'unknown',
+    'End-terraced house': 'end-terrace'
 
 }
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index ef73f133..2a388b2f 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -102,3 +102,9 @@ class PlanTriggerRequest(BaseModel):
     # If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing
     # scores to drop by a few points
     simulate_sap_10: Optional[bool] = False
+
+    # Add in optional fields which describe the format of the asset list being used
+    
+    file_type: Optional[Literal["csv", "xlsx"]] = None,
+    file_format: Optional[Literal["domna_asset_list"]] = None,
+    sheet_name: Optional[str] = None
diff --git a/etl/customers/abri/abs_rates.py b/etl/customers/abri/abs_rates.py
new file mode 100644
index 00000000..f9f2f98e
--- /dev/null
+++ b/etl/customers/abri/abs_rates.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+project = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Abs Rates/Desktop ABRI data - Standardised After "
+    "Programmes.xlsx"
+)
+
+cavity = project[project["cavity_reason"].isin(
+    ["EPC Shows Empty Cavity: SAP Rating 54 or less", "EPC Shows Empty Cavity: SAP Rating 55-68"]
+)]
+
+# Pull the data
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
deleted file mode 100644
index 95fe4fcd..00000000
--- a/etl/customers/stonewater/Wave 3 Preparation.py	
+++ /dev/null
@@ -1,4293 +0,0 @@
-import os
-from urllib import parse
-from fuzzywuzzy import fuzz
-
-import PyPDF2
-import re
-import pandas as pd
-import numpy as np
-from tqdm import tqdm
-from collections import Counter
-from scipy.optimize import linprog
-
-from SearchEpc import SearchEpc
-from utils.s3 import read_pickle_from_s3
-
-CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
-SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}")
-NUM_FOLDERS = 15
-
-
-def sap_to_epc(sap_points: int | float):
-    """
-    Simple utility function to convert SAP points to EPC rating.
-    :param sap_points: numerical value of SAP points, typically between 0 and 100
-    :return:
-    """
-
-    if sap_points <= 0:
-        raise ValueError("SAP points should be above 0.")
-
-    if sap_points >= 92:
-        return "A"
-    elif sap_points >= 81:
-        return "B"
-    elif sap_points >= 69:
-        return "C"
-    elif sap_points >= 55:
-        return "D"
-    elif sap_points >= 39:
-        return "E"
-    elif sap_points >= 21:
-        return "F"
-    else:
-        return "G"
-
-
-def extract_wall_details_summary(text):
-    """
-    Extracts wall type, insulation, dry-lining, and thickness for each building part,
-    including any alternative wall details within the 7.0 Walls section of the summary PDF text.
-    """
-    # Define data structure to hold all building part wall entries
-    wall_data = []
-
-    # Locate the entire 7.0 Walls section
-    wall_section = re.search(r"7\.0 Walls:\n(.*?)\n8\.0 Roofs:", text, re.DOTALL).group(1)
-
-    # Define pattern to match each building part's wall entry within the section
-    building_part_pattern = re.compile(
-        r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n"  # Matches each building part label
-        r"Type\s+(.*?)\n"  # Matches main wall Type
-        r"Insulation\s+(.*?)\n"  # Matches main wall Insulation
-        r"(Dry-lining\s+(.*?)\n)?"  # Optional main wall Dry-lining
-        r"Wall Thickness Unknown\s+(.*?)\n"  # Matches main wall Thickness Unknown
-        r"Wall Thickness \[mm\]\s+(\d+)",  # Matches main wall Thickness
-        re.DOTALL
-    )
-
-    # Define pattern to capture alternative wall details, if present
-    alternative_wall_pattern = re.compile(
-        r"Alternative Wall Area.*?\n"  # Matches start of alternative wall section
-        r"Alternative Type\s+(.*?)\n"  # Matches alternative wall Type
-        r"Alternative Insulation\s+(.*?)\n"  # Matches alternative wall Insulation
-        r"(Alternative Dry-lining\s+(.*?)\n)?"  # Optional Alternative Dry-lining
-        r"Alternative Wall Thickness Unknown\s+(.*?)\n"  # Matches alternative wall Thickness Unknown
-        r"Alternative Wall Thickness\s+(\d+)",  # Matches alternative wall Thickness
-        re.DOTALL
-    )
-
-    # Find all building part entries within the 7.0 Walls section
-    for match in building_part_pattern.finditer(wall_section):
-        wall_label = match.group(1).strip()
-        main_wall_type = match.group(2).strip()
-        main_wall_insulation = match.group(3).strip()
-        main_wall_dry_lining = match.group(5).strip() if match.group(5) else "N/A"
-        main_wall_thickness_unknown = match.group(6).strip()
-        main_wall_thickness = int(match.group(7))
-
-        # Initialize dictionary for this wall entry
-        wall_entry = {
-            "Building Part": wall_label,
-            "Wall Type": main_wall_type,
-            "Wall Insulation": main_wall_insulation,
-            "Wall Dry-lining": main_wall_dry_lining,
-            "Wall Thickness Unknown": main_wall_thickness_unknown,
-            "Wall Thickness (mm)": main_wall_thickness,
-            "Alternative Wall Type": None,
-            "Alternative Wall Insulation": None,
-            "Alternative Wall Dry-lining": "N/A",
-            "Alternative Wall Thickness Unknown": None,
-            "Alternative Wall Thickness (mm)": None,
-        }
-
-        # Check if there's an alternative wall section following this wall entry
-        alt_match = alternative_wall_pattern.search(wall_section, match.end())
-        if alt_match:
-            wall_entry["Alternative Wall Type"] = alt_match.group(1).strip()
-            wall_entry["Alternative Wall Insulation"] = alt_match.group(2).strip()
-            wall_entry["Alternative Wall Dry-lining"] = alt_match.group(4).strip() if alt_match.group(4) else "N/A"
-            wall_entry["Alternative Wall Thickness Unknown"] = alt_match.group(5).strip()
-            wall_entry["Alternative Wall Thickness (mm)"] = int(alt_match.group(6))
-
-        # Append each building part as a dictionary in the wall_data list
-        wall_data.append(wall_entry)
-
-    return wall_data
-
-
-def extract_summary_report(pdf_path):
-    """
-    Extracts specific data from the provided PDF file.
-    Data includes:
-    - Current SAP rating
-    - Fuel Bill
-    - Address
-    """
-
-    data = {
-        "Address": None,
-        "Postcode": None,
-        "Current SAP Rating": None,
-        "Current EPC Band": None,
-        "Fuel Bill": None,
-        "Main Building Age Band": None,
-        "Number of Storeys": None,
-        "Window Age Description": None,
-        "Window Age Description Proportion (%)": None,
-        "Secondary Window Age Description": None,
-        "Secondary Window Age Description Proportion (%)": None,
-        "Number of Windows": None,
-        "Total Number of Doors": None,
-        "Number of Insulated Doors": None,
-        "Existing Primary Heating System": None,
-        "Existing Primary Heating PCDF Reference": None,
-        "Existing Primary Heating Controls": None,
-        "Existing Primary Heating % of Heat": None,
-        "Existing Secondary Heating System": None,
-        "Existing Secondary Heating PCDF Reference": None,
-        "Existing Secondary Heating Controls": None,
-        "Existing Secondary Heating % of Heat": None,
-        "Secondary Heating Code": None,
-        "Water Heating Code": None,
-        'Total Floor Area (m2)': None,
-        'Total Ground Floor Area (m2)': None,
-        'RIR Floor Area': None,
-        'Main Building Wall Area (m2)': None,
-        'First Extension Wall Area (m2)': None,
-        "Number of Light Fittings": None,
-        "Number of LEL Fittings": None,
-        "Number of fittings needing LEL": None,
-        "Main Roof Type": None,
-        "Main Roof Insulation": None,
-        "Main Roof Insulation Thickness": None,
-        "Main Wall Type": None,
-        "Main Wall Insulation": None,
-        "Main Wall Dry-lining": None,
-        "Main Wall Thickness": None,
-        "Main Building Alternative Wall Type": None,
-        "Main Building Alternative Wall Insulation": None,
-        "Main Building Alternative Wall Dry-lining": None,
-        "Main Building Alternative Wall Thickness": None,
-    }
-
-    with (open(pdf_path, "rb") as file):
-        reader = PyPDF2.PdfReader(file)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text()
-
-        # Extract Current SAP rating
-        sap_match = re.search(r"Current SAP rating:\s*([A-Z] \d+)", text)
-        data["Current SAP Rating"] = sap_match.group(1).split(" ")[1]
-
-        # Extract age
-        age_band_match = re.search(
-            r"3\.0 Date Built:\s*Main Property\s*[A-Z]?\s*(\d{4}-\d{4}|before \d{4}|\d{4} onwards)",
-            text
-        )
-        data["Main Building Age Band"] = age_band_match.group(1)
-
-        # Number of storeys
-        storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
-        data["Number of Storeys"] = int(storeys_match.group(1))
-
-        # Extract Carbon Emissions
-        # carbon_match = re.search(r"Emissions \(t/year\):\s*([\d.]+)\s*tonnes", text)
-        # data["Carbon Emissions (t/year)"] = float(carbon_match.group(1))
-
-        # Extract Fuel Bill
-        fuel_bill_match = re.search(r"Fuel Bill:\s*£(\d+)", text)
-        data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"
-
-        # Extract individual address components
-        postcode = re.search(r"Postcode:\s*(.*?)\nRegion:", text)
-        # region = re.search(r"Region:\s*(.*?)\nHouse Name:", text)
-        house_name = re.search(r"House Name:\s*(.*?)\nHouse No:", text)
-        house_no = re.search(r"House No:\s*(.*?)\nStreet:", text)
-        street = re.search(r"Street:\s*(.*?)\nLocality:", text)
-        locality = re.search(r"Locality:\s*(.*?)\nTown:", text)
-        town = re.search(r"Town:\s*(.*?)\nCounty:", text)
-        county = re.search(r"County:\s*(.*?)\nProperty Tenure:", text)
-
-        # Clean extracted values and remove any prefixes
-        address_parts = [
-            house_no.group(1).strip() if house_no else "",
-            house_name.group(1).strip() if house_name else "",
-            street.group(1).strip() if street else "",
-            locality.group(1).strip() if locality else "",
-            town.group(1).strip() if town else "",
-            county.group(1).strip() if county else "",
-            postcode.group(1).strip() if postcode else ""
-        ]
-
-        # Join non-empty parts with a comma
-        data["Address"] = ", ".join([part for part in address_parts if part])
-        data["Postcode"] = postcode.group(1).strip()
-
-        windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
-        windows_text = windows_section.group(1)
-        window_data = extract_window_age_description(windows_text)
-        data.update(window_data)
-
-        # Extract Total Number of Doors
-        total_doors_match = re.search(r"Total Number of Doors\s*(\d+)", text)
-        data["Total Number of Doors"] = int(total_doors_match.group(1))
-
-        # Extract Number of Insulated Doors
-        insulated_doors_match = re.search(r"Number of Insulated Doors\s*(\d+)", text)
-        data["Number of Insulated Doors"] = int(insulated_doors_match.group(1))
-
-        # Extract heating system
-        # Extract Primary Heating Data
-        # Extract Primary Heating Section
-        primary_heating_section1 = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL)
-        primary_heating_section2 = re.search(r"Main\s*Heating1\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
-        primary_heating_section = primary_heating_section1 if primary_heating_section1 else primary_heating_section2
-
-        primary_text = primary_heating_section.group(1)
-
-        data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(
-            1).strip()
-        data["Existing Primary Heating PCDF Reference"] = re.search(
-            r"PCDF boiler Reference\s*(\d+)", primary_text
-        ).group(1)
-        data["Existing Primary Heating Controls"] = re.search(
-            r"Main Heating Controls\s*(.*?)\n", primary_text
-        ).group(1).strip()
-        data["Existing Primary Heating % of Heat"] = int(
-            re.search(r"Percentage of Heat\s*(\d+)\s*%", primary_text).group(1)
-        )
-
-        # Extract Secondary Heating Section
-        secondary_heating_section = re.search(r"Main\s*Heating2\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
-
-        if secondary_heating_section is None:
-            data["Existing Secondary Heating System"] = ""
-            data["Existing Secondary Heating PCDF Reference"] = ""
-            data["Existing Secondary Heating Controls"] = ""
-            data["Existing Secondary Heating % of Heat"] = 0
-
-        else:
-            secondary_text = secondary_heating_section.group(1)
-
-            main_heating_code_match_secondary = re.search(
-                r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text
-            )
-            data["Existing Secondary Heating System"] = main_heating_code_match_secondary.group(1).strip()
-            data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
-                                                                          secondary_text).group(1)
-            second_heating_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text)
-            data["Existing Secondary Heating Controls"] = (
-                second_heating_controls_match.group(1).strip() if second_heating_controls_match else ""
-            )
-            data["Existing Secondary Heating % of Heat"] = int(
-                re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1)
-            )
-
-        # Extract Secondary Heating and Water Heating Codes
-        secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
-        water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
-
-        if data["Existing Secondary Heating System"] == "":
-            data["Secondary Heating Code"] = ""
-        else:
-            data["Secondary Heating Code"] = secondary_heating_code_match.group(
-                1).strip() if secondary_heating_code_match else ""
-
-        data["Water Heating Code"] = water_heating_code_match.group(1).strip()
-
-        dimensions = extract_building_parts_summary(text)
-        data.update(dimensions)
-
-        data["Number of Light Fittings"] = int(re.search(r"Total number of light fittings\s*(\d+)", text).group(1))
-        data["Number of LEL Fittings"] = int(re.search(r"Total number of L.E.L. fittings\s*(\d+)", text).group(1))
-        data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
-
-        extracted_roof_data = extract_roof_details_summary(text)
-        main_roof_data = [roof for roof in extracted_roof_data if "Main" in roof["Building Part"]][0]
-        data["Main Roof Type"] = main_roof_data["Roof Type"]
-        data["Main Roof Insulation"] = main_roof_data["Roof Insulation"]
-        data["Main Roof Insulation Thickness"] = main_roof_data["Roof Insulation Thickness"]
-
-        walls_data = extract_wall_details_summary(text)
-        # Get the main building wall data
-        main_building_walls = [wall for wall in walls_data if "Main" in wall["Building Part"]][0]
-        data["Main Wall Type"] = main_building_walls["Wall Type"]
-        data["Main Wall Insulation"] = main_building_walls["Wall Insulation"]
-        data["Main Wall Dry-lining"] = main_building_walls["Wall Dry-lining"]
-        data["Main Wall Thickness"] = main_building_walls["Wall Thickness (mm)"]
-        data["Main Building Alternative Wall Type"] = main_building_walls["Alternative Wall Type"]
-        data["Main Building Alternative Wall Insulation"] = main_building_walls["Alternative Wall Insulation"]
-        data["Main Building Alternative Wall Dry-lining"] = main_building_walls["Alternative Wall Dry-lining"]
-        data["Main Building Alternative Wall Thickness"] = main_building_walls["Alternative Wall Thickness (mm)"]
-
-    return data
-
-
-def extract_window_age_description(windows_text):
-    """
-    Extracts the most common window age description and its proportion.
-
-    Parameters:
-        windows_text (str): The text section containing window data.
-
-    Returns:
-        dict: A dictionary with the most common window age description and its proportion.
-    """
-    # Clean up windows_text by removing line breaks for better pattern matching
-    windows_text = windows_text.replace("\n", "")
-
-    # Define possible window age descriptions
-    window_descriptions = [
-        "Double post or during 2002",
-        "Double pre 2002",
-        "Double with unknown install date",
-        "Secondary glazing",
-        "Triple glazing",
-        "Single glazing",
-    ]
-
-    # Count occurrences of each description
-    description_counts = Counter()
-    for description in window_descriptions:
-        matches = re.findall(re.escape(description), windows_text)
-        description_counts[description] = len(matches)
-
-    if not description_counts or not sum(description_counts.values()):
-        raise ValueError("Failed to extract window data.")
-
-    # Determine the most common description and calculate its proportion
-    most_common_description, window_count = description_counts.most_common(1)[0]
-    window_proportion = window_count / sum(description_counts.values()) * 100
-
-    # Get the second most common and the proportion
-    if window_proportion == 100:
-        second_most_common_description = None
-        second_most_common_proportion = 0
-    else:
-        second_most_common_description, second_window_count = description_counts.most_common(2)[1]
-        second_most_common_proportion = second_window_count / sum(description_counts.values()) * 100
-
-    return {
-        "Window Age Description": most_common_description,
-        "Window Age Description Proportion (%)": window_proportion,
-        "Secondary Window Age Description": second_most_common_description,
-        "Secondary Window Age Description Proportion (%)": second_most_common_proportion,
-        "Number of Windows": sum(description_counts.values())
-    }
-
-
-def extract_building_parts_epr(text):
-    """
-    Extracts building parts and associated dimensions from the provided PDF text.
-    Each building part (main and extensions) includes floor area, room height, perimeter, and party wall length.
-    Handles cases where 'Room(s) in Roof area' appears within the part_name with only the Floor Area information.
-    """
-    data = []
-
-    # Pattern to locate each "Building part" section
-    building_part_pattern = re.compile(
-        r"Construction details: Building part: (.*?)\nFloor Area \[m2\] Room Height \[m\] Perimeter \[m\] Party "
-        r"Wall Length \[m\]\n(.*?)(?=Construction details|Data inputs|$)",
-        re.DOTALL
-    )
-
-    # Extract each building part
-    for match in building_part_pattern.finditer(text):
-        part_name = match.group(1).strip()
-        floor_data = match.group(2)
-
-        # Check for "Room(s) in Roof area" within the part_name
-        room_in_roof_match = re.search(r"Room\(s\) in Roof area:\s*([\d.]+)", part_name)
-        if room_in_roof_match:
-            # Extract Room in Roof area and add it as a separate entry
-            floor_area = float(room_in_roof_match.group(1))
-            # Clean up part name to exclude "Room(s) in Roof area" from the building part name
-            cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip()
-            data.append({
-                "Building Part": cleaned_part_name,
-                "Floor Level": "Room in Roof",
-                "Floor Area (m2)": floor_area,
-                "Room Height (m)": None,  # Placeholder for missing data
-                "Perimeter (m)": None,  # Placeholder for missing data
-                "Party Wall Length (m)": None  # Placeholder for missing data
-            })
-        else:
-            # Clean up part name to keep only the descriptor (e.g., "Main" or "1st Extension")
-            cleaned_part_name = re.sub(r" - built in.*", "", part_name).strip()
-
-        # Pattern to match each floor's measurements in standard cases
-        floor_pattern = re.compile(
-            r"(Lowest floor|First floor|Second floor)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
-        )
-
-        # Extract floor details for each building part
-        for floor_match in floor_pattern.finditer(floor_data):
-            floor_level = floor_match.group(1)
-            floor_area = float(floor_match.group(2))
-            room_height = float(floor_match.group(3))
-            perimeter = float(floor_match.group(4))
-            party_wall_length = float(floor_match.group(5))
-
-            # Append to data
-            data.append({
-                "Building Part": cleaned_part_name,
-                "Floor Level": floor_level,
-                "Floor Area (m2)": floor_area,
-                "Room Height (m)": room_height,
-                "Perimeter (m)": perimeter,
-                "Party Wall Length (m)": party_wall_length
-            })
-
-    # Aggregated data calculation
-    main_building = [part for part in data if "Main" in part["Building Part"]]
-    first_extension = [part for part in data if "1st Extension" in part["Building Part"]]
-    dimensions = {
-        "Total Floor Area (m2)": sum([part["Floor Area (m2)"] for part in data]),
-        "Total Ground Floor Area (m2)": sum(
-            [part["Floor Area (m2)"] for part in data if "Lowest floor" in part["Floor Level"]]
-        ),
-        "RIR Floor Area": sum(
-            [part["Floor Area (m2)"] for part in data if "Room in Roof" in part["Floor Level"]]
-        ),
-        "Main Building Wall Area (m2)": sum(
-            [x["Perimeter (m)"] * x["Room Height (m)"] for x in main_building if
-             x["Perimeter (m)"] and x["Room Height (m)"]]
-        ),
-        "First Extension Wall Area (m2)": sum(
-            [x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extension if
-             x["Perimeter (m)"] and x["Room Height (m)"]]
-        ) if first_extension else 0,
-    }
-
-    return dimensions
-
-
-def extract_building_parts_summary(text):
-    """
-    Extracts building parts and associated dimensions from the summary report PDF.
-    This includes Main Property, multiple extensions if they exist, and Room in Roof areas.
-    """
-    data = []
-
-    # Locate the Dimensions section
-    dimensions_section = re.search(
-        r"Dimensions:\s*Dimension type: Internal\n(.*?)\n5\.0 Conservatory:", text, re.DOTALL
-    )
-    if not dimensions_section:
-        dimensions_section = re.search(
-            r"Dimensions:\s*Dimension type: External\n(.*?)\n5\.0 Conservatory:", text, re.DOTALL
-        )
-        if not dimensions_section:
-            raise ValueError("Failed to locate dimensions section in the text.")
-
-    dimensions_text = dimensions_section.group(1)
-
-    # Pattern to extract each building part, starting from Main Property and including extensions
-    building_part_pattern = re.compile(
-        r"(Main Property|\d+(?:st|nd|rd|th) Extension)\s*"
-        r"(.*?)(?=\d+(?:st|nd|rd|th) Extension|5\.0 Conservatory|$)",
-        re.DOTALL
-    )
-
-    # Loop through each building part match, including Main Property and extensions
-    for match in building_part_pattern.finditer(dimensions_text):
-        part_name = match.group(1)
-        floor_data = match.group(2)
-
-        # Pattern to extract floor details: Floor Level, Floor Area, Room Height, Perimeter, Party Wall Length
-        floor_pattern = re.compile(
-            r"(1st Floor|Lowest Floor|Second floor):\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
-        )
-
-        # Extract data for each floor within the building part
-        for floor_match in floor_pattern.finditer(floor_data):
-            floor_level = floor_match.group(1)
-            floor_area = float(floor_match.group(2))
-            room_height = float(floor_match.group(3))
-            perimeter = float(floor_match.group(4))
-            party_wall_length = float(floor_match.group(5))
-
-            # Append to data list
-            data.append({
-                "Building Part": part_name,
-                "Floor Level": floor_level,
-                "Floor Area (m2)": floor_area,
-                "Room Height (m)": room_height,
-                "Perimeter (m)": perimeter,
-                "Party Wall Length (m)": party_wall_length
-            })
-
-        # Check specifically for "Room(s) in Roof" entries, which only have Floor Area
-        room_in_roof_pattern = re.compile(r"Room\(s\) in Roof:\s*([\d.]+)")
-        room_in_roof_match = room_in_roof_pattern.search(floor_data)
-        if room_in_roof_match:
-            floor_area = float(room_in_roof_match.group(1))
-            data.append({
-                "Building Part": part_name,
-                "Floor Level": "Room in Roof",
-                "Floor Area (m2)": floor_area,
-                "Room Height (m)": None,  # Placeholder for missing data
-                "Perimeter (m)": None,  # Placeholder for missing data
-                "Party Wall Length (m)": None  # Placeholder for missing data
-            })
-
-    # Calculate aggregated dimensions
-    main_property = [part for part in data if "Main Property" in part["Building Part"]]
-    first_extensions = [part for part in data if "1st Extension" in part["Building Part"]]
-    dimensions = {
-        "Total Floor Area (m2)": sum([part["Floor Area (m2)"] for part in data]),
-        "Total Ground Floor Area (m2)": sum(
-            [part["Floor Area (m2)"] for part in data if "Lowest Floor" in part["Floor Level"]]
-        ),
-        "RIR Floor Area": sum(
-            [part["Floor Area (m2)"] for part in data if "Room in Roof" in part["Floor Level"]]
-        ),
-        "Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_property if
-                                             x["Perimeter (m)"] and x["Room Height (m)"]]),
-        "First Extension Wall Area (m2)": sum(
-            [x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extensions if
-             x["Perimeter (m)"] and x["Room Height (m)"]]
-        ),
-    }
-
-    return dimensions
-
-
-def extract_roof_details_epr(text):
-    """
-    Extracts roof type, insulation, and insulation thickness for each building part
-    in the provided EPR PDF text.
-    """
-    # Define data structure to hold results
-    roof_data = []
-
-    # Locate each building part section
-    building_part_pattern = re.compile(
-        r"Construction details: Building part: (.*?)\n(.*?)(?=Conservatory|Construction details|$)",
-        re.DOTALL
-    )
-
-    # Extract each building part's data, including roof details
-    for match in building_part_pattern.finditer(text):
-        part_name = match.group(1).strip()
-
-        # Clean up the building part name
-        cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip()
-
-        part_details = match.group(2)
-
-        # Extract Roof Type, Roof Insulation, and Roof Insulation Thickness
-        roof_type_match = re.search(r"Roof Type:\s*(.*?)(?=\n|$)", part_details)
-        roof_insulation_match = re.search(r"Roof Insulation:\s*(.*?)(?=\n|$)", part_details)
-        roof_insulation_thickness_match = re.search(r"Roof Insulation Thickness:\s*(.*?)(?=\n|$)", part_details)
-
-        # Store results for this building part
-        roof_data.append({
-            "Building Part": cleaned_part_name,
-            "Roof Type": roof_type_match.group(1).strip() if roof_type_match else None,
-            "Roof Insulation": roof_insulation_match.group(1).strip() if roof_insulation_match else None,
-            "Roof Insulation Thickness": roof_insulation_thickness_match.group(
-                1).strip() if roof_insulation_thickness_match else None,
-        })
-
-    return roof_data
-
-
-def extract_roof_details_summary(text):
-    """
-    Extracts roof type, insulation, and insulation thickness for each building part
-    in the 8.0 Roofs section of the summary report.
-    """
-    # Define data structure to hold results
-    roof_data = []
-
-    # Locate the entire 8.0 Roofs section
-    roof_section_match = re.search(r"8\.0 Roofs:\n(.*?)(?=\n9\.0 Floors:|$)", text, re.DOTALL)
-    if not roof_section_match:
-        return roof_data  # Return empty if no roof section is found
-
-    # Extract the roof section and append "9.0 Floors:" as the boundary
-    roof_section = roof_section_match.group(1).strip() + "\n9.0 Floors:"
-
-    # Define pattern to match each building part's roof entry
-    building_part_pattern = re.compile(
-        r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n"  # Matches each building part label
-        r"Type\s+(.*?)(?=\n(?:Insulation|9\.0 Floors:|[A-Z]))"  # Matches Roof Type until the next field, label, or end
-        r"(?:\nInsulation\s+(.*?)(?=\n(?:Insulation Thickness|9\.0 Floors:|[A-Z])))?"  # Optional Insulation
-        r"(?:\nInsulation Thickness\s+(.*?)(?=\n(?:9\.0 Floors:|[A-Z])))?",  # Optional Insulation Thickness
-        re.DOTALL
-    )
-
-    # Extract each building part's data
-    for match in building_part_pattern.finditer(roof_section):
-        part_name = match.group(1).strip()  # Building part label
-        roof_type = match.group(2).strip()  # Roof Type
-        roof_insulation = match.group(3).strip() if match.group(3) else None  # Optional Insulation
-        roof_insulation_thickness = match.group(4).strip() if match.group(4) else None  # Optional Thickness
-
-        # Cleaning to handle annoying cases when it comes out like this:
-        # 'A Another dwelling above\n1st Extension'
-        if roof_type.startswith("A Another dwelling above"):
-            roof_type = "A Another dwelling above"
-
-        # Store results for this building part
-        roof_data.append({
-            "Building Part": part_name,
-            "Roof Type": roof_type,
-            "Roof Insulation": roof_insulation,
-            "Roof Insulation Thickness": roof_insulation_thickness,
-        })
-
-    return roof_data
-
-
-def extract_wall_details_epr(text):
-    """
-    Extracts wall type, insulation, dry-lining, and thickness for each building part
-    in the provided EPR PDF text.
-    """
-    # Define data structure to hold results
-    wall_data = []
-
-    # Locate each building part section
-    building_part_pattern = re.compile(
-        r"Construction details: Building part: (.*?)\n(.*?)(?=Conservatory|Construction details|$)",
-        re.DOTALL
-    )
-
-    # Extract each building part's data, including wall details
-    for match in building_part_pattern.finditer(text):
-        part_name = match.group(1).strip()
-
-        # Clean up the building part name
-        cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip()
-
-        part_details = match.group(2)
-
-        # Extract Wall Type, Wall Insulation, Wall Dry-lining, and Wall Thickness
-        wall_type_match = re.search(r"Wall Type:\s*(.*?)(?=\n|$)", part_details)
-        wall_insulation_match = re.search(r"Wall Insulation:\s*(.*?)(?=\n|$)", part_details)
-        wall_drylining_match = re.search(r"Wall Dry-lining:\s*(.*?)(?=\n|$)", part_details)
-        wall_thickness_match = re.search(r"Wall Thickness:\s*(\d+)(?=\n|$)", part_details)
-
-        # Extract Alternative Wall information if available
-        alt_wall_type_match = re.search(r"Alternative Wall Type:\s*(.*?)(?=\n|$)", part_details)
-        alt_wall_insulation_match = re.search(r"Alternative Wall Insulation:\s*(.*?)(?=\n|$)", part_details)
-        alt_wall_drylining_match = re.search(r"Alternative Wall Dry-lining:\s*(.*?)(?=\n|$)", part_details)
-        alt_wall_thickness_match = re.search(r"Alternative Wall Thickness:\s*(\d+)(?=\n|$)", part_details)
-
-        # Store results for this building part
-        wall_data.append({
-            "Building Part": cleaned_part_name,
-            "Wall Type": wall_type_match.group(1).strip() if wall_type_match else None,
-            "Wall Insulation": wall_insulation_match.group(1).strip() if wall_insulation_match else None,
-            "Wall Dry-lining": wall_drylining_match.group(1).strip() if wall_drylining_match else None,
-            "Wall Thickness": int(wall_thickness_match.group(1)) if wall_thickness_match else None,
-            "Alternative Wall Type": alt_wall_type_match.group(1).strip() if alt_wall_type_match else None,
-            "Alternative Wall Insulation": alt_wall_insulation_match.group(
-                1).strip() if alt_wall_insulation_match else None,
-            "Alternative Wall Dry-lining": alt_wall_drylining_match.group(
-                1).strip() if alt_wall_drylining_match else None,
-            "Alternative Wall Thickness": int(alt_wall_thickness_match.group(1)) if alt_wall_thickness_match else None,
-        })
-
-    return wall_data
-
-
-def extract_epr(pdf_path):
-    """
-    Extracts specific data from an Energy Report (EPR) PDF file.
-    """
-
-    data = {
-        "Address": None,
-        "Postcode": None,
-        "Current SAP Rating": None,
-        "Current EPC Band": None,
-        "Primary Energy Use (kWh/yr)": None,
-        "Primary Energy Use Intensity (kWh/m2/yr)": None,
-        "Number of Storeys": None,
-        "Main Building Age Band": None,
-        "Fuel Bill": None,
-        "Window Age Description": None,
-        "Window Age Description Proportion (%)": None,
-        "Secondary Window Age Description": None,
-        "Secondary Window Age Description Proportion (%)": None,
-        "Number of Windows": None,
-        "Total Number of Doors": None,
-        "Number of Insulated Doors": None,
-        "Existing Primary Heating System": None,
-        "Existing Primary Heating PCDF Reference": None,
-        "Existing Primary Heating Controls": None,
-        "Existing Primary Heating % of Heat": None,
-        "Existing Secondary Heating System": None,
-        "Existing Secondary Heating PCDF Reference": None,
-        "Existing Secondary Heating Controls": None,
-        "Existing Secondary Heating % of Heat": None,
-        "Secondary Heating Code": None,
-        "Water Heating Code": None,
-        'Total Floor Area (m2)': None,
-        'Total Ground Floor Area (m2)': None,
-        'RIR Floor Area': None,
-        'Main Building Wall Area (m2)': None,
-        'First Extension Wall Area (m2)': None,
-        "Number of Light Fittings": None,
-        "Number of LEL Fittings": None,
-        "Number of fittings needing LEL": None,
-        "Main Roof Type": None,
-        "Main Roof Insulation": None,
-        "Main Roof Insulation Thickness": None,
-        "Main Wall Type": None,
-        "Main Wall Insulation": None,
-        "Main Wall Dry-lining": None,
-        "Main Wall Thickness": None,
-        "Main Building Alternative Wall Type": None,
-        "Main Building Alternative Wall Insulation": None,
-        "Main Building Alternative Wall Dry-lining": None,
-        "Main Building Alternative Wall Thickness": None,
-        "Main Fuel": None
-    }
-
-    with open(pdf_path, "rb") as file:
-        reader = PyPDF2.PdfReader(file)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text()
-
-        # Extract Address
-        address_match = re.search(r"ENERGY REPORT\nDwelling Address\s*(.*?)\s*\nReference", text, re.DOTALL)
-        data["Address"] = address_match.group(1).strip()
-        data["Postcode"] = data["Address"].split(",")[-1].strip()
-
-        # Extract Current and Potential SAP ratings
-        sap_match = re.search(r"GG \(1-20\)\s*(\d{1,2})\s*(\d{1,2})", text)
-        if sap_match is None:
-            # Handles the older format of the elmhurst EPR
-            # The text will look something like this:
-            # Least energy efficient - higher running costsD 61 - we extract D 61
-            sap_match = re.search(
-                r"(?P<current_epc>[A-G])\s(?P<current_sap>\d{1,3})(?P<potential_epc>[A-G])\s(?P<potential_sap>\d{1,3})",
-                text)
-            data["Current EPC Band"] = sap_match.group("current_epc")
-            data["Current SAP Rating"] = int(sap_match.group("current_sap"))
-        else:
-            current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
-            data["Current SAP Rating"] = current_sap
-
-        # Extract the primary energy use intensity
-        additional_rating_match = re.search(r"Additional ratings for your home\s*([\d.]+)", text)
-        if additional_rating_match:
-            data["Primary Energy Use Intensity (kWh/m2/yr)"] = float(additional_rating_match.group(1))
-        else:
-            # Handles the older format of the Elmhurst EPR
-            primary_energy_match = re.search(r"actual consumption\.\n(?P<primary_energy>\d+)", text)
-            data["Primary Energy Use (kWh/yr)"] = int(primary_energy_match.group("primary_energy"))
-            # We calculate the primary energy use intensity by dividing by floor area
-            floor_area = re.search(r"Total Floor Area\s(?P<floor_area>\d+)\s?m2", text).group("floor_area")
-            data["Primary Energy Use Intensity (kWh/m2/yr)"] = data["Primary Energy Use (kWh/yr)"] / int(floor_area)
-
-        # Extract age band
-        age_band_match = re.search(
-            r"Building part:\s*Main\s*-\s*built in\s*(?:[A-Z]\s*)?(\d{4}-\d{4}|before \d{4}|\d{4} onwards)",
-            text
-        )
-
-        data["Main Building Age Band"] = age_band_match.group(1)
-
-        # Extract Number of Storeys
-        storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
-        data["Number of Storeys"] = int(storeys_match.group(1))
-
-        # Extract Fuel Bill
-        fuel_bill_match = re.search(r"TOTAL\s*£(\d+)", text)
-        data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"
-
-        # Extract Total Number of Doors
-        total_doors_match = re.search(r"Total Doors:\s*(\d+)", text)
-        data["Total Number of Doors"] = int(total_doors_match.group(1))
-
-        # Extract Number of Insulated Doors
-        insulated_doors_match = re.search(r"Insulated Doors:\s*(\d+)", text)
-        data["Number of Insulated Doors"] = int(insulated_doors_match.group(1))
-
-        # Extract Primary Heating Section (Main Heating 1)
-        primary_heating_section1 = re.search(r"Main\s*Heating\s*1\s*(.*?)\s*Main\s*Heating\s*2", text, re.DOTALL)
-        # We may not have a secondary heating
-        primary_heating_section2 = re.search(r"Main\s*Heating\s*1\s*(.*?)\s*Secondary\s*Heating", text, re.DOTALL)
-        primary_heating_section = primary_heating_section1 if primary_heating_section1 else primary_heating_section2
-        primary_text = primary_heating_section.group(1)
-
-        data["Existing Primary Heating System"] = re.search(
-            r"Main Heating Code\s*(.*?)\n", primary_text
-        ).group(1).strip()
-        data["Existing Primary Heating PCDF Reference"] = re.search(
-            r"PCDF boiler Reference\s*(\d+)", primary_text
-        ).group(1)
-        data["Existing Primary Heating Controls"] = re.search(
-            r"Main Heating Controls\s*(.*?)\n", primary_text
-        ).group(1).strip()
-        data["Existing Primary Heating % of Heat"] = int(
-            re.search(r"Percentage of Heat\s*(\d+)\s*%?", primary_text).group(1)
-        )
-
-        # Extract Secondary Heating Section (Main Heating 2)
-        secondary_heating_section = re.search(r"Main\s*Heating\s*2\s*(.*?)\s*Secondary Heating", text, re.DOTALL)
-        if secondary_heating_section is None:
-            data["Existing Secondary Heating System"] = ""
-            data["Existing Secondary Heating PCDF Reference"] = ""
-            data["Existing Secondary Heating Controls"] = ""
-            data["Existing Secondary Heating % of Heat"] = 0
-
-        else:
-            secondary_text = secondary_heating_section.group(1)
-
-            main_heating_code_match_secondary = re.search(
-                r"Main Heating Code\s*(.*?)(?=\n|Percentage of Heat)", secondary_text
-            )
-            data["Existing Secondary Heating System"] = main_heating_code_match_secondary.group(1).strip()
-
-            data["Existing Secondary Heating PCDF Reference"] = re.search(
-                r"PCDF boiler Reference\s*(\d+)", secondary_text
-            ).group(1)
-
-            if data["Existing Secondary Heating System"] == "":
-                data["Existing Secondary Heating Controls"] = ""
-            else:
-                # Might not have heating controls on 2nd system
-                secondary_controls_match = re.search(r"Main Heating Controls\s*(.*?)\n", secondary_text)
-                data["Existing Secondary Heating Controls"] = (
-                    secondary_controls_match.group(1).strip() if secondary_controls_match else ""
-                )
-            data["Existing Secondary Heating % of Heat"] = int(
-                re.search(r"Percentage of Heat\s*(\d+)\s*%?", secondary_text).group(1)
-            )
-
-        # Extract Secondary Heating and Water Heating Codes
-        secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
-        water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
-
-        if data["Existing Secondary Heating System"] == "":
-            data["Secondary Heating Code"] = ""
-        else:
-            data["Secondary Heating Code"] = secondary_heating_code_match.group(
-                1).strip() if secondary_heating_code_match else ""
-        data["Water Heating Code"] = water_heating_code_match.group(1).strip()
-
-        # Extract Windows information
-        windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
-        if windows_section:
-            windows_text = windows_section.group(1)
-            window_data = extract_window_age_description(windows_text)
-            data.update(window_data)
-
-        building_parts = extract_building_parts_epr(text)
-        data.update(building_parts)
-
-        # Get number of lighting outlets and number of fittings needing LEL
-        lighting_fittings_match = re.search(r"Total number of light fittings\s*(\d+)", text)
-        data["Number of Light Fittings"] = int(lighting_fittings_match.group(1))
-        lel_fittings_match = re.search(r"Total number of L.E.L. fittings\s*(\d+)", text)
-        data["Number of LEL Fittings"] = int(lel_fittings_match.group(1))
-        data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
-
-        roof_details = extract_roof_details_epr(text)
-        # Get from the main building
-        main_roof_details = [r for r in roof_details if "Main" in r["Building Part"]]
-        data["Main Roof Type"] = main_roof_details[0]["Roof Type"]
-        data["Main Roof Insulation"] = main_roof_details[0]["Roof Insulation"]
-        data["Main Roof Insulation Thickness"] = main_roof_details[0]["Roof Insulation Thickness"]
-
-        wall_details = extract_wall_details_epr(text)
-        main_wall_details = [w for w in wall_details if "Main" in w["Building Part"]][0]
-        data["Main Wall Type"] = main_wall_details["Wall Type"]
-        data["Main Wall Insulation"] = main_wall_details["Wall Insulation"]
-        data["Main Wall Dry-lining"] = main_wall_details["Wall Dry-lining"]
-        data["Main Wall Thickness"] = main_wall_details["Wall Thickness"]
-        data["Main Building Alternative Wall Type"] = main_wall_details["Alternative Wall Type"]
-        data["Main Building Alternative Wall Insulation"] = main_wall_details["Alternative Wall Insulation"]
-        data["Main Building Alternative Wall Dry-lining"] = main_wall_details["Alternative Wall Dry-lining"]
-        data["Main Building Alternative Wall Thickness"] = main_wall_details["Alternative Wall Thickness"]
-
-    return data
-
-
-def detect_report_type(pdf_path, pdf_file):
-    """
-    Detects the type of report based on content or filename.
-    :param pdf_path: String path to the PDF file
-    :param pdf_file: String name of the PDF file
-    :return: String type of the report ("epr", "summary", or None)
-    """
-    # Attempt to read the first page of the PDF to determine type
-    with open(pdf_path, "rb") as file:
-        # This code raises some warnings like Multiple definitions in dictionary at byte 0x1ab for key /Filter
-        # This is because the pdf is irregular. We could possibly try a library like fitz to handle this
-        reader = PyPDF2.PdfReader(file)
-        first_page_text = reader.pages[0].extract_text() if reader.pages else ""
-        n_pages = len(reader.pages)
-
-        if is_energy_report(first_page_text) and n_pages > 3:
-            # The EPR should have more than 3 pages
-            return "epr"
-        elif is_energy_report(first_page_text) and n_pages <= 3:
-            # This is a shortened version of the EPR which isn't massively useful
-            return "short_form_epr"
-        elif "summary" in pdf_file.lower() or is_summary_report(first_page_text):
-            return "summary"
-        elif is_condition_report(first_page_text):
-            return "condition"
-
-    return None
-
-
-def extract_retrofit_pdfs(data_folder_path):
-    """
-    Handles extraction from a retrofit data folder if it exists and has content.
-    Prioritizes extracting data from an EPR if both EPR and summary report are present.
-    """
-    retrofit_files = [f for f in os.listdir(data_folder_path) if f.endswith(".pdf")]
-    report_types = {"epr": None, "summary": None}
-
-    # First, identify the types of reports available
-    for pdf_file in retrofit_files:
-        pdf_path = os.path.join(data_folder_path, pdf_file)
-        report_type = detect_report_type(pdf_path, pdf_file)
-
-        if report_type == "epr":
-            report_types["epr"] = pdf_path
-        elif report_type == "summary":
-            report_types["summary"] = pdf_path
-
-        # Stop checking further if both EPR and summary are found
-        if report_types["epr"] and report_types["summary"]:
-            break
-
-    # Extract data based on report availability and priority
-    if report_types["epr"]:
-        return extract_epr(report_types["epr"])
-    elif report_types["summary"]:
-        return extract_summary_report(report_types["summary"])
-
-    # If no relevant PDF is found, return None
-    return None
-
-
-def is_energy_report(text):
-    """
-    Determines if the provided text indicates that the PDF is an Energy Report.
-    Returns True if the text contains 'Energy Report'.
-    """
-    return text.startswith("ENERGY REPORT")
-
-
-def is_summary_report(text):
-    """
-    Determines if the provided text indicates that the PDF is a Summary Report.
-    """
-    return text.startswith("Summary Information")
-
-
-def detect_and_parse_report(pdf_path, pdf_file):
-    """
-    Detects the type of report and extracts the relevant data.
-    :param pdf_path: String path to the PDF file
-    :param pdf_file: String name of the PDF file
-    :return:
-    """
-    # Attempt to read the first page of the PDF to determine type
-    with open(pdf_path, "rb") as file:
-        reader = PyPDF2.PdfReader(file)
-        first_page_text = reader.pages[0].extract_text() if reader.pages else ""
-
-        if is_energy_report(first_page_text):
-            # Treat this as an Energy Report
-            return extract_epr(pdf_path)
-        elif "summary" in pdf_file.lower() or is_summary_report(first_page_text):
-            # Treat this as a Summary Report
-            return extract_summary_report(pdf_path)
-        elif is_condition_report(first_page_text):
-            return None
-        else:
-            raise NotImplementedError("Implement me")
-
-
-def is_condition_report(text):
-    """
-    Determines if the provided text indicates that the PDF is a Condition Report.
-    """
-    return text.startswith("OsmosisACDNEWPAS2035ConditionReport") or text.startswith("OsmosisACDPAS2035ConditionReport")
-
-
-def main():
-    """
-    This code prepares the data for the Warm Homes: Social Housing Fund Wave 3, for Stonewater.
-    """
-    # List only directories in the specified FILE_PATH
-    survey_folders = []
-
-    # Loop over each survey folder and list its contents
-    for i in range(1, NUM_FOLDERS + 1):
-        folder_path = os.path.join(CUSTOMER_FOLDER_PATH, f"StonewaterSurveys_{i}")
-        if os.path.isdir(folder_path):  # Check if folder exists
-            folder_contents = [os.path.join(f"StonewaterSurveys_{i}", file) for file in os.listdir(folder_path)]
-            survey_folders.extend(folder_contents)  # Append contents to the master list
-
-    # Get rid of .DS_Store files
-    survey_folders = [folder for folder in survey_folders if not folder.endswith(".DS_Store")]
-
-    extracted_data = []
-    for survey_folder in tqdm(survey_folders):
-        survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
-
-        # List the folders inside of the survey folder
-        survey_subfolders = [name for name in os.listdir(survey_folder_path)
-                             if os.path.isdir(os.path.join(survey_folder_path, name))]
-
-        # Check if there's a "retrofit assessment" folder
-        retrofit_folder = next((name for name in survey_subfolders if "retrofit assessment" in name.lower()), None)
-
-        ra_folder = next(
-            (name for name in survey_subfolders if "ra coordinator info" in name.lower() or "ra info" in name.lower()),
-            None
-        )
-
-        # If retrofit assessment folder exists, check if it has content
-        if retrofit_folder or ra_folder:
-            if retrofit_folder:
-                retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
-            else:
-                retrofit_folder_path = os.path.join(survey_folder_path, ra_folder)
-
-            # Check if everything inside is a sub-folder and the number of folders is 2
-            items = [item for item in os.listdir(retrofit_folder_path) if item != '.DS_Store']
-            all_folders = [os.path.isdir(os.path.join(retrofit_folder_path, item)) for item in items]
-            if all(all_folders) and len(all_folders) == 2 and "Property Pics" in items:
-                # Get the folder that isn't Property Pics
-                retrofit_folder_path = os.path.join(
-                    retrofit_folder_path, [item for item in items if item != "Property Pics"][0]
-                )
-
-            if os.listdir(retrofit_folder_path):  # If not empty
-                summary_data = extract_retrofit_pdfs(data_folder_path=retrofit_folder_path)
-                if summary_data:
-                    summary_data = {
-                        "survey_folder": survey_folder,
-                        **summary_data,
-                    }
-                    extracted_data.append(summary_data)
-                    continue
-            else:
-                # Then we have an empty Retrofit Assessment folder
-                continue
-
-        # If no retrofit folder or it was empty, check files in survey_folder
-
-        summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
-        if not summary_data:
-            if len(survey_subfolders) == 1:
-                survey_folder_path = os.path.join(survey_folder_path, survey_subfolders[0])
-                summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
-
-        if summary_data:
-            summary_data = {
-                "survey_folder": survey_folder,
-                **summary_data,
-            }
-            extracted_data.append(summary_data)
-
-    extracted_data = pd.DataFrame(extracted_data)
-
-    extracted_data["Primary Energy Use (kWh/yr)"] = (
-        extracted_data["Primary Energy Use Intensity (kWh/m2/yr)"] * extracted_data["Total Floor Area (m2)"]
-    )
-    extracted_data["Current SAP Rating"] = extracted_data["Current SAP Rating"].astype(int)
-    extracted_data["Current EPC Band"] = extracted_data["Current SAP Rating"].apply(sap_to_epc)
-
-    # Remove some definite duplicates
-    dupes = extracted_data[extracted_data["Address"].duplicated()]["Address"]
-    dupes = extracted_data[extracted_data["Address"].isin(dupes)]
-    dupes = dupes.sort_values("Address")
-    # Get all of the folders that end with ROSS
-    to_drop = dupes[dupes["survey_folder"].str.endswith("ROSS")]["survey_folder"].unique().tolist()
-
-    extracted_data = extracted_data[
-        ~extracted_data["survey_folder"].isin(
-            [
-                "StonewaterSurveys_10/4 Beech Road, LUTON, LU1 1DP ROSS",
-                "StonewaterSurveys_2/135 Runley Road, LUTON, LU1 1TX ROSS",
-                "StonewaterSurveys_13/7 Saxon Road, LUTON, LU3 1JR ROSS"
-            ] + to_drop
-        )
-    ]
-
-    # We now merge on the coordinator data so that against each property, we can map the measures
-    # TODO: Get the pre & post primary energy numbers
-    # TODO: Make sure the numbers are going down
-
-    retrofit_packages_board = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH,
-            "Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1732034933 Final 19.11.24.xlsx"
-        ),
-        header=4
-    )
-    retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
-    # Take just the rows that have been surveyed
-    retrofit_packages_board = retrofit_packages_board[
-        retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
-    ]
-    # populated_primary_energy = retrofit_packages_board[
-    #     ~pd.isnull(retrofit_packages_board['BASE Primary energy (13a-272)'])
-    # ]
-    #
-    # z = populated_primary_energy[
-    #     populated_primary_energy['POST Primary energy (13a - 272)'] > populated_primary_energy[
-    #         'BASE Primary energy (13a-272)']
-    #     ]
-    #
-    # all(populated_primary_energy['POST Primary energy (13a - 272)'] <= populated_primary_energy[
-    #     'BASE Primary energy (13a-272)'])
-
-    # Replace \n with ""
-    extracted_data["Postcode"] = extracted_data["Postcode"].str.replace("\n", "")
-
-    manual_filters = {
-        "Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
-        "6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
-        "2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ",
-        'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT",
-        '14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT',
-        '19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY',
-        'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN',
-        'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB',
-        '16 The Crescent, Kington': 'StonewaterSurveys_9/360-3-16 The Crescent-HR5 3AS',
-        '2 School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
-        '14 South Road': 'StonewaterSurveys_2/14 The Oaks, South Road, SMETHWICK, B67 7BY',
-        '1 Groves Street': 'StonewaterSurveys_4/19-5-1 Groves Street-SN2 2BW',
-        # '2 Sorrell Place': '',
-        # '72 St Ives Road': '',
-        # '1 The Close, Burton Gardens': '',
-        # '102 Cheaton Close': '',
-        # 'Flat 16 Spring Gardens': '',
-        # '4 Apple Close': '',
-        # '25 Folly Lane': '',
-        '2 Calshot Walk': 'StonewaterSurveys_3/156-3-2 Calshot Walk-MK41 8QS',
-        '21 Constitution Hill': 'StonewaterSurveys_1/112-11-21 Constitution Hill-BH14 0PX',
-        '22 Constitution Hill': 'StonewaterSurveys_4/185-8-22 Constitution Hill-BH14 0PX',
-        '2 Marches Cottages, School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
-        '26, Copthorn House, Brighton Road': 'StonewaterSurveys_15/133-1-26 Brighton Road-KT20 6BQ',
-        '4, Old St Marys, Ripley Lane': "StonewaterSurveys_15/433-3-4 Ripley Lane-KT24 6JG",
-        '1 Nelson House, Short Street': 'StonewaterSurveys_15/89-2-1 Short Street-GU11 1HX',
-        "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
-        '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
-        '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
-        '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX',
-        '7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA'
-    }
-
-    # We now match this retrofit packages board to the extracted data
-    matching_lookup = []
-    for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
-
-        # Handle the case that has the wrong postcode in the asset data
-        if home["Name"] in manual_filters:
-            filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
-        else:
-            filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy()
-
-            # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
-            to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
-                home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
-            )
-            if to_filter.sum() == 0:
-                to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.replace(",", "").str.replace(".",
-                                                                                                                   "").str.contains(
-                    home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
-                )
-            filtered = filtered[to_filter]
-
-        if filtered.empty:
-            continue
-
-        if filtered.shape[0] == 1:
-            matching_lookup.append(
-                {
-                    "survey_folder": filtered["survey_folder"].values[0],
-                    "Address ID": home["Address ID"],
-                    "Name": home["Name"]
-                }
-            )
-            continue
-
-        # home["Name"] should be contained in the survey_folder
-        filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
-        # We have an edge case wher some properties have two outputs in Sharepoint
-        if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
-            raise Exception("Fix me1")
-            # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
-
-        if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
-            raise Exception("Fix me2")
-            # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
-
-        if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ':
-            filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"]
-
-        if filtered.empty:
-            continue
-        if filtered.shape[0] != 1:
-            raise Exception("something went wrong")
-
-        matching_lookup.append(
-            {
-                "survey_folder": filtered["survey_folder"].values[0],
-                "Address ID": home["Address ID"],
-                "Name": home["Name"]
-            }
-        )
-
-    matching_lookup = pd.DataFrame(matching_lookup)
-    # Find Osmosis IDs that are in the packages board but not in the matching looking
-    missing_ids = set(retrofit_packages_board["Address ID"]) - set(matching_lookup["Address ID"])
-    missing_ids = list(missing_ids)
-    if missing_ids:
-        # We check that the missing ids have no data yet
-        # missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
-        # missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
-        #     CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
-
-        if len(missing_ids) != 1:
-            raise Exception("Unacceptable number of missings")
-
-    if matching_lookup["Address ID"].duplicated().sum():
-        raise Exception("Duplicate Address IDs")
-
-    if matching_lookup["survey_folder"].duplicated().sum():
-        raise Exception("Duplicate survey folders")
-
-    measure_columns = [
-        'Main Wall Insulation',
-        'Secondary Wall Insulation',
-        'Loft insulation',
-        'Flat Roof',
-        'Room in Roof',
-        'Window Upgrade',
-        'Door Upgrade',
-        'Ventilation',
-        'Main Heating',
-        'Water Heating',
-        'Heating Controls',
-        'Solar PV',
-        'Other measures'
-    ]
-
-    # We should end up with a 1:1 mapping between the Osm. ID and the survey folder
-    stonewater_data = extracted_data.merge(matching_lookup, on="survey_folder", how="inner").merge(
-        retrofit_packages_board[
-            [
-                "Name",
-                "RA",
-                "Address ID",
-                "Archetype ID",
-                "Arch. Group Rank",
-                "Actual SAP Band",
-                "Actual SAP Rating",
-                "Modelled SAP Band",
-                "Modelled SAP Rating",
-                "Package Ref",
-            ] + measure_columns
-            ],
-        on=["Address ID", "Name"],
-        how="left"
-    )
-
-    if stonewater_data["Address ID"].duplicated().sum():
-        raise Exception("Duplicate Address IDs")
-    # Create a section for costs
-    for measure in measure_columns:
-        stonewater_data[f"Cost of {measure}"] = None
-
-    stonewater_data["Total Cost of Measures"] = None
-    stonewater_data["Contingency Cost"] = None
-    stonewater_data["Total Cost of Measures inc Contingency"] = None
-
-    # We've appended the recommended packages and modelled SAP ratings to the data
-    # We also want to append the windows data
-    windows_data = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH,
-            "Window data included AP Copy Stonewater SHDF_3_0_Board Triage Master Filtered 26.07.24.xlsx"
-        ),
-        header=12
-    )
-
-    windows_data = windows_data[windows_data["Address ID"] != "Address ID"]
-    windows_data = windows_data[~pd.isnull(windows_data["Address ID"])]
-
-    # We get a lookup id of Osm.ID and when the windows were fitted
-    windows_data = windows_data[
-        ["Address ID", "Window attributes - Fitted/renewed date",
-         "Parent Asset Window attributes - Fitted/renewed date"]
-    ]
-    # Convert to string for the moment
-    windows_data["Parent Asset Window attributes - Fitted/renewed date"] = windows_data[
-        "Parent Asset Window attributes - Fitted/renewed date"
-    ].astype(str)
-    # Create a single date column
-    windows_data["Fitted/renewed date"] = np.where(
-        pd.notnull(windows_data["Window attributes - Fitted/renewed date"]),
-        windows_data["Window attributes - Fitted/renewed date"],
-        windows_data["Parent Asset Window attributes - Fitted/renewed date"]
-    )
-    # Convert to a date
-    windows_data["Fitted/renewed date"] = pd.to_datetime(windows_data["Fitted/renewed date"])
-    # Calculate the number of years since something was done on the windows
-    windows_data["Years since fitted/renewed"] = (pd.Timestamp.now() - windows_data[
-        "Fitted/renewed date"]).dt.days / 365
-
-    stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"])
-    windows_data["Address ID"] = windows_data["Address ID"].astype(float)
-    stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left")
-    stonewater_data = stonewater_data.sort_values("Archetype ID", ascending=True)
-
-    if stonewater_data["Address ID"].duplicated().sum():
-        raise Exception("Duplicate Address IDs")
-
-    for c in [
-        'Window attributes - Fitted/renewed date',
-        'Parent Asset Window attributes - Fitted/renewed date',
-        'Fitted/renewed date'
-    ]:
-        stonewater_data[c] = stonewater_data[c].astype(str)
-
-    # FIll the primary energy numbers from the excel
-    stonewater_data = stonewater_data.merge(
-        retrofit_packages_board[
-            [
-                "Name", "Address ID", "BASE Primary energy (13a-272)", "POST Primary energy (13a - 272)"
-            ]
-        ],
-        on=["Address ID", "Name"],
-        how="left"
-    )
-    stonewater_data["Primary Energy Use (kWh/yr)"] = np.where(
-        pd.isnull(stonewater_data["Primary Energy Use (kWh/yr)"]),
-        stonewater_data["BASE Primary energy (13a-272)"],
-        stonewater_data["Primary Energy Use (kWh/yr)"]
-    )
-    stonewater_data = stonewater_data.drop(columns=["BASE Primary energy (13a-272)"])
-
-    # Add on organisation reference
-    original_archetypes = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
-        "- Archetyped V3.1.xlsx",
-        header=4
-    )
-    original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
-    original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
-    original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
-
-    stonewater_data = stonewater_data.merge(
-        original_archetypes[["Address ID", 'Org. ref.']],
-        on="Address ID",
-        how="left"
-    )
-
-    # Save this data to excel
-    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V4.xlsx", index=False)
-
-    cost_sheet = [
-        {
-            "measure": "EWI 0.30 w.m2.K", "cost": 298.35, "unit": "m2"
-        },
-        {
-            "measure": "CWI RdSAP Default", "cost": 14.21, "unit": "m2"
-        },
-        {
-            "measure": "Poss Extract CWI & Refill (issues identified)", "cost": 14.21 + 25, "unit": "m2"
-        },
-        {
-            "measure": "IWI 0.30 w.m2.K", "cost": 244.80, "unit": "m2"
-        },
-        {
-            "measure": "EWI/IWI 0.3", "cost": (298.35 + 244.8) / 2, "unit": "m2"
-        },
-        {
-            "measure": "Loft Insulation 0.11 w.m2.K", "cost": 16.07, "unit": "m2"
-        },
-        {
-            "measure": "Flat Roof 0.11 w.m2.K", "cost": 195, "unit": "m2"
-        },
-        {
-            "measure": "DG Window 1.30 w.m2.K", "cost": 1140, "unit": "each"
-        },
-        {
-            "measure": "Secondary 2.40", "cost": 974, "unit": "each"
-        },
-        {
-            "measure": "Ins. Door 1.30 w.m2.K", "cost": None, "unit": "each"
-        },
-        {
-            "measure": "Ins. Door 1.40 w.m2.K", "cost": None, "unit": "each"
-        },
-        {
-            "measure": "DMEV", "cost": 900, "unit": "each"
-        },
-        {
-            "measure": "ASHP Vaillant 102607 5kw", "cost": None, "unit": "each"
-        },
-        {
-            "measure": "HHRSH Quantum 150", "cost": None, "unit": "each"
-        },
-        {
-            "measure": "Dual Stat Tank 210lt 50mm Foam", "cost": None, "unit": "each"
-        },
-        {
-            "measure": "Dual Stat Tank 160lt 50mm Foam", "cost": None, "unit": "each"
-        },
-        {
-            "measure": "Dual Stat Tank 110lt 50mm Foam", "cost": None, "unit": "each"
-        },
-        {
-            "measure": "Smart Thermostat", "cost": 1200, "unit": "each"
-        },
-        {
-            "measure": "TRV's", "cost": 350, "unit": "each"
-        },
-        {
-            "measure": "Solar PV - 3.0kwp", "cost": 4365.0, "unit": "each"
-        },
-        {
-            "measure": "Solar PV - 1.5kwp", "cost": 3881, "unit": "each"
-        },
-        {
-            "measure": "LEL", "cost": 35, "unit": "per bulb"
-        },
-        {
-            "measure": "Roof 0.16 - Walls 0.30", "cost": 180, "unit": "floor area m2"
-        },
-        {
-            "measure": "Roof 0.16 - Walls 0.16", "cost": 180, "unit": "floor area m2"
-        },
-    ]
-    cost_sheet = pd.DataFrame(cost_sheet)
-
-    # Save cost sheet - ideally this will be used as a secondary sheet for Stonewater
-    cost_sheet.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - cost sheet.xlsx", index=False)
-
-    # stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values
-
-    create_proposed_wave_3_bid(
-        costed_packages_filepath=os.path.join(
-            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) Single Model V3.xlsx"
-        ),
-        archetypes_sheet_filepath=os.path.join(
-            CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
-        )
-    )
-
-
-def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath):
-    # We read in the costed packages
-    costed_packages = pd.read_excel(costed_packages_filepath, header=13, sheet_name="Modelled Packages")
-    costed_packages = costed_packages[~pd.isnull(costed_packages["Address"])]
-
-    archetypes_to_cost = costed_packages[
-        [
-            "Name", "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Modelled SAP Band",
-            "Modelled SAP Rating", "Package Ref", 'Total Cost of Measures', 'Contingency Cost',
-            'Total Cost of Measures inc Contingency', 'Main Roof Type', 'Main Roof Insulation',
-            'Main Roof Insulation Thickness', 'Existing Primary Heating System',
-            'Existing Primary Heating PCDF Reference'
-        ]
-    ].copy()
-
-    # Combine 'Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness', separating by colons!
-    archetypes_to_cost['Surveyed Main Roof'] = (
-        archetypes_to_cost['Main Roof Type'] + ': ' + archetypes_to_cost['Main Roof Insulation'] + ': ' +
-        archetypes_to_cost['Main Roof Insulation Thickness'].astype(str)
-    )
-
-    # Combine the heating systems, separating by colons!
-    archetypes_to_cost['Surveyed Main Heating'] = (
-        archetypes_to_cost['Existing Primary Heating System'] + ': code - ' + archetypes_to_cost[
-        'Existing Primary Heating PCDF Reference'].astype(str)
-    )
-
-    archetypes_to_cost = archetypes_to_cost.drop(
-        columns=['Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness',
-                 'Existing Primary Heating System',
-                 'Existing Primary Heating PCDF Reference'])
-
-    # We take properties that are EPC D and below (59% of units)
-    archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
-
-    archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"])
-
-    # These are the Arhetypes that will likely be suitable for Wave 3
-    archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4)
-    archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])]
-    archetypes_sheet = archetypes_sheet[archetypes_sheet["Address ID"] != "Address ID"]
-    archetypes_sheet["Address ID"] = archetypes_sheet["Address ID"].astype(int)
-
-    # We merge the property details onto the costed archetypes
-    archetypes_to_cost = archetypes_to_cost.merge(
-        archetypes_sheet[["Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
-        on="Address ID",
-        how="left"
-    )
-
-    proposed_sample = archetypes_sheet[
-        archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
-    ]
-
-    not_proposed = archetypes_sheet[
-        ~archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
-    ]
-
-    # archetypes_without_survey = []
-    # for p in list(set(not_proposed)):
-    #     filtered = costed_packages[costed_packages["Archetype ID"].astype(int).astype(str) == p]
-    #     if filtered.empty:
-    #         archetypes_without_survey.append(p)
-
-    # Can we propose anything about archetypes that were not surveyed?
-
-    proposed_sample = proposed_sample[
-        [
-            "Name", "Postcode", "UPRN", "UDPRN", "Address ID", "Osm. ID", "Archetype ID",
-            "Property Type", "Wall Type", "Roof Type", "Heating"
-        ]
-    ]
-
-    # We classify into high and low confidence
-
-    archetypes_to_cost["Surveyed Main Roof"] = archetypes_to_cost["Surveyed Main Roof"].fillna("")
-
-    match_classification = []
-    for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
-
-        surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]].copy()
-        surveyed["Package Ref"] = surveyed["Package Ref"].astype(str)
-
-        package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
-        package = package.replace("\n", "")
-
-        surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
-        surveyed_roofs = surveyed_roofs.replace("\n", "")
-
-        surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
-        surveyed_heating = surveyed_heating.replace("\n", "")
-
-        # We now check if we have a perfect match
-        surveyed = surveyed[
-            (surveyed["Property Type"] == home["Property Type"]) &
-            (surveyed["Wall Type"] == home["Wall Type"]) &
-            (surveyed["Roof Type"] == home["Roof Type"]) &
-            (surveyed["Heating"] == home["Heating"])
-            ]
-
-        if surveyed.empty:
-            if package == "2B2A":
-                raise Exception("Fix me")
-            match_classification.append(
-                {
-                    "Address ID": home["Address ID"],
-                    "Match to Surveyed": "Approximate",
-                    "Proposed Package Ref": package,
-                    "Surveyed Archetype Roofs": surveyed_roofs,
-                    "Surveyed Archetype Heating": surveyed_heating
-                }
-            )
-            continue
-        # Re-do
-        package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
-        package = package.replace("\n", "")
-        surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
-        surveyed_roofs = surveyed_roofs.replace("\n", "")
-        surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
-        surveyed_heating = surveyed_heating.replace("\n", "")
-
-        match_classification.append(
-            {
-                "Address ID": home["Address ID"],
-                "Match to Surveyed": "Exact",
-                "Proposed Package Ref": package,
-                "Surveyed Archetype Roofs": surveyed_roofs,
-                "Surveyed Archetype Heating": surveyed_heating
-            }
-        )
-
-    match_classification = pd.DataFrame(match_classification)
-
-    proposed_sample = proposed_sample.merge(
-        match_classification,
-        on="Address ID",
-        how="left",
-    )
-
-    # Merge on the cost per archetype
-    cost_per_archetype = (
-        archetypes_to_cost.groupby("Archetype ID")[['Total Cost of Measures inc Contingency']].mean().reset_index()
-    )
-    proposed_sample = proposed_sample.merge(
-        cost_per_archetype,
-        on="Archetype ID",
-        how="left"
-    )
-
-    # We add on a boolean to indicate if a property from that archetype has been modelled
-    proposed_sample = proposed_sample.merge(
-        archetypes_to_cost.groupby("Archetype ID")[["Has been modelled"]].any().reset_index(),
-        on="Archetype ID",
-        how="left"
-    )
-
-    proposed_sample["Total Cost of Measures inc Contingency"] = np.where(
-        ~proposed_sample["Has been modelled"],
-        None, proposed_sample["Total Cost of Measures inc Contingency"]
-    )
-
-    proposed_sample = proposed_sample.sort_values("Archetype ID", ascending=True)
-
-    # Save excel
-    proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid V2 (WIP).xlsx", index=False)
-
-    # For each postcode that's in the bid, we also summarise the number of units in the bid and number left out
-    proposed_sample_postcodes = proposed_sample["Postcode"].unique()
-
-    postcode_summary = []
-    for postcode in proposed_sample_postcodes:
-        in_proposal = proposed_sample[proposed_sample["Postcode"] == postcode]
-        not_in_proposal = not_proposed[not_proposed["Postcode"] == postcode]
-        postcode_summary.append(
-            {
-                "Postcode": postcode,
-                "Number of properties in Proposal": len(in_proposal),
-                "Number of properties not in Proposal": len(not_in_proposal)
-            }
-        )
-    postcode_summary = pd.DataFrame(postcode_summary)
-    postcode_summary = postcode_summary.sort_values(
-        "Number of properties not in Proposal",
-        ascending=False).reset_index(drop=True)
-
-    postcode_summary.to_excel(
-        CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid Postcode Summary.xlsx", index=False
-    )
-
-
-def find_remaining_surveys():
-    """
-    This compares a list of properties that have been surveyed against a list of properties that I have produced
-    costed retrofit packages for, so I know what needs to be downloaded from Sharepoint
-    :return:
-    """
-
-    surveyed = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
-        "/Stonewater_SHDF_3_0_Board_work_in_progress_- 07.11.24.xlsx",
-        header=4
-    )
-
-    costed = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
-        "20241030 (WIP) MR Review v1.xlsx",
-        header=13,
-        sheet_name="Modelled Packages"
-    )
-    costed = costed[~pd.isnull(costed["Address ID"])]
-
-    needed = surveyed[~surveyed["Address ID"].isin(costed["Address ID"])]
-
-    needed["id"] = needed["Archetype ID"].astype(str) + "-" + needed["Arch. Group Rank"].astype(str)
-    needed = needed.sort_values("id", ascending=True)
-    needed[["id", "Name", "Postcode"]].to_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/needed_surveys.csv"
-    )
-
-    assert needed.shape[0] + costed.shape[0] == surveyed.shape[0]
-
-
-def append_stonewater_id():
-    """
-    This completes an adhoc request from Stonewater to add in their organisation Reference onto the model
-    :return:
-    """
-
-    model_proposed_sample = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Bid Packages WIP 13.11.24.xlsx",
-        sheet_name="Modelled Packages",
-        header=13
-    )
-    model_proposed_sample = model_proposed_sample[~pd.isnull(model_proposed_sample["Address ID"])]
-    model_proposed_sample["Address ID"] = model_proposed_sample["Address ID"].astype(int)
-
-    original_archetypes = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
-        "- Archetyped V3.1.xlsx",
-        header=4
-    )
-    original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
-    original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
-    original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
-
-    matched = model_proposed_sample.merge(
-        original_archetypes[["Address ID", 'Org. ref.']],
-        on="Address ID",
-        how="left"
-    )
-
-    if pd.isnull(matched["Org. ref."]).sum():
-        raise ValueError("Something went wrong")
-
-    # Save as CSV
-    matched.to_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater IDs.xlsx",
-        sheet_name="Proposed Wave 3 Sample",
-        index=False
-    )
-
-
-def propsed_wave_3_sample():
-    """
-    Stonewater want to ensure that the properties that when selecting properties for wave 3, they choose properties
-    such that most of the properties within a geographical area are treatable within the bid.
-    Name, if we take a geographical area (which could be postal region) they want the most, and ideally all, of the
-    properties within that geographical area to be included within the bid
-    :return:
-    """
-
-    asset_list = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
-        "- Archetyped V3.1.xlsx",
-        header=4
-    )
-
-    # TODO: We drop 7 properties missing
-    # UPRN
-    asset_list = asset_list[~asset_list["Archetype ID"].isin(["MISSING UPRN"])]
-    # Clean address ids
-    asset_list = asset_list[~pd.isnull(asset_list["Address ID"])]
-    asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
-    asset_list["Address ID"] = asset_list["Address ID"].astype(int)
-
-    asset_list["Street name"] = np.where(
-        pd.isnull(asset_list["Street name"]),
-        asset_list["Postcode"],
-        asset_list["Street name"]
-    )
-
-    # Create the postal region, taking the first part of the postcode
-    asset_list["Postal Region"] = asset_list["Postcode"].str.split(" ").str[0]
-    asset_list["Street and Region"] = asset_list["Street name"] + " " + asset_list["Postal Region"]
-    unique_postal_regions = asset_list["Postal Region"].unique()
-
-    # Keep just the columns we need
-    asset_list = asset_list[
-        ["UPRN", "Address ID", 'Org. ref.', "Archetype ID", "Postal Region", "Name", "Postcode", "Street and Region",
-         "Property Type", "Wall Type", "Roof Type", "Heating"]
-    ]
-
-    survey_results = pd.read_excel(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.19 V2.xlsx"),
-        header=13,
-        sheet_name="Modelled Packages"
-    )
-
-    survey_results = survey_results[
-        [
-            "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode",
-            "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
-            "Existing Primary Heating System",
-            "Package Ref",
-            "Main Wall Type", "Main Wall Insulation Type", "Main Wall Thickness",
-            "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
-            "Main Building Alternative Wall Thickness"
-        ]
-    ].rename(
-        columns={
-            "Existing Primary Heating System": "Survey: Primary Heating System"
-        }
-    )
-
-    survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
-    # Concatenate from the wall information
-    survey_results["Survey: Main Wall Type"] = survey_results["Main Wall Type"].astype(str) + ": " + survey_results[
-        "Main Wall Insulation Type"].astype(str)
-    # Alternative wall
-    survey_results["Survey: Main Alternative Wall"] = (
-        survey_results["Main Building Alternative Wall Type"].astype(str) + ": " + survey_results[
-        "Main Building Alternative Wall Insulation"].astype(str)
-    )
-    # Roof information
-    survey_results["Survey: Main Roof Type"] = survey_results["Main Roof Type"].astype(str) + ": " + survey_results[
-        "Main Roof Insulation"].astype(str) + ": " + survey_results["Main Roof Insulation Thickness"].astype(str)
-
-    # Drop the individual columns:
-    survey_results = survey_results.drop(
-        columns=[
-            "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
-            "Main Wall Type", "Main Wall Insulation Type",
-            "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation"
-        ]
-    )
-
-    survey_results_with_original_features = survey_results.merge(
-        asset_list[["UPRN", "Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
-        on="Address ID",
-        how="left"
-    )
-
-    if survey_results_with_original_features.shape[0] != survey_results.shape[0]:
-        raise ValueError("Something went wrong")
-
-    # Against properties that have NO package ref, we assign a package ref
-    properties_with_packages = survey_results_with_original_features[
-        ~pd.isnull(survey_results_with_original_features["Package Ref"])
-    ]
-
-    properties_without_packages = survey_results_with_original_features[
-        (survey_results_with_original_features["Current SAP Rating"] < 69) & pd.isnull(
-            survey_results_with_original_features["Package Ref"]
-        )
-        ]
-
-    # Change this to a lookup
-    package_ratings = pd.DataFrame([
-        {
-            "1A": 1,
-            "1B": 2,
-            "2A": 3,
-            "2B": 4,
-            "3A": 5,
-            "3B": 6,
-            4: 7
-        }
-    ])
-    package_ratings = pd.melt(package_ratings, var_name="Package Ref", value_name="Rank")
-
-    mapped_package_refs = []
-    for _, property in tqdm(properties_without_packages.iterrows(), total=len(properties_without_packages)):
-        # Same archetype?
-        matches = properties_with_packages[properties_with_packages["Archetype ID"] == property["Archetype ID"]]
-
-        if matches.empty:
-            # Similar property
-            matches = properties_with_packages[
-                (properties_with_packages["Property Type"].str.split(":").str[0] ==
-                 property["Property Type"].split(":")[0]) &
-                (properties_with_packages["Wall Type"] == property["Wall Type"]) &
-                (properties_with_packages["Roof Type"].str.split(":").str[0] == property["Roof Type"].split(":")[0]) &
-                (properties_with_packages["Heating"].str.split(":").str[0] == property["Heating"].split(":")[0])
-                ]
-        if matches.empty:
-            matches = properties_with_packages[
-                (properties_with_packages["Property Type"].str.split(":").str[0] ==
-                 property["Property Type"].split(":")[0]) &
-                (properties_with_packages["Wall Type"].str.split(":").str[0] == property["Wall Type"].split(":")[0]) &
-                (properties_with_packages["Roof Type"].str.split(":").str[0] == property["Roof Type"].split(":")[0]) &
-                (properties_with_packages["Heating"].str.split(":").str[0] == property["Heating"].split(":")[0])
-                ]
-        if matches.empty:
-            raise Exception("Implement me")
-        if matches.shape[0] > 1:
-            # Take the package with the highest rank
-            matches = matches.merge(
-                package_ratings,
-                on="Package Ref",
-                how="left"
-            ).sort_values("Rank", ascending=False).head(1)
-
-        mapped_package_refs.append(
-            {
-                "Address ID": property["Address ID"],
-                "Matched Package Ref": matches["Package Ref"].values[0]
-            }
-        )
-
-    mapped_package_refs = pd.DataFrame(mapped_package_refs)
-
-    survey_results = survey_results.merge(
-        mapped_package_refs,
-        on="Address ID",
-        how="left"
-    )
-    survey_results["Package Ref"] = np.where(
-        pd.notnull(survey_results["Matched Package Ref"]),
-        survey_results["Matched Package Ref"],
-        survey_results["Package Ref"]
-    )
-    survey_results = survey_results.drop(columns=["Matched Package Ref"])
-
-    # Do the same with survey_results_with_original_features
-    survey_results_with_original_features = survey_results_with_original_features.merge(
-        mapped_package_refs,
-        on="Address ID",
-        how="left"
-    )
-    survey_results_with_original_features["Package Ref"] = np.where(
-        pd.notnull(survey_results_with_original_features["Matched Package Ref"]),
-        survey_results_with_original_features["Matched Package Ref"],
-        survey_results_with_original_features["Package Ref"]
-    )
-    survey_results_with_original_features = survey_results_with_original_features.drop(columns=["Matched Package Ref"])
-
-    # Save the data for reference
-    # mapped_package_refs = mapped_package_refs.merge(
-    #     asset_list[["Name", "Postcode", "Address ID", "Org. ref."]],
-    #     on="Address ID",
-    #     how="left"
-    # )
-    # mapped_package_refs.to_csv(os.path.join(CUSTOMER_FOLDER_PATH, "mapped_package_refs.csv"), index=False)
-
-    # We get longitude & Latitude
-    archetyping_spatial_features = read_pickle_from_s3(
-        bucket_name="retrofit-data-dev", s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
-    )
-    archetyping_spatial_features = pd.concat(archetyping_spatial_features)
-    archetyping_spatial_features = archetyping_spatial_features[["UPRN", 'LATITUDE', 'LONGITUDE']].rename(
-        columns={"LATITUDE": "latitude", "LONGITUDE": "longitude"}
-    )
-    # Merge them onto both datasets
-    asset_list = asset_list.merge(
-        archetyping_spatial_features, how="left", on="UPRN"
-    )
-    if pd.isnull(asset_list["longitude"]).sum():
-        raise ValueError("Something went wrong")
-
-    survey_results_with_original_features = survey_results_with_original_features.merge(
-        archetyping_spatial_features, how="left", on="UPRN"
-    )
-    if pd.isnull(survey_results_with_original_features["longitude"]).sum():
-        raise ValueError("Something went wrong")
-
-    def haversine(lat1, lon1, lat2, lon2):
-        # Radius of Earth in meters
-        R = 6371000
-
-        # Convert degrees to radians
-        lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
-
-        # Differences
-        dlat = lat2 - lat1
-        dlon = lon2 - lon1
-
-        # Haversine formula
-        a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0) ** 2
-        c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
-        distance = R * c
-        return distance
-
-    # Tier definitions
-    # Tier 1: We have a property in the same postal region and same archetype that was surveyed and is below EPC D
-    # Tier 2: We have a property in the same archetype that was surveyed and is below EPC D
-    #
-
-    def match_property_to_surveyed(property, survey_results_with_original_features):
-        surveyed = survey_results_with_original_features[
-            (
-                survey_results_with_original_features["Postal Region"] ==
-                property["Postal Region"]
-            ) &
-            (
-                survey_results_with_original_features["Property Type"] ==
-                property["Property Type"]
-            )
-            &
-            (
-                survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
-                property["Wall Type"].split(":")[0]
-            ) &
-            (
-                survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
-                property["Roof Type"].split(":")[0]
-            ) &
-            (
-                survey_results_with_original_features["Heating"].str.split(":").str[0] ==
-                property["Heating"].split(":")[0]
-            )
-            ].copy()
-
-        if not surveyed.empty:
-            return surveyed
-
-        surveyed = survey_results_with_original_features[
-            (
-                survey_results_with_original_features["Postal Region"] ==
-                property["Postal Region"]
-            ) &
-            (
-                survey_results_with_original_features["Property Type"].str.split(":").str[0] ==
-                property["Property Type"].split(":")[0]
-            )
-            &
-            (
-                survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
-                property["Wall Type"].split(":")[0]
-            ) &
-            (
-                survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
-                property["Roof Type"].split(":")[0]
-            ) &
-            (
-                survey_results_with_original_features["Heating"].str.split(":").str[0] ==
-                property["Heating"].split(":")[0]
-            )
-            ].copy()
-
-        # surveyed = survey_results_with_original_features[
-        #     (
-        #         survey_results_with_original_features["Property Type"] ==
-        #         property["Property Type"]
-        #     ) &
-        #     (
-        #         survey_results_with_original_features["Wall Type"] ==
-        #         property["Wall Type"]
-        #     ) &
-        #     (
-        #         survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
-        #         property["Roof Type"].split(":")[0]
-        #     ) &
-        #     (
-        #         survey_results_with_original_features["Heating"] ==
-        #         property["Heating"]
-        #     )
-        #     ].copy()
-
-        if not surveyed.empty:
-            return surveyed
-
-        surveyed = survey_results_with_original_features[
-            (
-                survey_results_with_original_features["Property Type"] ==
-                property["Property Type"]
-            ) &
-            (
-                survey_results_with_original_features["Wall Type"] ==
-                property["Wall Type"]
-            ) &
-            (
-                survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
-                property["Roof Type"].split(":")[0]
-            ) &
-            (
-                survey_results_with_original_features["Heating"].str.split(":").str[0] ==
-                property["Heating"].split(":")[0]
-            )
-            ].copy()
-
-        return surveyed
-
-    def fill_survey_columns(region_assets, suffix):
-        for col in [
-            'Current EPC Band', 'Current SAP Rating',
-            'Survey: Main Wall Type', 'Survey: Main Alternative Wall',
-            'Survey: Main Roof Type', 'Survey: Primary Heating System',
-            'Survey: Matching Address ID', 'Distance to Closest Match (m)',
-            "Package Ref"
-        ]:
-            region_assets[col] = np.where(
-                pd.isnull(region_assets[col]) & pd.notnull(region_assets[col + suffix]),
-                region_assets[col + suffix], region_assets[col]
-            )
-        return region_assets
-
-    survey_attribute_columns = [
-        "Survey: Main Wall Type", 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
-        'Survey: Primary Heating System'
-    ]
-
-    survey_results["Survey: Matching Address ID"] = survey_results["Address ID"].copy()
-
-    results = []
-    for region in tqdm(unique_postal_regions):
-        # Take all of the properties in that region
-        region_assets = asset_list[asset_list["Postal Region"] == region].copy()
-
-        # We have a tier 1 match if the property itself was surveyed
-        exact_surveyed = survey_results[
-            survey_results["Address ID"].isin(region_assets["Address ID"])
-        ]
-
-        region_assets = region_assets.merge(
-            exact_surveyed[
-                ["Address ID", "Current EPC Band", "Current SAP Rating"] + survey_attribute_columns + [
-                    "Survey: Matching Address ID", "Package Ref"
-                ]
-                ],
-            on="Address ID",
-            how="left"
-        )
-        region_assets['Distance to Closest Match (m)'] = None
-        region_assets["Distance to Closest Match (m)"] = np.where(
-            ~pd.isnull(region_assets["Current EPC Band"]),
-            0,
-            region_assets["Distance to Closest Match (m)"]
-        )
-
-        # Label the tier 1 properties
-        region_assets["Confidence Tier"] = None
-        region_assets["Confidence Tier"] = np.where(
-            region_assets["Current EPC Band"].isin(["D", "E", "F", "G"]),
-            "1 - property was surveyed", region_assets["Confidence Tier"]
-        )
-
-        region_assets["Confidence Tier"] = np.where(
-            region_assets["Current EPC Band"].isin(["C", "B", "A"]),
-            "5 - property was surveyed", region_assets["Confidence Tier"]
-        )
-
-        archetype_ids = region_assets[
-            pd.isnull(region_assets["Confidence Tier"])
-        ]["Archetype ID"].unique()
-        # We get the properties that have been surveyed
-
-        region_surveyed = []
-        for arch_id in archetype_ids:
-            for _, property in region_assets[region_assets["Archetype ID"] == arch_id].iterrows():
-                archetype_data = survey_results_with_original_features[
-                    survey_results["Archetype ID"] == arch_id
-                    ].copy()
-                if archetype_data.empty:
-                    continue
-
-                match_type = "2 - same archetype"
-                if any(archetype_data["Postal Region"] == property["Postal Region"]):
-                    match_type = "1 - same archetype, same postal region"
-                    archetype_data = archetype_data[
-                        archetype_data["Postal Region"] == property["Postal Region"]
-                        ]
-
-                if archetype_data.shape[0] > 1:
-                    # Look for an exact match, or as close as possible
-                    archetype_data_filtered = match_property_to_surveyed(property, archetype_data)
-                    if not archetype_data_filtered.empty:
-                        archetype_data = archetype_data_filtered
-
-                archetype_data["distance_meters"] = haversine(
-                    lat1=property.latitude, lon1=property.longitude,
-                    lat2=archetype_data["latitude"].values, lon2=archetype_data["longitude"].values
-                )
-                expected_sap = np.average(
-                    archetype_data["Current SAP Rating"], weights=1 / (archetype_data["distance_meters"] + 1)
-                )
-                expected_epc = sap_to_epc(expected_sap)
-
-                archetype_data = archetype_data.sort_values("distance_meters", ascending=True)
-
-                # We take the features of the closest matching property
-                closest_match = archetype_data.iloc[0]
-
-                # Set the package ref
-                if expected_epc in ["C", "B", "A"]:
-                    package_ref = None
-                else:
-                    package_ref = archetype_data["Package Ref"].dropna().values[0]
-
-                region_surveyed.append(
-                    {
-                        "Archetype ID": arch_id,
-                        "Address ID": property["Address ID"],
-                        "Current EPC Band": expected_epc,
-                        "Current SAP Rating": expected_sap,
-                        'Survey: Main Wall Type': closest_match["Survey: Main Wall Type"],
-                        'Survey: Main Alternative Wall': closest_match["Survey: Main Alternative Wall"],
-                        'Survey: Main Roof Type': closest_match["Survey: Main Roof Type"],
-                        'Survey: Primary Heating System': closest_match["Survey: Primary Heating System"],
-                        "Survey: Matching Address ID": closest_match["Address ID"],
-                        'Distance to Closest Match (m)': closest_match["distance_meters"],
-                        "Package Ref": package_ref,
-                        "Match Type": match_type
-                    }
-                )
-        region_surveyed = pd.DataFrame(region_surveyed)
-
-        if region_surveyed.empty:
-            region_surveyed = pd.DataFrame(
-                columns=[
-                    "Archetype ID", "Address ID", "Current EPC Band", "Current SAP Rating",
-                    'Survey: Main Wall Type', 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
-                    'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)',
-                    "Match Type", "Package Ref"
-                ]
-            )
-
-        starting_shape = region_assets.shape[0]
-        region_assets = region_assets.merge(
-            region_surveyed,
-            on=["Archetype ID", "Address ID"],
-            how="left",
-            suffixes=("", "_method1")
-        )
-        if region_assets.shape[0] != starting_shape:
-            raise ValueError("Something went wrong")
-
-        # Label the tier 1 properties
-        region_assets["Confidence Tier"] = np.where(
-            region_assets["Current EPC Band_method1"].isin(["D", "E", "F", "G"]) &
-            pd.isnull(region_assets["Confidence Tier"]) & ~pd.isnull(region_assets["Match Type"]),
-            region_assets["Match Type"], region_assets["Confidence Tier"]
-        )
-
-        # Handle EPC C
-        region_assets["Confidence Tier"] = np.where(
-            region_assets["Current EPC Band_method1"].isin(["C", "B", "F", "G"]) &
-            pd.isnull(region_assets["Confidence Tier"]),
-            "5 - EPC C or above", region_assets["Confidence Tier"]
-        )
-
-        region_assets = fill_survey_columns(region_assets, suffix="_method1")
-
-        method_1_columns = [c for c in region_assets.columns if c.endswith("_method1")]
-        region_assets = region_assets.drop(columns=method_1_columns + ["Match Type"])
-
-        missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
-
-        if not missed_addressids:
-            results.append(region_assets)
-            continue
-
-            # This means that this archetype was never surveyed and so we need to find a sufficiently similar property
-        final_missed_matches = []
-        for a_id in missed_addressids:
-
-            match_type = "3 - compared to similar properties"
-
-            property = asset_list[asset_list["Address ID"] == a_id].squeeze()
-
-            surveyed = match_property_to_surveyed(property, survey_results_with_original_features)
-
-            if surveyed.empty:
-                match_type = "3 - compared to similar properties, relaxed"
-                # In this case, we do one additional check where we filter on everything the same apart from heating,
-                # where we do a slightly more rough match
-                surveyed = survey_results_with_original_features[
-                    (
-                        survey_results_with_original_features["Property Type"].str.split(":").str[0] ==
-                        property["Property Type"].split(":")[0]
-                    ) &
-                    (
-                        survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
-                        property["Wall Type"].split(":")[0]
-                    ) &
-                    (
-                        survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
-                        property["Roof Type"].split(":")[0]
-                    )
-                    ].copy()
-
-                if surveyed.empty:
-                    if property["Property Type"].split(":")[0] in ["House", "Bungalow", "Maisonette"]:
-                        filter_property_types = ["House", "Bungalow", ]
-                    else:
-                        filter_property_types = ["Flat"]
-                    surveyed = survey_results_with_original_features[
-                        (
-                            survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
-                                filter_property_types
-                            )
-                        ) &
-                        (
-                            survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
-                            property["Wall Type"].split(":")[0]
-                        ) &
-                        (
-                            survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
-                            property["Roof Type"].split(":")[0]
-                        )
-                        ].copy()
-
-                if "Electric" in property["Heating"]:
-                    # Take other electric heating systems
-                    surveyed = surveyed[surveyed["Heating"].str.contains("Electric")]
-                elif property["Heating"] in [
-                    "Community Heating Systems: Community boilers only (RdSAP)",
-                    "Community Heating Systems: Community CHP and boilers (RdSAP)"
-                ]:
-                    # Take other community heating systems
-                    surveyed = surveyed[surveyed["Heating"].str.contains("Community")]
-                elif property["Heating"] == 'Heat Pump: (from database)':
-                    # Take other heat pumps
-                    surveyed = surveyed[surveyed["Heating"].str.contains("Heat Pump")]
-                elif property["Heating"] == "Solid fuel room heaters: Open fire in grate":
-                    # Take other properties with room heaters
-                    surveyed = surveyed[surveyed["Heating"].str.contains("room heaters")]
-                elif "Boiler" in property["Heating"]:
-                    # Take other properties with boilers
-                    surveyed = surveyed[surveyed["Heating"].str.contains("Boiler")]
-                else:
-                    raise Exception("Fix me")
-
-            if surveyed.empty:
-                final_missed_matches.append(
-                    {
-                        "Address ID": a_id,
-                        "Confidence Tier": "4 - no similar property, needs survey to confirm",
-                        "Current EPC Band": "Needs Survey",
-                        "Current SAP Rating": "Needs Survey",
-                        'Survey: Main Wall Type': "Not Surveyed",
-                        "Survey: Main Alternative Wall": "Not Surveyed",
-                        "Survey: Main Roof Type": "Not Surveyed",
-                        "Survey: Primary Heating System": "Not Surveyed",
-                        "Survey: Matching Address ID": "Not Surveyed",
-                        'Distance to Closest Match (m)': 9999999,
-                        "Package Ref": "Not Surveyed",
-                    }
-                )
-                continue
-
-            # Calculate distance
-            surveyed["distance_meters"] = haversine(
-                lat1=property["latitude"], lon1=property["longitude"],
-                lat2=surveyed["latitude"].values, lon2=surveyed["longitude"].values
-            )
-            surveyed = surveyed.sort_values("distance_meters", ascending=True)
-
-            # Check if we have a postcode match check if surveyed postcode is the same as the property postcode
-            if any(surveyed["Postcode"] == property["Postcode"]):
-                surveyed = surveyed[surveyed["Postcode"] == property["Postcode"]]
-
-            if any(surveyed["Postal Region"] == property["Postal Region"]):
-                surveyed = surveyed[surveyed["Postal Region"] == property["Postal Region"]]
-
-            # Take the 3 nearest
-            surveyed = surveyed.head(3)
-
-            # perform a weighted mean of SAP rating - the closer the better
-            expected_sap = np.average(
-                surveyed["Current SAP Rating"], weights=1 / (surveyed["distance_meters"] + 1)
-            )
-            expected_epc = sap_to_epc(expected_sap)
-
-            if expected_epc in ["C", "B", "A"]:
-                match_type = "5 - EPC C or above"
-
-            closest_match = surveyed.iloc[0]
-
-            # The closest property may be an EPC C, we we take the package ref from the property that's the nearest
-            # with non-NA package ref
-            if expected_epc in ["C", "B", "A"]:
-                package_ref = None
-            else:
-                package_ref = surveyed["Package Ref"].dropna().values[0]
-
-            final_missed_matches.append(
-                {
-                    "Address ID": a_id,
-                    "Confidence Tier": match_type,
-                    "Current EPC Band": expected_epc,
-                    "Current SAP Rating": expected_sap,
-                    'Survey: Main Wall Type': closest_match["Survey: Main Wall Type"],
-                    "Survey: Main Alternative Wall": closest_match["Survey: Main Alternative Wall"],
-                    "Survey: Main Roof Type": closest_match["Survey: Main Roof Type"],
-                    "Survey: Primary Heating System": closest_match["Survey: Primary Heating System"],
-                    "Survey: Matching Address ID": closest_match["Address ID"],
-                    'Distance to Closest Match (m)': closest_match["distance_meters"],
-                    "Package Ref": package_ref
-                }
-            )
-            continue
-
-        final_missed_matches = pd.DataFrame(final_missed_matches)
-
-        region_assets = region_assets.merge(
-            final_missed_matches,
-            on="Address ID",
-            how="left",
-            suffixes=("", "_method3")
-        )
-
-        region_assets["Confidence Tier"] = region_assets["Confidence Tier"].fillna(
-            region_assets["Confidence Tier_method3"]
-        )
-
-        region_assets = fill_survey_columns(region_assets, suffix="_method3")
-
-        method_3_columns = [c for c in region_assets.columns if c.endswith("_method3")]
-        region_assets = region_assets.drop(columns=method_3_columns)
-
-        if pd.isnull(region_assets["Current EPC Band"]).sum():
-            raise Exception("Something went wrong")
-
-        results.append(region_assets)
-
-    results = pd.concat(results)
-
-    if (pd.isnull(results["Package Ref"]) & (~results["Current EPC Band"].isin(["A", "B", "C"]))).sum():
-        raise ValueError("Missing Package Refs")
-
-    # Check if there are missings in current epc band, current sap rating or any of the survey attributes
-    for c in (
-        [
-            "Current EPC Band", "Current SAP Rating", "Survey: Matching Address ID", 'Distance to Closest Match (m)'] +
-        survey_attribute_columns
-    ):
-        if pd.isnull(results[c]).sum():
-            raise Exception("Something went wrong")
-
-    gain_columns = sorted([x for x in results["Confidence Tier"].unique() if "1 - " in x or "2 - " in x or "3 - " in x])
-    loss_columns = sorted([x for x in results["Confidence Tier"].unique() if "4 - " in x or "5 - " in x])
-
-    def optimise(gain, loss, max_loss=250):
-
-        # Define the coefficients for the objective function (negative because we maximize Gain)
-        c = -gain
-
-        # Define constraints
-        A = [loss]  # Only 1 constraint for now, total Loss
-        b = [max_loss]  # Maximum total Loss allowed
-
-        # Bounds for each variable (select or not select each row, 0 <= x <= 1)
-        bounds = [(0, 1) for _ in gain]
-
-        # Solve the problem using linprog with HiGHS solver
-        result = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')
-        if not result.success:
-            raise Exception("Optimization failed")
-
-        selected_rows = result.x.round().astype(int)  # Rounded to 0 or 1
-        optimal_gain = -result.fun
-
-        return selected_rows, optimal_gain
-
-    street_summary = results.pivot_table(
-        index='Street and Region',
-        columns='Confidence Tier',
-        aggfunc='size',
-        fill_value=0
-    ).reset_index()
-
-    street_summary["Gain"] = street_summary[gain_columns].sum(axis=1)
-    street_summary["Loss"] = street_summary[loss_columns].sum(axis=1)
-
-    selected_rows, _ = optimise(
-        gain=street_summary["Gain"].values,
-        loss=street_summary["Loss"].values,
-        max_loss=250
-    )
-
-    street_summary["Selected"] = selected_rows == 1
-    print(street_summary[street_summary["Selected"]][["Gain", "Loss"]].sum())
-
-    selected_streets = street_summary[
-        street_summary["Selected"]
-    ]
-
-    totals = selected_streets[["Gain", "Loss"]].sum()
-
-    bid_size = totals.sum()
-    print("Bid Size:", bid_size)
-    total_epc_d_or_below = totals["Gain"]
-    print("Total EPC D or below:", total_epc_d_or_below)
-    total_epc_c = totals["Loss"]
-    print("Total EPC C or above:", total_epc_c)
-    # Total needing a survey
-    total_needing_survey = selected_streets[
-        "4 - no similar property, needs survey to confirm"
-    ].sum()
-    print("Total needing survey:", total_needing_survey)
-
-    # Label final outputs
-    # We create a summary of packages by street
-    results["Package Ref"] = results["Package Ref"].fillna("EPC C - No Package")
-    results["Package Ref"] = results["Package Ref"].astype(str)
-    results["Package Ref"] = np.where(
-        results["Package Ref"] == "4.0", "4", results["Package Ref"]
-    )
-    package_summary = results.pivot_table(
-        index='Street and Region',
-        columns='Package Ref',
-        aggfunc='size',
-        fill_value=0
-    ).reset_index()
-
-    assert sum([v for k, v in package_summary.sum().items() if k != "Street and Region"]) == results.shape[0]
-
-    street_bid_structure = street_summary.merge(
-        package_summary, how="left", on="Street and Region"
-    )
-    street_bid_structure = street_bid_structure.sort_values("Gain", ascending=False)
-
-    individual_units_programme = results.copy()
-    individual_units_programme["Unit in Programme"] = individual_units_programme["Street and Region"].isin(
-        street_bid_structure[street_bid_structure["Selected"]]["Street and Region"].values
-    )
-
-    # Merge on Stonewaters ID
-    asset_list_ids = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
-        "- Archetyped V3.1.xlsx",
-        header=4
-    )[["Address ID", "Org. ref."]]
-    # Clean address ids
-    asset_list_ids = asset_list_ids[~pd.isnull(asset_list_ids["Address ID"])]
-    asset_list_ids = asset_list_ids[asset_list_ids["Address ID"] != "Address ID"]
-    asset_list_ids["Address ID"] = asset_list_ids["Address ID"].astype(int)
-
-    individual_units_programme = individual_units_programme.merge(
-        asset_list_ids.rename(
-            columns={"Org. ref.": "Survey: Org. ref.", "Address ID": "Survey: Matching Address ID"}
-        ),
-        how="left",
-        on="Survey: Matching Address ID"
-    )
-
-    individual_units_programme["Survey: Org. ref."] = np.where(
-        (individual_units_programme["Survey: Matching Address ID"] == "Not Surveyed"),
-        "Not Surveyed",
-        individual_units_programme["Survey: Org. ref."]
-    )
-
-    if pd.isnull(individual_units_programme["Survey: Org. ref."]).sum() or pd.isnull(
-        individual_units_programme["Org. ref."]).sum():
-        raise ValueError("something went wrong")
-
-    for col in ["Survey: Main Roof Type", "Survey: Main Wall Type", "Survey: Main Alternative Wall"]:
-        individual_units_programme[col] = (
-            individual_units_programme[col]
-            .str.replace(r': nan(?=$|:)', '', regex=True)  # Remove ': nan' at the end or before another ':'
-            .str.replace(r':\s+:', ': ', regex=True)  # Replace occurrences of ': :' with ': '
-            .str.replace(r'\s+', ' ', regex=True)  # Replace multiple spaces with a single space
-            .str.strip()  # Strip leading/trailing spaces
-        )
-
-    # Any EPC C properties that have been included should be flagged as potential low carbon heating
-    selected_epc_c = individual_units_programme[
-        (individual_units_programme["Current EPC Band"].isin(["C", "B", "A", "Needs Survey"])) &
-        (individual_units_programme["Unit in Programme"])
-        ]
-
-    flat_wall_map = {
-        "CA Cavity: F Filled Cavity": False,
-        "CA Cavity: A As Built": True,
-        "SO Solid Brick: A As Built": True,
-        "Not Surveyed": False
-    }
-
-    heating_map = {
-        "BGW Post 98 Combi condens. with auto ign.": False,
-        "BGB Post 98 Regular condens. with auto ign.": False,
-        "SEK High heat retention storage heaters": False,
-        "SEB Modern slimline storage heaters": True,
-        "Not Surveyed": False
-    }
-
-    infill_data = []
-    for _, epc_c_property in selected_epc_c.iterrows():
-        if epc_c_property["Property Type"].split(":")[0] == "Flat":
-            # Look for a wall insulation measure
-            infill = flat_wall_map[epc_c_property["Survey: Main Wall Type"]]
-            infill_data.append(
-                {
-                    "Address ID": epc_c_property["Address ID"],
-                    "Street and Region": epc_c_property["Street and Region"],
-                    "Possible Flat Infill?": infill
-                }
-            )
-            continue
-
-        infill = heating_map[epc_c_property["Survey: Primary Heating System"]]
-        infill_data.append(
-            {
-                "Address ID": epc_c_property["Address ID"],
-                "Street and Region": epc_c_property["Street and Region"],
-                "Low Carbon Heating Infill?": infill
-            }
-        )
-    infill_data = pd.DataFrame(infill_data)
-
-    individual_units_programme = individual_units_programme.merge(
-        infill_data[["Address ID", 'Possible Flat Infill?', 'Low Carbon Heating Infill?']],
-        how="left", on="Address ID"
-    )
-
-    for c in ['Possible Flat Infill?', 'Low Carbon Heating Infill?']:
-        individual_units_programme[c] = individual_units_programme[c].fillna(False)
-
-    infill_by_street = infill_data.pivot_table(
-        index='Street and Region',
-        values=['Possible Flat Infill?', 'Low Carbon Heating Infill?'],
-        aggfunc='sum',
-        fill_value=0
-    ).reset_index()
-
-    street_bid_structure = street_bid_structure.merge(
-        infill_by_street, how="left", on="Street and Region"
-    )
-
-    for c in ['Low Carbon Heating Infill?', 'Possible Flat Infill?']:
-        street_bid_structure[c] = street_bid_structure[c].fillna(0)
-
-    master_sheet = pd.read_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
-        "master "
-        "sheet.csv",
-        encoding='latin1'
-    )
-    master_sheet = master_sheet[["Address ID", "Main Fuel"]]
-
-    individual_units_programme = individual_units_programme.merge(
-        master_sheet, how="left", on="Address ID"
-    )
-
-    street_bid_structure.to_csv(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure V2.csv"), index=False
-    )
-
-    individual_units_programme.to_csv(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme V2.csv"), index=False
-    )
-
-    survey_results = pd.read_excel(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.19 V2.xlsx"),
-        header=13,
-        sheet_name="Modelled Packages"
-    )
-
-    indivual_units = pd.read_csv(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme V2.csv")
-    )
-
-    u_aids = survey_results["Archetype ID"].astype(str).unique()
-    units_in_bid = indivual_units[indivual_units['Unit in Programme']]["Archetype ID"].astype(str).values
-
-    len({v for v in units_in_bid if str(v) in u_aids})
-    len(list(set(units_in_bid)))
-
-
-def identify_incorrect_packages():
-    """
-    Due to limitations in the data collected during survey, we have some properties that do not have suitable packages
-    assigned. This function will identify those properties, which can be flagged for Stonewater's review
-    """
-
-    units_with_assigned_packages = pd.read_excel(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.20 V2.xlsx"),
-        header=2,
-        sheet_name="Individual Units Programme"
-    )
-
-    # This sheet contains information on the heating systems for properties, so we can flag any units that have
-    # been labelled as being electric but are actually gas
-    heating_survey_data = pd.read_excel(
-        os.path.join(CUSTOMER_FOLDER_PATH, "STOCKBOOK December 2024 data (5).xlsx"),
-        header=0,
-        sheet_name="Export"
-    )
-
-    units_with_assigned_packages = units_with_assigned_packages.merge(
-        heating_survey_data[["Asset Reference", "Heating Type"]], how="left",
-        left_on="Org. ref.", right_on="Asset Reference"
-    )
-
-    # Check the different heating types
-    units_with_assigned_packages["Gas properties: different to Parity"] = (
-        (
-            units_with_assigned_packages["Heating Type"].isin(["Gas", "Communal Gas"])
-        ) & (
-            units_with_assigned_packages["Heating"].isin(
-                [
-                    "Heat Pump: Electric Heat "
-                    "pumps: Air source heat pump "
-                    "with flow temperature <= 35°C",
-                    "Electric Storage Systems: Fan "
-                    "storage heaters",
-                    "Electric (direct acting) room "
-                    "heaters: Panel, convector or "
-                    "radiant heaters"
-                ]
-            )
-        )
-    )
-
-    units_with_assigned_packages["Electric properties: different to Parity"] = (
-        (units_with_assigned_packages["Heating Type"] == "Electric") & (
-        units_with_assigned_packages["Heating"].isin(
-            [
-                "Boiler: A rated Regular Boiler",
-                "Boiler: F rated Combi",
-                "No Heating",
-                "Boiler: A rated CPSU",
-                "Boiler: G rated Regular Boiler"
-            ]
-        )
-    )
-    )
-
-    units_with_assigned_packages["Ground Source properties: different to Parity"] = (
-        (units_with_assigned_packages["Heating Type"] == "Ground Source") & (
-        units_with_assigned_packages["Heating"].isin(
-            [
-                "Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C",
-                "Electric Storage Systems: Fan storage heaters",
-                "Electric Storage Systems: High heat retention storage heaters"
-            ]
-        )
-    )
-    )
-
-    units_with_assigned_packages["LPG properties: different to Parity"] = (
-        (units_with_assigned_packages["Heating Type"] == "Lpg") & (
-        units_with_assigned_packages["Main Fuel"].isin(
-            [
-                "Gas: Mains Gas", "Solid Fuel: Wood Logs, Gas: Mains Gas"
-            ]
-        )
-    )
-    )
-
-    units_with_assigned_packages["Solid Fuel properties: different to Parity"] = (
-        (units_with_assigned_packages["Heating Type"] == "Solid Fuel") & (
-        units_with_assigned_packages["Main Fuel"].isin(
-            [
-                "Gas: Mains Gas"
-            ]
-        )
-    )
-    )
-
-    # The next check is to identify properties with specific features that are not condusive to specific packages. E.g.
-    # Solar PV packages for properties that have another dwelling above
-    # Label properties that have been matched to a package, during coordination, that includes Solar PV and has
-    # a property with a dwelling above
-    units_with_assigned_packages["Invalid Roof Type for Solar - coordination to be reviewed"] = (
-        (units_with_assigned_packages["Package Ref"].isin(["3A", "3B", "4", 4])) & (
-        units_with_assigned_packages["Survey: Main Roof Type"].str.contains("A Another dwelling above")
-    )
-    )
-
-    # Label properties that have a dwelling above in the Parity data, and weren't surveyed, but have been assigned
-    # a package that includes solar PV
-    units_with_assigned_packages["Invalid Roof Type for Solar - coordination to be reviewed"] = (
-        (units_with_assigned_packages["Package Ref"].isin(["3A", "3B", "4", 4])) & (
-        units_with_assigned_packages["Survey: Main Roof Type"].str.contains("A Another dwelling above")
-    )
-    )
-
-    # We now iterate through postcodes and find anomalous properties based on the partiy data and survey data
-    fields_to_check = [
-        'Wall Type Category',
-        # 'Roof Type Category',  - not very interesting
-        'Heating',
-        'Main Fuel',
-        'Survey: Main Wall Type',
-        # 'Survey: Main Roof Type',
-        'Survey: Primary Heating System'
-    ]
-
-    units_with_assigned_packages['Wall Type Category'] = units_with_assigned_packages['Wall Type'].str.replace(
-        r'\s*\(.*?\)', '', regex=True
-    )
-
-    # Create roof type category by splitting in colon and taking the first part
-    units_with_assigned_packages['Roof Type Category'] = units_with_assigned_packages['Roof Type'].str.split(':').str[0]
-
-    units_with_assigned_packages["Street, Region and Postcode"] = (
-        units_with_assigned_packages["Street and Region"] + ", " + units_with_assigned_packages["Postcode"]
-    )
-
-    def check_mixed_types(row):
-        # Count distinct primary types with non-zero values
-        primary_types_present = set()
-        for col in field_counts.columns:
-            if ':' in col:
-                primary_type = col.split(':')[0]
-                if row[col] > 0:  # Non-zero count means this type is present
-                    primary_types_present.add(primary_type)
-        return len(primary_types_present) > 1  # True if more than one primary type
-
-    aggregated_results = {}
-    for field in fields_to_check:
-        # Group by postcode and count occurrences of each unique value
-        field_counts = (
-            units_with_assigned_packages.groupby(['Street, Region and Postcode', field])
-            .size()
-            .unstack(fill_value=0)
-            .reset_index()
-        )
-
-        # Calculate dominant value and percentage before modifying the DataFrame
-        dominant_value = field_counts.iloc[:, 1:].idxmax(axis=1)
-        dominant_percentage = (
-            (field_counts.iloc[:, 1:].max(axis=1) / field_counts.iloc[:, 1:].sum(axis=1)) * 100
-        )
-        number_of_properties = field_counts.iloc[:, 1:].sum(axis=1)
-
-        # Add these as new columns after computation
-        field_counts['Dominant Value'] = dominant_value
-        field_counts['% Dominant'] = dominant_percentage
-        field_counts['Number of Properties'] = number_of_properties
-        field_counts['Mixed Type'] = field_counts.apply(check_mixed_types, axis=1)
-
-        # Store the result in the dictionary
-        aggregated_results[field] = field_counts
-
-    # Let's fetch the EPC data
-    # Read in the existing EPC data we stored
-    import json
-    from utils.s3 import read_from_s3, read_pickle_from_s3
-    def read_epc_data():
-        epc_data = json.loads(
-            read_from_s3(
-                bucket_name="retrofit-data-dev",
-                s3_file_name="customers/Stonewater/clustering/epc_data.json"
-            )
-        )
-        epc_data = pd.DataFrame(epc_data)
-
-        epc_data["uprn"] = np.where(
-            epc_data["internal_id"] == 1091,
-            83143766,
-            epc_data["uprn"]
-        )
-        epc_data_batch_2 = read_pickle_from_s3(
-            s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
-            bucket_name="retrofit-data-dev"
-        )
-        epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
-
-        complete_epcs = pd.concat([epc_data, epc_data_batch_2])
-
-        return complete_epcs
-
-    epc_data = read_epc_data()
-    # Get just the fields we want from the EPC: Uprn, Wall, Roof, Heating, Fuel, SAP Score, EPC Band, Date of EPC
-    epc_data_to_append = epc_data[
-        [
-            "uprn", "walls-description", "roof-description", "mainheat-description", "main-fuel",
-            "current-energy-efficiency", "current-energy-rating", "lodgement-date",
-            "estimated"
-        ]
-    ].rename(
-        columns={
-            "uprn": "UPRN",
-            "walls-description": "EPC: Wall Type",
-            "roof-description": "EPC: Roof Type",
-            "mainheat-description": "EPC: Heating",
-            "mainfuel": "EPC: Main Fuel",
-            "current-energy-efficiency": "EPC: SAP Score",
-            "current-energy-rating": "EPC: EPC Band",
-            "lodgement-date": "EPC: Date of EPC",
-            "estimated": "EPC Estimated based on Nearby Properties"
-        }
-    )
-    # Take non-estimated EPCs?
-    # epc_data_to_append = epc_data_to_append[epc_data_to_append["EPC Estimated based on Nearby Properties"] != True]
-    # Take the newest EPC per UPRN, based on lodgement date
-    epc_data_to_append = epc_data_to_append.sort_values("EPC: Date of EPC", ascending=False).drop_duplicates("UPRN")
-
-    epc_data_to_append["EPC: Date of EPC"] = pd.to_datetime(epc_data_to_append["EPC: Date of EPC"])
-    # Years since the EPC was lodged
-    epc_data_to_append["Years since EPC"] = (pd.Timestamp.now() - epc_data_to_append["EPC: Date of EPC"]).dt.days / 365
-    epc_data_to_append = epc_data_to_append[epc_data_to_append["UPRN"] != ""]
-    epc_data_to_append["UPRN"] = epc_data_to_append["UPRN"].astype(int)
-
-    units_with_assigned_packages = units_with_assigned_packages.merge(
-        epc_data_to_append, how="left", on="UPRN",
-    )
-
-    # Read in the wave 2.1 data
-    wave_2_data = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH, "Stonewater 2.1 SAP Pre & Post.xlsx"
-        ),
-        header=3
-    )
-    # Remove any where the work is outstanding
-    wave_2_data = wave_2_data[wave_2_data["Retrofit Assessment"] == "Completed"]
-    wave_2_data = wave_2_data[~pd.isnull(wave_2_data["Package Approved (Client)"])]
-    wave_2_data["house_number"] = wave_2_data["Name"].apply(lambda x: SearchEpc.get_house_number(x, ""))
-
-    # Filter postcodes in the units_with_assigned_packages, to find overlapping postcodes
-    related_to_wave_2 = units_with_assigned_packages[
-        units_with_assigned_packages["Postcode"].isin(
-            wave_2_data["Post Code"].values
-        ) & (
-            ~units_with_assigned_packages["Confidence Tier"].isin(
-                [
-                    "1 - same archetype, same postal region", "1 - property was surveyed"
-                ]
-            )
-        )
-        ]
-
-    wave2_matches = []
-    for _, home in related_to_wave_2.iterrows():
-        # Get the related homes
-        assigned_wave_2_packages = wave_2_data[
-            wave_2_data["Post Code"] == home["Postcode"]
-            ]
-
-        if assigned_wave_2_packages.shape[0] != 1:
-            # In this case, we get the closest match based on door number
-            hn = SearchEpc.get_house_number(home["Name"], home["Postcode"])
-
-            assigned_wave_2_packages = assigned_wave_2_packages[
-                abs(assigned_wave_2_packages["house_number"].astype(int) - int(hn)) == min(
-                    abs(assigned_wave_2_packages["house_number"].astype(int) - int(hn)))
-                ]
-
-        wave2_matches.append(
-            {
-                "UPRN": home["UPRN"],
-                "2.1 matched address": assigned_wave_2_packages["Name"].values[0],
-                "2.1 matched address: Package Ref": assigned_wave_2_packages["Package Approved (Client)"].values[0],
-                "2.1 matched address: Wall Insulation": assigned_wave_2_packages["Wall Insulation"].values[0],
-                "2.1 matched address: Loft Insulation": assigned_wave_2_packages["Loft Insulation"].values[0],
-                "2.1 matched address: Ventilation": assigned_wave_2_packages["Ventilation"].values[0],
-                "2.1 matched address: Windows": assigned_wave_2_packages["Windwos Upgrade"].values[0]
-            }
-        )
-
-    # Store each results to CSV
-    for field, df in aggregated_results.items():
-        df.to_csv(
-            os.path.join(CUSTOMER_FOLDER_PATH, f"{field} - aggregated results.csv"), index=False
-        )
-
-    # Store units_with_assigned_packages
-    units_with_assigned_packages.to_csv(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Units with assigned packages - with flags.csv"), index=False
-    )
-
-
-def extract_sharepoint_url(x):
-    if pd.isnull(x):
-        return ""
-    return "/".join(parse.urlparse(
-        x.split(" - http")[1]
-    ).path.replace("%20", " ").split("/")[-2:])
-
-
-def revised_model():
-    """
-    This function implements the revised model for Stonewater, where we are looking at new priority postcodes
-    This work was undertaken in January 2021.
-    """
-
-    # 1) Create the new list of properties
-    new_priority_postcodes = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
-        "priority list.xlsx"
-    )
-
-    original_archetypes = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
-        "- Archetyped V3.1.xlsx",
-        header=4
-    )
-    original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
-    original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
-    original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
-    original_archetypes["UPRN"] = original_archetypes["UPRN"].astype("Int64").astype(str)
-
-    wave_21_folder_name = "Wave 2.1 Surveys - 2"
-
-    # Check if we have all of the addresses
-    missed = original_archetypes[
-        ~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
-    ]["Archetype ID"].unique()
-
-    assert set(missed) == {'NOT PRIORITY POSTCODE', 'IN WAVE 2.1', 'EPC C OR ABOVE'}
-
-    original_archetypes = original_archetypes[
-        ["Address ID", "Archetype ID", "Archetype Group Rank", "UPRN"]
-    ]
-
-    # Merge these archetypes on to the new priority postcodes
-    new_priority_postcodes = new_priority_postcodes.merge(
-        original_archetypes, how="left", on="Address ID"
-    )
-
-    # Basic check, should have no rows with missing Archetype ID, where
-    assert float(new_priority_postcodes[pd.isnull(new_priority_postcodes["Archetype ID"])]["Address ID"].isin(
-        original_archetypes["Address ID"]
-    ).sum()) == 0
-
-    # We pull together the survey data sheet
-    survey_folders = []
-
-    # Loop over each survey folder and list its contents
-    for i in range(1, NUM_FOLDERS + 1):
-        folder_path = os.path.join(CUSTOMER_FOLDER_PATH, f"StonewaterSurveys_{i}")
-        if os.path.isdir(folder_path):  # Check if folder exists
-            folder_contents = [os.path.join(f"StonewaterSurveys_{i}", file) for file in os.listdir(folder_path)]
-            survey_folders.extend(folder_contents)  # Append contents to the master list
-
-    wave_21_folders = [
-        "1. Herefordshire",
-        "2. Bedfordshire",
-        "3. Wiltshire",
-        "4. Bournemouth",
-        "5. Coventry",
-        "6. West Sussex",
-        "7. Dorset",
-        "8. Cambridgeshire",
-        "9. Guildford",
-        "10. Little Island",
-        "11. CCS Dorset"
-    ]
-
-    for wave_2_1_folder in wave_21_folders:
-        folder_path = os.path.join(CUSTOMER_FOLDER_PATH, wave_21_folder_name, wave_2_1_folder)
-        if os.path.isdir(folder_path):  # Check if folder exists
-            folder_contents = [os.path.join(wave_21_folder_name, wave_2_1_folder, file) for file in
-                               os.listdir(folder_path)]
-            survey_folders.extend(folder_contents)  # Append contents to the master list
-
-    # We now do a large pull of all of the data
-    extracted_data = []
-    mtp_extracted_data = []  # Additional data to extract from the medium term plans
-    for survey_folder in tqdm(survey_folders):
-        survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
-
-        # Check that the survey folder is actually a folder
-        if not os.path.isdir(survey_folder_path):
-            continue
-
-        # List the folders inside of the survey folder
-        survey_subfolders = [
-            name for name in os.listdir(survey_folder_path)
-            if os.path.isdir(os.path.join(survey_folder_path, name))
-        ]
-
-        # Check if there's a "retrofit assessment" folder
-        retrofit_folder = next((name for name in survey_subfolders if "retrofit assessment" in name.lower()), None)
-
-        ra_folder = next(
-            (name for name in survey_subfolders if "ra coordinator info" in name.lower() or "ra info" in name.lower()),
-            None
-        )
-
-        mtp_folder = next(
-            (name for name in survey_subfolders if "mid-term" in name.lower() or "mtp" in name.lower()),
-            None
-        )
-        if mtp_folder:
-            # We have a mid term plan:
-            mtp_folder_path = os.path.join(survey_folder_path, mtp_folder)
-            # Get the contents - files and not folder
-            mtp_contents = [
-                os.path.join(mtp_folder, file) for file in os.listdir(mtp_folder_path)
-                if ".DS_Store" not in file and not os.path.isdir(os.path.join(mtp_folder_path, mtp_folder, file))
-            ]
-
-            has_v1 = [
-                f for f in mtp_contents if "v1" in f.lower() or "/ss" in f.lower()
-            ]
-
-            if has_v1:
-                # Then we go one level deeper
-                mtp_contents = [
-                    os.path.join(has_v1[0], f) for f in
-                    os.listdir(os.path.join(survey_folder_path, has_v1[0]))
-                ]
-
-            # We check the the IMA
-            for file_name in mtp_contents:
-
-                filepath = os.path.join(survey_folder_path, file_name)
-                # We expect a pdf so try and parse it
-                try:
-                    with open(filepath, "rb") as file:
-                        reader = PyPDF2.PdfReader(file)
-                        # Just the first page
-                        text = reader.pages[0].extract_text()
-
-                except Exception as e:
-                    continue
-
-                # We check if this is an IMA
-                ima_heading_search = re.search(
-                    r"Improvement measure\s+Capital Cost\s+Lifetime of\s*\n\s*measureFuel saving\s*Lifetime fuel", text
-                )
-
-                is_ima = bool(ima_heading_search)
-                if not is_ima:
-                    continue
-
-                # Otherwise, extract: RIR, PV
-                pv_search = re.search(r"PV \(\d+Kwp\)", text)
-                has_pv = bool(pv_search)
-                pv_system = pv_search.group(0) if has_pv else None
-
-                # We perform a second search for PV:
-                if pv_search is None:
-                    pv_search = re.search("solar pv", text.lower())
-                    has_pv = bool(pv_search)
-                    pv_system = "Solar PV" if has_pv else None
-
-                rir_search = re.search(r"RIR \(\d+(\.\d+)?\)", text)
-                has_rir = bool(rir_search)
-                rir_spec = rir_search.group(0) if has_rir else None
-
-                mtp_extracted_data.append({
-                    "survey_folder": survey_folder,
-                    "has_pv": has_pv,
-                    "PV System": pv_system,
-                    "RIR Specification": rir_spec,
-                    "has_rir": has_rir
-                })
-                continue
-
-        # If retrofit assessment folder exists, check if it has content
-        if retrofit_folder or ra_folder:
-            if retrofit_folder:
-                retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
-            else:
-                retrofit_folder_path = os.path.join(survey_folder_path, ra_folder)
-
-            # Check if everything inside is a sub-folder and the number of folders is 2
-            items = [item for item in os.listdir(retrofit_folder_path) if item != '.DS_Store']
-            all_folders = [os.path.isdir(os.path.join(retrofit_folder_path, item)) for item in items]
-            if all(all_folders) and len(all_folders) == 2 and "Property Pics" in items:
-                # Get the folder that isn't Property Pics
-                retrofit_folder_path = os.path.join(
-                    retrofit_folder_path, [item for item in items if item != "Property Pics"][0]
-                )
-
-            if os.listdir(retrofit_folder_path):  # If not empty
-                summary_data = extract_retrofit_pdfs(data_folder_path=retrofit_folder_path)
-                if summary_data:
-                    summary_data = {
-                        "survey_folder": survey_folder,
-                        **summary_data,
-                    }
-                    extracted_data.append(summary_data)
-                    continue
-            else:
-                # Then we have an empty Retrofit Assessment folder
-                continue
-
-        # If no retrofit folder or it was empty, check files in survey_folder
-        summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
-        if not summary_data:
-            if len(survey_subfolders) == 1:
-                survey_folder_path = os.path.join(survey_folder_path, survey_subfolders[0])
-                summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
-
-        if summary_data:
-            summary_data = {
-                "survey_folder": survey_folder,
-                **summary_data,
-            }
-            extracted_data.append(summary_data)
-
-    retrofit_assessment_data = pd.DataFrame(extracted_data)
-    mtp_df = pd.DataFrame(mtp_extracted_data)
-
-    # Save
-    # retrofit_assessment_data.to_csv(
-    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 5.csv"), index=False
-    # )
-    # mtp_df.to_csv(
-    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 5.csv"), index=False
-    # )
-    retrofit_assessment_data = pd.read_csv(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 5.csv"),
-    )
-    mtp_df = pd.read_csv(
-        os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 5.csv"),
-    )
-
-    # There are a few duplicates we just manually drop
-    mtp_df = mtp_df.drop_duplicates()
-    mtp_df = mtp_df[
-        ~((
-              mtp_df["survey_folder"] == "Wave 2.1 Surveys - 2/1. Herefordshire/(043) Manor Fields 27"
-          ) & (~mtp_df["has_pv"]))
-    ]
-
-    mtp_df = mtp_df[
-        ~((
-              mtp_df["survey_folder"] == "Wave 2.1 Surveys - 2/2. Bedfordshire/(147) Gilpin Close 5"
-          ) & (~mtp_df["has_pv"]))
-    ]
-
-    # Remove some definite duplicates
-    dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].duplicated()]["Address"]
-    dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].isin(dupes)]
-    dupes = dupes.sort_values("Address")
-    # Get all of the folders that end with ROSS
-    to_drop = dupes[dupes["survey_folder"].str.endswith("ROSS")]["survey_folder"].unique().tolist()
-
-    # Replace \n with ""
-    retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")
-
-    retrofit_assessment_data = retrofit_assessment_data[
-        ~retrofit_assessment_data["survey_folder"].isin(
-            [
-                "StonewaterSurveys_10/4 Beech Road, LUTON, LU1 1DP ROSS",
-                "StonewaterSurveys_2/135 Runley Road, LUTON, LU1 1TX ROSS",
-                "StonewaterSurveys_13/7 Saxon Road, LUTON, LU3 1JR ROSS"
-            ] + to_drop
-        )
-    ]
-
-    retrofit_assessments_data_columns = [
-        'Current SAP Rating', 'Current EPC Band', 'Primary Energy Use (kWh/yr)',
-        'Primary Energy Use Intensity (kWh/m2/yr)', 'Number of Storeys',
-        'Fuel Bill', 'Window Age Description',
-        'Window Age Description Proportion (%)',
-        'Secondary Window Age Description',
-        'Secondary Window Age Description Proportion (%)', 'Number of Windows',
-        'Total Number of Doors', 'Number of Insulated Doors',
-        'Existing Primary Heating System',
-        'Existing Primary Heating PCDF Reference',
-        'Existing Primary Heating Controls',
-        'Existing Primary Heating % of Heat',
-        'Existing Secondary Heating System',
-        'Existing Secondary Heating PCDF Reference',
-        'Existing Secondary Heating Controls',
-        'Existing Secondary Heating % of Heat', 'Secondary Heating Code',
-        'Water Heating Code', 'Total Floor Area (m2)',
-        'Total Ground Floor Area (m2)', 'RIR Floor Area',
-        'Main Building Wall Area (m2)', 'First Extension Wall Area (m2)',
-        'Number of Light Fittings', 'Number of LEL Fittings',
-        'Number of fittings needing LEL', 'Main Roof Type',
-        'Main Roof Insulation', 'Main Roof Insulation Thickness',
-        'Main Wall Type', 'Main Wall Insulation', 'Main Wall Dry-lining',
-        'Main Wall Thickness', 'Main Building Alternative Wall Type',
-        'Main Building Alternative Wall Insulation',
-        'Main Building Alternative Wall Dry-lining',
-        'Main Building Alternative Wall Thickness',
-        'Main Fuel',
-        'Main Building Age Band',
-    ]
-    # For the columns in retrofit_assessments_data_columns, prefix all of them with Survey:
-    retrofit_assessments_data_columns_prefixed = ["Survey: " + x for x in retrofit_assessments_data_columns]
-    rename_dict = dict(zip(retrofit_assessments_data_columns, retrofit_assessments_data_columns_prefixed))
-    retrofit_assessment_data = retrofit_assessment_data.rename(columns=rename_dict)
-    retrofit_assessment_data["Survey: Current EPC Band"] = (
-        retrofit_assessment_data["Survey: Current SAP Rating"].apply(lambda x: sap_to_epc(x))
-    )
-
-    # We can read in the data as needed
-
-    # Next Step: Read in the coordinated measures and match to the extracted data
-    ############################################################
-    # CCS
-    #############################################################
-    ccs_coordination_sheet = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH,
-            "Jan 2025 Project",
-            "CCS_Installation_Compliance_-_Stonewater_SHDF_2_1_1738228227.xlsx"
-        ),
-        header=4
-    )
-    ccs_postcodes = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"
-        ),
-        header=4
-    )
-    ccs_coordination_sheet = ccs_postcodes[['Name', 'Post Code', 'Asset ID', 'Asset ID.1']].merge(
-        ccs_coordination_sheet, how="left", on="Name"
-    )
-    ccs_coordination_sheet = ccs_coordination_sheet[~pd.isnull(ccs_coordination_sheet["Name"])]
-    ccs_coordination_sheet["contractor"] = "CCS"
-    # We split ccs into two sections - the first being
-    ccs_coordination_removed_from_programme = ccs_coordination_sheet.tail(21)
-    ccs_coordination_sheet = ccs_coordination_sheet.head(87)
-    ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet])
-
-    ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x))
-
-    ############################################################
-    # WATES
-    #############################################################
-    wates_coordination_sheet = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_1738229226.xlsx"
-        ),
-        header=4
-    )
-    wates_postcodes = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_Vinci-Wates.xlsx"
-        ),
-        header=4
-    )
-    wates_postcodes = wates_postcodes[~pd.isnull(wates_postcodes["Post Code"])]
-    wates_coordination_sheet = wates_coordination_sheet.merge(
-        wates_postcodes[['Name', 'Post Code', 'Asset ID']].drop_duplicates(), how="left", on="Name"
-    )
-
-    wates_coordination_sheet["contractor"] = "Wates"
-    # Break into the different sites:
-    # Wiltshire
-    wates_coordination_sheet_wiltshere = wates_coordination_sheet.head(267)
-    wates_coordination_sheet_herefordshire = wates_coordination_sheet.iloc[271:332, :]
-    wates_coordination_sheet_coventry = wates_coordination_sheet.iloc[336:409, :]
-    wates_coordination_sheet_bedfordshire = wates_coordination_sheet.iloc[413:520, :]
-    wates_coordination_sheet_bournemouth = wates_coordination_sheet.iloc[524:567, :]
-    wates_coordination_sheet_cambridgeshire = wates_coordination_sheet.iloc[571:581, :]
-    wates_coordination_sheet_removed_from_programme = wates_coordination_sheet.iloc[586:926, :]
-    wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[930:972, :]
-
-    wates_coordination = pd.concat(
-        [
-            wates_coordination_sheet_wiltshere,
-            wates_coordination_sheet_herefordshire,
-            wates_coordination_sheet_coventry,
-            wates_coordination_sheet_bedfordshire,
-            wates_coordination_sheet_bournemouth,
-            wates_coordination_sheet_cambridgeshire,
-            wates_coordination_sheet_removed_from_programme,
-            wates_coordination_sheet_abeyance
-        ]
-    )
-    # We correct the Asset ID for 34 Kempster Close
-    wates_coordination["Asset ID"] = np.where(
-        wates_coordination["Name"] == "34 Kempster Close",
-        "12005",
-        wates_coordination["Asset ID"]
-    )
-
-    # We fill the missing ids
-    missing_lookup = {
-        "4  Sydnall Fields": 31231,
-        "12  Sydnall Fields": 31239,
-        "12  Athena Gardens": 28061,
-        "49  Banner Lane": 41189,
-        "4  Jonathan Road": 41232,
-        "8  Jonathan Road": 41236,
-        "1  Jonathan Road": 41229,
-        "96  Taunton Way": 31417,
-        "94  Taunton Way": 31418,
-        "1  Lady Lane": 29430,
-        "10  Jonathan Road": 41283,
-        "21  Jonathan Road": 41246,
-        "12  Ashcroft Close": 26399
-    }
-    for name, asset_id in missing_lookup.items():
-        wates_coordination["Asset ID"] = np.where(
-            wates_coordination["Name"] == name,
-            asset_id,
-            wates_coordination["Asset ID"]
-        )
-
-    wates_coordination = wates_coordination[~pd.isnull(wates_coordination["Asset ID"])]
-
-    wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply(
-        lambda x: extract_sharepoint_url(x)
-    )
-
-    ############################################################
-    # NEW 450 COORDINATED RETROFIT ASSESSMENTS
-    #############################################################
-    features = pd.read_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
-        "master sheet.csv",
-        encoding='latin1'
-    )
-    features["Address ID"] = features["Address ID"].astype(str).astype(int)
-    features_to_merge = features[["Address ID", "Organisation Reference"]]
-
-    retrofit_packages_board = pd.read_excel(
-        os.path.join(
-            CUSTOMER_FOLDER_PATH,
-            "Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1732034933 Final 19.11.24.xlsx"
-        ),
-        header=4
-    )
-    retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
-    # Take just the rows that have been surveyed
-    retrofit_packages_board = retrofit_packages_board[
-        retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
-    ]
-
-    retrofit_packages_board = retrofit_packages_board.merge(
-        features_to_merge, how="left", on="Address ID"
-    )
-
-    manual_filters = {
-        "Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
-        "6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
-        "2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ",
-        'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT",
-        '14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT',
-        '19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY',
-        'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN',
-        'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB',
-        '16 The Crescent, Kington': 'StonewaterSurveys_9/360-3-16 The Crescent-HR5 3AS',
-        '2 School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
-        '14 South Road': 'StonewaterSurveys_2/14 The Oaks, South Road, SMETHWICK, B67 7BY',
-        '1 Groves Street': 'StonewaterSurveys_4/19-5-1 Groves Street-SN2 2BW',
-        '2 Calshot Walk': 'StonewaterSurveys_3/156-3-2 Calshot Walk-MK41 8QS',
-        '21 Constitution Hill': 'StonewaterSurveys_1/112-11-21 Constitution Hill-BH14 0PX',
-        '22 Constitution Hill': 'StonewaterSurveys_4/185-8-22 Constitution Hill-BH14 0PX',
-        '2 Marches Cottages, School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
-        '26, Copthorn House, Brighton Road': 'StonewaterSurveys_15/133-1-26 Brighton Road-KT20 6BQ',
-        '4, Old St Marys, Ripley Lane': "StonewaterSurveys_15/433-3-4 Ripley Lane-KT24 6JG",
-        '1 Nelson House, Short Street': 'StonewaterSurveys_15/89-2-1 Short Street-GU11 1HX',
-        "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
-        '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
-        '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
-        '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX',
-        '7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA'
-    }
-
-    # We now match this retrofit packages board to the extracted data
-    matching_lookup = []
-    for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
-
-        # Handle the case that has the wrong postcode in the asset data
-        if home["Name"] in manual_filters:
-            filtered = retrofit_assessment_data[
-                retrofit_assessment_data["survey_folder"] == manual_filters[home["Name"]]
-                ].copy()
-        else:
-            filtered = retrofit_assessment_data[
-                retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
-                ].copy()
-
-            # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
-            to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
-                home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
-            )
-            if to_filter.sum() == 0:
-                to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.replace(",", "").str.replace(".",
-                                                                                                                   "").str.contains(
-                    home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
-                )
-            filtered = filtered[to_filter]
-
-        if filtered.empty:
-            continue
-
-        if filtered.shape[0] == 1:
-            matching_lookup.append(
-                {
-                    "survey_folder": filtered["survey_folder"].values[0],
-                    "Address ID": home["Address ID"],
-                    "Name": home["Name"]
-                }
-            )
-            continue
-
-        # home["Name"] should be contained in the survey_folder
-        filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
-        # We have an edge case wher some properties have two outputs in Sharepoint
-        if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
-            raise Exception("Fix me1")
-            # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
-
-        if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
-            raise Exception("Fix me2")
-            # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
-
-        if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ':
-            filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"]
-
-        if filtered.empty:
-            continue
-        if filtered.shape[0] != 1:
-            raise Exception("something went wrong")
-
-        matching_lookup.append(
-            {
-                "survey_folder": filtered["survey_folder"].values[0],
-                "Address ID": home["Address ID"],
-                "Name": home["Name"]
-            }
-        )
-    matching_lookup = pd.DataFrame(matching_lookup)
-
-    ccs_coordination = ccs_coordination.rename(
-        columns={"Post Code": "Postcode"}
-    )
-    ccs_coordination = ccs_coordination[~pd.isnull(ccs_coordination["Postcode"])]
-    ccs_coordination = ccs_coordination[ccs_coordination["Retrofit Assessment"] != "Outstanding"]
-
-    ccs_manual_filters = {
-        "35 Kittiwake Close": f"{wave_21_folder_name}/11. CCS Dorset/Kittiwake Close 35"
-    }
-    ccs_matching_lookup = []
-    for _, home in tqdm(ccs_coordination.iterrows(), total=len(ccs_coordination)):
-
-        # Handle the case that has the wrong postcode in the asset data
-        if home["Name"] in ccs_manual_filters:
-            filtered = retrofit_assessment_data[
-                retrofit_assessment_data["survey_folder"] == ccs_manual_filters[home["Name"]]
-                ].copy()
-        else:
-            filtered = retrofit_assessment_data[
-                retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
-                ].copy()
-
-            # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
-            to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
-                home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
-            )
-            if to_filter.sum() == 0:
-                to_filter = (
-                    filtered["survey_folder"].
-                    str.replace(r"[^\w\s]", "").
-                    str.replace(",", "").
-                    str.replace(".", "").
-                    str.contains(
-                        home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
-                    )
-                )
-            if to_filter.sum() == 0:
-                to_filter = (
-                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").str.lower() ==
-                    home["Name"].lower()
-                )
-            if to_filter.sum() == 0:
-                to_filter = (
-                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:1].str.join("").str.lower() ==
-                    home["Name"].lower()
-                )
-            if to_filter.sum() == 0:
-                # Do a fuzzy match on the name
-                # Find the best filter
-                to_filter = filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").apply(
-                    lambda x: fuzz.partial_ratio(home["Name"], x) > 93
-                )
-            if to_filter.sum() == 0:
-                # We also some cases where the name of the survey folder is like "Colville Road 7" and the
-                # property name is actually 7 Colville Road, so we try taking the final part of the address,
-                # splitting on space, and adding it to the front
-                def reformat_survey_folder(x):
-                    filename = x.split("/")[-1]
-                    parts = filename.split(" ")
-                    return " ".join(parts[-1:] + parts[:-1])
-
-                to_filter = (
-                    filtered["survey_folder"].apply(lambda x: reformat_survey_folder(x)).str.lower() ==
-                    home["Name"].lower()
-                )
-
-            if to_filter.sum() == 0:
-                raise Exception("Error")
-            filtered = filtered[to_filter]
-
-        if filtered.empty:
-            continue
-
-        if filtered.shape[0] == 1:
-            ccs_matching_lookup.append(
-                {
-                    "survey_folder": filtered["survey_folder"].values[0],
-                    "Asset ID.1": home["Asset ID.1"],
-                    "Name": home["Name"]
-                }
-            )
-            continue
-
-        raise Exception("No match")
-
-    ccs_matching_lookup = pd.DataFrame(ccs_matching_lookup)
-    # We get a match for all records
-    assert ccs_matching_lookup.shape[0] == ccs_coordination.shape[0]
-    assert not pd.isnull(ccs_matching_lookup["Asset ID.1"]).sum()
-    assert not ccs_matching_lookup["Asset ID.1"].duplicated().sum()
-
-    # We do the same for Wates
-    wates_coordination = wates_coordination.rename(
-        columns={"Post Code": "Postcode"}
-    )
-    wates_coordination = wates_coordination[
-        wates_coordination["Retrofit Assessment"].isin(["Completed"])
-    ]
-    wates_coordination = wates_coordination[
-        ~pd.isnull(wates_coordination["Postcode"])
-    ]
-
-    wates_manual_filters = {
-        "24 Rabley Wood View": f"{wave_21_folder_name}/3. Wiltshire/24-25 Rabley Wood View",
-        "14 Edencroft": f"{wave_21_folder_name}/3. Wiltshire/14 Edencroft",
-        "Flat 31 Rabley Wood View": f"{wave_21_folder_name}/3. Wiltshire/Flat 31  Rabley Wood View",
-        'Flat 13, Manor Fields': f'{wave_21_folder_name}/1. Herefordshire/(038) Manor Fields Flat 13',
-        "4 Kittys Lane": f"{wave_21_folder_name}/1. Herefordshire/(005) Kittys Lane 4",
-        '1 Jephson Court': f'{wave_21_folder_name}/5. Coventry/Jesphson Court 1',
-        '2 Jephson Court': f'{wave_21_folder_name}/5. Coventry/Jesphson Court 2',
-    }
-    wates_matching_lookup = []
-    # Examples to skip when we cannot get the data
-    wates_to_skip = [
-        "66 Abbatt Close",  # File type is unusual, couldn't extract the data
-        "Flat 69 Goddard Road",  # Doesn't exist
-        "19 Garth House",  # # File type is unusual, couldn't extract the data
-        '5 Gilpin Close',  # No properly formatted EPR
-        '49 The Hide, Netherfield',  # TODO: TEMP HERE
-        '19 Chanders Rd',
-        '5 Chanders Rd',
-        '23 Chanders Rd',
-        '3 Chanders Rd',
-        '1 Orchard Close',
-    ]
-    wates_coordination = wates_coordination[~wates_coordination["Name"].isin(wates_to_skip)]
-
-    for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)):
-
-        # Search the folder
-        filtered = retrofit_assessment_data[
-            retrofit_assessment_data["survey_folder"].str.contains(home["folder_path"], regex=False)
-        ]
-        if len(filtered) == 1:
-            wates_matching_lookup.append(
-                {
-                    "survey_folder": filtered["survey_folder"].values[0],
-                    "Asset ID": home["Asset ID"],
-                    "Name": home["Name"]
-                }
-            )
-            continue
-
-        if home["Name"] in wates_to_skip:
-            continue
-
-        # Handle the case that has the wrong postcode in the asset data
-        if home["Name"] in wates_manual_filters:
-            filtered = retrofit_assessment_data[
-                retrofit_assessment_data["survey_folder"] == wates_manual_filters[home["Name"]]
-                ].copy()
-        else:
-            filtered = retrofit_assessment_data[
-                retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
-                ].copy()
-
-            # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
-            to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
-                home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
-            )
-
-            if to_filter.sum() > 1:
-                to_filter = (
-                    filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.split("/").str[-1].str.lower() ==
-                    home["Name"].replace(r"[^\w\s]", "").lstrip().lower()
-                )
-
-            if to_filter.sum() == 0:
-                to_filter = (
-                    filtered["survey_folder"].
-                    str.replace(r"[^\w\s]", "").
-                    str.replace(",", "").
-                    str.replace(".", "").
-                    str.contains(
-                        home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
-                    )
-                )
-            if to_filter.sum() == 0:
-                to_filter = (
-                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").str.lower() ==
-                    home["Name"].lower()
-                )
-            if to_filter.sum() == 0:
-                to_filter = (
-                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:1].str.join("").str.lower() ==
-                    home["Name"].lower()
-                )
-            if to_filter.sum() == 0:
-                # Do a fuzzy match on the name
-                # Find the best filter
-                to_filter = filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").apply(
-                    lambda x: fuzz.partial_ratio(home["Name"], x) > 93
-                )
-            if to_filter.sum() == 0:
-                # We also some cases where the name of the survey folder is like "Colville Road 7" and the
-                # property name is actually 7 Colville Road, so we try taking the final part of the address,
-                # splitting on space, and adding it to the front
-                def reformat_survey_folder(x):
-                    filename = x.split("/")[-1]
-                    parts = filename.split(" ")
-                    return " ".join(parts[-1:] + parts[:-1])
-
-                to_filter = (
-                    filtered["survey_folder"].apply(lambda x: reformat_survey_folder(x)).str.lower() ==
-                    home["Name"].lower()
-                )
-
-            if to_filter.sum() == 0:
-                raise Exception("Error")
-            filtered = filtered[to_filter]
-
-        if filtered.empty:
-            continue
-
-        if filtered.shape[0] == 1:
-            wates_matching_lookup.append(
-                {
-                    "survey_folder": filtered["survey_folder"].values[0],
-                    "Asset ID": home["Asset ID"],
-                    "Name": home["Name"]
-                }
-            )
-            continue
-
-        raise Exception("No match")
-    wates_matching_lookup = pd.DataFrame(wates_matching_lookup)
-
-    # We get a match for all records
-    assert wates_matching_lookup.shape[0] == wates_coordination.shape[0]
-    assert not pd.isnull(wates_matching_lookup["Asset ID"]).sum()
-    assert not wates_matching_lookup["Asset ID"].duplicated().sum()
-
-    # Merge lookup tables onto the coordination sheets
-    wates_coordination = wates_coordination.merge(
-        wates_matching_lookup, how="left", on="Name"
-    )
-    missed_asset_id = wates_coordination[pd.isnull(wates_coordination["Asset ID_x"])]
-    if not missed_asset_id.empty:
-        raise Exception("Missing Asset ID")
-
-    if wates_coordination["Asset ID_x"].duplicated().sum():
-        raise Exception("Duplicated IDs in wates")
-
-    # We merge the mpt data on to the wates coordination
-    wates_coordination = wates_coordination.merge(
-        mtp_df, how="left", on="survey_folder"
-    )
-
-    ccs_coordination = ccs_coordination.merge(
-        ccs_matching_lookup, how="left", on="Name"
-    )
-    ccs_coordination = ccs_coordination.merge(
-        mtp_df, how="left", on="survey_folder"
-    )
-
-    retrofit_packages_board = retrofit_packages_board.merge(
-        matching_lookup, how="left", on="Name"
-    )
-
-    # We now map the retrofit assessment data to the coordinated packages
-    wates_coordination = wates_coordination.merge(
-        retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
-    )
-    ccs_coordination = ccs_coordination.merge(
-        retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
-    )
-    retrofit_packages_board = retrofit_packages_board.merge(
-        retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
-    )
-
-    # We have 4 properties in the Wates coordination board, that we want to remove from the retrofit packages board
-    to_remove = wates_coordination[
-        wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
-    ]
-    assert to_remove.shape[0] == 4
-    # Remove them from the wates board
-    wates_coordination = wates_coordination[
-        ~wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
-    ]
-
-    # We combine this into a singular board
-    coordinated_packages = pd.concat(
-        [
-            retrofit_packages_board[
-                [
-                    "Name", "Postcode", 'Actual SAP Band', 'Actual SAP Rating',
-                    'Modelled SAP Band', 'Modelled SAP Rating', 'Package Ref',
-                    'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
-                    'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
-                    'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
-                    'Solar PV', 'Other measures', 'Organisation Reference',
-                ] + retrofit_assessments_data_columns_prefixed
-                ],
-            ccs_coordination[
-                [
-                    # We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls,
-                    # Solar PV
-                    "Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
-                    'SAP Band Install Package', 'Package Approved (Client)',
-                    'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
-                    'Ventilation', 'Heating', 'Other Measures', 'PV System',
-                    "Asset ID.1_y",
-                ] + retrofit_assessments_data_columns_prefixed
-                ].rename(
-                columns={
-                    "SAP Band Pre": "Actual SAP Band",
-                    "SAP Rating Pre": "Actual SAP Rating",
-                    'SAP Rating Install Package': 'Modelled SAP Band',
-                    'SAP Band Install Package': 'Modelled SAP Rating',
-                    'Package Approved (Client)': 'Package Ref',
-                    'Wall Insulation': 'Main Wall Insulation',
-                    'Loft Insulation': 'Loft insulation',
-                    'Windows Upgrade': 'Window Upgrade',
-                    'Ext. Doors Upgrade': 'Door Upgrade',
-                    'Heating': 'Main Heating',
-                    'Other Measures': 'Other measures',
-                    'Asset ID.1_y': 'Organisation Reference',
-                    "PV System": "Solar PV",
-                }
-            ),
-            wates_coordination[
-                [
-                    "Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
-                    'SAP Band Install Package', 'Package Approved (Client)',
-                    'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
-                    'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x', "PV System"
-                ] + retrofit_assessments_data_columns_prefixed
-                ].rename(
-                columns={
-                    "SAP Band Pre": "Actual SAP Band",
-                    "SAP Rating Pre": "Actual SAP Rating",
-                    'SAP Rating Install Package': 'Modelled SAP Band',
-                    'SAP Band Install Package': 'Modelled SAP Rating',
-                    'Package Approved (Client)': 'Package Ref',
-                    'Wall Insulation': 'Main Wall Insulation',
-                    'Loft Insulation': 'Loft insulation',
-                    'Windows Upgrade': 'Window Upgrade',
-                    'Ext. Doors Upgrade': 'Door Upgrade',
-                    'Heating': 'Main Heating',
-                    'Other Measures': 'Other measures',
-                    'Asset ID_x': 'Organisation Reference',
-                    "PV System": "Solar PV",
-                }
-            )
-        ]
-    )
-
-    coordinated_packages["Organisation Reference"] = coordinated_packages["Organisation Reference"].astype(int)
-    assert not coordinated_packages["Organisation Reference"].duplicated().sum()
-
-    # Merge the property features on
-    coordinated_packages = coordinated_packages.merge(
-        features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]],
-        how="left",
-        on="Organisation Reference"
-    )
-
-    coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current EPC Band"])]
-    coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current SAP Rating"])]
-
-    # We need the features pertaining to these priority postcodes
-
-    def find_nearest_matching_property(coordinated_packages, home):
-        filter_levels = [
-            (["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 2),
-            (["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 3),
-            (["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 4),
-            (["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 5),
-            (["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 6),
-            (["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 7),
-        ]
-
-        max_confidence = max([confidence for (_, confidence) in filter_levels])
-
-        for i, (filters, match_confidence) in enumerate(filter_levels):
-            match = coordinated_packages.copy()
-
-            for col in filters:
-                match = match[match[col] == home[col]]
-
-            if not match.empty:
-                return match, match_confidence
-
-        # Finally, we search for a property in the same Archetype
-        match = coordinated_packages[coordinated_packages["Archetype ID"] == home["Archetype ID"]]
-        if not match.empty:
-            return match, max_confidence + 1
-
-        return None, None  # No match found
-
-    coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
-    new_priority_postcodes["Postal Region"] = new_priority_postcodes["Postcode"].str.split(" ").str[0].str.strip()
-
-    coordinated_packages["Roof Simple"] = coordinated_packages["Roofs"].str.split(":").str[0].str.strip()
-    new_priority_postcodes["Roof Simple"] = new_priority_postcodes["Roofs"].str.split(":").str[0].str.strip()
-
-    coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0]
-    new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0]
-
-    coordinated_packages = coordinated_packages.merge(
-        new_priority_postcodes[["Organisation Reference", "Archetype ID"]],
-        how="left",
-        on="Organisation Reference"
-    )
-
-    # For every property in the priority postcodes data, we look for a most appropriate matching property
-    no_match = []
-    matches = []
-    for _, home in tqdm(new_priority_postcodes.iterrows(), total=len(new_priority_postcodes)):
-        # We check if the property was surveyed
-        survey_result = coordinated_packages[
-            coordinated_packages["Organisation Reference"] == home["Organisation Reference"]
-            ]
-        if not survey_result.empty:
-            to_extend = [
-                {
-                    "Organisation Reference": home["Organisation Reference"],
-                    "Best Match Organisation Reference": m,
-                    "match_confidence": 1,
-                    "Was Surveyed": True
-                } for m in survey_result["Organisation Reference"].values
-            ]
-            matches.extend(to_extend)
-            continue
-
-        closest_match, match_confidence = find_nearest_matching_property(coordinated_packages, home)
-        if closest_match is None:
-            no_match.append(home["Organisation Reference"])
-            continue
-
-        to_extend = [
-            {
-                "Organisation Reference": home["Organisation Reference"],
-                "Best Match Organisation Reference": m,
-                "match_confidence": match_confidence,
-                "Was Surveyed": False
-            } for m in closest_match["Organisation Reference"].values
-        ]
-        matches.extend(to_extend)
-
-    no_match_summary = new_priority_postcodes[
-        new_priority_postcodes["Organisation Reference"].isin(
-            no_match
-        )
-    ].groupby(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"])[
-        "Organisation Reference"].count().reset_index()
-
-    no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
-
-    # len(no_match)
-    # 8764, 5607, 5646, 5071
-    # no_match_summary.shape
-    # (3953, 6), (2948, 6), (2969, 7), (2575, 7)
-
-    matches_df = pd.DataFrame(matches)
-
-    matches_df = matches_df.merge(
-        coordinated_packages[["Organisation Reference", "Survey: Current EPC Band", "Survey: Current SAP Rating"]],
-        left_on="Best Match Organisation Reference", right_on="Organisation Reference",
-        suffixes=("", " - Closest Match")
-    )
-
-    measures_columns = [
-        'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
-        'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
-        'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
-        'Solar PV', 'Other measures'
-    ]
-
-    # We want to aggregate the matches, when we have multiple
-    aggregated_matches_df = []
-    for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
-
-        measures = coordinated_packages[
-            (
-                coordinated_packages["Organisation Reference"].isin(
-                    mapped_matches['Best Match Organisation Reference'].values
-                )
-            )
-        ][measures_columns]
-
-        if mapped_matches.shape[0] == 1:
-            # Get the measures for this property
-            measures = measures.squeeze()
-
-            aggregated_matches_df.append(
-                {
-                    "Organisation Reference": org_ref,
-                    "Number of matches": 1,
-                    "Proportion": 100,
-                    "Estimated SAP Rating": mapped_matches["Survey: Current SAP Rating"].values[0],
-                    "Estimated EPC Rating": mapped_matches["Survey: Current EPC Band"].values[0],
-                    "Was Surveyed": mapped_matches["Was Surveyed"].values[0],
-                    **measures
-                }
-            )
-            continue
-
-        # We need to aggregate the matches, since we have multiple
-        average_rating = mapped_matches["Survey: Current SAP Rating"].mean()
-        number_of_matches = mapped_matches.shape[0]
-        average_epc_rating = sap_to_epc(average_rating)
-        # proportion is the number of properties that have this EPC rating
-        proportion_with_this_epc = int(
-            mapped_matches[mapped_matches["Survey: Current EPC Band"] == average_epc_rating].shape[
-                0] / number_of_matches * 100
-        )
-
-        measures_aggregated = {}
-        for m in measures_columns:
-            if any(~pd.isnull(measures[m])):
-                # Check if we have 2 unique values
-                vals = measures[~pd.isnull(measures[m])][m].unique()
-                if len(vals) > 1:
-                    measures_aggregated[m] = ", ".join(vals)
-                else:
-                    measures_aggregated[m] = vals[0]
-
-        aggregated_matches_df.append(
-            {
-                "Organisation Reference": org_ref,
-                "Number of matches": number_of_matches,
-                "Proportion": proportion_with_this_epc,
-                "Estimated SAP Rating": average_rating,
-                "Estimated EPC Rating": average_epc_rating,
-                "Was Surveyed": False,
-                **measures_aggregated
-            }
-        )
-
-    aggregated_matches_df = pd.DataFrame(aggregated_matches_df)
-
-    mapped_priority_list = new_priority_postcodes.merge(
-        aggregated_matches_df, on="Organisation Reference", how="left"
-    )
-
-    mapped_priority_list["address1"] = mapped_priority_list["Address"].str.split(",").str[0]
-
-    # If we have a leading number like 01, 02, 03, 04, 05, 06, 07, 08, 09, we remove the leading 0
-
-    def remove_leading_zero(address):
-        return re.sub(r"^0([1-9]) ", r"\1 ", address)
-
-    mapped_priority_list["address1"] = mapped_priority_list["address1"].apply(remove_leading_zero)
-    mapped_priority_list["address1"] = np.where(
-        mapped_priority_list["Organisation Reference"] == 37004,
-        "8 Mason Road",
-        mapped_priority_list["address1"]
-    )
-    mapped_priority_list["address1"] = np.where(
-        mapped_priority_list["Organisation Reference"] == 37003,
-        "9 Mason Road",
-        mapped_priority_list["address1"]
-    )
-
-    mapped_priority_list = mapped_priority_list.rename(
-        columns={"UPRN": "uprn"}
-    )
-    mapped_priority_list["row_id"] = mapped_priority_list["Organisation Reference"]
-
-    # Flag where 2 out of the three columns have consensus
-    mapped_priority_list["2 of 3 Data Sources Have Consensus on EPC"] = (
-        (mapped_priority_list["SAP Band"] == mapped_priority_list["EPC Band"]) |
-        (mapped_priority_list["SAP Band"] == mapped_priority_list["Estimated EPC Rating"]) |
-        (mapped_priority_list["EPC Band"] == mapped_priority_list["Estimated EPC Rating"])
-    )
-
-    # Let's get the newest EPC data for these properties
-    # We merge on UPRN, when we have it
-    # from etl.route_march_data_pull.app import get_data
-    # epc_data, errors, nodata = get_data(
-    #     asset_list=mapped_priority_list,
-    #     fulladdress_column="Address",
-    #     address1_column="address1",
-    #     postcode_column="Postcode",
-    #     manual_uprn_map={},
-    #     epc_api_only=True
-    # )
-    #
-    # epc_df = pd.DataFrame(epc_data)
-    # epc_df.to_csv(
-    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"), index=False
-    # )
-    epc_df = pd.read_csv(os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"))
-    epc_df = epc_df.rename(columns={"row_id": "Organisation Reference"})
-
-    # We now package up the data
-
-    # Sheet 1 is the base coordination data
-    output_coordination_sheet = coordinated_packages[
-        [
-            "Name", "Postcode", 'Organisation Reference', 'Package Ref',
-            'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
-            'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
-            'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
-            'Solar PV', 'Other measures',
-            'Survey: Current SAP Rating',
-            'Survey: Current EPC Band',
-            'Survey: Primary Energy Use (kWh/yr)',
-            'Survey: Primary Energy Use Intensity (kWh/m2/yr)',
-            'Survey: Number of Storeys', 'Survey: Fuel Bill',
-            'Survey: Window Age Description',
-            'Survey: Window Age Description Proportion (%)',
-            'Survey: Secondary Window Age Description',
-            'Survey: Secondary Window Age Description Proportion (%)',
-            'Survey: Number of Windows', 'Survey: Total Number of Doors',
-            'Survey: Number of Insulated Doors',
-            'Survey: Existing Primary Heating System',
-            'Survey: Existing Primary Heating PCDF Reference',
-            'Survey: Existing Primary Heating Controls',
-            'Survey: Existing Primary Heating % of Heat',
-            'Survey: Existing Secondary Heating System',
-            'Survey: Existing Secondary Heating PCDF Reference',
-            'Survey: Existing Secondary Heating Controls',
-            'Survey: Existing Secondary Heating % of Heat',
-            'Survey: Secondary Heating Code', 'Survey: Water Heating Code',
-            'Survey: Total Floor Area (m2)', 'Survey: Total Ground Floor Area (m2)',
-            'Survey: RIR Floor Area', 'Survey: Main Building Wall Area (m2)',
-            'Survey: First Extension Wall Area (m2)',
-            'Survey: Number of Light Fittings', 'Survey: Number of LEL Fittings',
-            'Survey: Number of fittings needing LEL', 'Survey: Main Roof Type',
-            'Survey: Main Roof Insulation',
-            'Survey: Main Roof Insulation Thickness', 'Survey: Main Wall Type',
-            'Survey: Main Wall Insulation', 'Survey: Main Wall Dry-lining',
-            'Survey: Main Wall Thickness',
-            'Survey: Main Building Alternative Wall Type',
-            'Survey: Main Building Alternative Wall Insulation',
-            'Survey: Main Building Alternative Wall Dry-lining',
-            'Survey: Main Building Alternative Wall Thickness',
-            'Survey: Main Fuel',
-            'Survey: Main Building Age Band',
-            'Walls', 'Roofs', 'Heating', 'Main Fuel', 'Age', 'Property Type'
-        ]
-    ].rename(
-        columns={
-            'Walls': "Parity - Walls",
-            'Roofs': "Parity - Roof",
-            'Heating': "Parity - Heating",
-            'Main Fuel': "Parity - Fuel",
-            'Age': "Parity - Age Band",
-            'Property Type': "Parity - Property Type"
-        }
-    )
-
-    # Sheet 2 is the lookup table which maps the properties to their closest match
-    # We need to bring in the parity attributes between the mapped properties so we can see side-by-side
-    mapped_lookup = matches_df[
-        [
-            'Organisation Reference',
-            'Best Match Organisation Reference',
-            'Survey: Current EPC Band',
-            'Survey: Current SAP Rating',
-            "Was Surveyed",
-            "match_confidence",
-        ]
-    ].rename(
-        columns={
-            'Best Match Organisation Reference': "Best Match - Organisation Reference",
-            "Survey: Current EPC Band": "Best Match - Survey: Current EPC Band",
-            'Survey: Current SAP Rating': "Best Match - Survey: Current SAp Rating"
-        }
-    ).merge(
-        features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type",
-                  "Total Floor Area"]],
-        how="left",
-        on="Organisation Reference"
-    ).merge(
-        features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type",
-                  "Total Floor Area"]].rename(
-            columns={
-                "Organisation Reference": "Best Match - Organisation Reference",
-                "Walls": "Best Match - Walls",
-                "Roofs": "Best Match - Roof",
-                "Heating": "Best Match - Heating",
-                "Main Fuel": "Best Match - Main Fuel",
-                "Age": "Best Match - Age",
-                "Property Type": "Best Match - Property Type",
-                "Total Floor Area": "Best Match - Total Floor Area"
-            }
-        ),
-        how="left",
-        on="Best Match - Organisation Reference"
-    ).merge(
-        coordinated_packages[
-            [
-                "Organisation Reference", 'Survey: Main Wall Type', 'Survey: Main Wall Insulation',
-                'Survey: Main Roof Type', 'Survey: Main Roof Insulation', 'Survey: Main Roof Insulation Thickness',
-                'Survey: Existing Primary Heating System', 'Survey: Main Building Age Band',
-                'Survey: Main Building Wall Area (m2)', 'Survey: Total Floor Area (m2)',
-                'Survey: Main Building Age Band',
-            ]
-        ].rename(
-            columns={
-                "Organisation Reference": "Best Match - Organisation Reference",
-                'Survey: Main Wall Type': 'Best Match - Survey: Main Wall Type',
-                'Survey: Main Wall Insulation': 'Best Match - Survey: Main Wall Insulation',
-                'Survey: Main Roof Type': 'Best Match - Survey: Main Roof Type',
-                'Survey: Main Roof Insulation': 'Best Match - Survey: Main Roof Insulation',
-                'Survey: Main Roof Insulation Thickness': 'Best Match - Survey: Main Roof Insulation Thickness',
-                'Survey: Existing Primary Heating System': 'Best Match - Survey: Existing Primary Heating System',
-            }
-        ),
-        how="left",
-        on="Best Match - Organisation Reference"
-    )
-
-    # Finally, we have the property, against the mapped home with the estimate SAP scores and the EPC data
-    worksheet = mapped_priority_list[
-        [
-            'Organisation Reference', 'Address', 'Postcode', 'Address ID', 'uprn', 'Archetype ID',
-            'SAP', 'SAP Band', "Property Type", "Walls", "Roofs", 'Glazing',
-            'Heating', 'Main Fuel', 'Hot Water', 'Number of matches', 'Proportion',
-            'Estimated SAP Rating', 'Estimated EPC Rating', "Was Surveyed",
-            'Main Wall Insulation',
-            'Secondary Wall Insulation', 'Loft insulation', 'Flat Roof',
-            'Room in Roof', 'Window Upgrade', 'Door Upgrade', 'Ventilation',
-            'Main Heating', 'Water Heating', 'Heating Controls', 'Solar PV',
-            'Other measures', "2 of 3 Data Sources Have Consensus on EPC"
-        ]
-    ].rename(
-        columns={
-            "SAP": "Parity - SAP Rating",
-            "SAP Band": "Parity - EPC Rating",
-            "Property Type": "Parity - Property Type",
-            "Walls": "Parity - Walls",
-            "Roofs": "Parity - Roofs",
-            'Glazing': "Parity - Glazing",
-            'Heating': 'Parity - Heating',
-            'Main Fuel': 'Parity - Main Fuel',
-            'Hot Water': 'Parity - Hot Water',
-            'Proportion': 'Proportion of matched properties with same EPC rating',
-        }
-    ).merge(
-        epc_df[
-            [
-                "Organisation Reference",
-                "uprn",
-                "current-energy-efficiency",
-                "current-energy-rating",
-                "lodgement-date",
-                "construction-age-band",
-                "walls-description",
-                "roof-description",
-                "mainheat-description",
-                "windows-description",
-                "hotwater-description",
-                "main-fuel",
-                "total-floor-area",
-            ]
-        ].rename(
-            columns={
-                "uprn": "Last EPC - uprn",
-                "current-energy-efficiency": "Last EPC - SAP Score",
-                "current-energy-rating": "Last EPC - EPC Rating",
-                "lodgement-date": "Last EPC - Date Lodged",
-                "construction-age-band": "Last EPC - Age Band",
-                "walls-description": "Last EPC - Walls",
-                "roof-description": "Last EPC - Roof",
-                "mainheat-description": "Last EPC - Heating",
-                "windows-description": "Last EPC - Windows",
-                "hotwater-description": "Last EPC - Hot Water",
-                "main-fuel": "Last EPC - Main Fuel",
-                "total-floor-area": "Last EPC - Total Floor Area"
-            }
-        ),
-        how="left",
-        on='Organisation Reference'
-    )
-
-    worksheet["Years Since Last EPC"] = pd.Timestamp.now().year - pd.to_datetime(
-        worksheet["Last EPC - Date Lodged"]).dt.year
-
-    worksheet["Last EPC - uprn"] = worksheet["Last EPC - uprn"].astype("Int64").astype(str)
-
-    worksheet["uprn"] = np.where(
-        pd.isnull(worksheet["uprn"]) & pd.notnull(worksheet["Last EPC - uprn"]),
-        worksheet["Last EPC - uprn"],
-        worksheet["uprn"]
-    )
-
-    worksheet["uprn"] = worksheet["uprn"].replace("<NA>", "")
-
-    worksheet = worksheet.drop(columns=["Last EPC - uprn"])
-
-    # Save to Excel with multiple sheets
-    excel_path = os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "13022025 Stonewater Priority List.xlsx")
-    with pd.ExcelWriter(excel_path, engine="xlsxwriter") as writer:
-        worksheet.to_excel(writer, sheet_name="Worksheet", index=False, header=True)
-        mapped_lookup.to_excel(writer, sheet_name="Lookup Table", index=False, header=True)
-        output_coordination_sheet.to_excel(writer, sheet_name="Coordination", index=False, header=True)
-
-# if __name__ == "__main__":
-#     main()
diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py
index 216a14de..766de840 100644
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@@ -681,7 +681,9 @@ class RetrieveFindMyEpc:
             ],
             "High heat retention storage heaters and dual rate meter": [
                 "high_heat_retention_storage_heater"
-            ]
+            ],
+            "Increase loft insulation to 250mm": ["loft_insulation"],
+            "Solar photovoltaics panels, 25% of roof area": ["solar_pv"],
         }
 
         survey = True