From 97eaf948c5030ad079d7336741849f3bec3104e9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 16 May 2025 15:51:05 +0100
Subject: [PATCH 01/14] fixing test

---
 recommendations/tests/test_lighting_recommendations.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py
index dbb621e7..5fb914a8 100644
--- a/recommendations/tests/test_lighting_recommendations.py
+++ b/recommendations/tests/test_lighting_recommendations.py
@@ -49,6 +49,6 @@ class TestLightingRecommendations:
                                                                                                     'lighting in all '
                                                                                                     'fixed outlets',
                                                                             'low-energy-lighting': 100},
-             'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3,
-             'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4,
-             'labour_cost': 63.0, 'survey': False}]
+             'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3,
+             'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False}
+        ]

From c0cf848db2676f93fd0e458c327de7554370e2af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 May 2025 15:59:38 +0100
Subject: [PATCH 02/14] re-building thrive's programme

---
 .idea/Model.iml                            |   2 +-
 .idea/misc.xml                             |   2 +-
 asset_list/AssetList.py                    |  48 ++-
 asset_list/app.py                          |  52 +--
 asset_list/mappings/heating_systems.py     |   2 +
 asset_list/mappings/roof.py                |   9 +-
 etl/customers/thrive/Programme Analysis.py | 373 +++++++++++++++++++--
 7 files changed, 411 insertions(+), 77 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index c6561970..09f2e496 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 4b7a11ec..199b175c 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -2507,7 +2507,7 @@ class AssetList:
         else:
             raise NotImplementedError("Invalid date in outcomes - implement me")
 
-        notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
+        notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes"
 
         lookup = lookup.merge(
             self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
@@ -2576,6 +2576,7 @@ class AssetList:
     def flag_survey_master(
         self,
         master_filepaths,
+        master_id_colnames,
         master_to_asset_list_filepath=None
     ):
         # TODO: This probably needs further expansion
@@ -2591,7 +2592,7 @@ class AssetList:
         logger.info("Getting masters and merging onto asset list")
         master_surveyed = []
         unmatched_submissions = []
-        for filepath in master_filepaths:
+        for idx, filepath in enumerate(master_filepaths):
             master_data = pd.read_csv(filepath)
             # Strip columns
             master_data.columns = [c.strip() for c in master_data.columns]
@@ -2618,22 +2619,6 @@ class AssetList:
                 "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
             )
 
-            # if "UPRN" in master_data.columns:
-            #     # We just need to check if any were cancelled
-            #     master_to_append = master_data[
-            #         ["UPRN", install_col, submission_col]
-            #     ].rename(
-            #         columns={
-            #             "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
-            #             install_col: "survey_status",
-            #             submission_col: "submission_date"
-            #         }
-            #     )
-            #     master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
-            #
-            #     master_surveyed.append(master_to_append)
-            #     continue
-
             master_data["row_id"] = master_data.index
 
             self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
@@ -2656,8 +2641,6 @@ class AssetList:
             )
             measure_mix_col = "MEASURE COMBO"
 
-            # Otherwise, we need to match algorithmically
-            has_property_id = "UPRN" in master_data.columns
             logger.info("Matching master data to asset list")
             matched = []
             unmatched = []
@@ -2670,13 +2653,22 @@ class AssetList:
                 if pd.isnull(row[postcode_col]):
                     continue
 
-                # if has_property_id:
-                #     submission_uprn = row["UPRN"]
-                #
-                # if not pd.isnull(submission_uprn):
-                #     df = self.standardised_asset_list[
-                #         self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
-                #         ]
+                if master_id_colnames[idx] is not None:
+                    # Filter the standardised asset list on this
+                    df = self.standardised_asset_list[
+                        self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]]
+                        ]
+                    if df.shape[0] == 1:
+                        matched.append(
+                            {
+                                "row_id": row["row_id"],
+                                "original_house_no": original_house_no,
+                                "original_street": original_street,
+                                "original_postcode": original_postcode,
+                                self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
+                            }
+                        )
+                        continue
 
                 postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
 
@@ -2721,6 +2713,7 @@ class AssetList:
                             self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
                         }
                     )
+                    continue
 
                 if house_no in df["house_no"].values:
                     df = df[df["house_no"] == house_no]
@@ -2793,6 +2786,7 @@ class AssetList:
                 }
             )
             master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
+            master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
             master_surveyed.append(master_to_append)
             unmatched_df = master_data[
                 master_data["row_id"].isin(unmatched)
diff --git a/asset_list/app.py b/asset_list/app.py
index bb898c09..3441e5de 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -62,36 +62,42 @@ def app():
     Property UPRN
     """
 
-    # Thurrock
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
-    data_filename = "THURROCK COUNCIL - For analysis.xlsx"
-    sheet_name = "Assets"
-    postcode_column = 'Postcode'
-    fulladdress_column = "Full Address"
-    address1_column = None
-    address1_method = "house_number_extraction"
+    # Thrive - reconciliation
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
+    data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = 'postcode'
+    fulladdress_column = "full_address"
+    address1_column = "address_line_1"
+    address1_method = None
     address_cols_to_concat = []
     missing_postcodes_method = None
-    landlord_year_built = "Construction Date"
+    landlord_year_built = "age_band_calculated"
     landlord_os_uprn = None
-    landlord_property_type = "Property Type"
-    landlord_built_form = "Property Subtype"
+    landlord_property_type = "property_type"
+    landlord_built_form = "build_form"
     landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = "Main Heating Type"
+    landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
+    landlord_heating_system = "heating_type_updated"
     landlord_existing_pv = None
-    landlord_property_id = "Property Reference"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
-    master_filepaths = []
+    landlord_property_id = "thrive_property_id"
+    landlord_sap = "sap_rating_updated"
+    outcomes_filename = [
+        os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
+    ]
+    outcomes_sheetname = ["Sheet1"]
+    outcomes_postcode = ["postcode"]
+    outcomes_houseno = ["No."]
+    outcomes_id = ["thrive_property_id"]
+    outcomes_address = ["address"]
+    master_filepaths = [
+        os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
+        os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
+    ]
     master_to_asset_list_filepath = None
+    master_id_colnames = ["thrive_property_id", "thrive_property_id"]
     phase = False
-    ecosurv_landlords = None
+    ecosurv_landlords = "thrive"
 
     # Medway
     data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index 92f59f2c..daef01bb 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -292,4 +292,6 @@ HEATING_MAPPINGS = {
     'Communal Heating': 'communal heating',
     'No Data': 'unknown',
     'Boiler System': 'gas condensing boiler',
+    'Storage heating': 'electric storage heaters',
+    'Storage heating (HHRSH)': 'high heat retention storage heaters'
 }
diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py
index 03d6f9af..3b447829 100644
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
     "pitched unknown access to loft",
     "piched unknown insulation",
     "pitched insulated",
-    "pitched less than 100mm insulation"
+    "pitched less than 100mm insulation",
     "another dwelling above",
     "flat unknown insulation",
     "unknown insulated",
@@ -38,4 +38,11 @@ ROOF_CONSTRUCTION_MAPPINGS = {
     '200mm': 'pitched insulated',
     '0-49mm': 'pitched less than 100mm insulation',
     '50mm': 'pitched less than 100mm insulation',
+    '': 'unknown',
+    'NR': 'unknown',
+    'Non-joist': 'unknown',
+    '25mm': 'pitched less than 100mm insulation',
+    '400mm+': 'pitched insulated',
+    '12mm': 'pitched less than 100mm insulation'
+
 }
diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py
index 521cfd30..2d6a0d69 100644
--- a/etl/customers/thrive/Programme Analysis.py	
+++ b/etl/customers/thrive/Programme Analysis.py	
@@ -8,6 +8,8 @@ address the following concerns:
 """
 
 import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
 
 # This is Thrive's list of properties and when they should have been surveyed
 thrive_tracker = pd.read_excel(
@@ -51,27 +53,10 @@ original_columns = {
 }
 
 original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
-original_asset_list["Data Source"] = "Thrive Tracker"
+original_asset_list["Data Source"] = "Original Asset List"
+original_asset_list = original_asset_list.drop_duplicates()
 
 # We append on the missed properties, with the information we have
-# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
-#        'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
-#        'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
-#        'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
-#        'Special Requirements ', 'CIGA', 'Date CIGA check received',
-#        'Proposed Progamme', 'New Proposed Programme',
-#        'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
-#        'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
-#        'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
-#        'Date Submitted to installer', 'PRRN Number',
-#        'Loft insulation required? (Thrive)', 'Date booked ',
-#        'Completed\n(yes/no)', 'Date Completed',
-#        'Vents installed?\n(number and location)',
-#        'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
-#        'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
-#        'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
-#        'PRRN Submitted '
-
 missed_properties["Full Address"] = (
     missed_properties["#"].astype(str) + ", " +
     missed_properties["Adress Line 1"].astype(str) + ", " +
@@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
 missed_properties["ECO Eligibility"] = "Property Not Inspected"
 missed_properties["Data Source"] = "Thrive Tracker"
 
+# We de-dupe ides in original_asset_list
+dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
+dupes = original_asset_list[
+    original_asset_list["thrive_property_id"].isin(dupe_ids)
+].copy()
+dupes = dupes.sort_values("thrive_property_id")
+
+original_asset_list = original_asset_list.rename(
+    columns={
+        "detailed_property_type": "build_form"
+    }
+)
+
 master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
 
 # We were provided with a data update for a sample of properties. We update the data with this information
@@ -103,12 +101,339 @@ data_update = pd.read_excel(
     header=0
 )
 
-new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
+new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
+new_properties["Full Address"] = (
+    new_properties["#"].astype(str) + ", " +
+    new_properties["Adress Line 1"].astype(str) + ", " +
+    new_properties["Postcode"].astype(str)
+)
+new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
+new_properties["WFT Findings"] = "Property Not Inspected"
+new_properties["ECO Eligibility"] = "Property Not Inspected"
+new_properties["Data Source"] = "13.05.2025 Data Update"
+
+master_list = pd.concat([new_properties, master_list])
+
+# We append any new data on heating system, heating type, and insulation type, based on the data update
+master_list = master_list.merge(
+    data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
+        columns={
+            "Heating Type": "heating_type_updated",
+            "Assumed mm ": "assumed_loft_insulation_thickness_updated",
+            "SAP": "sap_rating_updated"
+        }
+    ),
+    how="left",
+    left_on="thrive_property_id",
+    right_on="UPRN"
+)
+
+# We fill the missings
+master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
+master_list["assumed_loft_insulation_thickness_updated"] = master_list[
+    "assumed_loft_insulation_thickness_updated"
+].fillna(master_list["assumed_loft_insulation_thickness"])
+master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
+
+assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
+
+master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
+    thrive_tracker["UPRN"].astype(str).values
+)
+
+# Those the asset list - call it master asset list updated May2025
+master_list = master_list.drop(columns=["UPRN"])
+master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
+# master_list.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+#     "Complete - Updated May 2025.xlsx",
+# )
+
+master_list["house_number_TEMP"] = master_list.apply(
+    lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
+    axis=1
+)
+
+# We add in the status of the property
+# TODO: Add the status of the property from the Thrive tracker
+outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
+    "24-March25.xlsx",
+    header=0
+)
+outcomes["row_id"] = outcomes.index
+
+# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
+tracker_for_matching = thrive_tracker[
+    ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
+].copy()
+tracker_for_matching["Full Address"] = (
+    tracker_for_matching["#"].astype(str) + ", " +
+    tracker_for_matching["Adress Line 1"].astype(str) + ", " +
+    tracker_for_matching["Postcode"].astype(str)
+)
+
+outcomes_id_lookup = []
+for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
+
+    hn = str(x["No."])
+    address = x["Address"]
+    postcode = x["Postcode"]
+    contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
+    contact_no = None if contact_no == "nan" else contact_no
+
+    if address == "292 Micklefield Road":
+        hn = "292"
+
+    if (address == "Micklefield Road") & (hn == "302"):
+        hn = "292"
+
+    if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "103a"
+
+    if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "105a"
+
+    if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "107a"
+
+    #
+    # # We match this to the tracker
+    # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
+    # # Many of the phone numbers don't have a leading zero in the tracker so we add them
+    # if (m1.shape[0] != 1) and not pd.isnull(contact_no):
+    #     m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
+    # 
+    # if m1.shape[0] > 1:
+    #     raise ValueError(
+    #         f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+    #     )
+
+    # if m1.empty:
+    m1 = tracker_for_matching[
+        (tracker_for_matching["#"].astype(str) == hn) &
+        (tracker_for_matching["Postcode"] == postcode)
+        ]
+
+    if m1.empty:
+        # Some properties aren't in the tracker, we match to the master list
+        m1 = master_list[
+            (master_list["house_number_TEMP"].astype(str) == hn) &
+            (master_list["postcode"] == postcode)
+            ]
+        outcomes_id_lookup.append(
+            {
+                "row_id": x["row_id"],
+                "thrive_property_id": m1["thrive_property_id"].values[0],
+                "address": m1["full_address"].values[0],
+                "postcode": m1["postcode"].values[0],
+            }
+        )
+        continue
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+        )
+
+    # We add the status to the master list
+    outcomes_id_lookup.append(
+        {
+            "row_id": x["row_id"],
+            "thrive_property_id": m1["UPRN"].values[0],
+            "address": m1["Full Address"].values[0],
+            "postcode": m1["Postcode"].values[0],
+        }
+    )
+
+outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
+outcomes = outcomes.merge(
+    outcomes_id_lookup,
+    how="left",
+    left_on="row_id",
+    right_on="row_id"
+)
+
+outcomes = outcomes.drop(columns=["row_id"])
+outcomes = outcomes.rename(
+    columns={
+        "Outcomes": "Outcome",
+        "Notes                                                                                         (If 'no "
+        "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
+    }
+)
 
 
+# Store the corrected outcomes
+# outcomes.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
+#     April 24-March25 - Corrected.xlsx",
+#     index=False
+# )
 
-data_update = = data_update[["UPRN", ""]]
 
-# TODO: Flag the Thrive priorities and create a separate project code for these
-# TODO: Add the general project code
-# TODO: Add the thrive
\ No newline at end of file
+def parse_date(value):
+    # Strip any 'W.C' or 'w/c' prefix and clean whitespace
+    value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
+    try:
+        # Try parsing the date with dayfirst=True
+        return pd.to_datetime(value, dayfirst=True, errors='coerce')
+    except Exception:
+        return pd.NaT
+
+
+outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
+
+# Next step - match the submissions master to the asset list. We will append on the UPRN
+eco3_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO3.csv",
+    header=0
+)
+eco3_submissions["row_id"] = eco3_submissions.index
+
+eco4_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO4.csv",
+    header=0
+)
+eco4_submissions["row_id"] = eco4_submissions.index
+
+# List of properties never on the asset list
+not_on_master = [
+    "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
+    "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
+    "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
+    "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
+    "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
+    "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
+    "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
+    "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
+    '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
+    '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
+    '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
+    '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
+]
+
+eco3_remap = {
+    "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
+    "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
+    "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
+    "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
+    "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
+    "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
+    "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
+    "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
+    "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
+    "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+    "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+    "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
+    '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
+    '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
+    '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
+    '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
+    '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
+    '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
+    '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
+    '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
+    '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
+    '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
+    '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
+    '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
+    '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
+}
+
+eco3_lookup = []
+for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
+    hn = row["NO "]
+    pc = row["Post Code"]
+    street = row["Street / Block Name"]
+    key = f"{hn}+{street}+{pc}"
+    if key in not_on_master:
+        continue
+
+    if key in eco3_remap:
+        hn, street, pc = eco3_remap[key]
+        # The postcode is different to the asse
+
+    # We filter the asset list, because it's hard to know how accurate this is
+    m1 = master_list[
+        (master_list["house_number_TEMP"].astype(str) == hn) &
+        (master_list["postcode"] == pc)
+        ]
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {key} in the tracker"
+        )
+
+    eco3_lookup.append(
+        {
+            "row_id": row["row_id"],
+            "thrive_property_id": m1["thrive_property_id"].values[0],
+            "submission_house_number": row["NO "],
+            "submission_address1": row["Street / Block Name"],
+            "submission_postcode": row["Post Code"],
+        }
+    )
+
+eco4_lookup = []
+for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
+    hn = row["NO."]
+    pc = row["Post Code"]
+    street = row["Street / Block Name"]
+    key = f"{hn}+{street}+{pc}"
+    if key in not_on_master:
+        continue
+
+    if key in eco3_remap:
+        hn, street, pc = eco3_remap[key]
+        # The postcode is different to the asse
+
+    # We filter the asset list, because it's hard to know how accurate this is
+    m1 = master_list[
+        (master_list["house_number_TEMP"].astype(str) == hn) &
+        (master_list["postcode"].str.lower() == pc.lower())
+        ]
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {key} in the tracker"
+        )
+
+    eco4_lookup.append(
+        {
+            "row_id": row["row_id"],
+            "thrive_property_id": m1["thrive_property_id"].values[0],
+            "submission_house_number": row["NO."],
+            "submission_address1": row["Street / Block Name"],
+            "submission_postcode": row["Post Code"],
+        }
+    )
+
+# We match the lookups back to the submission sheets
+eco3_lookup = pd.DataFrame(eco3_lookup)
+eco3_submissions = eco3_submissions.merge(
+    eco3_lookup,
+    how="left",
+    on="row_id",
+)
+
+eco4_lookup = pd.DataFrame(eco4_lookup)
+eco4_submissions = eco4_submissions.merge(
+    eco4_lookup,
+    how="left",
+    on="row_id",
+)
+
+# Store
+eco3_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO3 - with IDS.csv",
+    index=False
+)
+eco4_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO4 - with IDS.csv",
+    index=False
+)

From 2e041bfe75384f65a437dc55d585d04366dff1ff Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 21 May 2025 11:55:10 +0100
Subject: [PATCH 03/14] set up hubspot status

---
 asset_list/AssetList.py      | 193 +++++++++++++++++++++++++++++++----
 asset_list/app.py            |  82 +++++----------
 asset_list/hubspot/config.py |  28 +++++
 3 files changed, 231 insertions(+), 72 deletions(-)
 create mode 100644 asset_list/hubspot/config.py

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 199b175c..e68ee6dd 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -4,8 +4,8 @@ import re
 import tiktoken
 from pprint import pprint
 from datetime import datetime
+import asset_list.hubspot.config as hubspot_config
 
-from numpy.ma.core import masked_not_equal
 from openai import OpenAI
 import numpy as np
 import pandas as pd
@@ -292,6 +292,13 @@ class AssetList:
         "Any further surveyor notes", 'Surveyors Name'
     ]
 
+    NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [
+        "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
+        "Does the property have cladding?", "Gable Wall Obstructions",
+        "Does the property have foliage that needs removal?",
+        "Potential unsafe environment", "Date of Inspection"
+    ]
+
     NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
 
     OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@@ -400,6 +407,10 @@ class AssetList:
 
         self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns
 
+        self.new_format_non_insturives_present = (
+            "Has the property been re-walled?" in self.standardised_asset_list.columns
+        )
+
         # Names of columns
         self.landlord_property_id = landlord_property_id
         self.address1_colname = address1_colname
@@ -687,6 +698,9 @@ class AssetList:
         if self.non_intrusives_eligibility:
             non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)
 
+        if self.new_format_non_insturives_present:
+            non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
+
         if self.old_format_non_intrusives_present:
             # We check if we have the ECO Eligibility column, which we might not have
             non_intrusive_columns = [
@@ -931,6 +945,23 @@ class AssetList:
             self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str)
         )
 
+        # CLean up the standard SAP column, that can be problematic
+        if self.landlord_sap is not None:
+            self.standardised_asset_list[self.STANDARD_SAP] = (
+                self.standardised_asset_list[self.STANDARD_SAP]
+                .astype(str)
+                .str.replace('\xa0', ' ', regex=False)
+                .str.strip()
+            )
+            self.standardised_asset_list[self.STANDARD_SAP] = np.where(
+                self.standardised_asset_list[self.STANDARD_SAP] == "",
+                None,
+                self.standardised_asset_list[self.STANDARD_SAP]
+            )
+            self.standardised_asset_list[self.STANDARD_SAP] = (
+                self.standardised_asset_list[self.STANDARD_SAP].astype(float)
+            )
+
     def merge_data(self, df: pd.DataFrame):
         """
         Used to insert data into the standardised asset list, based on the domna property id
@@ -1864,7 +1895,7 @@ class AssetList:
             for col in ["cavity_reason", "solar_reason"]:
                 self.standardised_asset_list[col] = np.where(
                     (
-                        (~pd.isnull(self.standardised_asset_list["submission_date"]))
+                        (~pd.isnull(self.standardised_asset_list["submission_status"]))
                     ),
                     None,
                     self.standardised_asset_list[col]
@@ -1874,7 +1905,7 @@ class AssetList:
             for col in ["cavity_reason", "solar_reason"]:
                 self.standardised_asset_list[col] = np.where(
                     (
-                        (~pd.isnull(self.standardised_asset_list["ecosurv_reference"]))
+                        (~pd.isnull(self.standardised_asset_list["ecosurv_status"]))
                     ),
                     None,
                     self.standardised_asset_list[col]
@@ -1911,6 +1942,42 @@ class AssetList:
                     self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work)
                 ]
 
+    def label_property_status(self):
+        """
+        This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
+        column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we
+        recognise within hubspot
+        :return:
+        """
+
+        # For anything that is ready to go, that gets set to ready to be scheduled
+        self.standardised_asset_list["hubspot_status"] = np.where(
+            ~pd.isnull(self.standardised_asset_list["cavity_reason"]) |
+            ~pd.isnull(self.standardised_asset_list["solar_reason"]),
+            hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label,
+            None
+        )
+
+        # we step through the process of flagging completed surveys
+
+        # We utilise submissions, ecosurv and outcomes to define the hubspot status
+        # We'll take the maximum of these three columns, based on the enum integer value
+        label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus}
+
+        def get_max_status_from_columns(row):
+            status_candidates = []
+            for col in ["submission_status", "ecosurv_install_status", "outcome_status"]:
+                label = row.get(col)
+                if label in label_to_enum:
+                    status_candidates.append(label_to_enum[label])
+            if not status_candidates:
+                return row["hubspot_status"]  # fallback to existing status if no updates
+            return max(status_candidates).label
+
+        self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply(
+            get_max_status_from_columns, axis=1
+        )
+
     def flat_analysis(self):
 
         # We need to deduce the building name - we strip out the house number
@@ -2331,6 +2398,52 @@ class AssetList:
             # It doesn't matter too much which record we take
             matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
 
+        # We merge on the status of the property
+        matched = matched.merge(
+            self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
+                columns={
+                    "Reference": "ecosurv_reference",
+                    "status": "ecosurv_status",
+                    "Lead Status": "ecosurv_lead_status",
+                    "Tags": "ecosurv_tags"
+                }
+            ), how="left", on="ecosurv_reference"
+        )
+
+        matched["ecosurv_install_status"] = None
+
+        # This mapping is ordered by process order, where lodgment is the final step so if we have an indication
+        # that the property is ready for lodgement, we set the status to that. We then proceed through the other
+        # statuses where the penultimate status is install complete
+        mapping = {
+            "Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED,
+            "TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE,
+            "Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+            "Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+            "Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+            "Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
+        }
+
+        def get_max_status(tag_str):
+            if pd.isna(tag_str):
+                return None
+            matched_statuses = []
+            for tag, status in mapping.items():
+                if tag in tag_str:
+                    matched_statuses.append(status)
+            if not matched_statuses:
+                return None
+            return max(matched_statuses).label
+
+        matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status)
+
         self.standardised_asset_list = self.standardised_asset_list.merge(
             matched,
             how="left",
@@ -2380,7 +2493,7 @@ class AssetList:
                 # Perform the remap
                 outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary)
 
-            outcomes["Outcome"] = outcomes["Outcome"].str.lower()
+            outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip()
 
             logger.info("Matching outcomes to asset list")
             # Merge the outcomes onto the asset list - we check we're able to match sufficiently well
@@ -2542,12 +2655,13 @@ class AssetList:
             apply(get_latest_note).
             reset_index(drop=True)
         )
-        latest_note = latest_note[["domna_property_id", notes_col]]
+        latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename(
+            columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"}
+        )
 
         pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index()
-        pivot_df = pivot_df.merge(
-            visit_counts, how="left", on="domna_property_id"
-        )
+        pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id")
+        pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id")
 
         # We want the latest note
 
@@ -2558,15 +2672,32 @@ class AssetList:
         self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values)
         self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id")
 
+        # We flag the outcome status, based on the outcome
+        pivot_df["outcome_status"] = None
+
+        if "surveyed" in pivot_df.columns:
+            pivot_df["outcome_status"] = np.where(
+                pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
+                pivot_df["outcome_status"]
+            )
+
+        if "installer refusal" in pivot_df.columns:
+            pivot_df["outcome_status"] = np.where(
+                pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label,
+                pivot_df["outcome_status"]
+            )
+
+        pivot_df["outcome_status"] = np.where(
+            pivot_df["latest_outcome"].isin(["see notes"]) &
+            (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label),
+            hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label,
+            pivot_df["outcome_status"]
+        )
+
         # We merge out pivoted outcomes onto the asset list
         self.standardised_asset_list = self.standardised_asset_list.merge(
             pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
         )
-        # Merge the latest note
-        self.standardised_asset_list = self.standardised_asset_list.merge(
-            latest_note.rename(columns={notes_col: "Latest Route March Note"}),
-            how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
-        )
 
         if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
             raise ValueError("Duplicates appreared - something went wrong")
@@ -2640,6 +2771,7 @@ class AssetList:
                 master_data.columns else "PROPERTY TYPE As per table emailed"
             )
             measure_mix_col = "MEASURE COMBO"
+            installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
 
             logger.info("Matching master data to asset list")
             matched = []
@@ -2774,19 +2906,30 @@ class AssetList:
             self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")
 
             # We match the "UPRN" which is the landlords ID, onto the master sheet
+
+            if measure_mix_col not in master_data.columns:
+                master_data[measure_mix_col] = "Measure mix not recorded"
+
             matched = pd.DataFrame(matched)
-            master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge(
+            master_to_append = master_data[
+                [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col]
+            ].merge(
                 matched, how="left", on="row_id"
             ).rename(
                 columns={
                     scheme_col: "funding_scheme",
                     measure_mix_col: "measure_mix",
                     install_col: "survey_status",
-                    submission_col: "submission_date"
+                    submission_col: "submission_date",
+                    installer_notes_col: "submission_installer_notes"
                 }
             )
-            master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
-            master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
+            master_to_append["submission_cancelled"] = (
+                master_to_append["survey_status"].str.lower().str.contains("cancel")
+            )
+            master_to_append["submission_installed"] = (
+                master_to_append["survey_status"].str.lower().str.contains("installed")
+            )
             master_surveyed.append(master_to_append)
             unmatched_df = master_data[
                 master_data["row_id"].isin(unmatched)
@@ -2822,7 +2965,21 @@ class AssetList:
         ].astype(str)
 
         # We de-dupe crudely on landlord property id
-        self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
+        self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy()
+
+        # We now add the submission status, based on the hubspot stages
+        self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label
+        self.master_surveyed["submission_status"] = np.where(
+            self.master_surveyed["submission_cancelled"] == True,
+            hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label,
+            self.master_surveyed["submission_status"]
+        )
+
+        self.master_surveyed["submission_status"] = np.where(
+            self.master_surveyed["submission_installed"] == True,
+            hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label,
+            self.master_surveyed["submission_status"]
+        )
 
         self.standardised_asset_list = self.standardised_asset_list.merge(
             self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
diff --git a/asset_list/app.py b/asset_list/app.py
index 3441e5de..31c404e5 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -99,66 +99,36 @@ def app():
     phase = False
     ecosurv_landlords = "thrive"
 
-    # Medway
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
-    data_filename = "MEDWAY Asset List.xlsx"
-    sheet_name = "Asset list"
+    # Torus
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2"
+    data_filename = "Torus Property Asset List - INSPECTIONS.xlsx"
+    sheet_name = "TORUS"
     postcode_column = 'Postcode'
     fulladdress_column = None
-    address1_column = "House Number"
+    address1_column = "AddressLine1"
     address1_method = None
-    address_cols_to_concat = ["House Number", "Street 1"]
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
     missing_postcodes_method = None
-    landlord_year_built = "Year Built"
-    landlord_os_uprn = None
-    landlord_property_type = "Property Type - Academy"
-    landlord_built_form = "Property Type - Academy"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "Row ID"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
-    master_filepaths = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-
-    # MHS
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
-    data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
-    sheet_name = "Sheet1"
-    postcode_column = 'Postcode'
-    fulladdress_column = "FullAddress"
-    address1_column = None
-    address1_method = "house_number_extraction"
-    address_cols_to_concat = []
-    missing_postcodes_method = None
-    landlord_year_built = "BuiltInYear"
-    landlord_os_uprn = None
-    landlord_property_type = "AssetType"
-    landlord_built_form = "PropertyType"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
+    landlord_year_built = "Property Age"
+    landlord_os_uprn = "NatUPRN"
+    landlord_property_type = "Property Type"
+    landlord_built_form = "Built Form"
+    landlord_wall_construction = "Wall Construction"
+    landlord_roof_construction = "Roof Construction"
+    landlord_heating_system = "Space Heating Source"
+    landlord_existing_pv = "Low Carbon Technology (Solar PV)"
     landlord_property_id = "UPRN"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
+    landlord_sap = "SAP Score"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
     master_filepaths = []
     master_to_asset_list_filepath = None
-    phase = False
+    master_id_colnames = []
+    phase = True
     ecosurv_landlords = None
 
     # Southern Midlands
@@ -300,7 +270,8 @@ def app():
 
     asset_list.flag_survey_master(
         master_filepaths=master_filepaths,
-        master_to_asset_list_filepath=master_to_asset_list_filepath
+        master_to_asset_list_filepath=master_to_asset_list_filepath,
+        master_id_colnames=master_id_colnames,
     )
 
     asset_list.flag_ecosurv(ecosurv_landlords)
@@ -505,6 +476,9 @@ def app():
 
     pprint(asset_list.work_type_figures)
 
+    # We now flag the status of the property
+    asset_list.label_property_status()
+
     asset_list.flat_analysis()
 
     asset_list.load_contact_details(
diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py
new file mode 100644
index 00000000..180bf0e0
--- /dev/null
+++ b/asset_list/hubspot/config.py
@@ -0,0 +1,28 @@
+from enum import IntEnum
+
+
+class HubspotProcessStatus(IntEnum):
+    def __new__(cls, value, label):
+        obj = int.__new__(cls, value)
+        obj._value_ = value
+        obj.label = label
+        return obj
+
+    # the numerical values of this enum aren't important, but they define the order of operations
+
+    # This is the first stage, where a survey is ready to go
+    READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
+    # The property didn't get access and needs sign off
+    SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
+    # The survey has been completed. We don't have any update as to whether the property has been installed
+    SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
+    # The property turned out to be ineligibile
+    NOT_VIABLE = 4, "NOT VIABLE"
+    # The property is with the installer. This will likely be the default for historic programmes
+    SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
+    # The property has been installed
+    INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
+    # The install has complete and lodgement is complete
+    LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
+    # The property has been cancelled
+    INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"

From 1e0fbb111dc401e7cb5697a6285d0d89e9483d91 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 May 2025 10:23:38 +0100
Subject: [PATCH 04/14] don't fetch from find my epc website when the property
 doesnt have an epc

---
 .idea/Model.iml                           |   2 +-
 .idea/misc.xml                            |   2 +-
 asset_list/AssetList.py                   |  82 ++++++++--------
 asset_list/app.py                         |   8 +-
 asset_list/hubspot/prepare_for_hubspot.py |  18 ++++
 backend/engine/engine.py                  |   2 +-
 etl/customers/thrive/Project codes.py     | 108 ++++++++++++++++++++++
 7 files changed, 180 insertions(+), 42 deletions(-)
 create mode 100644 asset_list/hubspot/prepare_for_hubspot.py
 create mode 100644 etl/customers/thrive/Project codes.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 09f2e496..c6561970 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index fb10c6b0..50cad4ca 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index e68ee6dd..fea0f59e 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -279,6 +279,7 @@ class AssetList:
     STANDARD_HEATING_SYSTEM = "landlord_heating_system"
     STANDARD_EXISTING_PV = "landlord_existing_pv"
     STANDARD_SAP = "landlord_sap_rating"
+    STANDARD_BLOCK_REFERENCE = "landlord_block_reference"
 
     DOMNA_PROPERTY_ID = "domna_property_id"
 
@@ -369,6 +370,7 @@ class AssetList:
         landlord_heating_system=None,
         landlord_existing_pv=None,
         landlord_sap=None,
+        landlord_block_reference=None,
         phase=False,
         header=0
     ):
@@ -382,7 +384,7 @@ class AssetList:
         self.standardised_asset_list = self.raw_asset_list.copy()
         # Will be used to store aggregated figures against the various work types
         self.work_type_figures = {}
-        self.flat_data = None
+        self.block_analysis_df = None
         self.duplicated_addresses = None
         self.contact_details = None
         self.contact_detail_fields = None
@@ -425,6 +427,7 @@ class AssetList:
         self.landlord_heating_system = landlord_heating_system
         self.landlord_existing_pv = landlord_existing_pv
         self.landlord_sap = landlord_sap
+        self.landlord_block_reference = landlord_block_reference
 
         # parameters for cleaning
         self.full_address_cols_to_concat = full_address_cols_to_concat
@@ -671,6 +674,7 @@ class AssetList:
             self.landlord_heating_system,
             self.landlord_existing_pv,
             self.landlord_sap,
+            self.landlord_block_reference,
         ]
         # Keep just non-null variables (e.g landlord may not provide uprn
         self.keep_variables = [v for v in variables if v is not None]
@@ -688,6 +692,7 @@ class AssetList:
             self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM,
             self.landlord_existing_pv: self.STANDARD_EXISTING_PV,
             self.landlord_sap: self.STANDARD_SAP,
+            self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE
         }
         self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
 
@@ -1905,7 +1910,7 @@ class AssetList:
             for col in ["cavity_reason", "solar_reason"]:
                 self.standardised_asset_list[col] = np.where(
                     (
-                        (~pd.isnull(self.standardised_asset_list["ecosurv_status"]))
+                        (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"]))
                     ),
                     None,
                     self.standardised_asset_list[col]
@@ -1978,42 +1983,42 @@ class AssetList:
             get_max_status_from_columns, axis=1
         )
 
-    def flat_analysis(self):
+    def block_analysis(self):
 
-        # We need to deduce the building name - we strip out the house number
+        if self.landlord_block_reference is None:
+            # This information is not available
+            return
 
-        # We want to deduce if flats have 50% of the properties below C75
-        # We group by postcode and property type
-        grouped = self.standardised_asset_list.groupby(
-            [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE]
-        )
+        # Reverse mapping: label -> enum
+        LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus}
 
-        flat_data = []
-        for _, group in grouped:
-            if "flat" in group[self.STANDARD_PROPERTY_TYPE].values:
-                num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0]
-                num_below_c75 = group[
-                    self.EPC_API_DATA_NAMES["current-energy-efficiency"]
-                ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum()
-                # Check if any flats are below C69
-                num_flats_below_c69 = group[
-                    self.EPC_API_DATA_NAMES["current-energy-efficiency"]
-                ].lt(69).sum()
+        # Threshold status - anythign that is at this stage or beyond is considered surveyed
+        threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value
 
-                flat_data.append(
-                    {
-                        "Postcode": group[self.STANDARD_POSTCODE].iloc[0],
-                        "Property Type": "Flat",
-                        "Number of Flats with EPC": num_flats,
-                        "Number of Flats below C75": num_below_c75,
-                        "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats),
-                        "Number of Flats Below C69": num_flats_below_c69,
-                    }
-                )
+        block_analysis = []
+        for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE):
+            cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100
 
-        flat_data = pd.DataFrame(flat_data)
+            if all(cavity_breakdown.index == "No Eligibility"):
+                continue
 
-        self.flat_data = flat_data
+            works = group["hubspot_status"]
+            above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
+            count_above = (above_threshold >= threshold).sum()
+            proportion = count_above / len(works)
+
+            block_analysis.append(
+                {
+                    "Block Reference": block_reference,
+                    "Proportion of properties suryeyed": proportion,
+                    **cavity_breakdown.to_dict(),
+                }
+            )
+
+        block_analysis = pd.DataFrame(block_analysis)
+        block_analysis = block_analysis.fillna(0)
+
+        self.block_analysis_df = block_analysis
 
     @staticmethod
     def split_full_name(x):
@@ -2403,14 +2408,15 @@ class AssetList:
             self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
                 columns={
                     "Reference": "ecosurv_reference",
-                    "status": "ecosurv_status",
+                    "Status": "ecosurv_status",
                     "Lead Status": "ecosurv_lead_status",
-                    "Tags": "ecosurv_tags"
+                    "Tags": "ecosurv_tags",
+                    "Installer": "ecosurv_installer"
                 }
             ), how="left", on="ecosurv_reference"
         )
 
-        matched["ecosurv_install_status"] = None
+        matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
 
         # This mapping is ordered by process order, where lodgment is the final step so if we have an indication
         # that the property is ready for lodgement, we set the status to that. We then proceed through the other
@@ -2772,6 +2778,7 @@ class AssetList:
             )
             measure_mix_col = "MEASURE COMBO"
             installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
+            installer_col = "INSTALLER"
 
             logger.info("Matching master data to asset list")
             matched = []
@@ -2912,7 +2919,7 @@ class AssetList:
 
             matched = pd.DataFrame(matched)
             master_to_append = master_data[
-                [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col]
+                [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col]
             ].merge(
                 matched, how="left", on="row_id"
             ).rename(
@@ -2921,7 +2928,8 @@ class AssetList:
                     measure_mix_col: "measure_mix",
                     install_col: "survey_status",
                     submission_col: "submission_date",
-                    installer_notes_col: "submission_installer_notes"
+                    installer_notes_col: "submission_installer_notes",
+                    installer_col: "submission_installer"
                 }
             )
             master_to_append["submission_cancelled"] = (
diff --git a/asset_list/app.py b/asset_list/app.py
index 31c404e5..41623880 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -82,6 +82,7 @@ def app():
     landlord_existing_pv = None
     landlord_property_id = "thrive_property_id"
     landlord_sap = "sap_rating_updated"
+    landlord_block_reference = "block_reference"
     outcomes_filename = [
         os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
     ]
@@ -119,6 +120,7 @@ def app():
     landlord_existing_pv = "Low Carbon Technology (Solar PV)"
     landlord_property_id = "UPRN"
     landlord_sap = "SAP Score"
+    landlord_block_reference = None
     outcomes_filename = None
     outcomes_sheetname = None
     outcomes_postcode = None
@@ -209,6 +211,7 @@ def app():
         landlord_heating_system=landlord_heating_system,
         landlord_existing_pv=landlord_existing_pv,
         landlord_sap=landlord_sap,
+        landlord_block_reference=landlord_block_reference,
         phase=phase
     )
     asset_list.init_standardise()
@@ -479,7 +482,7 @@ def app():
     # We now flag the status of the property
     asset_list.label_property_status()
 
-    asset_list.flat_analysis()
+    asset_list.block_analysis()
 
     asset_list.load_contact_details(
         local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
@@ -526,7 +529,8 @@ def app():
 
     with pd.ExcelWriter(filename) as writer:
         asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
-        asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
+        if asset_list.block_analysis_df is not None:
+            asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
         # If we have outcomes, we add a tab with the outcomes
         if not asset_list.outcomes_for_output.empty:
             asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
new file mode 100644
index 00000000..302d2673
--- /dev/null
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+
+def app():
+    """
+    TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
+          review. So, we will need to update the hubspot status for these entries and set them to None, if they
+          were previously being set to ready for scheduling. We don't want to just filter on rows where
+          cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
+          them
+
+    :return:
+    """
+
+    filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive "
+                "Programme - reconciled.xlsx")
+
+    standardised_asset_list = pd.read_excel(filepath, sheet_name="Standardised Asset List")
diff --git a/backend/engine/engine.py b/backend/engine/engine.py
index 58c3dc8e..5316fd03 100644
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest):
             )
 
             # if we have a remote assment data type, we pull the additional data and include it
-            if body.event_type == "remote_assessment":
+            if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
                 logger.info("Retrieving find my epc data")
                 try:
                     property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(
diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py
new file mode 100644
index 00000000..6235ebed
--- /dev/null
+++ b/etl/customers/thrive/Project codes.py	
@@ -0,0 +1,108 @@
+"""
+THis script will take the standardised asset list and append on the project codes.
+We also, review the existing install status, in case anything is wrong
+"""
+import pandas as pd
+import numpy as np
+
+standardised_asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Standardised Asset List",
+)
+
+project_code_allocations = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
+    "Warmfront).xlsx",
+    sheet_name="Master Tracker",
+    header=1
+)
+
+programme_codes = project_code_allocations[
+    ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
+].copy()
+programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
+programme_codes["programme_reference"] = np.where(
+    pd.isnull(programme_codes["programme_reference"]),
+    programme_codes["Proposed Progamme"],
+    programme_codes["programme_reference"]
+)
+
+PROJECT_CODE_MAP = {
+    'Phase 2': "THRIVE-002",
+    'Phase 3': "THRIVE-003",
+    'Phase 4': "THRIVE-004",
+    'Phase 5': "THRIVE-005",
+    'Phase 6': "THRIVE-006",
+    'Phase 7': "THRIVE-007",
+    'Phase 8': "THRIVE-008",
+    'Phase 9': "THRIVE-009",
+    'Phase 10': "THRIVE-010",
+    "Week1": "THRIVE-WEEK-001",
+    "Week2": "THRIVE-WEEK-002",
+    "Week4": "THRIVE-WEEK-004",
+    "Week7": "THRIVE-WEEK-007",
+}
+programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
+
+thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()
+
+standardised_asset_list = standardised_asset_list.merge(
+    programme_codes[["UPRN", "project_code", "programme_reference"]],
+    how="left",
+    left_on="landlord_property_id",
+    right_on="UPRN",
+).merge(
+    thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
+    how="left",
+    on="UPRN",
+)
+
+standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])
+
+# We fill the project code for historical completions
+standardised_asset_list["project_code"] = np.where(
+    pd.isnull(standardised_asset_list["project_code"]) & (
+        standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
+    ) & (
+        ~pd.isnull(standardised_asset_list["hubspot_status"])
+    ),
+    "THRIVE-HISTORICAL",
+    standardised_asset_list["project_code"]
+)
+
+# Store as an excel
+filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
+            "reconciled.xlsx")
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+# Other tabs:
+block_analysis = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Block Analysis",
+)
+outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Outcomes",
+)
+unmatched_submissions = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Unmatched Submissions",
+)
+unmatched_ecosurv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Unmatched Ecosurv",
+)
+
+with pd.ExcelWriter(filename) as writer:
+    standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+    block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
+    # If we have outcomes, we add a tab with the outcomes
+    outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
+
+    unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
+
+    unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)

From d2a74d5612439e3732553eb133129c2aaad9f5bc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 23 May 2025 10:48:08 +0100
Subject: [PATCH 05/14] adding in a placeholder method which will assume that
 properties without an EPC, are going to be older properties

---
 backend/SearchEpc.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 0010191a..1ee1f950 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -702,6 +702,18 @@ class SearchEpc:
             exclude_old=exclude_old
         )
 
+        # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build
+        # so we avoid comparing it to new builds
+        # TODO - this is experimental
+        newer_age_bands = [
+            "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011",
+            "England and Wales: 2012 onwards"
+        ]
+
+        if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum():
+            # We have some older age bands, so we need to filter them out
+            epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy()
+
         # If we have missing lodgment date, we fill it with inspection-date
         epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
         # If we still have missing dates, we set it to the mean of the non NA dates

From 94dcd9c00acdfd2b7db34c6ab9e79a6061233e09 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 3 Jun 2025 18:41:01 +0100
Subject: [PATCH 06/14] working on hubspot upload

---
 .idea/Model.iml                           |   2 +-
 .idea/misc.xml                            |   2 +-
 asset_list/AssetList.py                   | 274 +++++++++++++++-------
 asset_list/hubspot/config.py              |  43 ++++
 asset_list/hubspot/prepare_for_hubspot.py |  39 ++-
 backend/Funding.py                        | 120 ++++++++++
 etl/customers/cambridge/surveys.py        |  24 ++
 etl/customers/places_for_people/abs.py    | 199 ++++++++++++++++
 etl/customers/thrive/Project codes.py     |  34 ++-
 9 files changed, 635 insertions(+), 102 deletions(-)
 create mode 100644 etl/customers/cambridge/surveys.py
 create mode 100644 etl/customers/places_for_people/abs.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index c6561970..09f2e496 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index fea0f59e..3f5ef7ff 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -350,6 +350,34 @@ class AssetList:
         "cavity wall, as built, partial insulation",
     ]
 
+    # Work type prefixes:
+    # Empties
+    EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity"
+    EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled"
+    EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other"
+    EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build"
+    EPC_EMPTY_INSPECTIONS_NON_CAVITY = "EPC Shows Empty Cavity, inspections show non-cavity build"
+    EPC_EMPTY = "EPC Shows Empty Cavity"
+    LANDLORD_EMPTY_INSPECTIONS_OTHER = ("Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or "
+                                        "Non-cavity")
+    # Extraction
+    EXTRACTION_NON_INTRUSIVE = "Non-Intrusive Data Shows Cavity Extraction"
+
+    # Solar
+    SOLAR_ELIGIBLE = "Solar Eligible"
+    SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below"
+    SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade"
+
+    CRM_PRODUCTS = {
+        "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity & Loft - ECO4"},
+        "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"},
+        "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"},
+        "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"},
+        "Solar PV + Heating Upgrade - ECO4": {
+            "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4"
+        },
+    }
+
     def __init__(
         self,
         local_filepath,
@@ -1719,10 +1747,10 @@ class AssetList:
         self.standardised_asset_list["cavity_reason"] = None
 
         empty_cavity_map = {
-            "non_intrusive_indicates_empty_cavity": "Non-Intrusive Data Shows Empty Cavity: ",
-            "non_intrusive_indicates_empty_cavity_has_solar": "Non-Intrusive Data Shows Empty Cavity - property "
+            "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE_PREFIX + ": ",
+            "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property "
                                                               "already has solar: ",
-            "non_intrusive_indicates_empty_cavity_no_year_filter": f"Non-Intrusive Data Shows Empty Cavity, "
+            "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, "
                                                                    f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
 
         }
@@ -1747,7 +1775,7 @@ class AssetList:
                     )) &
                     pd.isnull(self.standardised_asset_list["cavity_reason"])
                 ),
-                "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[
+                f"{EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[
                     "SAP Category"],
                 self.standardised_asset_list["cavity_reason"]
             )
@@ -1759,7 +1787,7 @@ class AssetList:
                     self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] &
                     pd.isnull(self.standardised_asset_list["cavity_reason"])
                 ),
-                "EPC Shows Empty Cavity, inspections show filled or other: " + self.standardised_asset_list[
+                f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + self.standardised_asset_list[
                     "SAP Category"],
                 self.standardised_asset_list["cavity_reason"]
             )
@@ -1771,7 +1799,7 @@ class AssetList:
                     (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") &
                     pd.isnull(self.standardised_asset_list["cavity_reason"])
                 ),
-                "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[
+                f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[
                     "SAP Category"],
                 self.standardised_asset_list["cavity_reason"]
             )
@@ -1783,8 +1811,7 @@ class AssetList:
                     (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") &
                     pd.isnull(self.standardised_asset_list["cavity_reason"])
                 ),
-                "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list[
-                    "SAP Category"],
+                f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + self.standardised_asset_list["SAP Category"],
                 self.standardised_asset_list["cavity_reason"]
             )
         else:
@@ -1794,7 +1821,7 @@ class AssetList:
                     ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
                     pd.isnull(self.standardised_asset_list["cavity_reason"])
                 ),
-                "EPC Shows Empty Cavity: " + self.standardised_asset_list["SAP Category"],
+                f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"],
                 self.standardised_asset_list["cavity_reason"]
             )
 
@@ -1804,10 +1831,12 @@ class AssetList:
                 ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
                 pd.isnull(self.standardised_asset_list["cavity_reason"])
             ),
-            "EPC Shows Empty Cavity, inspections show non-cavity build: " + self.standardised_asset_list[
-                "SAP Category"],
+            f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + self.standardised_asset_list["SAP Category"],
             self.standardised_asset_list["cavity_reason"]
         )
+
+        # Work type prefixes
+
         # Landlord data: The landlord's data indicates that the wall is an uninsulated cavity wall, but EPC and
         # inspections show filled
         self.standardised_asset_list["cavity_reason"] = np.where(
@@ -1817,7 +1846,7 @@ class AssetList:
                 ~self.standardised_asset_list["epc_indicates_empty_cavity"] &
                 pd.isnull(self.standardised_asset_list["cavity_reason"])
             ),
-            "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or Non-cavity: " +
+            f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " +
             self.standardised_asset_list["SAP Category"],
             self.standardised_asset_list["cavity_reason"]
         )
@@ -1828,7 +1857,7 @@ class AssetList:
                 self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] &
                 pd.isnull(self.standardised_asset_list["cavity_reason"])
             ),
-            "Non-Intrusive Data Shows Cavity Extraction: " + self.standardised_asset_list["SAP Category"],
+            f"{self.EXTRACTION_NON_INTRUSIVE}: " + self.standardised_asset_list["SAP Category"],
             self.standardised_asset_list["cavity_reason"]
         )
 
@@ -1837,7 +1866,7 @@ class AssetList:
                 self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] &
                 pd.isnull(self.standardised_asset_list["cavity_reason"])
             ),
-            f"Non-Intrusive Data Shows Cavity Extraction, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " +
+            f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " +
             self.standardised_asset_list["SAP Category"],
             self.standardised_asset_list["cavity_reason"]
         )
@@ -1850,11 +1879,9 @@ class AssetList:
         # Map of variables and fill values for the solar_reason variable
         # ordering of this map is important, where we flag our prioritised work types first
         solar_reason_map = {
-            "solar_eligible": "Solar Eligible: ",
-            "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ",
-            "solar_eligible_needs_heating_upgrade": (
-                "Solar Eligible, Needs Heating Upgrade: "
-            )
+            "solar_eligible": f"{self.SOLAR_ELIGIBLE}: ",
+            "solar_eligible_solid_wall_uninsulated": f"{self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED}: ",
+            "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: "
         }
 
         for variable, reason in solar_reason_map.items():
@@ -2079,68 +2106,97 @@ class AssetList:
                 *contact_details[fullname_column].apply(self.split_full_name)
             )
         else:
-            raise NotImplementedError("Implement me")
+            contact_details["title"] = None
 
         self.contact_details = contact_details
 
-    def prepare_for_crm(self, company_domain, crm_pipeline_name, first_dealstage, assigned_surveyors):
+    @classmethod
+    def load_standardised_asset_list(cls, filepath):
+        """
+        This function is designed to load the standardised asset list from a file
+        :return:
+        """
+        # This is a placeholder for now
+        # instantiate the class
+        instance = cls(
+            local_filepath=filepath,
+            sheet_name="Standardised Asset List",
+            address1_colname=cls.STANDARD_ADDRESS_1,
+            postcode_colname=cls.STANDARD_POSTCODE,
+            full_address_colname=cls.STANDARD_FULL_ADDRESS,
+            landlord_property_id=cls.STANDARD_LANDLORD_PROPERTY_ID,
+            full_address_cols_to_concat=[],
+            missing_postcodes_method=None,
+            address1_extraction_method=None,
+            landlord_year_built=None,
+            landlord_uprn=None,
+            landlord_property_type=None,
+            landlord_built_form=None,
+            landlord_wall_construction=None,
+            landlord_roof_construction=None,
+            landlord_heating_system=None,
+            landlord_existing_pv=None,
+            landlord_sap=None,
+            landlord_block_reference=None,
+            phase=False,
+            header=0
+        )
+        return instance
+
+    def prepare_for_crm(self, company_domain, installer_name):
         """
         This function prepares the data for upload into Hubspot
         :return:
         """
-        # This is a placeholder for now
-
         # This maps the opportunities as we reference them, to the product data as stored in Hubspot
-        product_lookup_table = {
-            "Non-Intrusive Data Showed Cavity Extraction": {
-                "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500
-            },
-            "Non-Intrusive Data Showed Empty Cavity": {
-                "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
-            },
-            "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed": {
-                "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
-            },
-            "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed": {
-                "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500
-            },
-            "EPC Data Showed Empty Cavity": {
-                "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
-            },
-            "Solid Floor, Insulated, No Solar": {
-                "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
-            },
-            "Solid Floor, Insulated, Needs Loft": {
-                "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
-            },
-            "Other Floor, Insulated, No Solar": {
-                "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
-            },
-            "Other Floor, Insulated, Needs Loft": {
-                "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
-            }
+
+        prefixes_to_products = {
+            # Empty
+            self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            # Extraction
+            self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"],
+            # Solar
+            self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"],
+            self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"],
+            self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"],
         }
+
         # We check if all products are covered in the lookup table
-        cavity_products = self.standardised_asset_list["cavity_reason"].unique()
-        solar_products = self.standardised_asset_list["solar_reason"].unique()
-        # Check if there any options not in out lookup table
-        if (
-            any(x for x in cavity_products if x not in product_lookup_table) or
-            any(x for x in solar_products if x not in product_lookup_table)
-        ):
-            raise ValueError("We have products not referenced in the lookup table - check this")
+        cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist()
+        solar_products = self.standardised_asset_list["solar_reason"].unique().tolist()
+
+        product_map = {}
+        for identified_product in cavity_products + solar_products:
+            if pd.isnull(identified_product):
+                continue
+
+            matched_product = None
+            for product_prefix, crm_product in prefixes_to_products.items():
+                if identified_product.startswith(product_prefix):
+                    matched_product = crm_product
+
+            product_map[identified_product] = matched_product
+
+        # For each cavity and solar product, we iterate through the prexies and map to the products
+
+        # # Check if there any options not in out lookup table
+        # if (
+        #     any(x for x in cavity_products if x not in product_lookup_table) or
+        #     any(x for x in solar_products if x not in product_lookup_table)
+        # ):
+        #     raise ValueError("We have products not referenced in the lookup table - check this")
 
         programme_data = self.standardised_asset_list.copy()
-
-        # Exclusions - these are properties we won't treat for the moment
-        product_exclusions = [
-            "Other Floor, Insulated, No Solar",
-            "Other Floor, Insulated, Needs Loft"
-        ]
-        if product_exclusions:
-            logger.warning("Excluding products: %s", product_exclusions)
-
-        programme_data = programme_data[programme_data["solar_reason"].isin(product_exclusions) == False]
+        # We take rows that have a survyor and a date for the survey
+        programme_data = programme_data[
+            ~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"])
+            ]
 
         # Merge on the contact details
         programme_data = programme_data.merge(
@@ -2153,26 +2209,34 @@ class AssetList:
         programme_data["Company Domain Name <COMPANY domain>"] = company_domain
         # Append the product data onto the programme data
         programme_data["cavity_product"] = programme_data["cavity_reason"].map(
-            lambda x: product_lookup_table.get(x, {"name": None})["name"]
+            lambda x: product_map.get(x, {"name": None})["name"]
         )
         programme_data["solar_product"] = programme_data["solar_reason"].map(
-            lambda x: product_lookup_table.get(x, {"name": None})["name"]
+            lambda x: product_map.get(x, {"name": None})["name"]
         )
 
-        programme_data["domna_product"] = programme_data["solar_reason"].copy()
+        # We check if we have any missings
+        cavity_missing = pd.isnull(programme_data[~pd.isnull(programme_data["cavity_reason"])]["cavity_product"]).sum()
+        solar_missing = pd.isnull(programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"]).sum()
+
+        if cavity_missing > 0 or solar_missing > 0:
+            raise ValueError(
+                f"We have {cavity_missing} cavity products and {solar_missing} solar products that are not "
+                "mapped to a product in the lookup table. Please check the mapping."
+            )
+
+        programme_data["domna_product"] = programme_data["solar_product"].copy()
         programme_data["domna_product"] = np.where(
             pd.isnull(programme_data["domna_product"]),
-            programme_data["solar_product"],
+            programme_data["cavity_product"],
             programme_data["domna_product"]
         )
         # We filter just on rows where we have a product
-        programme_data = programme_data[
-            ~pd.isnull(programme_data["domna_product"])
-        ]
+        programme_data = programme_data[~pd.isnull(programme_data["domna_product"])]
         programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
 
         product_df = (
-            pd.DataFrame(product_lookup_table).T[["name", "id", "unit_price"]]
+            pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]]
             .reset_index()
             .rename(
                 columns={
@@ -2194,21 +2258,27 @@ class AssetList:
         )
 
         # Add in deal and pipeline information
-        programme_data["dealname"] = programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data[
-            "domna_product"]
-        programme_data['Pipeline <DEAL pipeline>'] = crm_pipeline_name
-        programme_data['Deal Stage <DEAL dealstage>'] = first_dealstage
+        programme_data["dealname"] = (
+            programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"]
+        )
+        programme_data['Pipeline <DEAL pipeline>'] = hubspot_config.CRM_PIPELINE_NAME
+        programme_data['Deal Stage <DEAL dealstage>'] = hubspot_config.CRM_PIPELINE_FIRST_STAGE_NAME
         programme_data['Associations: Listing'] = "Property Owner"
 
-        programme_data = programme_data.merge(
-            assigned_surveyors.rename(
-                columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID}
-            ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
+        # programme_data = programme_data.merge(
+        #     assigned_surveyors.rename(
+        #         columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID}
+        #     ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
+        # )
+
+        # Add in some columns if we have them
+        date_of_inspections = (
+            "Non-Intrusives: Date of Inspection" if
+            "Non-Intrusives: Date of Inspection" in programme_data.columns else None
         )
 
         # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged
         schema_mappings = {
-            'Name <LISTING hs_name>': self.DOMNA_PROPERTY_ID,  # TODO: Maybe change this?
             'Company Domain Name <COMPANY domain>': 'Company Domain Name <COMPANY domain>',
             'Email <CONTACT email>': (
                 self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None
@@ -2227,9 +2297,10 @@ class AssetList:
             'Address 2 <LISTING hs_address_2>': None,  # TODO: Don't have this for the moment
             'Postcode <LISTING hs_zip>': self.STANDARD_POSTCODE,
             'Property Type <LISTING property_type>': self.STANDARD_PROPERTY_TYPE,
-            'Property Sub Type <LISTING property_sub_type>': None,  # TODO: Don't have this for the moment
+            'Property Sub Type <LISTING property_sub_type>': self.STANDARD_BUILT_FORM,
             'Bedroom(s) <LISTING hs_bedrooms>': None,  # TODO: Don't have this for the moment
             'Domna Property ID <LISTING domna_property_id>': self.DOMNA_PROPERTY_ID,
+            # We populate this with the column that we have
             'National UPRN <LISTING national_uprn>': (
                 self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"]
             ),
@@ -2239,8 +2310,7 @@ class AssetList:
             'Year Built <LISTING hs_year_built>': self.STANDARD_YEAR_BUILT,
             'Boiler Make <LISTING boiler_make>': None,  # TODO: Don't have this for the moment
             'Boiler Model <LISTING boiler_model>': None,  # TODO: Don't have this for the moment
-            'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>': None,
-            # TODO: Don't have this for the moment
+            'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>': date_of_inspections,
             'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>': (
                 "non-intrusives: Construction" if self.non_intrusives_present else None
             ),
@@ -2283,16 +2353,22 @@ class AssetList:
             'Last EPC: Age Band <LISTING last_epc__age_band>': self.EPC_API_DATA_NAMES["construction-age-band"],
             'Deal Stage <DEAL dealstage>': 'Deal Stage <DEAL dealstage>',
             'Pipeline <DEAL pipeline>': 'Pipeline <DEAL pipeline>',
-            'Expected Commencement Date <DEAL expected_commencement_date>': None,  # TODO: Need to set this,
+            'Expected Commencement Date <DEAL expected_commencement_date>': "survey_week",
             'Deal Name <DEAL dealname>': "dealname",  # Need to create this,
             'Product ID <LINE_ITEM hs_product_id>': 'Product ID <LINE_ITEM hs_product_id>',
             'Name <LINE_ITEM name>': 'Name <LINE_ITEM name>',
             'Unit price <LINE_ITEM price>': 'Unit price <LINE_ITEM price>',
             'Quantity <LINE_ITEM quantity>': 'Quantity <LINE_ITEM quantity>',
             'Deal Owner': 'surveyor_email',
-            'Amount <DEAL amount>': 'Unit price <LINE_ITEM price>',
+            'Project Code <DEAL project_code>': 'project_code',
+            'Associations: Listing': 'Associations: Listing',
         }
 
+        # We sometimes columns if the landlord never provided them
+        missed_mapping_cols = [c for c in schema_mappings.values() if c not in programme_data.columns if c is not None]
+        for c in missed_mapping_cols:
+            programme_data[c] = None
+
         # We now create the finalised dataset to be uploaded into Hubspot
         variables_required = list(schema_mappings.values())
         variables_required = [v for v in variables_required if v is not None]
@@ -2307,6 +2383,22 @@ class AssetList:
             columns={v: k for k, v in schema_mappings.items() if v is not None}
         )
 
+        programme_data['Installer <DEAL installer>'] = installer_name
+        programme_data['Name <LISTING hs_name>'] = (
+            programme_data['Address 1 <LISTING hs_address_1>'] + " ," + programme_data['Postcode <LISTING hs_zip>']
+        )
+        # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing
+        programme_data['Listing Owner Email <LISTING hubspot_owner_id>'] = programme_data['Deal Owner']
+        programme_data['Amount <DEAL amount>'] = 0
+
+        # We make sure we have all of the columns that we need
+        missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns]
+        if missed_columns:
+            raise ValueError(
+                f"We have the following columns that are not in the programme data: {missed_columns}. "
+                "Please check the mapping and ensure all required columns are present."
+            )
+
         self.hubspot_data = programme_data
 
     def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None):
diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py
index 180bf0e0..6e16279a 100644
--- a/asset_list/hubspot/config.py
+++ b/asset_list/hubspot/config.py
@@ -1,5 +1,8 @@
 from enum import IntEnum
 
+CRM_PIPELINE_NAME = 'Operations - Housing Associations'
+CRM_PIPELINE_FIRST_STAGE_NAME = 'READY TO BE SCHEDULED'
+
 
 class HubspotProcessStatus(IntEnum):
     def __new__(cls, value, label):
@@ -26,3 +29,43 @@ class HubspotProcessStatus(IntEnum):
     LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
     # The property has been cancelled
     INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"
+
+
+CRM_UPLOAD_COLUMNS = [
+    'Name <LISTING hs_name>', 'Associations: Listing', 'Company Domain Name <COMPANY domain>',
+    'Email <CONTACT email>', 'First Name <CONTACT firstname>', 'Last Name <CONTACT lastname>',
+    'Phone <CONTACT phone>', 'Listing Owner Email <LISTING hubspot_owner_id>',
+    'Full Address <LISTING full_address>', 'Address 1 <LISTING hs_address_1>',
+    'Address 2 <LISTING hs_address_2>', 'Postcode <LISTING hs_zip>',
+    'Property Type <LISTING property_type>', 'Property Sub Type <LISTING property_sub_type>',
+    'Bedroom(s) <LISTING hs_bedrooms>', 'Domna Property ID <LISTING domna_property_id>',
+    'National UPRN <LISTING national_uprn>', 'Owner Property ID <LISTING owner_property_id>',
+    'Wall Construction <LISTING wall_construction>', 'Heating System <LISTING heating_system>',
+    'Year Built <LISTING hs_year_built>', 'Boiler Make <LISTING boiler_make>',
+    'Boiler Model <LISTING boiler_model>',
+    'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>',
+    'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>',
+    'Non-intrusives: Insulation <LISTING non_intrusives__insulation>',
+    'Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>',
+    'Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>',
+    'Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>',
+    'Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>',
+    'Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>',
+    'Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>',
+    'CIGA: Date Requested <LISTING ciga__date_requested>',
+    'CIGA: Cavity Guarantee Found <LISTING ciga__cavity_guarantee_found>',
+    'Last EPC: Is Estimated <LISTING last_epc__is_estimated>',
+    'Last EPC: EPC Rating <LISTING last_epc__epc_rating>',
+    'Last EPC: SAP Rating <LISTING last_epc__sap_rating>',
+    'Last EPC: Main Heating Description <LISTING last_epc__main_heating_description>',
+    'Last EPC: Heating Controls <LISTING last_epc__heating_controls>',
+    'Last EPC: Lodgement Date <LISTING last_epc__lodgement_date>',
+    'Last EPC: Floor Area <LISTING last_epc__floor_area>', 'Last EPC: Wall <LISTING last_epc__wall>',
+    'Last EPC: Roof <LISTING last_epc__roof>', 'Last EPC: Floor <LISTING last_epc__floor>',
+    'Last EPC: Room Height <LISTING last_epc__room_height>',
+    'Last EPC: Age Band <LISTING last_epc__age_band>', 'Deal Stage <DEAL dealstage>',
+    'Pipeline <DEAL pipeline>', 'Expected Commencement Date <DEAL expected_commencement_date>',
+    'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>',
+    'Product ID <LINE_ITEM hs_product_id>', 'Name <LINE_ITEM name>', 'Unit price <LINE_ITEM price>',
+    'Quantity <LINE_ITEM quantity>', 'Deal Owner', 'Amount <DEAL amount>', 'Installer <DEAL installer>'
+]
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index 302d2673..8ed654f3 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -1,4 +1,5 @@
 import pandas as pd
+from asset_list.AssetList import AssetList
 
 
 def app():
@@ -9,10 +10,42 @@ def app():
           cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
           them
 
+
+    TODO: If we wish to upload deals in batches
+
     :return:
     """
 
-    filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive "
-                "Programme - reconciled.xlsx")
+    # inputs:
+    customer_domain = "https://thrivehomes.org.uk"
+    asset_list_filepath = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Hubspot Upload - "
+        "Sample.xlsx"
+    )
+    contact_details_filepath = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Sample contact "
+        "details.xlsx"
+    )
+    contacts_sheet_name = "Sheet1"
+    contacts_landlord_property_id = "landlord_property_id"
+    contacts_phone_number_column = "phone_number"
+    contacts_email_column = "email"
+    contacts_fullname_column = "fullname"
+    contacts_firstname_column = "firstname"
+    contacts_lastname_column = "lastname"
 
-    standardised_asset_list = pd.read_excel(filepath, sheet_name="Standardised Asset List")
+    asset_list = AssetList.load_standardised_asset_list(asset_list_filepath)
+    asset_list.load_contact_details(
+        local_filepath=contact_details_filepath,
+        sheet_name=contacts_sheet_name,
+        landlord_property_id=contacts_landlord_property_id,
+        phone_number_column=contacts_phone_number_column,
+        email_column=contacts_email_column,
+        fullname_column=contacts_fullname_column,
+        firstname_column=contacts_firstname_column,
+        lastname_column=contacts_lastname_column
+    )
+
+    asset_list.prepare_for_crm(
+        company_domain=customer_domain
+    )
diff --git a/backend/Funding.py b/backend/Funding.py
index f5f85b9f..78440eac 100644
--- a/backend/Funding.py
+++ b/backend/Funding.py
@@ -411,3 +411,123 @@ class Funding:
         self.gbis()
         # self.eco4()
         self.whlg()
+
+
+class Funding2:
+    """
+    New class to handle funding calculation
+    """
+
+    def __init__(self, tenure: HousingType):
+        self.tenure = tenure
+
+    @staticmethod
+    def get_sap_band(sap_score_number):
+        bands = [
+            ("High_A", 96, float("inf")),
+            ("Low_A", 92, 96),
+            ("High_B", 86, 92),
+            ("Low_B", 81, 86),
+            ("High_C", 74.5, 81),
+            ("Low_C", 69, 74.5),
+            ("High_D", 61.5, 69),
+            ("Low_D", 55, 61.5),
+            ("High_E", 46.5, 55),
+            ("Low_E", 39, 46.5),
+            ("High_F", 29.5, 39),
+            ("Low_F", 21, 29.5),
+            ("High_G", 10.5, 21),
+            ("Low_G", 1, 10.5),
+        ]
+
+        for band, lower, upper in bands:
+            if lower <= sap_score_number < upper:
+                return band
+
+        return None
+
+    def eco4_prs_eligibility(
+        self, starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
+    ):
+        """
+        Handles the eligibility criteria for private rental properties under eco
+        :return: 
+        """
+
+        # Help to heat group
+        # 1) EPC E - G
+        # 2) Must receive one of SWI, FTCH, renewable heating or DHC
+        # 3) Tenant must be on benefits
+
+        # We don't consider the tenant being on benefits - we just notify the end user that this is a requirement
+
+        meets_epc = starting_sap <= 54
+        has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures
+        # We check if the property has a heating system that means solar pv counts as a renewable heating system
+
+        has_eligible_electric_heating = any(x in mainheat_description for x in [
+            "air source heat pump", "ground source heat pump", "boiler and radiators, electric"
+        ]) | (("electric storage heaters" in mainheat_description) and
+              (heating_control_description.lower() == "controls for high heat retention storage heaters")
+              )
+
+        # Counts as renewable heating
+        solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures)
+        # Is a renewable heating
+        ashp = "air_source_heat_pump" in measures
+
+        if meets_epc & (solar_renweable_heating or ashp or has_solid_wall):
+            return True
+
+        return False
+
+    def check_funding(
+        self, measures: List,
+        starting_sap: int,
+        ending_sap: int,
+        mainheat_description: str,
+        heating_control_description: str
+    ):
+        """
+        Given a list of measures, this function will check if the package of measures is fundable
+        :param measures:
+        :param starting_sap:
+        :param ending_sap:
+        :return:
+        """
+
+        starting_band = self.get_sap_band(starting_sap)
+        ending_band = self.get_sap_band(ending_sap)
+
+        # For ECO4 eligibility, the property needs to end at a C if it starts at a D or E, otherwise should end at a
+        # D
+
+        if starting_band <= 38 & ending_band >= 55:
+            # F or G should get to D
+            raise NotImplementedError("Implement F or G to D eligibility")
+
+        ########################
+        # Private
+        ########################
+        # 1) ECO4
+        # 2) GBIS
+
+        if self.tenure == "Private":
+            is_eligible = self.eco4_prs_eligibility(
+                starting_sap=starting_sap,
+                measures=measures,
+                mainheat_description=mainheat_description,
+                heating_control_description=heating_control_description
+            )
+            pass
+
+        ########################
+        # Social
+        ########################
+        # 1) ECO4
+        # 2) GBIS
+
+        if self.tenure == "Social":
+            pass
+
+        raise NotImplementedError("Only implemented for Private or Social housing")
diff --git a/etl/customers/cambridge/surveys.py b/etl/customers/cambridge/surveys.py
new file mode 100644
index 00000000..2aa52d6f
--- /dev/null
+++ b/etl/customers/cambridge/surveys.py
@@ -0,0 +1,24 @@
+import pandas as pd
+from backend.ml_models.Valuation import PropertyValuation
+from backend.app.utils import sap_to_epc
+
+# Read in the survey data
+surveys = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx",
+    sheet_name="Survey data",
+)
+
+increases = []
+for _, x in surveys.iterrows():
+    current_epc = sap_to_epc(x["Pre SAP"])
+    target_epc = sap_to_epc(x["Scenario 1 Post SAP"])
+    current_value = x["Valuation"]
+
+    val = PropertyValuation.estimate_valuation_improvement(
+        current_value,
+        current_epc,
+        target_epc,
+        total_cost=None
+    )
+    avg_increase = val["average_increase"]
+    increases.append(round(avg_increase))
diff --git a/etl/customers/places_for_people/abs.py b/etl/customers/places_for_people/abs.py
new file mode 100644
index 00000000..aa85a93f
--- /dev/null
+++ b/etl/customers/places_for_people/abs.py
@@ -0,0 +1,199 @@
+"""
+This script is to calculate the ABS for the Places for People London project
+"""
+
+import os
+import pandas as pd
+
+# London
+pfp_london_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"})
+pfp_london_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"})
+pfp_london_cav["location"] = "London"
+pfp_london_pv["location"] = "London"
+# East
+pfp_east_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_east_reviewed_standarised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"})
+pfp_east_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_east_reviewed_standarised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"})
+pfp_east_cav["location"] = "East"
+pfp_east_pv["location"] = "East"
+# North east
+pfp_north_east_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"})
+pfp_north_east_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"})
+pfp_north_east_cav["location"] = "North East"
+pfp_north_east_pv["location"] = "North East"
+# North West
+pfp_north_west_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"})
+pfp_north_west_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"})
+pfp_north_west_cav["location"] = "North West"
+pfp_north_west_pv["location"] = "North West"
+
+cav_route = pd.concat(
+    [
+        pfp_london_cav,
+        pfp_east_cav,
+        pfp_north_east_cav,
+        pfp_north_west_cav
+    ]
+)
+solar_route = pd.concat(
+    [
+        pfp_london_pv,
+        pfp_east_pv,
+        pfp_north_east_pv,
+        pfp_north_west_pv
+    ]
+)
+
+
+def get_band(sap_score_number):
+    bands = [
+        ("High_A", 96, float("inf")),
+        ("Low_A", 92, 96),
+        ("High_B", 86, 92),
+        ("Low_B", 81, 86),
+        ("High_C", 74.5, 81),
+        ("Low_C", 69, 74.5),
+        ("High_D", 61.5, 69),
+        ("Low_D", 55, 61.5),
+        ("High_E", 46.5, 55),
+        ("Low_E", 39, 46.5),
+        ("High_F", 29.5, 39),
+        ("Low_F", 21, 29.5),
+        ("High_G", 10.5, 21),
+        ("Low_G", 1, 10.5),
+    ]
+
+    for band, lower, upper in bands:
+        if lower <= sap_score_number < upper:
+            return band
+
+    return None
+
+
+def classify_floor_area(floor_area):
+    if floor_area <= 72:
+        return "0-72"
+
+    if floor_area <= 97:
+        return "73-97"
+
+    if floor_area <= 199:
+        return "98-199"
+
+    return "200+"
+
+
+# We classify the abs bounds
+solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band)
+solar_route["ending_abs_band_scenario1"] = "High_C"
+solar_route["ending_abs_band_scenario2"] = "Low_B"
+solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90)
+solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area)
+
+# We classify the abs bounds
+cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68)
+cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band)
+cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area)
+cav_route["ending_abs_band"] = "Low_C"
+
+abs_matrix = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
+)
+
+cav_route = cav_route.merge(
+    abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
+    how="left",
+    left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"],
+    right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
+)
+solar_route = solar_route.merge(
+    abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
+    how="left",
+    left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"],
+    right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
+)
+cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0)
+solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0)
+
+cav_abs_agg = (
+    cav_route.groupby("Route March").agg(
+        {
+            "ABS Rate": "sum",
+            "landlord_property_id": "count",
+        }
+    ).reset_index()
+)
+cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
+cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True)
+cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
+
+solar_abs_agg = (
+    solar_route.groupby("Route March").agg(
+        {
+            "ABS Rate": "sum",
+            "landlord_property_id": "count",
+        }
+    ).reset_index()
+)
+solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
+solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
+solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True)
+
+# We store the data
+# Store as an excel
+filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx"
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+with pd.ExcelWriter(filename) as writer:
+    solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False)
+    cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False)
+
+    cav_route.to_excel(writer, sheet_name="Cavity data", index=False)
+    solar_route.to_excel(writer, sheet_name="Solar data", index=False)
diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py
index 6235ebed..01a15497 100644
--- a/etl/customers/thrive/Project codes.py	
+++ b/etl/customers/thrive/Project codes.py	
@@ -38,10 +38,10 @@ PROJECT_CODE_MAP = {
     'Phase 8': "THRIVE-008",
     'Phase 9': "THRIVE-009",
     'Phase 10': "THRIVE-010",
-    "Week1": "THRIVE-WEEK-001",
-    "Week2": "THRIVE-WEEK-002",
-    "Week4": "THRIVE-WEEK-004",
-    "Week7": "THRIVE-WEEK-007",
+    "Week 1": "THRIVE-WEEK-001",
+    "Week 2": "THRIVE-WEEK-002",
+    "Week 4": "THRIVE-WEEK-004",
+    "Week 7": "THRIVE-WEEK-007",
 }
 programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
 
@@ -102,7 +102,29 @@ with pd.ExcelWriter(filename) as writer:
     block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
     # If we have outcomes, we add a tab with the outcomes
     outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
-
     unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
-
     unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
+
+# A check, just comparing against the master tracker to make sure I have all of the installs
+asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Standardised Asset List",
+)
+
+master_tracker = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
+    "Warmfront).xlsx",
+    sheet_name="Master Tracker",
+    header=1
+)
+
+df = asset_list[["landlord_property_id", "hubspot_status"]].merge(
+    master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]],
+    how="inner",
+    left_on="landlord_property_id",
+    right_on="UPRN"
+)
+
+df["hubspot_status"].value_counts()
+df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"]

From 1a49740bb0fb9778fcd663d1ebd6e0ab01d7b5c6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Jun 2025 17:54:55 +0100
Subject: [PATCH 07/14] finished hubspot upload code for Thrive

---
 asset_list/AssetList.py                   | 213 +++++++++++++++-------
 asset_list/hubspot/config.py              |  16 +-
 asset_list/hubspot/prepare_for_hubspot.py |  26 ++-
 etl/customers/l_and_g/risk_matrix.py      |   1 +
 4 files changed, 186 insertions(+), 70 deletions(-)

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 3f5ef7ff..ef125110 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -368,14 +368,19 @@ class AssetList:
     SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below"
     SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade"
 
+    CRM_HISTORICAL_CAVITY_PRODUCT = {
+        "id": 156989182176, "unit_price": 0, "name": "Historical ECO Cavity"
+    }
+
     CRM_PRODUCTS = {
-        "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity & Loft - ECO4"},
+        "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity - ECO4"},
         "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"},
         "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"},
         "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"},
         "Solar PV + Heating Upgrade - ECO4": {
             "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4"
         },
+        "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT
     }
 
     def __init__(
@@ -2128,27 +2133,33 @@ class AssetList:
             full_address_cols_to_concat=[],
             missing_postcodes_method=None,
             address1_extraction_method=None,
-            landlord_year_built=None,
-            landlord_uprn=None,
-            landlord_property_type=None,
-            landlord_built_form=None,
-            landlord_wall_construction=None,
-            landlord_roof_construction=None,
-            landlord_heating_system=None,
-            landlord_existing_pv=None,
-            landlord_sap=None,
-            landlord_block_reference=None,
+            landlord_year_built=cls.STANDARD_YEAR_BUILT,
+            landlord_uprn=cls.STANDARD_UPRN,
+            landlord_property_type=cls.STANDARD_PROPERTY_TYPE,
+            landlord_built_form=cls.STANDARD_BUILT_FORM,
+            landlord_wall_construction=cls.STANDARD_WALL_CONSTRUCTION,
+            landlord_roof_construction=cls.STANDARD_ROOF_CONSTRUCTION,
+            landlord_heating_system=cls.STANDARD_HEATING_SYSTEM,
+            landlord_existing_pv=cls.STANDARD_EXISTING_PV,
+            landlord_sap=cls.STANDARD_SAP,
+            landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE,
             phase=False,
             header=0
         )
         return instance
 
-    def prepare_for_crm(self, company_domain, installer_name):
+    def prepare_for_crm(self, company_domain, installer_name, reconcile_programme=False):
         """
         This function prepares the data for upload into Hubspot
+        :param company_domain: The company domain name to be used in the CRM
+        :param installer_name: The name of the installer to be used in the CRM
+        :param reconcile_programme: If True, will include all properties with a project code, regardless of status
+        :raises ValueError: If the installer name is not valid or if there are missing products
         :return:
         """
         # This maps the opportunities as we reference them, to the product data as stored in Hubspot
+        if not hubspot_config.Installer.is_valid_value(installer_name):
+            raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.")
 
         prefixes_to_products = {
             # Empty
@@ -2185,18 +2196,37 @@ class AssetList:
 
         # For each cavity and solar product, we iterate through the prexies and map to the products
 
-        # # Check if there any options not in out lookup table
-        # if (
-        #     any(x for x in cavity_products if x not in product_lookup_table) or
-        #     any(x for x in solar_products if x not in product_lookup_table)
-        # ):
-        #     raise ValueError("We have products not referenced in the lookup table - check this")
-
         programme_data = self.standardised_asset_list.copy()
+        # Format the two date columns
+        programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce")
+        programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime(
+            programme_data[self.EPC_API_DATA_NAMES["inspection-date"]],
+            errors="coerce"
+        )
+        # Convert to dd/mm/yyyy format
+        programme_data["survey_week"] = programme_data["survey_week"].dt.strftime("%d/%m/%Y")
+        programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = (
+            programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y")
+        )
+
         # We take rows that have a survyor and a date for the survey
-        programme_data = programme_data[
-            ~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"])
-            ]
+        # We include properties under 2 circumstances:
+        # 1) The hubspot status is ready to be scheduled and there is an assigned surveyor and week for survey
+        # 2) The hubspot status is something else, meaning this has been included in an existing programme
+        # 3) reconcile programme is true, and therefore all proeprties with a project code will be included
+
+        if reconcile_programme:
+            programme_data = programme_data[~pd.isnull(programme_data["project_code"])]
+        else:
+            ready_to_be_scheduled = (
+                (
+                    programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
+                ) & (~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"]))
+            )
+            completed_works = (
+                programme_data["hubspot_status"] != hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
+            )
+            programme_data = programme_data[ready_to_be_scheduled | completed_works]
 
         # Merge on the contact details
         programme_data = programme_data.merge(
@@ -2232,8 +2262,16 @@ class AssetList:
             programme_data["domna_product"]
         )
         # We filter just on rows where we have a product
-        programme_data = programme_data[~pd.isnull(programme_data["domna_product"])]
-        programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
+        if reconcile_programme:
+            # We include historical works, which will include hisorical cavity so we set these as extraction (as
+            # this is the main work mix)
+            programme_data["domna_product"] = programme_data["domna_product"].fillna(
+                self.CRM_HISTORICAL_CAVITY_PRODUCT["name"]
+            )
+        else:
+
+            programme_data = programme_data[~pd.isnull(programme_data["domna_product"])]
+            programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
 
         product_df = (
             pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]]
@@ -2251,25 +2289,24 @@ class AssetList:
         product_df['Quantity <LINE_ITEM quantity>'] = 1
 
         # Append on the product data
-        programme_data = programme_data.merge(
-            product_df,
-            how="left",
-            on="domna_product",
-        )
+        programme_data = programme_data.merge(product_df, how="left", on="domna_product")
 
         # Add in deal and pipeline information
         programme_data["dealname"] = (
             programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"]
         )
         programme_data['Pipeline <DEAL pipeline>'] = hubspot_config.CRM_PIPELINE_NAME
-        programme_data['Deal Stage <DEAL dealstage>'] = hubspot_config.CRM_PIPELINE_FIRST_STAGE_NAME
         programme_data['Associations: Listing'] = "Property Owner"
 
-        # programme_data = programme_data.merge(
-        #     assigned_surveyors.rename(
-        #         columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID}
-        #     ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
-        # )
+        # We determine which column we should use for the UPRN
+        if self.STANDARD_UPRN not in programme_data.columns:
+            uprn_column = self.EPC_API_DATA_NAMES["uprn"]
+        else:
+            # Use the value that has the most coverage
+            uprn_column = "hubspot_uprn"
+            programme_data[uprn_column] = programme_data[self.STANDARD_UPRN].fillna(
+                programme_data[self.EPC_API_DATA_NAMES["uprn"]]
+            )
 
         # Add in some columns if we have them
         date_of_inspections = (
@@ -2277,6 +2314,67 @@ class AssetList:
             "Non-Intrusives: Date of Inspection" in programme_data.columns else None
         )
 
+        # Ammend the property type and built form columns
+        programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy()
+        programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy()
+
+        def _replace_property_description_data(programme_data, column_name):
+            """
+            Helper function to replace property type or built form data with a specified value.
+            """
+
+            if column_name == "hubspot_property_type":
+                valid_values = ["house", "bungalow", "flat", "maisonette"]
+                epc_fill_col = "property-type"
+            elif column_name == "hubspot_built_form":
+                valid_values = ["detached", "semi-detached", "mid-terrace", "end-terrace"]
+                epc_fill_col = "built-form"
+            else:
+                raise ValueError(f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or "
+                                 f"'hubspot_built_form'.")
+
+            # Any vakue that is not house, bungalow, flat or maisonette is set to None
+            programme_data[column_name] = np.where(
+                ~programme_data[column_name].isin(valid_values),
+                None,
+                programme_data[column_name]
+            )
+            # We fill with the EPC property type
+            programme_data[column_name] = np.where(
+                pd.isnull(programme_data[column_name]),
+                programme_data[self.EPC_API_DATA_NAMES[epc_fill_col]],
+                programme_data[column_name]
+            )
+
+            programme_data[column_name] = programme_data[column_name].fillna("unknown")
+
+            return programme_data
+
+        # Clean up the property type and built form columns
+        programme_data = _replace_property_description_data(programme_data, "hubspot_property_type")
+        programme_data = _replace_property_description_data(programme_data, "hubspot_built_form")
+
+        # We accomodate the old vs new inspections format
+        if "non-intrusives: WFT Findings" in programme_data.columns:
+            # We have the old format - we only have notes
+            non_intrusives_surveyor_notes = "non-intrusives: WFT Findings"
+            non_intrusives_construction = None
+            non_intrusives_insulated = None
+            non_intrusives_insulation_material = None
+            non_intrusives_ciga_check_required = None
+            non_intrusives_pv_access = None
+            non_intrusives_roof_orientation = None
+            non_intrusives_surveyor_name = None
+        else:
+            non_intrusives_surveyor_notes = 'non-intrusives: Any further surveyor notes'
+            non_intrusives_construction = "non-intrusives: Construction"
+            non_intrusives_insulated = "non-intrusives: Insulated"
+            non_intrusives_insulation_material = "non-intrusives: Material"
+            non_intrusives_ciga_check_required = 'non-intrusives: CIGA Check Required'
+            non_intrusives_pv_access = 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES'
+            non_intrusives_roof_orientation = 'non-intrusives: OFF GAS - ROOF ORIENTATION'
+            non_intrusives_surveyor_name = 'non-intrusives: Surveyors Name'
+
         # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged
         schema_mappings = {
             'Company Domain Name <COMPANY domain>': 'Company Domain Name <COMPANY domain>',
@@ -2296,14 +2394,12 @@ class AssetList:
             'Address 1 <LISTING hs_address_1>': self.STANDARD_ADDRESS_1,
             'Address 2 <LISTING hs_address_2>': None,  # TODO: Don't have this for the moment
             'Postcode <LISTING hs_zip>': self.STANDARD_POSTCODE,
-            'Property Type <LISTING property_type>': self.STANDARD_PROPERTY_TYPE,
-            'Property Sub Type <LISTING property_sub_type>': self.STANDARD_BUILT_FORM,
+            'Property Type <LISTING property_type>': "hubspot_property_type",
+            'Property Sub Type <LISTING property_sub_type>': "hubspot_built_form",
             'Bedroom(s) <LISTING hs_bedrooms>': None,  # TODO: Don't have this for the moment
             'Domna Property ID <LISTING domna_property_id>': self.DOMNA_PROPERTY_ID,
             # We populate this with the column that we have
-            'National UPRN <LISTING national_uprn>': (
-                self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"]
-            ),
+            'National UPRN <LISTING national_uprn>': uprn_column,
             'Owner Property ID <LISTING owner_property_id>': self.STANDARD_LANDLORD_PROPERTY_ID,
             'Wall Construction <LISTING wall_construction>': self.STANDARD_WALL_CONSTRUCTION,
             'Heating System <LISTING heating_system>': self.STANDARD_HEATING_SYSTEM,
@@ -2311,30 +2407,17 @@ class AssetList:
             'Boiler Make <LISTING boiler_make>': None,  # TODO: Don't have this for the moment
             'Boiler Model <LISTING boiler_model>': None,  # TODO: Don't have this for the moment
             'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>': date_of_inspections,
-            'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>': (
-                "non-intrusives: Construction" if self.non_intrusives_present else None
-            ),
-            'Non-intrusives: Insulation <LISTING non_intrusives__insulation>': (
-                "non-intrusives: Insulated" if self.non_intrusives_present else None
-            ),
-            'Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>': (
-                "non-intrusives: Material" if self.non_intrusives_present else None
-            ),
-            'Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>': (
-                'non-intrusives: CIGA Check Required' if self.non_intrusives_present else None
-            ),
-            'Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>': (
-                'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' if self.non_intrusives_present else None
-            ),
-            'Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>': (
-                'non-intrusives: OFF GAS - ROOF ORIENTATION' if self.non_intrusives_present else None
-            ),
-            'Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>': (
-                'non-intrusives: Any further surveyor notes' if self.non_intrusives_present else None
-            ),
-            'Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>': (
-                'non-intrusives: Surveyors Name' if self.non_intrusives_present else None
-            ),
+            'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>': non_intrusives_construction,
+            'Non-intrusives: Insulation <LISTING non_intrusives__insulation>': non_intrusives_insulated,
+            'Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>':
+                non_intrusives_insulation_material,
+            'Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>':
+                non_intrusives_ciga_check_required,
+            'Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>': non_intrusives_pv_access,
+            'Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>':
+                non_intrusives_roof_orientation,
+            'Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>': non_intrusives_surveyor_notes,
+            'Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>': non_intrusives_surveyor_name,
             'CIGA: Date Requested <LISTING ciga__date_requested>': None,  # TODO: Don't have this for the moment
             'CIGA: Cavity Guarantee Found <LISTING ciga__cavity_guarantee_found>': None,
             'Last EPC: Is Estimated <LISTING last_epc__is_estimated>': self.EPC_API_DATA_NAMES["estimated"],
@@ -2351,7 +2434,6 @@ class AssetList:
             'Last EPC: Floor <LISTING last_epc__floor>': self.EPC_API_DATA_NAMES["floor-description"],
             'Last EPC: Room Height <LISTING last_epc__room_height>': self.EPC_API_DATA_NAMES["floor-height"],
             'Last EPC: Age Band <LISTING last_epc__age_band>': self.EPC_API_DATA_NAMES["construction-age-band"],
-            'Deal Stage <DEAL dealstage>': 'Deal Stage <DEAL dealstage>',
             'Pipeline <DEAL pipeline>': 'Pipeline <DEAL pipeline>',
             'Expected Commencement Date <DEAL expected_commencement_date>': "survey_week",
             'Deal Name <DEAL dealname>': "dealname",  # Need to create this,
@@ -2362,6 +2444,7 @@ class AssetList:
             'Deal Owner': 'surveyor_email',
             'Project Code <DEAL project_code>': 'project_code',
             'Associations: Listing': 'Associations: Listing',
+            'Deal Stage <DEAL dealstage>': "hubspot_status",
         }
 
         # We sometimes columns if the landlord never provided them
diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py
index 6e16279a..01540b7b 100644
--- a/asset_list/hubspot/config.py
+++ b/asset_list/hubspot/config.py
@@ -1,7 +1,6 @@
-from enum import IntEnum
+from enum import IntEnum, Enum
 
 CRM_PIPELINE_NAME = 'Operations - Housing Associations'
-CRM_PIPELINE_FIRST_STAGE_NAME = 'READY TO BE SCHEDULED'
 
 
 class HubspotProcessStatus(IntEnum):
@@ -31,6 +30,19 @@ class HubspotProcessStatus(IntEnum):
     INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"
 
 
+class Installer(Enum):
+    SCIS = "SCIS"
+    JJ_CRUMP = "J & J CRUMP"
+    SGEC = "SGEC"
+
+    @classmethod
+    def is_valid_value(cls, value):
+        """
+        Check if the value is a valid installer.
+        """
+        return value in cls._value2member_map_
+
+
 CRM_UPLOAD_COLUMNS = [
     'Name <LISTING hs_name>', 'Associations: Listing', 'Company Domain Name <COMPANY domain>',
     'Email <CONTACT email>', 'First Name <CONTACT firstname>', 'Last Name <CONTACT lastname>',
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index 8ed654f3..ee3bc65d 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -1,3 +1,4 @@
+import os
 import pandas as pd
 from asset_list.AssetList import AssetList
 
@@ -17,10 +18,12 @@ def app():
     """
 
     # inputs:
+    reconcile_programme = True  # If True, the hubspot upload will include all properties with a project code
     customer_domain = "https://thrivehomes.org.uk"
+    installer_name = "J & J CRUMP"
     asset_list_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Hubspot Upload - "
-        "Sample.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Thrive Programme - "
+        "Hubspot Upload 3.xlsx"
     )
     contact_details_filepath = (
         "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Sample contact "
@@ -47,5 +50,22 @@ def app():
     )
 
     asset_list.prepare_for_crm(
-        company_domain=customer_domain
+        company_domain=customer_domain,
+        installer_name=installer_name,
+        reconcile_programme=reconcile_programme
     )
+
+    # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
+    directory, filename = os.path.split(asset_list_filepath)
+    name, ext = os.path.splitext(filename)
+    output_filename = f"{name} - Hubspot Upload.csv"
+    output_filepath = os.path.join(directory, output_filename)
+
+    if pd.isnull(asset_list.hubspot_data['Project Code <DEAL project_code>']).sum():
+        raise ValueError("FIX MEEE")
+
+    if pd.isnull(asset_list.hubspot_data['Deal Stage <DEAL dealstage>']).any():
+        raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.")
+
+    # Just store locally
+    asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
diff --git a/etl/customers/l_and_g/risk_matrix.py b/etl/customers/l_and_g/risk_matrix.py
index c800117e..8f5451fc 100644
--- a/etl/customers/l_and_g/risk_matrix.py
+++ b/etl/customers/l_and_g/risk_matrix.py
@@ -81,6 +81,7 @@ def app():
     # We need to calculate the costs
     cost_data = []
     for _, row in epr_data.iterrows():
+
         epc = row["EPC"][0]
         sap = int(row["EPC"][1:])
 

From dd2a04f05e698b549815b5ab62219cd953c29f60 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Jun 2025 18:14:14 +0100
Subject: [PATCH 08/14] deleted old hubspot data from asset list app

---
 asset_list/app.py | 42 ------------------------------------------
 1 file changed, 42 deletions(-)

diff --git a/asset_list/app.py b/asset_list/app.py
index 41623880..5e62bbe1 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -484,45 +484,6 @@ def app():
 
     asset_list.block_analysis()
 
-    asset_list.load_contact_details(
-        local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
-        sheet_name="Report 1",
-        landlord_property_id=asset_list.landlord_property_id,
-        phone_number_column='Property Current Tel. Number',
-        fullname_column='Proeprty Current Occupant',
-        firstname_column=None,
-        lastname_column=None,
-        email_column=None,  # TODO - we need this
-    )
-
-    # Convert to a format suitable for CRM
-    # TODO: TEMP
-    assigned_surveyors = pd.DataFrame(
-        [
-            {
-                asset_list.landlord_property_id: "02610001",
-                "week_commencing": "10/10/2025",
-                "surveyor_name": "Khalim Conn-Kowlessar",
-                "surveyor_email": "khalim@domna.homes",
-            }
-        ]
-    )
-
-    # TODO: Sort the output by postcode
-
-    company_domain = "ealing.gov.uk"
-    crm_pipeline_name = "Survey Management"
-    first_dealstage = "READY TO BEGIN SCHEDULING"
-    # TODO - temp, upload to either SharePoint or AWS
-
-    asset_list.prepare_for_crm(
-        assigned_surveyors=assigned_surveyors,
-        company_domain=company_domain,
-        crm_pipeline_name=crm_pipeline_name,
-        first_dealstage=first_dealstage
-    )
-    hubspot_data = asset_list.hubspot_data
-
     # Store as an excel
     filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
     # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
@@ -543,6 +504,3 @@ def app():
 
         if not asset_list.ecosurv_no_match.empty:
             asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
-
-    # Store the Hubspot export as a csv
-    hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)

From d8b0662422c61ebd620a59b8e7667ad28c457dfc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 8 Jun 2025 16:15:23 +0100
Subject: [PATCH 09/14] adding project codes for blocks

---
 asset_list/AssetList.py                |  92 +++++++++++----
 asset_list/app.py                      | 152 ++++++++++---------------
 asset_list/mappings/built_form.py      |  29 +++++
 asset_list/mappings/exising_pv.py      |   3 +-
 asset_list/mappings/heating_systems.py |  34 +++++-
 asset_list/mappings/property_type.py   |   5 +-
 asset_list/mappings/roof.py            |   9 +-
 asset_list/mappings/walls.py           |  21 +++-
 8 files changed, 226 insertions(+), 119 deletions(-)

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index ef125110..78c589db 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -526,6 +526,23 @@ class AssetList:
                 self.standardised_asset_list["Archetype"].copy()
             )
 
+        self.prefixes_to_products = {
+            # Empty
+            self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+            # Extraction
+            self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"],
+            # Solar
+            self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"],
+            self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"],
+            self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"],
+        }
+
     def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"):
 
         if method not in self.ADDRESS_1_CLEANING_METHODS:
@@ -1752,7 +1769,7 @@ class AssetList:
         self.standardised_asset_list["cavity_reason"] = None
 
         empty_cavity_map = {
-            "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE_PREFIX + ": ",
+            "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + ": ",
             "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property "
                                                               "already has solar: ",
             "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, "
@@ -1780,7 +1797,7 @@ class AssetList:
                     )) &
                     pd.isnull(self.standardised_asset_list["cavity_reason"])
                 ),
-                f"{EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[
+                f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[
                     "SAP Category"],
                 self.standardised_asset_list["cavity_reason"]
             )
@@ -1979,6 +1996,22 @@ class AssetList:
                     self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work)
                 ]
 
+        # Finally, direct operations feedback has suggested that if a property is a flat that has a SAP rating of
+        # 76 or above, we should exclude it because it's likely not going to be eligible for anyting
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") &
+            (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"),
+            None,
+            self.standardised_asset_list["cavity_reason"]
+        )
+        # Split cavity_reason on the colon and check if the first part is equal to one of the two options above
+        # that indicates empties
+        self.standardised_asset_list["identified_empty_cavity"] = (
+            self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin(
+                [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EPC_EMPTY]
+            )
+        )
+
     def label_property_status(self):
         """
         This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
@@ -2015,6 +2048,28 @@ class AssetList:
             get_max_status_from_columns, axis=1
         )
 
+        self.standardised_asset_list["project_code"] = None
+        # if we have any blocks, where work is eligible, we flag them now
+        if self.landlord_block_reference is not None:
+            # For blocks that have a 50% allocation, we create project codes
+            self.block_analysis()
+            # find any block refs with more than 50% emptires
+            viable_empty_blocks = self.block_analysis_df[
+                self.block_analysis_df['Percentage of Empties'] >= 0.50
+                ]
+
+            if not viable_empty_blocks.empty:
+                project_code_lookup = viable_empty_blocks[["Block Reference"]].copy()
+                self.standardised_asset_list = self.standardised_asset_list.merge(
+                    project_code_lookup, how="left", left_on=self.STANDARD_BLOCK_REFERENCE, right_on="Block Reference"
+                )
+                self.standardised_asset_list["project_code"] = np.where(
+                    ~pd.isnull(self.standardised_asset_list["Block Reference"]),
+                    self.standardised_asset_list["Block Reference"],
+                    self.standardised_asset_list["project_code"]
+                )
+                self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"])
+
     def block_analysis(self):
 
         if self.landlord_block_reference is None:
@@ -2024,7 +2079,7 @@ class AssetList:
         # Reverse mapping: label -> enum
         LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus}
 
-        # Threshold status - anythign that is at this stage or beyond is considered surveyed
+        # Threshold status - anything that is at this stage or beyond is considered surveyed
         threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value
 
         block_analysis = []
@@ -2034,15 +2089,21 @@ class AssetList:
             if all(cavity_breakdown.index == "No Eligibility"):
                 continue
 
+            # We check the % of empty vs not empty as right now, we're focused on empty
+            n_empties = ((group["identified_empty_cavity"] == True) & (~pd.isnull(group["cavity_reason"]))).sum()
+
             works = group["hubspot_status"]
             above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
             count_above = (above_threshold >= threshold).sum()
-            proportion = count_above / len(works)
+            proportion_surveyed = count_above / len(works)
+            proportion_empty = n_empties / len(works)
+            # We auto-populate any blocks that have greater than 50% proportion empty
 
             block_analysis.append(
                 {
                     "Block Reference": block_reference,
-                    "Proportion of properties suryeyed": proportion,
+                    "Proportion of properties suryeyed": proportion_surveyed,
+                    "Percentage of Empties": proportion_empty,
                     **cavity_breakdown.to_dict(),
                 }
             )
@@ -2050,6 +2111,8 @@ class AssetList:
         block_analysis = pd.DataFrame(block_analysis)
         block_analysis = block_analysis.fillna(0)
 
+        # We flag which properties are eligible for works. We need at least 50%
+
         self.block_analysis_df = block_analysis
 
     @staticmethod
@@ -2161,23 +2224,6 @@ class AssetList:
         if not hubspot_config.Installer.is_valid_value(installer_name):
             raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.")
 
-        prefixes_to_products = {
-            # Empty
-            self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
-            self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
-            self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
-            self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
-            self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
-            self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
-            self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
-            # Extraction
-            self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"],
-            # Solar
-            self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"],
-            self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"],
-            self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"],
-        }
-
         # We check if all products are covered in the lookup table
         cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist()
         solar_products = self.standardised_asset_list["solar_reason"].unique().tolist()
@@ -2188,7 +2234,7 @@ class AssetList:
                 continue
 
             matched_product = None
-            for product_prefix, crm_product in prefixes_to_products.items():
+            for product_prefix, crm_product in self.prefixes_to_products.items():
                 if identified_product.startswith(product_prefix):
                     matched_product = crm_product
 
diff --git a/asset_list/app.py b/asset_list/app.py
index 5e62bbe1..881334b5 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -62,77 +62,77 @@ def app():
     Property UPRN
     """
 
-    # Thrive - reconciliation
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
-    data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
-    sheet_name = "Sheet1"
-    postcode_column = 'postcode'
-    fulladdress_column = "full_address"
-    address1_column = "address_line_1"
-    address1_method = None
+    # Stori
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
+    data_filename = "Asset list - for analysis.xlsx"
+    sheet_name = "SAP and Costs Calculations"
+    postcode_column = 'Postcode'
+    fulladdress_column = "Address1"
+    address1_column = None
+    address1_method = "house_number_extraction"
     address_cols_to_concat = []
     missing_postcodes_method = None
-    landlord_year_built = "age_band_calculated"
+    landlord_year_built = "Age"
     landlord_os_uprn = None
-    landlord_property_type = "property_type"
-    landlord_built_form = "build_form"
-    landlord_wall_construction = None
-    landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
-    landlord_heating_system = "heating_type_updated"
-    landlord_existing_pv = None
-    landlord_property_id = "thrive_property_id"
-    landlord_sap = "sap_rating_updated"
-    landlord_block_reference = "block_reference"
-    outcomes_filename = [
-        os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
-    ]
-    outcomes_sheetname = ["Sheet1"]
-    outcomes_postcode = ["postcode"]
-    outcomes_houseno = ["No."]
-    outcomes_id = ["thrive_property_id"]
-    outcomes_address = ["address"]
-    master_filepaths = [
-        os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
-        os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
-    ]
-    master_to_asset_list_filepath = None
-    master_id_colnames = ["thrive_property_id", "thrive_property_id"]
-    phase = False
-    ecosurv_landlords = "thrive"
-
-    # Torus
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2"
-    data_filename = "Torus Property Asset List - INSPECTIONS.xlsx"
-    sheet_name = "TORUS"
-    postcode_column = 'Postcode'
-    fulladdress_column = None
-    address1_column = "AddressLine1"
-    address1_method = None
-    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
-    missing_postcodes_method = None
-    landlord_year_built = "Property Age"
-    landlord_os_uprn = "NatUPRN"
-    landlord_property_type = "Property Type"
-    landlord_built_form = "Built Form"
-    landlord_wall_construction = "Wall Construction"
-    landlord_roof_construction = "Roof Construction"
-    landlord_heating_system = "Space Heating Source"
-    landlord_existing_pv = "Low Carbon Technology (Solar PV)"
+    landlord_property_type = "TYPE"
+    landlord_built_form = "AGE / DETACHMENT"
+    landlord_wall_construction = "WALL"
+    landlord_roof_construction = "LOFT INSULATION"
+    landlord_heating_system = "BOILER"
+    landlord_existing_pv = "SOLAR PV"
     landlord_property_id = "UPRN"
-    landlord_sap = "SAP Score"
+    landlord_sap = "Current SAP Rating"
     landlord_block_reference = None
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    outcomes_address = None
+    outcomes_filename = []
+    outcomes_sheetname = []
+    outcomes_postcode = []
+    outcomes_houseno = []
+    outcomes_id = []
+    outcomes_address = []
     master_filepaths = []
     master_to_asset_list_filepath = None
     master_id_colnames = []
-    phase = True
+    phase = False
     ecosurv_landlords = None
 
+    # Thrive - reconciliation
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
+    # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
+    # sheet_name = "Sheet1"
+    # postcode_column = 'postcode'
+    # fulladdress_column = "full_address"
+    # address1_column = "address_line_1"
+    # address1_method = None
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "age_band_calculated"
+    # landlord_os_uprn = None
+    # landlord_property_type = "property_type"
+    # landlord_built_form = "build_form"
+    # landlord_wall_construction = None
+    # landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
+    # landlord_heating_system = "heating_type_updated"
+    # landlord_existing_pv = None
+    # landlord_property_id = "thrive_property_id"
+    # landlord_sap = "sap_rating_updated"
+    # landlord_block_reference = "block_reference"
+    # outcomes_filename = [
+    #     os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
+    # ]
+    # outcomes_sheetname = ["Sheet1"]
+    # outcomes_postcode = ["postcode"]
+    # outcomes_houseno = ["No."]
+    # outcomes_id = ["thrive_property_id"]
+    # outcomes_address = ["address"]
+    # master_filepaths = [
+    #     os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
+    #     os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
+    # ]
+    # master_to_asset_list_filepath = None
+    # master_id_colnames = ["thrive_property_id", "thrive_property_id"]
+    # phase = False
+    # ecosurv_landlords = "thrive"
+
     # Southern Midlands
     # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
     # data_filename = "Southern Housing Midlands Property List - combined.xlsx"
@@ -160,34 +160,6 @@ def app():
     # master_filepaths = []
     # master_to_asset_list_filepath = None
 
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
-    data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
-    sheet_name = "CHECKED"
-    postcode_column = 'Postcode'
-    fulladdress_column = None
-    address1_column = "AddressLine1"
-    address1_method = None
-    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
-    missing_postcodes_method = None
-    landlord_year_built = None
-    landlord_os_uprn = None
-    landlord_property_type = "Archetype (PFP)"
-    landlord_built_form = "Archetype (PFP)"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "Uprn"
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    master_filepaths = []
-    master_to_asset_list_filepath = None
-    landlord_sap = None
-    phase = None
-
     # Maps addresses to uprn in problematic cases
     manual_uprn_map = {}
 
@@ -482,8 +454,6 @@ def app():
     # We now flag the status of the property
     asset_list.label_property_status()
 
-    asset_list.block_analysis()
-
     # Store as an excel
     filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
     # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py
index 116c3203..45e45c54 100644
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@@ -331,4 +331,33 @@ BUILT_FORM_MAPPINGS = {
     'Low Rise': 'low rise',
     'Upper Floor': 'top-floor',
     'High Rise': 'high rise',
+
+    '2012 ONWARDS DETACHED': 'detached',
+    '1950-66 END TERRACE': 'end-terrace',
+    '1976-82 MID TERRACED': 'mid-terrace',
+    '1950-66 MID TERRACE': 'mid-terrace',
+    '1991-95  DETACHED': 'detached',
+    '1976-82 END TERRACED': 'end-terrace',
+    '1967-75 DETACHED': 'detached',
+    'PRE 1900 DETACHED': 'detached',
+    'PRE 1900 MID TERRACE': 'mid-terrace',
+    '1900 DET': 'detached',
+    '1967-75 MID TERR': 'mid-terrace',
+    '1930-49 SEMI DET': 'semi-detached',
+    '1900-29 SEMI DET': 'semi-detached',
+    '1900-29 MID TERR': 'mid-terrace',
+    '1983- 90 MID TERR': 'mid-terrace',
+    '1976-82 MID TERR': 'mid-terrace',
+    '1983-90 END TERR': 'end-terrace',
+    '1991-95 SEMI DET': 'semi-detached',
+    '1983-90 SEMI DET': 'semi-detached',
+    '1991-95  MID TERR': 'mid-terrace',
+    '1950-66 SEMI DET': 'semi-detached',
+    '1900 MID TERR': 'mid-terrace',
+    '1967-75 SEMI DET': 'semi-detached',
+    '1983- 90 SEMI DET': 'semi-detached',
+    '1983-90 MID TERR': 'mid-terrace',
+    '1976-82 SEMI DET': 'semi-detached',
+    'PRE 1900 MID TERR': 'mid-terrace'
+
 }
diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py
index 51f5f922..e67fafb4 100644
--- a/asset_list/mappings/exising_pv.py
+++ b/asset_list/mappings/exising_pv.py
@@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = {
     'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
     'PV: 10% roof area, PV: 2kWp array': 'already has PV',
     'PV: 50% roof area': 'already has PV',
-    'Solar PV': 'already has PV'
+    'Solar PV': 'already has PV',
+    'SOLAR PV': 'already has PV'
 }
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index daef01bb..d2959873 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -293,5 +293,37 @@ HEATING_MAPPINGS = {
     'No Data': 'unknown',
     'Boiler System': 'gas condensing boiler',
     'Storage heating': 'electric storage heaters',
-    'Storage heating (HHRSH)': 'high heat retention storage heaters'
+    'Storage heating (HHRSH)': 'high heat retention storage heaters',
+
+    'ELECTRIC BOILER': 'electric boiler',
+    'STORAGE HEATERS': 'electric storage heaters',
+    'GREENSTAR 24I JUNIOR': 'gas combi boiler',
+    'generic cond combi post98': 'gas condensing combi',
+    'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler',
+    'ECO TEC PRO 28 H COMBI A': 'gas combi boiler',
+    'GREENSTAR 25I ErP': 'gas combi boiler',
+    'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler',
+    'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler',
+    'IDEAL LOGIC HEAT 30': 'gas boiler, radiators',
+    'WORCESTER 240': 'gas boiler, radiators',
+    'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler',
+    'ECO TEC PRO 28 (OLD)': 'gas combi boiler',
+    'LOGIC COMBI2 C30': 'gas combi boiler',
+    'GREENSTAR 28I JUNIOR': 'gas combi boiler',
+    'WORCESTER 24i': 'gas combi boiler',
+    'GREENSTAR 30I ErP': 'gas combi boiler',
+    '25 CDI': 'gas combi boiler',
+    'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler',
+    'GREENSTAR 24 RI': 'gas boiler, radiators',
+    'BAXI COMBI 105 HE': 'gas combi boiler',
+    'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler',
+    'WORCESTER 28 SI ll RSF': 'gas combi boiler',
+    'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler',
+    'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler',
+    'WORCESTER 24 SI ll RSF': 'gas combi boiler',
+    'GREENSTAR 4000': 'gas combi boiler',
+    'GREENSTAR 24i JUNIOR': 'gas combi boiler',
+    'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
+    'GREENSTAR 30SI COMPACT': 'gas combi boiler',
+    'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler'
 }
diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py
index b705d6ef..1a61c3eb 100644
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@@ -252,5 +252,8 @@ PROPERTY_MAPPING = {
     'Bedsit bungalow semi detached': 'bedsit',
     'Bedsit Flat': 'bedsit',
     'Semi detached house': 'house',
-    'Unit': 'unknown'
+    'Unit': 'unknown',
+    'HOUSE (3 STOREY)': 'house',
+    'FLAT GROUND FLOOR': 'flat',
+    'FLAT TOP FLOOR': 'flat'
 }
diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py
index 3b447829..13359ded 100644
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@@ -43,6 +43,13 @@ ROOF_CONSTRUCTION_MAPPINGS = {
     'Non-joist': 'unknown',
     '25mm': 'pitched less than 100mm insulation',
     '400mm+': 'pitched insulated',
-    '12mm': 'pitched less than 100mm insulation'
+    '12mm': 'pitched less than 100mm insulation',
 
+    '150MM': 'pitched insulated',
+    '200MM': 'pitched insulated',
+    '250MM': 'pitched insulated',
+    '100MM': 'pitched less than 100mm insulation',
+    'U/K': 'unknown',
+    'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation',
+    'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation'
 }
diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py
index 5e32531f..5baabe6f 100644
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@@ -224,5 +224,24 @@ WALL_CONSTRUCTION_MAPPINGS = {
     'Traditional Cavity Brickwork': 'cavity unknown insulation',
     'System build (undefined)': 'system built',
     'Non Trad Wimpey': 'system built',
-    'Non Trad Wates': 'system built'
+    'Non Trad Wates': 'system built',
+
+    'CAVITY FILLED 270MM': 'filled cavity',
+    'CAVITY FILLED  270MM': 'filled cavity',
+    'CAVITY FILLED  250MM': 'filled cavity',
+    'CAVITY FILLED  260MM': 'filled cavity',
+    'CAVITY FILLED 260MM': 'filled cavity',
+    'SOLID A/B 220MM': 'solid brick unknown insulation',
+    'CAVITY A/B 300MM': "uninsulated cavity",
+    'CAVITY A/B 250MM': "uninsulated cavity",
+    'CAVITY A/B  260MM': "uninsulated cavity",
+    'CAVITY A/B  270MM': "uninsulated cavity",
+    'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation',
+    'CAVITY EWI': 'filled cavity',
+    'SANDSTONE/CAVITY EXT': 'sandstone or limestone',
+    'SYSTEM BUILD 100MM EWI': 'system built',
+    'CAVITY A/B 260MM': "uninsulated cavity",
+    'CAVITY A/B 270MM': "uninsulated cavity",
+    'CAVITY A/B  250MM': "uninsulated cavity"
+
 }

From 02f423f60c28e359e48357762f9d7503d44a451b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 8 Jun 2025 16:46:42 +0100
Subject: [PATCH 10/14] if a property was flagged for a heating upgrade based
 on epc but the landlord's data indicates it has a boiler, it's removed

---
 asset_list/AssetList.py | 19 ++++++++++++++-----
 asset_list/app.py       | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 78c589db..4c729245 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -1512,13 +1512,22 @@ class AssetList:
             )
         )
 
+        # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the
+        # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the
+        # landlord data suggests otherwise (e.g. there's a gas boiler), we default to what the landlord has told us
         self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = (
-            self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains(
-                "electric storage heaters|room heaters"
+            (
+                self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains(
+                    "electric storage heaters|room heaters"
+                ) & (
+                    self.standardised_asset_list[
+                        self.EPC_API_DATA_NAMES["mainheatcont-description"]
+                    ] != "Controls for high heat retention storage heaters"
+                )
             ) & (
-                self.standardised_asset_list[
-                    self.EPC_API_DATA_NAMES["mainheatcont-description"]
-                ] != "Controls for high heat retention storage heaters"
+                ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
+                    ["district heating", "communal heating", "communal gas boiler"]
+                ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ")
             )
         )
 
diff --git a/asset_list/app.py b/asset_list/app.py
index 881334b5..3c9176ca 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -95,6 +95,42 @@ def app():
     phase = False
     ecosurv_landlords = None
 
+    # For ACIS - programme re-build
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
+    data_filename = "ACIS asset list.xlsx"
+    sheet_name = "Assets"
+    address1_column = "House No"
+    postcode_column = "Postcode"
+    landlord_property_id = "UPRN"
+    fulladdress_column = None
+    address_cols_to_concat = ["House No", "Street", "Town"]
+    missing_postcodes_method = None
+    address1_method = None
+    landlord_year_built = "YEAR BUILT"
+    landlord_os_uprn = None
+    landlord_property_type = "Property type"
+    landlord_built_form = None
+    landlord_wall_construction = "Wall Constuction"
+    landlord_roof_construction = None
+    landlord_sap = None
+    landlord_heating_system = "Heating"
+    landlord_existing_pv = None
+    outcomes_filename = [os.path.join(data_folder, "ACIS Group - 25.11.2024 - outcomes.xlsx")]
+    outcomes_sheetname = ["Feedback"]
+    outcomes_postcode = ["Postcode"]
+    outcomes_address = ["Address"]
+    outcomes_houseno = ["No"]
+    outcomes_id = [None]
+    master_filepaths = [
+        os.path.join(data_folder, "ECO 3 -Table 1.csv"),
+        os.path.join(data_folder, "ECO 4 -Table 1.csv"),
+    ]
+    master_id_colnames = [None, None]
+    master_to_asset_list_filepath = None
+    phase = False
+    ecosurv_landlords = None
+    landlord_block_reference = None
+
     # Thrive - reconciliation
     # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
     # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"

From c22179f1a5d12728e7a50ce9ab78543ce23b968a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 17 Jun 2025 15:59:07 +0100
Subject: [PATCH 11/14] updating code for calico

---
 asset_list/AssetList.py                       | 274 ++++++++++++++----
 asset_list/app.py                             | 209 ++++++++-----
 asset_list/hubspot/config.py                  |   4 +-
 asset_list/hubspot/prepare_for_hubspot.py     |  22 +-
 asset_list/mappings/heating_systems.py        |   5 +-
 asset_list/mappings/property_type.py          |  13 +-
 asset_list/mappings/walls.py                  |   9 +-
 .../Futures Housing/validation_surveys.py     | 167 +++++++++++
 8 files changed, 572 insertions(+), 131 deletions(-)
 create mode 100644 etl/customers/Futures Housing/validation_surveys.py

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 4c729245..62016239 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -29,6 +29,7 @@ from recommendations.recommendation_utils import (
 )
 
 from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
 
 logger = setup_logger()
 
@@ -428,6 +429,7 @@ class AssetList:
         self.unmatched_submissions = pd.DataFrame()
         self.ecosurv = None
         self.ecosurv_no_match = pd.DataFrame()
+        self.geographical_areas = pd.DataFrame()
 
         # When this is True, we intend to break the programme into multiple phases. We may need to review
         # how this is structured in the future, as depending on how we get future data, we may need to
@@ -443,7 +445,7 @@ class AssetList:
         self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns
 
         self.new_format_non_insturives_present = (
-            "Has the property been re-walled?" in self.standardised_asset_list.columns
+            "Has the property been re-walled?" in self.raw_asset_list.columns
         )
 
         # Names of columns
@@ -989,7 +991,7 @@ class AssetList:
                 self.STANDARD_YEAR_BUILT,
                 self.STANDARD_WALL_CONSTRUCTION,
                 self.STANDARD_HEATING_SYSTEM,
-                self.STANDARD_EXISTING_PV
+                self.STANDARD_BLOCK_REFERENCE,
             ] if v not in self.standardised_asset_list.columns
         ]
         for v in missing_variables:
@@ -1016,6 +1018,12 @@ class AssetList:
             self.standardised_asset_list[self.STANDARD_SAP] = (
                 self.standardised_asset_list[self.STANDARD_SAP].astype(float)
             )
+            # If it's zero, we set it to None
+            self.standardised_asset_list[self.STANDARD_SAP] = np.where(
+                self.standardised_asset_list[self.STANDARD_SAP] == 0,
+                None,
+                self.standardised_asset_list[self.STANDARD_SAP]
+            )
 
     def merge_data(self, df: pd.DataFrame):
         """
@@ -1233,7 +1241,7 @@ class AssetList:
             processed_age_band, how="left"
         )
 
-    def identify_worktypes(self, cleaned):
+    def identify_worktypes(self):
 
         if self.landlord_sap is not None:
             # We add a SAP category for all work type identification
@@ -1596,19 +1604,9 @@ class AssetList:
         else:
             self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False
 
-        # We merge on the u-value for average thermal transmittance
-        walls_uvalue_data = pd.DataFrame(cleaned["walls-description"])
-        walls_uvalue_data = walls_uvalue_data[
-            ~pd.isnull(walls_uvalue_data["thermal_transmittance"])
-        ][["original_description", "thermal_transmittance"]].rename(
-            columns={
-                "original_description": self.EPC_API_DATA_NAMES["walls-description"],
-                "thermal_transmittance": "walls_u_value"
-            }
-        )
-        self.standardised_asset_list = self.standardised_asset_list.merge(
-            walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"]
-        )
+        self.standardised_asset_list["walls_u_value"] = self.standardised_asset_list[
+            self.EPC_API_DATA_NAMES["walls-description"]
+        ].apply(lambda x: WallAttributes(x).process()["thermal_transmittance"] if not pd.isnull(x) else None)
 
         self.standardised_asset_list["solar_epc_walls_insulated"] = (
             (
@@ -1621,16 +1619,20 @@ class AssetList:
             )
         )
 
-        # We merge on the u-value for average thermal transmittance
-        roof_data = pd.DataFrame(cleaned["roof-description"])[
-            ["original_description", "thermal_transmittance", "is_pitched", "is_loft"]
-        ].rename(
-            columns={
-                "original_description": self.EPC_API_DATA_NAMES["roof-description"],
-                "thermal_transmittance": "roof_u_value",
-            }
-        )
-
+        roof_data = []
+        for desc in self.standardised_asset_list[
+            self.EPC_API_DATA_NAMES["roof-description"]
+        ].unique():
+            if pd.isnull(desc):
+                continue
+            roof_data.append(
+                {
+                    self.EPC_API_DATA_NAMES["roof-description"]: desc,
+                    **RoofAttributes(desc).process()
+                }
+            )
+        roof_data = pd.DataFrame(roof_data)
+        roof_data = roof_data.rename(columns={"thermal_transmittance": "roof_u_value"})
         self.standardised_asset_list = self.standardised_asset_list.merge(
             roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
         )
@@ -1723,6 +1725,8 @@ class AssetList:
             self.standardised_asset_list["solar_epc_loft_needs_topup"]
         )
 
+        z = self.standardised_asset_list[self.standardised_asset_list["landlord_property_id"] == "DW150120029"]
+
         self.standardised_asset_list["solar_eligible"] = (
             # Property isn't a flag
             not_a_flat &
@@ -1964,7 +1968,8 @@ class AssetList:
                     self.standardised_asset_list[col]
                 )
 
-        if self.ecosurv is not None:
+        if self.ecosurv is not None and "ecosurv_install_status" in self.standardised_asset_list.columns:
+            # If we didn't match anything to ecosurv, the ecosurv_install_status won't exist
             for col in ["cavity_reason", "solar_reason"]:
                 self.standardised_asset_list[col] = np.where(
                     (
@@ -2021,6 +2026,20 @@ class AssetList:
             )
         )
 
+    def fill_landlord_block_reference(self, has_blocks_of_flats):
+        if not has_blocks_of_flats:
+            return
+
+        # If we have blocks of flats, we fill the landlord_block_reference field with address 1 + postcode
+        self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where(
+            (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats") & (
+                pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])
+            ),
+            self.standardised_asset_list[self.STANDARD_ADDRESS_1] + " " +
+            self.standardised_asset_list[self.STANDARD_POSTCODE],
+            self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]
+        )
+
     def label_property_status(self):
         """
         This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
@@ -2059,7 +2078,89 @@ class AssetList:
 
         self.standardised_asset_list["project_code"] = None
         # if we have any blocks, where work is eligible, we flag them now
-        if self.landlord_block_reference is not None:
+        # These blocks may be refecence via the landlord_block_reference field, or by property types being
+        # blocks of flats
+        has_landlord_block_reference = self.landlord_block_reference is not None
+        has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum()
+
+        if has_landlord_block_reference or has_blocks_of_flats:
+
+            # If we blocks of flats, without a landlord block reference, we create this
+            self.fill_landlord_block_reference(has_blocks_of_flats)
+
+            self.split_blocks(has_blocks_of_flats)
+
+            def split_blocks(self, has_blocks_of_flats):
+                """
+                Where we have a single row that is a block of flats, we split this into multiple rows,
+                one for each unit. The data that we have will be copied across rows
+                :param self:
+                :param has_blocks_of_flats:
+                :return:
+                """
+
+                blocks = self.standardised_asset_list[
+                    self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
+                    ].copy()
+
+                RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b')
+                NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b')  # captures 12, 12A, etc.
+
+                expanded_rows = []
+                for _, row in blocks.iterrows():
+                    addr = str(row[self.STANDARD_ADDRESS_1])
+
+                    # 1 ─ Range  (e.g. 1-7)
+                    m_range = RANGE_RE.search(addr)
+                    if m_range:
+                        start, end = m_range.groups()
+                        start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
+                        if start > end or (end - start) > 100:
+                            raise ValueError(f"Suspicious range '{addr}'")
+                        for n in range(start, end + 1):
+                            new = row.copy()
+                            new_addr = RANGE_RE.sub(str(n), addr, count=1)
+                            new[self.STANDARD_ADDRESS_1] = new_addr
+                            new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
+                            expanded_rows.append(new)
+                        continue
+
+                    # 2 ─ Explicit list  (e.g. 1, 2, 5 Block)
+                    nums = NUM_RE.findall(addr)
+                    if len(nums) > 1 and ',' in addr:
+                        for n in nums:
+                            new = row.copy()
+                            new_addr = re.sub(NUM_RE, n, addr, count=1)  # replace the first number only
+                            new[self.STANDARD_ADDRESS_1] = new_addr
+                            new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
+                            expanded_rows.append(new)
+                        continue
+
+                    # 3 ─ Single number → treat as individual dwelling
+                    if len(nums) == 1:
+                        expanded_rows.append(row)
+                        continue
+
+                    # 4 ─ No numbers → keep as-is
+                    if not nums:
+                        expanded_rows.append(row)
+                        continue
+
+                    # Anything else with digits is unrecognised
+                    raise NotImplementedError(f"Unhandled block format: '{addr}'")
+
+                expanded_blocks = pd.DataFrame(expanded_rows)
+
+                # We drop the blocks from the standardised asset list and append on the expanded blocks
+                self.standardised_asset_list = self.standardised_asset_list[
+                    self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
+                    ]
+
+                self.standardised_asset_list = pd.concat(
+                    [self.standardised_asset_list, expanded_blocks],
+                    ignore_index=True
+                )
+
             # For blocks that have a 50% allocation, we create project codes
             self.block_analysis()
             # find any block refs with more than 50% emptires
@@ -2079,11 +2180,39 @@ class AssetList:
                 )
                 self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"])
 
-    def block_analysis(self):
+    def analyse_geographies(self):
+        cavity_programme = (
+            self.standardised_asset_list[["domna_postcode", "cavity_reason"]]
+            .groupby(["domna_postcode"])["cavity_reason"]
+            .count()
+            .reset_index()
+        )
+        solar_programme = (
+            self.standardised_asset_list[["domna_postcode", "solar_reason"]]
+            .groupby(["domna_postcode"])["solar_reason"]
+            .count()
+            .reset_index()
+        )
+        postcodes = (
+            self.standardised_asset_list[["domna_postcode", "landlord_property_id"]]
+            .groupby("domna_postcode")["landlord_property_id"]
+            .count()
+            .reset_index()
+            .rename(columns={"landlord_property_id": "n_properties"})
+        )
+        geographical_areas = postcodes.merge(cavity_programme, how="left", on="domna_postcode").merge(
+            solar_programme, how="left", on="domna_postcode"
+        ).fillna(0)
+        geographical_areas["coverage"] = (
+            (
+                geographical_areas["solar_reason"] + geographical_areas["cavity_reason"]
+            ) / geographical_areas["n_properties"] * 100
+        )
 
-        if self.landlord_block_reference is None:
-            # This information is not available
-            return
+        geographical_areas = geographical_areas.sort_values("coverage", ascending=False)
+        self.geographical_areas = geographical_areas
+
+    def block_analysis(self):
 
         # Reverse mapping: label -> enum
         LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus}
@@ -2146,6 +2275,8 @@ class AssetList:
         sheet_name,
         landlord_property_id,
         phone_number_column=None,
+        secondary_phone_number_column=None,
+        secondary_contact_full_name=None,
         email_column=None,
         fullname_column=None,
         firstname_column=None,
@@ -2155,6 +2286,8 @@ class AssetList:
         self.contact_detail_fields = {
             "landlord_property_id": landlord_property_id,
             "phone_number": phone_number_column,
+            "secondary_phone_number": secondary_phone_number_column,
+            "secondary_contact_full_name": secondary_contact_full_name,
             "email": email_column,
             "fullname": fullname_column,
             "firstname": firstname_column,
@@ -2162,7 +2295,8 @@ class AssetList:
         }
 
         details_colnames = [
-            phone_number_column, email_column, fullname_column, firstname_column, lastname_column
+            phone_number_column, secondary_phone_number_column, email_column, fullname_column, firstname_column,
+            lastname_column
         ]
         # We'll fill them
         none_details = [x for x in details_colnames if x is None]
@@ -2188,7 +2322,7 @@ class AssetList:
         self.contact_details = contact_details
 
     @classmethod
-    def load_standardised_asset_list(cls, filepath):
+    def load_standardised_asset_list(cls, filepath, sheet_name, header):
         """
         This function is designed to load the standardised asset list from a file
         :return:
@@ -2197,7 +2331,7 @@ class AssetList:
         # instantiate the class
         instance = cls(
             local_filepath=filepath,
-            sheet_name="Standardised Asset List",
+            sheet_name=sheet_name,
             address1_colname=cls.STANDARD_ADDRESS_1,
             postcode_colname=cls.STANDARD_POSTCODE,
             full_address_colname=cls.STANDARD_FULL_ADDRESS,
@@ -2216,7 +2350,7 @@ class AssetList:
             landlord_sap=cls.STANDARD_SAP,
             landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE,
             phase=False,
-            header=0
+            header=header
         )
         return instance
 
@@ -2252,6 +2386,10 @@ class AssetList:
         # For each cavity and solar product, we iterate through the prexies and map to the products
 
         programme_data = self.standardised_asset_list.copy()
+        programme_data["domna_full_address"] = (
+            programme_data["domna_full_address"].str.replace(";", ", ", regex=False).str.replace("  ", "")
+        )
+
         # Format the two date columns
         programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce")
         programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime(
@@ -2324,8 +2462,11 @@ class AssetList:
                 self.CRM_HISTORICAL_CAVITY_PRODUCT["name"]
             )
         else:
-
-            programme_data = programme_data[~pd.isnull(programme_data["domna_product"])]
+            programme_data = programme_data[
+                ~pd.isnull(programme_data["domna_product"]) &
+                ~pd.isnull(programme_data["surveyor"]) &
+                ~pd.isnull(programme_data["survey_week"])
+                ]
             programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
 
         product_df = (
@@ -2356,6 +2497,10 @@ class AssetList:
         # We determine which column we should use for the UPRN
         if self.STANDARD_UPRN not in programme_data.columns:
             uprn_column = self.EPC_API_DATA_NAMES["uprn"]
+            # If we're working form the EPC, we don't have this information if the EPC is estimated
+            programme_data[uprn_column] = np.where(
+                programme_data["estimated"] == True, None, programme_data[uprn_column]
+            )
         else:
             # Use the value that has the most coverage
             uprn_column = "hubspot_uprn"
@@ -2445,6 +2590,14 @@ class AssetList:
             'Phone <CONTACT phone>': (
                 self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None
             ),  # TODO: Review
+            'Secondary Phone <CONTACT secondary_phone_number>': (
+                self.contact_detail_fields["secondary_phone_number"] if
+                self.contact_detail_fields["secondary_phone_number"] else None
+            ),
+            "Secondary Contact Full Name <CONTACT secondary_contact_full_name>": (
+                self.contact_detail_fields["secondary_contact_full_name"] if
+                self.contact_detail_fields["secondary_contact_full_name"] else None
+            ),
             'Full Address <LISTING full_address>': self.STANDARD_FULL_ADDRESS,
             'Address 1 <LISTING hs_address_1>': self.STANDARD_ADDRESS_1,
             'Address 2 <LISTING hs_address_2>': None,  # TODO: Don't have this for the moment
@@ -2496,7 +2649,7 @@ class AssetList:
             'Name <LINE_ITEM name>': 'Name <LINE_ITEM name>',
             'Unit price <LINE_ITEM price>': 'Unit price <LINE_ITEM price>',
             'Quantity <LINE_ITEM quantity>': 'Quantity <LINE_ITEM quantity>',
-            'Deal Owner': 'surveyor_email',
+            'Deal Owner': 'surveyor',
             'Project Code <DEAL project_code>': 'project_code',
             'Associations: Listing': 'Associations: Listing',
             'Deal Stage <DEAL dealstage>': "hubspot_status",
@@ -2523,11 +2676,12 @@ class AssetList:
 
         programme_data['Installer <DEAL installer>'] = installer_name
         programme_data['Name <LISTING hs_name>'] = (
-            programme_data['Address 1 <LISTING hs_address_1>'] + " ," + programme_data['Postcode <LISTING hs_zip>']
+            programme_data['Full Address <LISTING full_address>'] + " ," + programme_data['Postcode <LISTING hs_zip>']
         )
         # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing
         programme_data['Listing Owner Email <LISTING hubspot_owner_id>'] = programme_data['Deal Owner']
         programme_data['Amount <DEAL amount>'] = 0
+        programme_data["Deal Owner"] = programme_data["Deal Owner"].str.lower()
 
         # We make sure we have all of the columns that we need
         missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns]
@@ -2626,7 +2780,10 @@ class AssetList:
         logger.info("Matched %s properties to ecosurv data", len(matched))
         logger.info("%s properties in Ecosurv remain unmatched", len(unmatched))
 
-        # We now match
+        if not matched:
+            return
+
+            # We now match
         matched = pd.DataFrame(matched)
         # We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe
         if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum():
@@ -2995,20 +3152,32 @@ class AssetList:
                 axis=1
             )
 
-            scheme_col = (
-                "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
-                "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH"
-            )
+            if "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns:
+                scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION"
+            elif "AFFORDABLE WARMTH" in master_data.columns:
+                scheme_col = "AFFORDABLE WARMTH"
+            else:
+                scheme_col = "OFFICE USE ONLY"
+
             postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code"
-            house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO"
-            property_type_col = (
-                "PROPERTY TYPE      As per table emailed" if
-                "PROPERTY TYPE      As per table emailed" in
-                master_data.columns else "PROPERTY TYPE As per table emailed"
-            )
+            if 'NO.' in master_data.columns:
+                house_no_col = 'NO.'
+            elif "NO" in master_data.columns:
+                house_no_col = 'NO'
+            else:
+                house_no_col = "NUMBER"
+
+            if "PROPERTY TYPE      As per table emailed" in master_data.columns:
+                property_type_col = "PROPERTY TYPE      As per table emailed"
+            elif "PROPERTY TYPE As per table emailed" in master_data.columns:
+                property_type_col = "PROPERTY TYPE As per table emailed"
+            else:
+                property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)"
+
             measure_mix_col = "MEASURE COMBO"
             installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
             installer_col = "INSTALLER"
+            town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area'
 
             logger.info("Matching master data to asset list")
             matched = []
@@ -3098,7 +3267,8 @@ class AssetList:
                             df = df[
                                 df[self.STANDARD_FULL_ADDRESS].str.lower().apply(
                                     lambda x: process.extractOne(
-                                        " ".join([row[house_no_col], row["Street / Block Name"], row["TOWN"]]).lower(),
+                                        " ".join(
+                                            [row[house_no_col], row["Street / Block Name"], row[town_colname]]).lower(),
                                         x
                                     )[1]
                                 ) > 90
diff --git a/asset_list/app.py b/asset_list/app.py
index 3c9176ca..08164c19 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -2,8 +2,6 @@ import os
 import json
 import pandas as pd
 from pprint import pprint
-import msgpack
-from utils.s3 import read_from_s3
 from asset_list.AssetList import AssetList
 from asset_list.mappings.property_type import PROPERTY_MAPPING
 from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
@@ -62,27 +60,28 @@ def app():
     Property UPRN
     """
 
-    # Stori
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
-    data_filename = "Asset list - for analysis.xlsx"
-    sheet_name = "SAP and Costs Calculations"
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
+    data_filename = "07.04 CALICO - Final List.xlsx"
+    asset_list_header = 2
+    sheet_name = "Final List"
     postcode_column = 'Postcode'
-    fulladdress_column = "Address1"
-    address1_column = None
-    address1_method = "house_number_extraction"
-    address_cols_to_concat = []
+    fulladdress_column = None
+    address1_column = "Property Number / Name"
+    address1_method = None
+    address_cols_to_concat = [
+        "Property Number / Name",
+        "Street",
+        "Town"
+    ]
     missing_postcodes_method = None
-    landlord_year_built = "Age"
+    landlord_year_built = "NROSH Estimated Build Date"
     landlord_os_uprn = None
-    landlord_property_type = "TYPE"
-    landlord_built_form = "AGE / DETACHMENT"
-    landlord_wall_construction = "WALL"
-    landlord_roof_construction = "LOFT INSULATION"
-    landlord_heating_system = "BOILER"
-    landlord_existing_pv = "SOLAR PV"
-    landlord_property_id = "UPRN"
-    landlord_sap = "Current SAP Rating"
-    landlord_block_reference = None
+    landlord_property_type = "Asset Type"
+    landlord_built_form = None
+    landlord_wall_construction = "Wall Type"
+    landlord_heating_system = "Boiler Type"
+    landlord_existing_pv = None
+    landlord_property_id = "Asset Reference"
     outcomes_filename = []
     outcomes_sheetname = []
     outcomes_postcode = []
@@ -90,46 +89,126 @@ def app():
     outcomes_id = []
     outcomes_address = []
     master_filepaths = []
-    master_to_asset_list_filepath = None
     master_id_colnames = []
-    phase = False
+    master_to_asset_list_filepath = None
+    landlord_roof_construction = None
+    landlord_block_reference = None
+    landlord_sap = "Current Efficiency Rating - Score"
+    phase = None
     ecosurv_landlords = None
 
-    # For ACIS - programme re-build
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
-    data_filename = "ACIS asset list.xlsx"
-    sheet_name = "Assets"
-    address1_column = "House No"
-    postcode_column = "Postcode"
-    landlord_property_id = "UPRN"
-    fulladdress_column = None
-    address_cols_to_concat = ["House No", "Street", "Town"]
-    missing_postcodes_method = None
-    address1_method = None
-    landlord_year_built = "YEAR BUILT"
-    landlord_os_uprn = None
-    landlord_property_type = "Property type"
-    landlord_built_form = None
-    landlord_wall_construction = "Wall Constuction"
-    landlord_roof_construction = None
-    landlord_sap = None
-    landlord_heating_system = "Heating"
-    landlord_existing_pv = None
-    outcomes_filename = [os.path.join(data_folder, "ACIS Group - 25.11.2024 - outcomes.xlsx")]
-    outcomes_sheetname = ["Feedback"]
-    outcomes_postcode = ["Postcode"]
-    outcomes_address = ["Address"]
-    outcomes_houseno = ["No"]
-    outcomes_id = [None]
-    master_filepaths = [
-        os.path.join(data_folder, "ECO 3 -Table 1.csv"),
-        os.path.join(data_folder, "ECO 4 -Table 1.csv"),
-    ]
-    master_id_colnames = [None, None]
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-    landlord_block_reference = None
+    # data_folder = (
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset
+    #     List"
+    # )
+    # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
+    # sheet_name = "Assets"
+    # postcode_column = 'Postcode'
+    # fulladdress_column = "Address"
+    # address1_column = None
+    # address1_method = "house_number_extraction"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Build Year"
+    # landlord_os_uprn = None
+    # landlord_property_type = "Property Archetype"
+    # landlord_built_form = None
+    # landlord_wall_construction = None
+    # landlord_heating_system = "Heating Fuel Type"
+    # landlord_existing_pv = None
+    # landlord_property_id = "Uprn - DO NOT DELETE"
+    # outcomes_filename = [
+    #     os.path.join(data_folder, "RT - LiveWest.xlsx")
+    # ]
+    # outcomes_sheetname = ["Feedback"]
+    # outcomes_postcode = ["Poscode"]
+    # outcomes_houseno = ["No."]
+    # outcomes_id = ["UPRN"]
+    # outcomes_address = ["Address"]
+    # master_filepaths = [
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
+    #     Master "
+    #     "- redacted for analysis/CAVITY-Table 1.csv"
+    # ]
+    # master_id_colnames = [None]
+    # master_to_asset_list_filepath = None
+    # landlord_roof_construction = None
+    # landlord_block_reference = None
+    # landlord_sap = None
+    # phase = None
+    # ecosurv_landlords = "livewest|live west"
+
+    # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
+    #                "2025/Livewest Asset List (Original) - csv")
+    # data_filename = "Report-Table 1.csv"
+    # sheet_name = None
+    # postcode_column = 'Postcode'
+    # fulladdress_column = "T1_Address"
+    # address1_column = None
+    # address1_method = "house_number_extraction"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Build Yr"
+    # landlord_os_uprn = None
+    # landlord_property_type = "T1_AssetType"
+    # landlord_built_form = "T1_AssetType"
+    # landlord_wall_construction = "Wall Type Cavity"
+    # landlord_heating_system = "Heating Fuel"
+    # landlord_existing_pv = None
+    # landlord_property_id = "T1_UPRN"
+    # outcomes_filename = [
+    #     os.path.join(data_folder, "RT - LiveWest.xlsx")
+    # ]
+    # outcomes_address = ["Address"]
+    # outcomes_sheetname = ["Feedback"]
+    # outcomes_postcode = ["Poscode"]
+    # outcomes_houseno = ["No."]
+    # outcomes_id = ["UPRN"]
+    # master_filepaths = [
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
+    #     Master "
+    #     "- redacted for analysis/CAVITY-Table 1.csv"
+    # ]
+    # master_id_colnames = [None]
+    # master_to_asset_list_filepath = None
+    # landlord_roof_construction = None
+    # landlord_block_reference = None
+    # landlord_sap = None
+    # phase = None
+    # ecosurv_landlords = "livewest|live west"
+
+    # Stori
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
+    # data_filename = "Asset list - for analysis.xlsx"
+    # sheet_name = "SAP and Costs Calculations"
+    # postcode_column = 'Postcode'
+    # fulladdress_column = "Address1"
+    # address1_column = None
+    # address1_method = "house_number_extraction"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Age"
+    # landlord_os_uprn = None
+    # landlord_property_type = "TYPE"
+    # landlord_built_form = "AGE / DETACHMENT"
+    # landlord_wall_construction = "WALL"
+    # landlord_roof_construction = "LOFT INSULATION"
+    # landlord_heating_system = "BOILER"
+    # landlord_existing_pv = "SOLAR PV"
+    # landlord_property_id = "UPRN"
+    # landlord_sap = "Current SAP Rating"
+    # landlord_block_reference = None
+    # outcomes_filename = []
+    # outcomes_sheetname = []
+    # outcomes_postcode = []
+    # outcomes_houseno = []
+    # outcomes_id = []
+    # outcomes_address = []
+    # master_filepaths = []
+    # master_to_asset_list_filepath = None
+    # master_id_colnames = []
+    # phase = False
+    # ecosurv_landlords = None
 
     # Thrive - reconciliation
     # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
@@ -201,7 +280,7 @@ def app():
 
     asset_list = AssetList(
         local_filepath=os.path.join(data_folder, data_filename),
-        header=0,
+        header=asset_list_header,
         sheet_name=sheet_name,
         address1_colname=address1_column,
         postcode_colname=postcode_column,
@@ -294,7 +373,7 @@ def app():
     epc_api_only = False
     force_retrieve_data = False
     skip = None  # Used to skip already completed chunks
-    chunk_size = 5000
+    chunk_size = 2000
     filename = "Chunk {i}.csv"
     download_folder = os.path.join(data_folder, "Chunks")
     if not os.path.exists(download_folder):
@@ -477,18 +556,13 @@ def app():
 
     asset_list.extract_attributes()
 
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-
-    asset_list.identify_worktypes(cleaned)
+    asset_list.identify_worktypes()
 
     pprint(asset_list.work_type_figures)
 
     # We now flag the status of the property
     asset_list.label_property_status()
+    asset_list.analyse_geographies()
 
     # Store as an excel
     filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
@@ -510,3 +584,6 @@ def app():
 
         if not asset_list.ecosurv_no_match.empty:
             asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
+
+        if not asset_list.geographical_areas.empty:
+            asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py
index 01540b7b..5110fb5f 100644
--- a/asset_list/hubspot/config.py
+++ b/asset_list/hubspot/config.py
@@ -46,7 +46,9 @@ class Installer(Enum):
 CRM_UPLOAD_COLUMNS = [
     'Name <LISTING hs_name>', 'Associations: Listing', 'Company Domain Name <COMPANY domain>',
     'Email <CONTACT email>', 'First Name <CONTACT firstname>', 'Last Name <CONTACT lastname>',
-    'Phone <CONTACT phone>', 'Listing Owner Email <LISTING hubspot_owner_id>',
+    'Phone <CONTACT phone>', 'Secondary Phone <CONTACT secondary_phone_number>',
+    'Secondary Contact Full Name <CONTACT secondary_contact_full_name>',
+    'Listing Owner Email <LISTING hubspot_owner_id>',
     'Full Address <LISTING full_address>', 'Address 1 <LISTING hs_address_1>',
     'Address 2 <LISTING hs_address_2>', 'Postcode <LISTING hs_zip>',
     'Property Type <LISTING property_type>', 'Property Sub Type <LISTING property_sub_type>',
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index ee3bc65d..9ffe24ca 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -18,31 +18,39 @@ def app():
     """
 
     # inputs:
-    reconcile_programme = True  # If True, the hubspot upload will include all properties with a project code
-    customer_domain = "https://thrivehomes.org.uk"
+    reconcile_programme = False  # If True, the hubspot upload will include all properties with a project code
+    customer_domain = "https://sandwell.gov.uk"
     installer_name = "J & J CRUMP"
     asset_list_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Thrive Programme - "
-        "Hubspot Upload 3.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
+        "Standardised.xlsx"
     )
+    asset_list_sheet_name = "Proposed Program"
+    asset_list_header = 1
+
     contact_details_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Sample contact "
-        "details.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
     )
     contacts_sheet_name = "Sheet1"
     contacts_landlord_property_id = "landlord_property_id"
     contacts_phone_number_column = "phone_number"
+    contacts_secondary_phone_number_column = "secondary_phone_number"
+    contacts_secondary_contact_full_name = "secondary_contact_full_name"
     contacts_email_column = "email"
     contacts_fullname_column = "fullname"
     contacts_firstname_column = "firstname"
     contacts_lastname_column = "lastname"
 
-    asset_list = AssetList.load_standardised_asset_list(asset_list_filepath)
+    asset_list = AssetList.load_standardised_asset_list(
+        asset_list_filepath, asset_list_sheet_name, asset_list_header
+    )
     asset_list.load_contact_details(
         local_filepath=contact_details_filepath,
         sheet_name=contacts_sheet_name,
         landlord_property_id=contacts_landlord_property_id,
         phone_number_column=contacts_phone_number_column,
+        secondary_phone_number_column=contacts_secondary_phone_number_column,
+        secondary_contact_full_name=contacts_secondary_contact_full_name,
         email_column=contacts_email_column,
         fullname_column=contacts_fullname_column,
         firstname_column=contacts_firstname_column,
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index d2959873..1a46c429 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -27,7 +27,7 @@ STANDARD_HEATING_SYSTEMS = {
     "electric ceiling",
     "electric underfloor",
     "no heating",
-    "non-electric underfloor"
+    "non-electric underfloor",
 }
 
 HEATING_MAPPINGS = {
@@ -325,5 +325,6 @@ HEATING_MAPPINGS = {
     'GREENSTAR 24i JUNIOR': 'gas combi boiler',
     'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
     'GREENSTAR 30SI COMPACT': 'gas combi boiler',
-    'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler'
+    'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler',
+    'Not applicable for this asset type': 'unknown'
 }
diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py
index 1a61c3eb..bdb6580e 100644
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@@ -255,5 +255,16 @@ PROPERTY_MAPPING = {
     'Unit': 'unknown',
     'HOUSE (3 STOREY)': 'house',
     'FLAT GROUND FLOOR': 'flat',
-    'FLAT TOP FLOOR': 'flat'
+    'FLAT TOP FLOOR': 'flat',
+
+    'SHARED HOUSE': 'house',
+    'MAISONETTE': 'maisonette',
+    'DIRECT ACCESS HOSTEL': 'other',
+    'Day centre': 'other',
+    'Care home': 'other',
+    'BLOCK (Communal)': 'block of flats',
+    'SHOP': 'other',
+    'Office Block': 'other',
+    'BLOCK (Non-Communal)': 'block of flats',
+    'Refuge': 'other'
 }
diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py
index 5baabe6f..8be8575a 100644
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@@ -242,6 +242,11 @@ WALL_CONSTRUCTION_MAPPINGS = {
     'SYSTEM BUILD 100MM EWI': 'system built',
     'CAVITY A/B 260MM': "uninsulated cavity",
     'CAVITY A/B 270MM': "uninsulated cavity",
-    'CAVITY A/B  250MM': "uninsulated cavity"
-
+    'CAVITY A/B  250MM': "uninsulated cavity",
+    'System': 'system built',
+    'Sandstone/Limestone': 'sandstone or limestone',
+    'No Fines': 'system built',
+    'Granite/Whinstone': 'granite or whinstone',
+    'Not applicable to this asset type': 'unknown',
+    'Steel Frame': 'system built'
 }
diff --git a/etl/customers/Futures Housing/validation_surveys.py b/etl/customers/Futures Housing/validation_surveys.py
new file mode 100644
index 00000000..1f8e6cfa
--- /dev/null
+++ b/etl/customers/Futures Housing/validation_surveys.py	
@@ -0,0 +1,167 @@
+import pandas as pd
+
+
+def get_band(sap_score_number):
+    bands = [
+        ("High_A", 96, float("inf")),
+        ("Low_A", 92, 96),
+        ("High_B", 86, 92),
+        ("Low_B", 81, 86),
+        ("High_C", 74.5, 81),
+        ("Low_C", 69, 74.5),
+        ("High_D", 61.5, 69),
+        ("Low_D", 55, 61.5),
+        ("High_E", 46.5, 55),
+        ("Low_E", 39, 46.5),
+        ("High_F", 29.5, 39),
+        ("Low_F", 21, 29.5),
+        ("High_G", 10.5, 21),
+        ("Low_G", 1, 10.5),
+    ]
+
+    for band, lower, upper in bands:
+        if lower <= sap_score_number < upper:
+            return band
+
+    return None
+
+
+def classify_floor_area(floor_area):
+    if floor_area <= 72:
+        return "0-72"
+
+    if floor_area <= 97:
+        return "73-97"
+
+    if floor_area <= 199:
+        return "98-199"
+
+    return "200+"
+
+
+asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band)
+asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area)
+
+# Objective:
+# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below
+#
+# Therefore:
+# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying
+# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do
+# qualify
+# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the
+# archetypes
+#
+# Driving Factors:
+# 1) Floor area band & starting SAP band - this will determine how much funding is produced
+# 2) Heating system - this will determine if the property needs a heating upgrade or not
+
+
+archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby(
+    ["floor_area_band", "starting_sap_band", "landlord_heating_system"]
+)["landlord_property_id"].nunique().reset_index()
+archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"})
+archetypes = archetypes.sort_values("n_properties", ascending=False)
+archetypes["running_total"] = archetypes["n_properties"].cumsum()
+archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100
+
+archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel"
+archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin(
+    ["boiler - other fuel", "electric storage heaters"]
+)
+archetypes = archetypes.reset_index(drop=True)
+
+# Right now, they don't want to treat the oil properties so we'll exclude them for the moment
+electric_heated_archetypes = (
+    archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True)
+)
+electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum()
+electric_heated_archetypes["cumulative_percentage"] = (
+    electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100
+)
+
+# The main properties that need validation surveys are properties that require a heating upgrade
+electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]]
+electric_heated_archetypes = electric_heated_archetypes.merge(
+    archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
+    how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
+)
+
+oil_archetypes = archetypes[
+    archetypes["landlord_heating_system"] == "boiler - other fuel"
+    ].copy().reset_index(drop=True)
+
+archetypes["archetype_id"] = archetypes.index
+
+asset_list = asset_list.merge(
+    archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
+    how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
+)
+
+properties_for_verification = asset_list[
+    asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values)
+].copy()
+properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[
+    0].str.strip()
+
+properties_for_verification["epc_age"] = (
+    pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"])
+).dt.days
+
+# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes
+archetypes_for_survey = pd.concat(
+    [electric_heated_archetypes, oil_archetypes.head(2)]
+)
+
+# Take the property with the oldest EPC, by region. Prioritise estimated properties
+sample = []
+for _, config in archetypes_for_survey.iterrows():
+    properties = asset_list[
+        (asset_list["archetype_id"] == config["archetype_id"]) &
+        (asset_list["floor_area_band"] == config["floor_area_band"]) &
+        (asset_list["starting_sap_band"] == config["starting_sap_band"])
+        ]
+
+    if pd.isnull(properties["epc_inspection_date"]).sum():
+        sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records")
+    else:
+        # Take the property with the oldest EPC
+        sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records")
+
+    sample.extend(sample_property)
+
+sample = pd.DataFrame(sample)
+
+sample = sample[
+    [
+        "landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band",
+        "floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id"
+    ]
+]
+
+archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy()
+archetypes["archetype_id"] = archetypes["archetype_id"].astype(str)
+
+filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx"
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+with pd.ExcelWriter(filename) as writer:
+    archetypes.to_excel(writer, sheet_name="Archetypes", index=False)
+    sample.to_excel(writer, sheet_name="Survey Sample", index=False)
+
+# We store this
+
+# Questions:
+# 1) If futures are considering changing properties that have oil heating systems, we could include them and
+# we have 39 total archetypes. Otherwise, we have 25 archetypes
+# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're
+# using
+
+# Recommendations:
+# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover
+#

From 383a4852e207b2879fd5de21316a1a8fda9ceb3f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 19 Jun 2025 08:15:01 +0100
Subject: [PATCH 12/14] created block splitting code for calico asset list

---
 asset_list/AssetList.py | 197 ++++++++++++++++++++++++----------------
 1 file changed, 120 insertions(+), 77 deletions(-)

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 62016239..acca0c58 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -1025,6 +1025,15 @@ class AssetList:
                 self.standardised_asset_list[self.STANDARD_SAP]
             )
 
+        has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum()
+
+        # Perform block splitting, ahead of fetching the EPC data
+        # If we blocks of flats, without a landlord block reference, we create this
+        self.fill_landlord_block_reference(has_blocks_of_flats)
+
+        # If we have blocks of flats, we split these out into individual units.
+        self.split_blocks()
+
     def merge_data(self, df: pd.DataFrame):
         """
         Used to insert data into the standardised asset list, based on the domna property id
@@ -1270,6 +1279,12 @@ class AssetList:
                 )
             )
 
+            self.standardised_asset_list["SAP Category"] = np.where(
+                pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]),
+                "SAP Unknown",
+                self.standardised_asset_list["SAP Category"]
+            )
+
         else:
             # We add a SAP category for all work type identification
             # We break into 4 categories (54 or less, 55-68, 69-74, 75 or more)
@@ -1290,6 +1305,11 @@ class AssetList:
                     ),
                 )
             )
+            self.standardised_asset_list["SAP Category"] = np.where(
+                pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]),
+                "SAP Unknown",
+                self.standardised_asset_list["SAP Category"]
+            )
 
         # Before we being, we identify if a property has solar already as we use this
         # for identifying cavity jobs
@@ -2040,6 +2060,100 @@ class AssetList:
             self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]
         )
 
+    def split_blocks(self):
+        """
+        Where we have a single row that is a block of flats, we split this into multiple rows,
+        one for each unit. The data that we have will be copied across rows
+        :return:
+        """
+
+        blocks = self.standardised_asset_list[
+            self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
+            ].copy()
+
+        if blocks.empty:
+            return
+
+        RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b')
+        NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b')  # captures 12, 12A, etc.
+
+        expanded_rows = []
+        for _, row in blocks.iterrows():
+            addr = str(row[self.STANDARD_ADDRESS_1])
+
+            # 1 ─ Range  (e.g. 1-7)
+            m_range = RANGE_RE.search(addr)
+            if m_range:
+                start, end = m_range.groups()
+                start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
+                if start > end or (end - start) > 100:
+                    raise ValueError(f"Suspicious range '{addr}'")
+                for n in range(start, end + 1):
+                    new = row.copy()
+                    new_addr = RANGE_RE.sub(str(n), addr, count=1)
+                    original_full_address = new[self.STANDARD_FULL_ADDRESS]
+                    new_full_address = original_full_address.replace(addr, new_addr)
+                    new[self.STANDARD_ADDRESS_1] = new_addr
+                    new[self.STANDARD_FULL_ADDRESS] = new_full_address
+                    new[self.STANDARD_PROPERTY_TYPE] = "flat"
+                    # Keep a record of the previous address 1
+                    new["block_address1"] = addr
+                    new["block_full_address"] = original_full_address
+                    new["is_expended_block"] = True
+                    # We update the full address
+
+                    new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
+                    expanded_rows.append(new)
+                continue
+
+            # 2 ─ Explicit list  (e.g. 1, 2, 5 Block)
+            nums = NUM_RE.findall(addr)
+            if len(nums) > 1 and ',' in addr:
+                for n in nums:
+                    new = row.copy()
+                    new_addr = re.sub(NUM_RE, n, addr, count=1)  # replace the first number only
+                    new[self.STANDARD_ADDRESS_1] = new_addr
+                    new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
+                    expanded_rows.append(new)
+                continue
+
+            # 3 ─ Single number or no number, treat as individual dwelling
+            if (len(nums) == 1) or not nums:
+                expanded_rows.append(row)
+                continue
+
+            # Anything else with digits is unrecognised
+            raise NotImplementedError(f"Unhandled block format: '{addr}'")
+
+        expanded_blocks = pd.DataFrame(expanded_rows)
+
+        # We drop the blocks from the standardised asset list and append on the expanded blocks
+        self.standardised_asset_list = self.standardised_asset_list[
+            self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
+            ]
+
+        self.standardised_asset_list = pd.concat(
+            [self.standardised_asset_list, expanded_blocks],
+            ignore_index=True
+        )
+
+        # As a final clean up, for any blocks that are size 1, we don't includr a project code
+        sizes = (
+            expanded_blocks
+            .groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID]
+            .nunique()
+            .reset_index()
+        )
+        size_1 = sizes[sizes[self.DOMNA_PROPERTY_ID] <= 1]
+        # Remove the size 1 blocks from the standardised asset list
+        self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where(
+            self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(
+                size_1[self.STANDARD_BLOCK_REFERENCE].values
+            ),
+            None,
+            self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]
+        )
+
     def label_property_status(self):
         """
         This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
@@ -2081,85 +2195,14 @@ class AssetList:
         # These blocks may be refecence via the landlord_block_reference field, or by property types being
         # blocks of flats
         has_landlord_block_reference = self.landlord_block_reference is not None
-        has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum()
 
-        if has_landlord_block_reference or has_blocks_of_flats:
+        if has_landlord_block_reference:
 
-            # If we blocks of flats, without a landlord block reference, we create this
-            self.fill_landlord_block_reference(has_blocks_of_flats)
-
-            self.split_blocks(has_blocks_of_flats)
-
-            def split_blocks(self, has_blocks_of_flats):
-                """
-                Where we have a single row that is a block of flats, we split this into multiple rows,
-                one for each unit. The data that we have will be copied across rows
-                :param self:
-                :param has_blocks_of_flats:
-                :return:
-                """
-
-                blocks = self.standardised_asset_list[
-                    self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
-                    ].copy()
-
-                RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b')
-                NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b')  # captures 12, 12A, etc.
-
-                expanded_rows = []
-                for _, row in blocks.iterrows():
-                    addr = str(row[self.STANDARD_ADDRESS_1])
-
-                    # 1 ─ Range  (e.g. 1-7)
-                    m_range = RANGE_RE.search(addr)
-                    if m_range:
-                        start, end = m_range.groups()
-                        start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
-                        if start > end or (end - start) > 100:
-                            raise ValueError(f"Suspicious range '{addr}'")
-                        for n in range(start, end + 1):
-                            new = row.copy()
-                            new_addr = RANGE_RE.sub(str(n), addr, count=1)
-                            new[self.STANDARD_ADDRESS_1] = new_addr
-                            new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
-                            expanded_rows.append(new)
-                        continue
-
-                    # 2 ─ Explicit list  (e.g. 1, 2, 5 Block)
-                    nums = NUM_RE.findall(addr)
-                    if len(nums) > 1 and ',' in addr:
-                        for n in nums:
-                            new = row.copy()
-                            new_addr = re.sub(NUM_RE, n, addr, count=1)  # replace the first number only
-                            new[self.STANDARD_ADDRESS_1] = new_addr
-                            new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
-                            expanded_rows.append(new)
-                        continue
-
-                    # 3 ─ Single number → treat as individual dwelling
-                    if len(nums) == 1:
-                        expanded_rows.append(row)
-                        continue
-
-                    # 4 ─ No numbers → keep as-is
-                    if not nums:
-                        expanded_rows.append(row)
-                        continue
-
-                    # Anything else with digits is unrecognised
-                    raise NotImplementedError(f"Unhandled block format: '{addr}'")
-
-                expanded_blocks = pd.DataFrame(expanded_rows)
-
-                # We drop the blocks from the standardised asset list and append on the expanded blocks
-                self.standardised_asset_list = self.standardised_asset_list[
-                    self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
-                    ]
-
-                self.standardised_asset_list = pd.concat(
-                    [self.standardised_asset_list, expanded_blocks],
-                    ignore_index=True
-                )
+            # # If we blocks of flats, without a landlord block reference, we create this
+            # self.fill_landlord_block_reference(has_blocks_of_flats)
+            #
+            # # If we have blocks of flats, we split these out into individual units
+            # self.split_blocks()
 
             # For blocks that have a 50% allocation, we create project codes
             self.block_analysis()

From 980f439f49e2a1504099cd9204e79cbba328e951 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 19 Jun 2025 22:48:25 +0100
Subject: [PATCH 13/14] debugging calico epc search to handle the strict blocks

---
 asset_list/AssetList.py                   | 94 ++++++++++++++++-------
 asset_list/app.py                         |  4 -
 asset_list/hubspot/prepare_for_hubspot.py | 14 ++--
 asset_list/utils.py                       |  8 +-
 backend/Funding.py                        | 93 ++++++++++++++++++----
 backend/SearchEpc.py                      | 27 ++++++-
 backend/tests/test_funding.py             | 52 +++++++++++++
 7 files changed, 236 insertions(+), 56 deletions(-)
 create mode 100644 backend/tests/test_funding.py

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index acca0c58..130d1242 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -298,7 +298,7 @@ class AssetList:
         "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
         "Does the property have cladding?", "Gable Wall Obstructions",
         "Does the property have foliage that needs removal?",
-        "Potential unsafe environment", "Date of Inspection"
+        "Potential unsafe environment", "Date of Inspection", "Borescoped?"
     ]
 
     NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
@@ -354,6 +354,7 @@ class AssetList:
     # Work type prefixes:
     # Empties
     EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity"
+    EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002'
     EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled"
     EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other"
     EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build"
@@ -1280,7 +1281,8 @@ class AssetList:
             )
 
             self.standardised_asset_list["SAP Category"] = np.where(
-                pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]),
+                pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) &
+                pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]),
                 "SAP Unknown",
                 self.standardised_asset_list["SAP Category"]
             )
@@ -1745,8 +1747,6 @@ class AssetList:
             self.standardised_asset_list["solar_epc_loft_needs_topup"]
         )
 
-        z = self.standardised_asset_list[self.standardised_asset_list["landlord_property_id"] == "DW150120029"]
-
         self.standardised_asset_list["solar_eligible"] = (
             # Property isn't a flag
             not_a_flat &
@@ -2035,14 +2035,15 @@ class AssetList:
         self.standardised_asset_list["cavity_reason"] = np.where(
             (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") &
             (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"),
-            None,
+            self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)",
             self.standardised_asset_list["cavity_reason"]
         )
+
         # Split cavity_reason on the colon and check if the first part is equal to one of the two options above
         # that indicates empties
         self.standardised_asset_list["identified_empty_cavity"] = (
             self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin(
-                [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EPC_EMPTY]
+                [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY]
             )
         )
 
@@ -2078,6 +2079,7 @@ class AssetList:
         NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b')  # captures 12, 12A, etc.
 
         expanded_rows = []
+
         for _, row in blocks.iterrows():
             addr = str(row[self.STANDARD_ADDRESS_1])
 
@@ -2194,16 +2196,9 @@ class AssetList:
         # if we have any blocks, where work is eligible, we flag them now
         # These blocks may be refecence via the landlord_block_reference field, or by property types being
         # blocks of flats
-        has_landlord_block_reference = self.landlord_block_reference is not None
+        has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]))
 
         if has_landlord_block_reference:
-
-            # # If we blocks of flats, without a landlord block reference, we create this
-            # self.fill_landlord_block_reference(has_blocks_of_flats)
-            #
-            # # If we have blocks of flats, we split these out into individual units
-            # self.split_blocks()
-
             # For blocks that have a 50% allocation, we create project codes
             self.block_analysis()
             # find any block refs with more than 50% emptires
@@ -2265,13 +2260,18 @@ class AssetList:
 
         block_analysis = []
         for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE):
+
             cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100
 
             if all(cavity_breakdown.index == "No Eligibility"):
                 continue
 
             # We check the % of empty vs not empty as right now, we're focused on empty
-            n_empties = ((group["identified_empty_cavity"] == True) & (~pd.isnull(group["cavity_reason"]))).sum()
+            n_empties = (
+                (group["identified_empty_cavity"] == True) &
+                (~pd.isnull(group["cavity_reason"])) &
+                (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False))
+            ).sum()
 
             works = group["hubspot_status"]
             above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
@@ -2293,6 +2293,36 @@ class AssetList:
         block_analysis = block_analysis.fillna(0)
 
         # We flag which properties are eligible for works. We need at least 50%
+        block_analysis["Eligible for Works"] = (
+            block_analysis["Percentage of Empties"] >= 0.50
+        )
+        block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False)
+
+        # For properties that are NOT eligible, we should update the cavity reason
+        ineligible_blocks = block_analysis[
+            ~block_analysis["Eligible for Works"]
+        ]["Block Reference"].values
+
+        eligible_blocks = block_analysis[
+            block_analysis["Eligible for Works"]
+        ]["Block Reference"].values
+
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks),
+            self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)",
+            self.standardised_asset_list["cavity_reason"]
+        )
+
+        # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this
+        # The criteria is:
+        # =The property should be in a block of flats
+
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
+            self.standardised_asset_list["cavity_reason"]
+            + " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
+            self.standardised_asset_list["cavity_reason"]
+        )
 
         self.block_analysis_df = block_analysis
 
@@ -2434,13 +2464,13 @@ class AssetList:
         )
 
         # Format the two date columns
-        programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce")
+        programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce")
         programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime(
             programme_data[self.EPC_API_DATA_NAMES["inspection-date"]],
             errors="coerce"
         )
         # Convert to dd/mm/yyyy format
-        programme_data["survey_week"] = programme_data["survey_week"].dt.strftime("%d/%m/%Y")
+        programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y")
         programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = (
             programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y")
         )
@@ -2457,12 +2487,14 @@ class AssetList:
             ready_to_be_scheduled = (
                 (
                     programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
-                ) & (~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"]))
+                ) & (~pd.isnull(programme_data["survey_date"]))
             )
-            completed_works = (
-                programme_data["hubspot_status"] != hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
-            )
-            programme_data = programme_data[ready_to_be_scheduled | completed_works]
+            # completed_works = (
+            #     (programme_data["hubspot_status"] !=
+            #     hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) &
+            #     (~pd.isnull(programme_data["hubspot_status"]))
+            # )
+            programme_data = programme_data[ready_to_be_scheduled]
 
         # Merge on the contact details
         programme_data = programme_data.merge(
@@ -2505,11 +2537,13 @@ class AssetList:
                 self.CRM_HISTORICAL_CAVITY_PRODUCT["name"]
             )
         else:
+            # We shouldn't have any missing products
             programme_data = programme_data[
-                ~pd.isnull(programme_data["domna_product"]) &
-                ~pd.isnull(programme_data["surveyor"]) &
-                ~pd.isnull(programme_data["survey_week"])
-                ]
+                ~pd.isnull(programme_data["survey_date"])
+            ]
+
+            if pd.isnull(programme_data["domna_product"]).sum():
+                raise ValueError("Missing products")
             programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
 
         product_df = (
@@ -2686,7 +2720,7 @@ class AssetList:
             'Last EPC: Room Height <LISTING last_epc__room_height>': self.EPC_API_DATA_NAMES["floor-height"],
             'Last EPC: Age Band <LISTING last_epc__age_band>': self.EPC_API_DATA_NAMES["construction-age-band"],
             'Pipeline <DEAL pipeline>': 'Pipeline <DEAL pipeline>',
-            'Expected Commencement Date <DEAL expected_commencement_date>': "survey_week",
+            'Expected Commencement Date <DEAL expected_commencement_date>': "survey_date",
             'Deal Name <DEAL dealname>': "dealname",  # Need to create this,
             'Product ID <LINE_ITEM hs_product_id>': 'Product ID <LINE_ITEM hs_product_id>',
             'Name <LINE_ITEM name>': 'Name <LINE_ITEM name>',
@@ -2724,7 +2758,11 @@ class AssetList:
         # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing
         programme_data['Listing Owner Email <LISTING hubspot_owner_id>'] = programme_data['Deal Owner']
         programme_data['Amount <DEAL amount>'] = 0
-        programme_data["Deal Owner"] = programme_data["Deal Owner"].str.lower()
+        programme_data["Deal Owner"] = np.where(
+            ~pd.isnull(programme_data["Deal Owner"]),
+            programme_data["Deal Owner"].astype(str).str.lower(),
+            programme_data["Deal Owner"]
+        )
 
         # We make sure we have all of the columns that we need
         missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns]
diff --git a/asset_list/app.py b/asset_list/app.py
index 08164c19..8158becc 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -553,13 +553,9 @@ def app():
     )
 
     asset_list.merge_data(epc_df)
-
     asset_list.extract_attributes()
-
     asset_list.identify_worktypes()
 
-    pprint(asset_list.work_type_figures)
-
     # We now flag the status of the property
     asset_list.label_property_status()
     asset_list.analyse_geographies()
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index 9ffe24ca..0d0abcb2 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -19,19 +19,19 @@ def app():
 
     # inputs:
     reconcile_programme = False  # If True, the hubspot upload will include all properties with a project code
-    customer_domain = "https://sandwell.gov.uk"
+    customer_domain = "https://livewest.co.uk"
     installer_name = "J & J CRUMP"
     asset_list_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
-        "Standardised.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/Livewest South-West - Standardised "
+        "V2.xlsx"
     )
-    asset_list_sheet_name = "Proposed Program"
-    asset_list_header = 1
+    asset_list_sheet_name = "Standardised Asset List"
+    asset_list_header = 0
 
     contact_details_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/23.06 Livewest Contacts.xlsx"
     )
-    contacts_sheet_name = "Sheet1"
+    contacts_sheet_name = "Contact Information"
     contacts_landlord_property_id = "landlord_property_id"
     contacts_phone_number_column = "phone_number"
     contacts_secondary_phone_number_column = "secondary_phone_number"
diff --git a/asset_list/utils.py b/asset_list/utils.py
index ff9db3f8..61dcf8ea 100644
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@@ -79,7 +79,13 @@ def get_data(
                 uprn=uprn
             )
             # Force the skipping of estimating the EPC
-            searcher.ordnance_survey_client.property_type = None
+            # We check if the property was split
+            if home["is_expended_block"]:
+                searcher.ordnance_survey_client.property_type = "Flat"
+                searcher.property_type = "Flat"
+                searcher.set_strict_property_type_search()
+            else:
+                searcher.ordnance_survey_client.property_type = None
             searcher.ordnance_survey_client.built_form = None
 
             searcher.find_property(skip_os=True)
diff --git a/backend/Funding.py b/backend/Funding.py
index 78440eac..49d2d293 100644
--- a/backend/Funding.py
+++ b/backend/Funding.py
@@ -5,7 +5,7 @@ from typing import List
 from backend.app.plan.schemas import HousingType
 
 
-class Funding:
+class FundingOld:
     """
     Given a property, this class identifies if the home is possibly eligible for funding under
     the various funding schemes. It will also calculate the expected amount of funding available
@@ -413,13 +413,32 @@ class Funding:
         self.whlg()
 
 
-class Funding2:
+class Funding:
     """
     New class to handle funding calculation
     """
 
-    def __init__(self, tenure: HousingType):
+    def __init__(
+        self,
+        tenure: HousingType,
+        social_cavity_abs_rate: float,
+        social_solid_abs_rate: float,
+        private_cavity_abs_rate: float,
+        private_solid_abs_rate: float,
+        project_scores_matrix,
+        whlg_eligible_postcodes
+    ):
         self.tenure = tenure
+        self.social_cavity_abs_rate = social_cavity_abs_rate
+        self.social_solid_abs_rate = social_solid_abs_rate
+        self.private_cavity_abs_rate = private_cavity_abs_rate
+        self.private_solid_abs_rate = private_solid_abs_rate
+
+        self.starting_sap_band = None
+        self.ending_sap_band = None
+        self.floor_area_band = None
+        self.project_scores_matrix = project_scores_matrix
+        self.whlg_eligible_postcodes = whlg_eligible_postcodes
 
     @staticmethod
     def get_sap_band(sap_score_number):
@@ -446,8 +465,22 @@ class Funding2:
 
         return None
 
+    @staticmethod
+    def get_floor_area_band(floor_area):
+        if floor_area <= 72:
+            return "0-72"
+
+        if floor_area <= 97:
+            return "73-97"
+
+        if floor_area <= 199:
+            return "98-199"
+
+        return "200"
+
+    @staticmethod
     def eco4_prs_eligibility(
-        self, starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
+        starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
     ):
         """
         Handles the eligibility criteria for private rental properties under eco
@@ -481,31 +514,53 @@ class Funding2:
 
         return False
 
+    def calculate_full_project_abs(self):
+
+        # Filter the project scores matrix
+        data = self.project_scores_matrix[
+            (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) &
+            (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) &
+            (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band)
+            ]
+
+        if data.emtpy:
+            raise ValueError("Missing abs rate, check the project scores matrix")
+
+        return data["Cost Savings"].values[0]
+
     def check_funding(
         self, measures: List,
         starting_sap: int,
         ending_sap: int,
+        floor_area: float,
         mainheat_description: str,
-        heating_control_description: str
+        heating_control_description: str,
+        is_cavity: bool
     ):
         """
         Given a list of measures, this function will check if the package of measures is fundable
         :param measures:
         :param starting_sap:
         :param ending_sap:
+        :param floor_area:
+        :param mainheat_description:
+        :param heating_control_description:
+        :param is_cavity: Indicates if the property has cavity wall insulation
         :return:
         """
 
-        starting_band = self.get_sap_band(starting_sap)
-        ending_band = self.get_sap_band(ending_sap)
+        # If it's an E or D, should get to an EPC C
+        if starting_sap >= 55 and ending_sap < 69:
+            raise NotImplementedError("This property doesn't have sufficient SAP movement")
 
-        # For ECO4 eligibility, the property needs to end at a C if it starts at a D or E, otherwise should end at a
-        # D
-
-        if starting_band <= 38 & ending_band >= 55:
+        if starting_sap <= 38 & ending_sap <= 55:
             # F or G should get to D
             raise NotImplementedError("Implement F or G to D eligibility")
 
+        self.starting_sap_band = self.get_sap_band(starting_sap)
+        self.ending_sap_band = self.get_sap_band(ending_sap)
+        self.floor_area_band = self.get_floor_area_band(floor_area)
+
         ########################
         # Private
         ########################
@@ -513,13 +568,25 @@ class Funding2:
         # 2) GBIS
 
         if self.tenure == "Private":
-            is_eligible = self.eco4_prs_eligibility(
+            is_eco4_eligible = self.eco4_prs_eligibility(
                 starting_sap=starting_sap,
                 measures=measures,
                 mainheat_description=mainheat_description,
                 heating_control_description=heating_control_description
             )
-            pass
+
+            # Need to implement
+            # 1) Package has to include an insulation measure
+            # 2) We should use the funding for the measure that has the largest partial project score
+            is_gbis_eligible = ()
+
+            if not is_eco4_eligible:
+                return
+            eco4_abs = self.calculate_full_project_abs()
+            # We estimate rates now
+            eco4_funding = (
+                eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate
+            )
 
         ########################
         # Social
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 1ee1f950..16dd8f04 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -160,6 +160,9 @@ class SearchEpc:
         """
         Address lines 1 and postcode are mandatory fields. The other address lines are optional
         but can be used to find the epc for the home, if address1 and postcode are insufficient
+
+        If you wish to run a strict property type search, please run set_strict_property_type_search()
+
         :param address1: string, propery's address line 1
         :param postcode: string, propery's postcode
         :param full_address: string, optional parameter, the full address of the property
@@ -189,6 +192,7 @@ class SearchEpc:
         self.older_epcs = None
         self.full_sap_epc = None
         self.metadata = None
+        self.strict_property_type_search = False
 
         # These are the address and postcode values, which we store in the database
         self.address_clean = None
@@ -199,6 +203,14 @@ class SearchEpc:
         self.property_type = property_type
         self.fast = fast
 
+    def set_strict_property_type_search(self):
+        """
+        This method sets the strict property type search flag to True. When this flag is set, the search will
+        only return results that match the specified property type.
+        :return:
+        """
+        self.strict_property_type_search = True
+
     @staticmethod
     def get_house_number(address: str, postcode=None) -> str | None:
         """
@@ -315,6 +327,8 @@ class SearchEpc:
             address_params["address"] = self.address1
         if self.postcode:
             address_params["postcode"] = self.postcode
+        if self.strict_property_type_search and self.property_type:
+            address_params["property-type"] = self.property_type.lower()
 
         # We attempt the search with uprn params
 
@@ -365,11 +379,16 @@ class SearchEpc:
 
         unique_property_types = {r["property-type"] for r in rows}
 
+        is_just_a_house = (len(unique_property_types) == 1) & (
+            ("House" in unique_property_types) | ("Bungalow" in unique_property_types)
+        )
+
         # We allow for variation in property type across flats/maisonettes
         # If we know that we have a flat/maisonette, we allow for both property types
-        if property_type in ["Flat", "Maisonette"]:
-            if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
-            ) or unique_property_types == {"Flat", "Maisonette"}):
+        # Make sure we have not JUST a house, or not JUST a flat/maisonette
+        if property_type in ["Flat", "Maisonette"] and not is_just_a_house:
+            if (((len(uprns) == 1) and ((len(unique_property_types) == 1)
+            ) or unique_property_types == {"Flat", "Maisonette"})):
                 return rows
 
         if property_type is not None:
@@ -424,6 +443,8 @@ class SearchEpc:
 
             return rows
 
+        raise ValueError("property type and address cannot both be None, at least one must be provided")
+
     @staticmethod
     def format_address(newest_epc):
         """
diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py
new file mode 100644
index 00000000..311ab589
--- /dev/null
+++ b/backend/tests/test_funding.py
@@ -0,0 +1,52 @@
+import pytest
+import pandas as pd
+from utils.s3 import read_csv_from_s3
+from backend.Funding import Funding
+
+
+def get_funding_data():
+    """
+    This function retrieves the eco project scores matrix and the warm homes local grant funding data
+    :return:
+    """
+    project_scores_matrix = read_csv_from_s3(
+        bucket_name="retrofit-data-dev",
+        filepath="funding/ECO4 Full Project Scores Matrix.csv",
+    )
+    project_scores_matrix = pd.DataFrame(project_scores_matrix)
+    project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
+    project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
+
+    whlg_eligible_postcodes = read_csv_from_s3(
+        bucket_name="retrofit-data-dev",
+        filepath="funding/whlg eligible postcodes.csv",
+    )
+    whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
+
+    return project_scores_matrix, whlg_eligible_postcodes
+
+
+class TestFunding:
+
+    def test_prs(self):
+        eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
+        funding = Funding(
+            project_scores_matrix=eco_project_scores_matrix,
+            whlg_eligible_postcodes=whlg_eligible_postcodes,
+            social_cavity_abs_rate=13.5,
+            social_solid_abs_rate=17,
+            private_cavity_abs_rate=13.5,
+            private_solid_abs_rate=17,
+            tenure="Private",
+        )
+
+        measures_1 = ["internal_wall_insulation", "solar_pv"]
+        funding.check_funding(
+            measures=measures_1,
+            starting_sap=54,
+            ending_sap=69,
+            floor_area=73,
+            mainheat_description="Boiler and radiators, mains gas",
+            heating_control_description="Programmer, room thermostat and TRVs",
+            is_cavity=True
+        )

From 127773a19d1400188e67b5a6b797722d597bca78 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 22 Jun 2025 15:34:21 +0100
Subject: [PATCH 14/14] cleaning up hubspot prepare code

---
 asset_list/AssetList.py                   |  2 +
 asset_list/app.py                         | 96 +++++++++++++++--------
 asset_list/hubspot/prepare_for_hubspot.py | 26 ++++--
 asset_list/mappings/walls.py              |  4 +-
 asset_list/utils.py                       |  2 +-
 5 files changed, 90 insertions(+), 40 deletions(-)

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 130d1242..ad3087c3 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -2442,7 +2442,9 @@ class AssetList:
 
         # We check if all products are covered in the lookup table
         cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist()
+        cavity_products = [x for x in cavity_products if not pd.isnull(x)]
         solar_products = self.standardised_asset_list["solar_reason"].unique().tolist()
+        solar_products = [x for x in solar_products if not pd.isnull(x)]
 
         product_map = {}
         for identified_product in cavity_products + solar_products:
diff --git a/asset_list/app.py b/asset_list/app.py
index 8158becc..7c0023ce 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -60,42 +60,76 @@ def app():
     Property UPRN
     """
 
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
-    data_filename = "07.04 CALICO - Final List.xlsx"
-    asset_list_header = 2
-    sheet_name = "Final List"
+    # NCHA
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
+    data_filename = "Energy Information MASTER June 2025.xlsx"
+    sheet_name = "Data"
     postcode_column = 'Postcode'
-    fulladdress_column = None
-    address1_column = "Property Number / Name"
-    address1_method = None
-    address_cols_to_concat = [
-        "Property Number / Name",
-        "Street",
-        "Town"
-    ]
+    fulladdress_column = "Address"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
     missing_postcodes_method = None
-    landlord_year_built = "NROSH Estimated Build Date"
+    landlord_year_built = "Build Date (HAR10)"
     landlord_os_uprn = None
-    landlord_property_type = "Asset Type"
-    landlord_built_form = None
-    landlord_wall_construction = "Wall Type"
-    landlord_heating_system = "Boiler Type"
-    landlord_existing_pv = None
-    landlord_property_id = "Asset Reference"
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
-    master_filepaths = []
-    master_id_colnames = []
-    master_to_asset_list_filepath = None
+    landlord_property_type = "Property Type (HAR10)"
+    landlord_built_form = "Build Form (EPC)"
+    landlord_wall_construction = "Wall Description"
     landlord_roof_construction = None
-    landlord_block_reference = None
-    landlord_sap = "Current Efficiency Rating - Score"
-    phase = None
+    landlord_heating_system = "HEAT Code"
+    landlord_existing_pv = None
+    landlord_property_id = "Place ref"
+    landlord_sap = "EPC SAP"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+    phase = False
     ecosurv_landlords = None
+    asset_list_header = 0
+    landlord_block_reference = None
+    master_id_colnames = []
+
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
+    # data_filename = "07.04 CALICO - Final List.xlsx"
+    # asset_list_header = 2
+    # sheet_name = "Final List"
+    # postcode_column = 'Postcode'
+    # fulladdress_column = None
+    # address1_column = "Property Number / Name"
+    # address1_method = None
+    # address_cols_to_concat = [
+    #     "Property Number / Name",
+    #     "Street",
+    #     "Town"
+    # ]
+    # missing_postcodes_method = None
+    # landlord_year_built = "NROSH Estimated Build Date"
+    # landlord_os_uprn = None
+    # landlord_property_type = "Asset Type"
+    # landlord_built_form = None
+    # landlord_wall_construction = "Wall Type"
+    # landlord_heating_system = "Boiler Type"
+    # landlord_existing_pv = None
+    # landlord_property_id = "Asset Reference"
+    # outcomes_filename = []
+    # outcomes_sheetname = []
+    # outcomes_postcode = []
+    # outcomes_houseno = []
+    # outcomes_id = []
+    # outcomes_address = []
+    # master_filepaths = []
+    # master_id_colnames = []
+    # master_to_asset_list_filepath = None
+    # landlord_roof_construction = None
+    # landlord_block_reference = None
+    # landlord_sap = "Current Efficiency Rating - Score"
+    # phase = None
+    # ecosurv_landlords = None
 
     # data_folder = (
     #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index 0d0abcb2..eed6d7e7 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -19,19 +19,19 @@ def app():
 
     # inputs:
     reconcile_programme = False  # If True, the hubspot upload will include all properties with a project code
-    customer_domain = "https://livewest.co.uk"
+    customer_domain = "https://sandwell.gov.uk"
     installer_name = "J & J CRUMP"
     asset_list_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/Livewest South-West - Standardised "
-        "V2.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
+        "Standardised.xlsx"
     )
-    asset_list_sheet_name = "Standardised Asset List"
-    asset_list_header = 0
+    asset_list_sheet_name = "Proposed Program"
+    asset_list_header = 1
 
     contact_details_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/23.06 Livewest Contacts.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
     )
-    contacts_sheet_name = "Contact Information"
+    contacts_sheet_name = "Sheet1"
     contacts_landlord_property_id = "landlord_property_id"
     contacts_phone_number_column = "phone_number"
     contacts_secondary_phone_number_column = "secondary_phone_number"
@@ -41,6 +41,10 @@ def app():
     contacts_firstname_column = "firstname"
     contacts_lastname_column = "lastname"
 
+    existing_programme_filepath = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv"
+    )
+
     asset_list = AssetList.load_standardised_asset_list(
         asset_list_filepath, asset_list_sheet_name, asset_list_header
     )
@@ -63,6 +67,14 @@ def app():
         reconcile_programme=reconcile_programme
     )
 
+    # Remove the existing programme
+    existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
+    asset_list.hubspot_data = asset_list.hubspot_data[
+        ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
+            existing_programme['Domna Property ID'].values
+        )
+    ]
+
     # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
     directory, filename = os.path.split(asset_list_filepath)
     name, ext = os.path.splitext(filename)
diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py
index 8be8575a..2e0a332f 100644
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@@ -248,5 +248,7 @@ WALL_CONSTRUCTION_MAPPINGS = {
     'No Fines': 'system built',
     'Granite/Whinstone': 'granite or whinstone',
     'Not applicable to this asset type': 'unknown',
-    'Steel Frame': 'system built'
+    'Steel Frame': 'system built',
+    'Solid Wall As Built': 'uninsulated solid brick',
+    'Solid As Built': 'uninsulated solid brick'
 }
diff --git a/asset_list/utils.py b/asset_list/utils.py
index 61dcf8ea..1678b8e9 100644
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@@ -80,7 +80,7 @@ def get_data(
             )
             # Force the skipping of estimating the EPC
             # We check if the property was split
-            if home["is_expended_block"]:
+            if home.get("is_expended_block"):
                 searcher.ordnance_survey_client.property_type = "Flat"
                 searcher.property_type = "Flat"
                 searcher.set_strict_property_type_search()