From c0cf848db2676f93fd0e458c327de7554370e2af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 May 2025 15:59:38 +0100
Subject: [PATCH] re-building thrive's programme

---
 .idea/Model.iml                            |   2 +-
 .idea/misc.xml                             |   2 +-
 asset_list/AssetList.py                    |  48 ++-
 asset_list/app.py                          |  52 +--
 asset_list/mappings/heating_systems.py     |   2 +
 asset_list/mappings/roof.py                |   9 +-
 etl/customers/thrive/Programme Analysis.py | 373 +++++++++++++++++++--
 7 files changed, 411 insertions(+), 77 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index c6561970..09f2e496 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 4b7a11ec..199b175c 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -2507,7 +2507,7 @@ class AssetList:
         else:
             raise NotImplementedError("Invalid date in outcomes - implement me")
 
-        notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
+        notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes"
 
         lookup = lookup.merge(
             self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
@@ -2576,6 +2576,7 @@ class AssetList:
     def flag_survey_master(
         self,
         master_filepaths,
+        master_id_colnames,
         master_to_asset_list_filepath=None
     ):
         # TODO: This probably needs further expansion
@@ -2591,7 +2592,7 @@ class AssetList:
         logger.info("Getting masters and merging onto asset list")
         master_surveyed = []
         unmatched_submissions = []
-        for filepath in master_filepaths:
+        for idx, filepath in enumerate(master_filepaths):
             master_data = pd.read_csv(filepath)
             # Strip columns
             master_data.columns = [c.strip() for c in master_data.columns]
@@ -2618,22 +2619,6 @@ class AssetList:
                 "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
             )
 
-            # if "UPRN" in master_data.columns:
-            #     # We just need to check if any were cancelled
-            #     master_to_append = master_data[
-            #         ["UPRN", install_col, submission_col]
-            #     ].rename(
-            #         columns={
-            #             "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
-            #             install_col: "survey_status",
-            #             submission_col: "submission_date"
-            #         }
-            #     )
-            #     master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
-            #
-            #     master_surveyed.append(master_to_append)
-            #     continue
-
             master_data["row_id"] = master_data.index
 
             self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
@@ -2656,8 +2641,6 @@ class AssetList:
             )
             measure_mix_col = "MEASURE COMBO"
 
-            # Otherwise, we need to match algorithmically
-            has_property_id = "UPRN" in master_data.columns
             logger.info("Matching master data to asset list")
             matched = []
             unmatched = []
@@ -2670,13 +2653,22 @@ class AssetList:
                 if pd.isnull(row[postcode_col]):
                     continue
 
-                # if has_property_id:
-                #     submission_uprn = row["UPRN"]
-                #
-                # if not pd.isnull(submission_uprn):
-                #     df = self.standardised_asset_list[
-                #         self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
-                #         ]
+                if master_id_colnames[idx] is not None:
+                    # Filter the standardised asset list on this
+                    df = self.standardised_asset_list[
+                        self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]]
+                        ]
+                    if df.shape[0] == 1:
+                        matched.append(
+                            {
+                                "row_id": row["row_id"],
+                                "original_house_no": original_house_no,
+                                "original_street": original_street,
+                                "original_postcode": original_postcode,
+                                self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
+                            }
+                        )
+                        continue
 
                 postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
 
@@ -2721,6 +2713,7 @@ class AssetList:
                             self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
                         }
                     )
+                    continue
 
                 if house_no in df["house_no"].values:
                     df = df[df["house_no"] == house_no]
@@ -2793,6 +2786,7 @@ class AssetList:
                 }
             )
             master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
+            master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
             master_surveyed.append(master_to_append)
             unmatched_df = master_data[
                 master_data["row_id"].isin(unmatched)
diff --git a/asset_list/app.py b/asset_list/app.py
index bb898c09..3441e5de 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -62,36 +62,42 @@ def app():
     Property UPRN
     """
 
-    # Thurrock
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
-    data_filename = "THURROCK COUNCIL - For analysis.xlsx"
-    sheet_name = "Assets"
-    postcode_column = 'Postcode'
-    fulladdress_column = "Full Address"
-    address1_column = None
-    address1_method = "house_number_extraction"
+    # Thrive - reconciliation
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
+    data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = 'postcode'
+    fulladdress_column = "full_address"
+    address1_column = "address_line_1"
+    address1_method = None
     address_cols_to_concat = []
     missing_postcodes_method = None
-    landlord_year_built = "Construction Date"
+    landlord_year_built = "age_band_calculated"
     landlord_os_uprn = None
-    landlord_property_type = "Property Type"
-    landlord_built_form = "Property Subtype"
+    landlord_property_type = "property_type"
+    landlord_built_form = "build_form"
     landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = "Main Heating Type"
+    landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
+    landlord_heating_system = "heating_type_updated"
     landlord_existing_pv = None
-    landlord_property_id = "Property Reference"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
-    master_filepaths = []
+    landlord_property_id = "thrive_property_id"
+    landlord_sap = "sap_rating_updated"
+    outcomes_filename = [
+        os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
+    ]
+    outcomes_sheetname = ["Sheet1"]
+    outcomes_postcode = ["postcode"]
+    outcomes_houseno = ["No."]
+    outcomes_id = ["thrive_property_id"]
+    outcomes_address = ["address"]
+    master_filepaths = [
+        os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
+        os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
+    ]
     master_to_asset_list_filepath = None
+    master_id_colnames = ["thrive_property_id", "thrive_property_id"]
     phase = False
-    ecosurv_landlords = None
+    ecosurv_landlords = "thrive"
 
     # Medway
     data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index 92f59f2c..daef01bb 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -292,4 +292,6 @@ HEATING_MAPPINGS = {
     'Communal Heating': 'communal heating',
     'No Data': 'unknown',
     'Boiler System': 'gas condensing boiler',
+    'Storage heating': 'electric storage heaters',
+    'Storage heating (HHRSH)': 'high heat retention storage heaters'
 }
diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py
index 03d6f9af..3b447829 100644
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
     "pitched unknown access to loft",
     "piched unknown insulation",
     "pitched insulated",
-    "pitched less than 100mm insulation"
+    "pitched less than 100mm insulation",
     "another dwelling above",
     "flat unknown insulation",
     "unknown insulated",
@@ -38,4 +38,11 @@ ROOF_CONSTRUCTION_MAPPINGS = {
     '200mm': 'pitched insulated',
     '0-49mm': 'pitched less than 100mm insulation',
     '50mm': 'pitched less than 100mm insulation',
+    '': 'unknown',
+    'NR': 'unknown',
+    'Non-joist': 'unknown',
+    '25mm': 'pitched less than 100mm insulation',
+    '400mm+': 'pitched insulated',
+    '12mm': 'pitched less than 100mm insulation'
+
 }
diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py
index 521cfd30..2d6a0d69 100644
--- a/etl/customers/thrive/Programme Analysis.py	
+++ b/etl/customers/thrive/Programme Analysis.py	
@@ -8,6 +8,8 @@ address the following concerns:
 """
 
 import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
 
 # This is Thrive's list of properties and when they should have been surveyed
 thrive_tracker = pd.read_excel(
@@ -51,27 +53,10 @@ original_columns = {
 }
 
 original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
-original_asset_list["Data Source"] = "Thrive Tracker"
+original_asset_list["Data Source"] = "Original Asset List"
+original_asset_list = original_asset_list.drop_duplicates()
 
 # We append on the missed properties, with the information we have
-# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
-#        'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
-#        'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
-#        'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
-#        'Special Requirements ', 'CIGA', 'Date CIGA check received',
-#        'Proposed Progamme', 'New Proposed Programme',
-#        'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
-#        'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
-#        'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
-#        'Date Submitted to installer', 'PRRN Number',
-#        'Loft insulation required? (Thrive)', 'Date booked ',
-#        'Completed\n(yes/no)', 'Date Completed',
-#        'Vents installed?\n(number and location)',
-#        'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
-#        'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
-#        'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
-#        'PRRN Submitted '
-
 missed_properties["Full Address"] = (
     missed_properties["#"].astype(str) + ", " +
     missed_properties["Adress Line 1"].astype(str) + ", " +
@@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
 missed_properties["ECO Eligibility"] = "Property Not Inspected"
 missed_properties["Data Source"] = "Thrive Tracker"
 
+# We de-dupe ides in original_asset_list
+dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
+dupes = original_asset_list[
+    original_asset_list["thrive_property_id"].isin(dupe_ids)
+].copy()
+dupes = dupes.sort_values("thrive_property_id")
+
+original_asset_list = original_asset_list.rename(
+    columns={
+        "detailed_property_type": "build_form"
+    }
+)
+
 master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
 
 # We were provided with a data update for a sample of properties. We update the data with this information
@@ -103,12 +101,339 @@ data_update = pd.read_excel(
     header=0
 )
 
-new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
+new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
+new_properties["Full Address"] = (
+    new_properties["#"].astype(str) + ", " +
+    new_properties["Adress Line 1"].astype(str) + ", " +
+    new_properties["Postcode"].astype(str)
+)
+new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
+new_properties["WFT Findings"] = "Property Not Inspected"
+new_properties["ECO Eligibility"] = "Property Not Inspected"
+new_properties["Data Source"] = "13.05.2025 Data Update"
+
+master_list = pd.concat([new_properties, master_list])
+
+# We append any new data on heating system, heating type, and insulation type, based on the data update
+master_list = master_list.merge(
+    data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
+        columns={
+            "Heating Type": "heating_type_updated",
+            "Assumed mm ": "assumed_loft_insulation_thickness_updated",
+            "SAP": "sap_rating_updated"
+        }
+    ),
+    how="left",
+    left_on="thrive_property_id",
+    right_on="UPRN"
+)
+
+# We fill the missings
+master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
+master_list["assumed_loft_insulation_thickness_updated"] = master_list[
+    "assumed_loft_insulation_thickness_updated"
+].fillna(master_list["assumed_loft_insulation_thickness"])
+master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
+
+assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
+
+master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
+    thrive_tracker["UPRN"].astype(str).values
+)
+
+# Those the asset list - call it master asset list updated May2025
+master_list = master_list.drop(columns=["UPRN"])
+master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
+# master_list.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+#     "Complete - Updated May 2025.xlsx",
+# )
+
+master_list["house_number_TEMP"] = master_list.apply(
+    lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
+    axis=1
+)
+
+# We add in the status of the property
+# TODO: Add the status of the property from the Thrive tracker
+outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
+    "24-March25.xlsx",
+    header=0
+)
+outcomes["row_id"] = outcomes.index
+
+# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
+tracker_for_matching = thrive_tracker[
+    ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
+].copy()
+tracker_for_matching["Full Address"] = (
+    tracker_for_matching["#"].astype(str) + ", " +
+    tracker_for_matching["Adress Line 1"].astype(str) + ", " +
+    tracker_for_matching["Postcode"].astype(str)
+)
+
+outcomes_id_lookup = []
+for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
+
+    hn = str(x["No."])
+    address = x["Address"]
+    postcode = x["Postcode"]
+    contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
+    contact_no = None if contact_no == "nan" else contact_no
+
+    if address == "292 Micklefield Road":
+        hn = "292"
+
+    if (address == "Micklefield Road") & (hn == "302"):
+        hn = "292"
+
+    if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "103a"
+
+    if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "105a"
+
+    if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "107a"
+
+    #
+    # # We match this to the tracker
+    # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
+    # # Many of the phone numbers don't have a leading zero in the tracker so we add them
+    # if (m1.shape[0] != 1) and not pd.isnull(contact_no):
+    #     m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
+    # 
+    # if m1.shape[0] > 1:
+    #     raise ValueError(
+    #         f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+    #     )
+
+    # if m1.empty:
+    m1 = tracker_for_matching[
+        (tracker_for_matching["#"].astype(str) == hn) &
+        (tracker_for_matching["Postcode"] == postcode)
+        ]
+
+    if m1.empty:
+        # Some properties aren't in the tracker, we match to the master list
+        m1 = master_list[
+            (master_list["house_number_TEMP"].astype(str) == hn) &
+            (master_list["postcode"] == postcode)
+            ]
+        outcomes_id_lookup.append(
+            {
+                "row_id": x["row_id"],
+                "thrive_property_id": m1["thrive_property_id"].values[0],
+                "address": m1["full_address"].values[0],
+                "postcode": m1["postcode"].values[0],
+            }
+        )
+        continue
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+        )
+
+    # We add the status to the master list
+    outcomes_id_lookup.append(
+        {
+            "row_id": x["row_id"],
+            "thrive_property_id": m1["UPRN"].values[0],
+            "address": m1["Full Address"].values[0],
+            "postcode": m1["Postcode"].values[0],
+        }
+    )
+
+outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
+outcomes = outcomes.merge(
+    outcomes_id_lookup,
+    how="left",
+    left_on="row_id",
+    right_on="row_id"
+)
+
+outcomes = outcomes.drop(columns=["row_id"])
+outcomes = outcomes.rename(
+    columns={
+        "Outcomes": "Outcome",
+        "Notes                                                                                         (If 'no "
+        "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
+    }
+)
 
 
+# Store the corrected outcomes
+# outcomes.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
+#     April 24-March25 - Corrected.xlsx",
+#     index=False
+# )
 
-data_update = = data_update[["UPRN", ""]]
 
-# TODO: Flag the Thrive priorities and create a separate project code for these
-# TODO: Add the general project code
-# TODO: Add the thrive
\ No newline at end of file
+def parse_date(value):
+    # Strip any 'W.C' or 'w/c' prefix and clean whitespace
+    value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
+    try:
+        # Try parsing the date with dayfirst=True
+        return pd.to_datetime(value, dayfirst=True, errors='coerce')
+    except Exception:
+        return pd.NaT
+
+
+outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
+
+# Next step - match the submissions master to the asset list. We will append on the UPRN
+eco3_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO3.csv",
+    header=0
+)
+eco3_submissions["row_id"] = eco3_submissions.index
+
+eco4_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO4.csv",
+    header=0
+)
+eco4_submissions["row_id"] = eco4_submissions.index
+
+# List of properties never on the asset list
+not_on_master = [
+    "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
+    "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
+    "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
+    "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
+    "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
+    "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
+    "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
+    "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
+    '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
+    '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
+    '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
+    '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
+]
+
+eco3_remap = {
+    "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
+    "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
+    "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
+    "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
+    "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
+    "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
+    "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
+    "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
+    "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
+    "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+    "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+    "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
+    '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
+    '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
+    '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
+    '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
+    '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
+    '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
+    '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
+    '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
+    '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
+    '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
+    '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
+    '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
+    '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
+}
+
+eco3_lookup = []
+for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
+    hn = row["NO "]
+    pc = row["Post Code"]
+    street = row["Street / Block Name"]
+    key = f"{hn}+{street}+{pc}"
+    if key in not_on_master:
+        continue
+
+    if key in eco3_remap:
+        hn, street, pc = eco3_remap[key]
+        # The postcode is different to the asse
+
+    # We filter the asset list, because it's hard to know how accurate this is
+    m1 = master_list[
+        (master_list["house_number_TEMP"].astype(str) == hn) &
+        (master_list["postcode"] == pc)
+        ]
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {key} in the tracker"
+        )
+
+    eco3_lookup.append(
+        {
+            "row_id": row["row_id"],
+            "thrive_property_id": m1["thrive_property_id"].values[0],
+            "submission_house_number": row["NO "],
+            "submission_address1": row["Street / Block Name"],
+            "submission_postcode": row["Post Code"],
+        }
+    )
+
+eco4_lookup = []
+for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
+    hn = row["NO."]
+    pc = row["Post Code"]
+    street = row["Street / Block Name"]
+    key = f"{hn}+{street}+{pc}"
+    if key in not_on_master:
+        continue
+
+    if key in eco3_remap:
+        hn, street, pc = eco3_remap[key]
+        # The postcode is different to the asse
+
+    # We filter the asset list, because it's hard to know how accurate this is
+    m1 = master_list[
+        (master_list["house_number_TEMP"].astype(str) == hn) &
+        (master_list["postcode"].str.lower() == pc.lower())
+        ]
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {key} in the tracker"
+        )
+
+    eco4_lookup.append(
+        {
+            "row_id": row["row_id"],
+            "thrive_property_id": m1["thrive_property_id"].values[0],
+            "submission_house_number": row["NO."],
+            "submission_address1": row["Street / Block Name"],
+            "submission_postcode": row["Post Code"],
+        }
+    )
+
+# We match the lookups back to the submission sheets
+eco3_lookup = pd.DataFrame(eco3_lookup)
+eco3_submissions = eco3_submissions.merge(
+    eco3_lookup,
+    how="left",
+    on="row_id",
+)
+
+eco4_lookup = pd.DataFrame(eco4_lookup)
+eco4_submissions = eco4_submissions.merge(
+    eco4_lookup,
+    how="left",
+    on="row_id",
+)
+
+# Store
+eco3_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO3 - with IDS.csv",
+    index=False
+)
+eco4_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO4 - with IDS.csv",
+    index=False
+)