diff --git a/.idea/Model.iml b/.idea/Model.iml
index c6561970..09f2e496 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 4b7a11ec..199b175c 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -2507,7 +2507,7 @@ class AssetList:
else:
raise NotImplementedError("Invalid date in outcomes - implement me")
- notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
+ notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes"
lookup = lookup.merge(
self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
@@ -2576,6 +2576,7 @@ class AssetList:
def flag_survey_master(
self,
master_filepaths,
+ master_id_colnames,
master_to_asset_list_filepath=None
):
# TODO: This probably needs further expansion
@@ -2591,7 +2592,7 @@ class AssetList:
logger.info("Getting masters and merging onto asset list")
master_surveyed = []
unmatched_submissions = []
- for filepath in master_filepaths:
+ for idx, filepath in enumerate(master_filepaths):
master_data = pd.read_csv(filepath)
# Strip columns
master_data.columns = [c.strip() for c in master_data.columns]
@@ -2618,22 +2619,6 @@ class AssetList:
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
)
- # if "UPRN" in master_data.columns:
- # # We just need to check if any were cancelled
- # master_to_append = master_data[
- # ["UPRN", install_col, submission_col]
- # ].rename(
- # columns={
- # "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
- # install_col: "survey_status",
- # submission_col: "submission_date"
- # }
- # )
- # master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
- #
- # master_surveyed.append(master_to_append)
- # continue
-
master_data["row_id"] = master_data.index
self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
@@ -2656,8 +2641,6 @@ class AssetList:
)
measure_mix_col = "MEASURE COMBO"
- # Otherwise, we need to match algorithmically
- has_property_id = "UPRN" in master_data.columns
logger.info("Matching master data to asset list")
matched = []
unmatched = []
@@ -2670,13 +2653,22 @@ class AssetList:
if pd.isnull(row[postcode_col]):
continue
- # if has_property_id:
- # submission_uprn = row["UPRN"]
- #
- # if not pd.isnull(submission_uprn):
- # df = self.standardised_asset_list[
- # self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
- # ]
+ if master_id_colnames[idx] is not None:
+ # Filter the standardised asset list on this
+ df = self.standardised_asset_list[
+ self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]]
+ ]
+ if df.shape[0] == 1:
+ matched.append(
+ {
+ "row_id": row["row_id"],
+ "original_house_no": original_house_no,
+ "original_street": original_street,
+ "original_postcode": original_postcode,
+ self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
+ }
+ )
+ continue
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
@@ -2721,6 +2713,7 @@ class AssetList:
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
}
)
+ continue
if house_no in df["house_no"].values:
df = df[df["house_no"] == house_no]
@@ -2793,6 +2786,7 @@ class AssetList:
}
)
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
+ master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
master_surveyed.append(master_to_append)
unmatched_df = master_data[
master_data["row_id"].isin(unmatched)
diff --git a/asset_list/app.py b/asset_list/app.py
index bb898c09..3441e5de 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -62,36 +62,42 @@ def app():
Property UPRN
"""
- # Thurrock
- data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
- data_filename = "THURROCK COUNCIL - For analysis.xlsx"
- sheet_name = "Assets"
- postcode_column = 'Postcode'
- fulladdress_column = "Full Address"
- address1_column = None
- address1_method = "house_number_extraction"
+ # Thrive - reconciliation
+ data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
+ data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
+ sheet_name = "Sheet1"
+ postcode_column = 'postcode'
+ fulladdress_column = "full_address"
+ address1_column = "address_line_1"
+ address1_method = None
address_cols_to_concat = []
missing_postcodes_method = None
- landlord_year_built = "Construction Date"
+ landlord_year_built = "age_band_calculated"
landlord_os_uprn = None
- landlord_property_type = "Property Type"
- landlord_built_form = "Property Subtype"
+ landlord_property_type = "property_type"
+ landlord_built_form = "build_form"
landlord_wall_construction = None
- landlord_roof_construction = None
- landlord_heating_system = "Main Heating Type"
+ landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
+ landlord_heating_system = "heating_type_updated"
landlord_existing_pv = None
- landlord_property_id = "Property Reference"
- landlord_sap = None
- outcomes_filename = []
- outcomes_sheetname = []
- outcomes_postcode = []
- outcomes_houseno = []
- outcomes_id = []
- outcomes_address = []
- master_filepaths = []
+ landlord_property_id = "thrive_property_id"
+ landlord_sap = "sap_rating_updated"
+ outcomes_filename = [
+ os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
+ ]
+ outcomes_sheetname = ["Sheet1"]
+ outcomes_postcode = ["postcode"]
+ outcomes_houseno = ["No."]
+ outcomes_id = ["thrive_property_id"]
+ outcomes_address = ["address"]
+ master_filepaths = [
+ os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
+ os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
+ ]
master_to_asset_list_filepath = None
+ master_id_colnames = ["thrive_property_id", "thrive_property_id"]
phase = False
- ecosurv_landlords = None
+ ecosurv_landlords = "thrive"
# Medway
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index 92f59f2c..daef01bb 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -292,4 +292,6 @@ HEATING_MAPPINGS = {
'Communal Heating': 'communal heating',
'No Data': 'unknown',
'Boiler System': 'gas condensing boiler',
+ 'Storage heating': 'electric storage heaters',
+ 'Storage heating (HHRSH)': 'high heat retention storage heaters'
}
diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py
index 03d6f9af..3b447829 100644
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched unknown access to loft",
"piched unknown insulation",
"pitched insulated",
- "pitched less than 100mm insulation"
+ "pitched less than 100mm insulation",
"another dwelling above",
"flat unknown insulation",
"unknown insulated",
@@ -38,4 +38,11 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'200mm': 'pitched insulated',
'0-49mm': 'pitched less than 100mm insulation',
'50mm': 'pitched less than 100mm insulation',
+ '': 'unknown',
+ 'NR': 'unknown',
+ 'Non-joist': 'unknown',
+ '25mm': 'pitched less than 100mm insulation',
+ '400mm+': 'pitched insulated',
+ '12mm': 'pitched less than 100mm insulation'
+
}
diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py
index 521cfd30..2d6a0d69 100644
--- a/etl/customers/thrive/Programme Analysis.py
+++ b/etl/customers/thrive/Programme Analysis.py
@@ -8,6 +8,8 @@ address the following concerns:
"""
import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
# This is Thrive's list of properties and when they should have been surveyed
thrive_tracker = pd.read_excel(
@@ -51,27 +53,10 @@ original_columns = {
}
original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
-original_asset_list["Data Source"] = "Thrive Tracker"
+original_asset_list["Data Source"] = "Original Asset List"
+original_asset_list = original_asset_list.drop_duplicates()
# We append on the missed properties, with the information we have
-# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
-# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
-# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
-# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
-# 'Special Requirements ', 'CIGA', 'Date CIGA check received',
-# 'Proposed Progamme', 'New Proposed Programme',
-# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
-# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
-# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
-# 'Date Submitted to installer', 'PRRN Number',
-# 'Loft insulation required? (Thrive)', 'Date booked ',
-# 'Completed\n(yes/no)', 'Date Completed',
-# 'Vents installed?\n(number and location)',
-# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
-# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
-# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
-# 'PRRN Submitted '
-
missed_properties["Full Address"] = (
missed_properties["#"].astype(str) + ", " +
missed_properties["Adress Line 1"].astype(str) + ", " +
@@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
missed_properties["ECO Eligibility"] = "Property Not Inspected"
missed_properties["Data Source"] = "Thrive Tracker"
+# We de-dupe ides in original_asset_list
+dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
+dupes = original_asset_list[
+ original_asset_list["thrive_property_id"].isin(dupe_ids)
+].copy()
+dupes = dupes.sort_values("thrive_property_id")
+
+original_asset_list = original_asset_list.rename(
+ columns={
+ "detailed_property_type": "build_form"
+ }
+)
+
master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
# We were provided with a data update for a sample of properties. We update the data with this information
@@ -103,12 +101,339 @@ data_update = pd.read_excel(
header=0
)
-new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
+new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
+new_properties["Full Address"] = (
+ new_properties["#"].astype(str) + ", " +
+ new_properties["Adress Line 1"].astype(str) + ", " +
+ new_properties["Postcode"].astype(str)
+)
+new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
+new_properties["WFT Findings"] = "Property Not Inspected"
+new_properties["ECO Eligibility"] = "Property Not Inspected"
+new_properties["Data Source"] = "13.05.2025 Data Update"
+
+master_list = pd.concat([new_properties, master_list])
+
+# We append any new data on heating system, heating type, and insulation type, based on the data update
+master_list = master_list.merge(
+ data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
+ columns={
+ "Heating Type": "heating_type_updated",
+ "Assumed mm ": "assumed_loft_insulation_thickness_updated",
+ "SAP": "sap_rating_updated"
+ }
+ ),
+ how="left",
+ left_on="thrive_property_id",
+ right_on="UPRN"
+)
+
+# We fill the missings
+master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
+master_list["assumed_loft_insulation_thickness_updated"] = master_list[
+ "assumed_loft_insulation_thickness_updated"
+].fillna(master_list["assumed_loft_insulation_thickness"])
+master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
+
+assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
+
+master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
+ thrive_tracker["UPRN"].astype(str).values
+)
+
+# Those the asset list - call it master asset list updated May2025
+master_list = master_list.drop(columns=["UPRN"])
+master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
+# master_list.to_excel(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+# "Complete - Updated May 2025.xlsx",
+# )
+
+master_list["house_number_TEMP"] = master_list.apply(
+ lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
+ axis=1
+)
+
+# We add in the status of the property
+# TODO: Add the status of the property from the Thrive tracker
+outcomes = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
+ "24-March25.xlsx",
+ header=0
+)
+outcomes["row_id"] = outcomes.index
+
+# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
+tracker_for_matching = thrive_tracker[
+ ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
+].copy()
+tracker_for_matching["Full Address"] = (
+ tracker_for_matching["#"].astype(str) + ", " +
+ tracker_for_matching["Adress Line 1"].astype(str) + ", " +
+ tracker_for_matching["Postcode"].astype(str)
+)
+
+outcomes_id_lookup = []
+for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
+
+ hn = str(x["No."])
+ address = x["Address"]
+ postcode = x["Postcode"]
+ contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
+ contact_no = None if contact_no == "nan" else contact_no
+
+ if address == "292 Micklefield Road":
+ hn = "292"
+
+ if (address == "Micklefield Road") & (hn == "302"):
+ hn = "292"
+
+ if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+ hn = "103a"
+
+ if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+ hn = "105a"
+
+ if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+ hn = "107a"
+
+ #
+ # # We match this to the tracker
+ # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
+ # # Many of the phone numbers don't have a leading zero in the tracker so we add them
+ # if (m1.shape[0] != 1) and not pd.isnull(contact_no):
+ # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
+ #
+ # if m1.shape[0] > 1:
+ # raise ValueError(
+ # f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+ # )
+
+ # if m1.empty:
+ m1 = tracker_for_matching[
+ (tracker_for_matching["#"].astype(str) == hn) &
+ (tracker_for_matching["Postcode"] == postcode)
+ ]
+
+ if m1.empty:
+ # Some properties aren't in the tracker, we match to the master list
+ m1 = master_list[
+ (master_list["house_number_TEMP"].astype(str) == hn) &
+ (master_list["postcode"] == postcode)
+ ]
+ outcomes_id_lookup.append(
+ {
+ "row_id": x["row_id"],
+ "thrive_property_id": m1["thrive_property_id"].values[0],
+ "address": m1["full_address"].values[0],
+ "postcode": m1["postcode"].values[0],
+ }
+ )
+ continue
+
+ if m1.shape[0] != 1:
+ raise ValueError(
+ f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+ )
+
+ # We add the status to the master list
+ outcomes_id_lookup.append(
+ {
+ "row_id": x["row_id"],
+ "thrive_property_id": m1["UPRN"].values[0],
+ "address": m1["Full Address"].values[0],
+ "postcode": m1["Postcode"].values[0],
+ }
+ )
+
+outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
+outcomes = outcomes.merge(
+ outcomes_id_lookup,
+ how="left",
+ left_on="row_id",
+ right_on="row_id"
+)
+
+outcomes = outcomes.drop(columns=["row_id"])
+outcomes = outcomes.rename(
+ columns={
+ "Outcomes": "Outcome",
+ "Notes (If 'no "
+ "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
+ }
+)
+# Store the corrected outcomes
+# outcomes.to_excel(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
+# April 24-March25 - Corrected.xlsx",
+# index=False
+# )
-data_update = = data_update[["UPRN", ""]]
-# TODO: Flag the Thrive priorities and create a separate project code for these
-# TODO: Add the general project code
-# TODO: Add the thrive
\ No newline at end of file
+def parse_date(value):
+ # Strip any 'W.C' or 'w/c' prefix and clean whitespace
+ value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
+ try:
+ # Try parsing the date with dayfirst=True
+ return pd.to_datetime(value, dayfirst=True, errors='coerce')
+ except Exception:
+ return pd.NaT
+
+
+outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
+
+# Next step - match the submissions master to the asset list. We will append on the UPRN
+eco3_submissions = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO3.csv",
+ header=0
+)
+eco3_submissions["row_id"] = eco3_submissions.index
+
+eco4_submissions = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO4.csv",
+ header=0
+)
+eco4_submissions["row_id"] = eco4_submissions.index
+
+# List of properties never on the asset list
+not_on_master = [
+ "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
+ "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
+ "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
+ "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
+ "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
+ "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
+ "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
+ "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
+ '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
+ '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
+ '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
+ '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
+]
+
+eco3_remap = {
+ "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
+ "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
+ "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
+ "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
+ "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
+ "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
+ "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
+ "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
+ "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
+ "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+ "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+ "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
+ '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
+ '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
+ '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
+ '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
+ '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
+ '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
+ '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
+ '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
+ '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
+ '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
+ '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
+ '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
+ '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
+}
+
+eco3_lookup = []
+for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
+ hn = row["NO "]
+ pc = row["Post Code"]
+ street = row["Street / Block Name"]
+ key = f"{hn}+{street}+{pc}"
+ if key in not_on_master:
+ continue
+
+ if key in eco3_remap:
+ hn, street, pc = eco3_remap[key]
+ # The postcode is different to the asse
+
+ # We filter the asset list, because it's hard to know how accurate this is
+ m1 = master_list[
+ (master_list["house_number_TEMP"].astype(str) == hn) &
+ (master_list["postcode"] == pc)
+ ]
+
+ if m1.shape[0] != 1:
+ raise ValueError(
+ f"Error for {key} in the tracker"
+ )
+
+ eco3_lookup.append(
+ {
+ "row_id": row["row_id"],
+ "thrive_property_id": m1["thrive_property_id"].values[0],
+ "submission_house_number": row["NO "],
+ "submission_address1": row["Street / Block Name"],
+ "submission_postcode": row["Post Code"],
+ }
+ )
+
+eco4_lookup = []
+for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
+ hn = row["NO."]
+ pc = row["Post Code"]
+ street = row["Street / Block Name"]
+ key = f"{hn}+{street}+{pc}"
+ if key in not_on_master:
+ continue
+
+ if key in eco3_remap:
+ hn, street, pc = eco3_remap[key]
+ # The postcode is different to the asse
+
+ # We filter the asset list, because it's hard to know how accurate this is
+ m1 = master_list[
+ (master_list["house_number_TEMP"].astype(str) == hn) &
+ (master_list["postcode"].str.lower() == pc.lower())
+ ]
+
+ if m1.shape[0] != 1:
+ raise ValueError(
+ f"Error for {key} in the tracker"
+ )
+
+ eco4_lookup.append(
+ {
+ "row_id": row["row_id"],
+ "thrive_property_id": m1["thrive_property_id"].values[0],
+ "submission_house_number": row["NO."],
+ "submission_address1": row["Street / Block Name"],
+ "submission_postcode": row["Post Code"],
+ }
+ )
+
+# We match the lookups back to the submission sheets
+eco3_lookup = pd.DataFrame(eco3_lookup)
+eco3_submissions = eco3_submissions.merge(
+ eco3_lookup,
+ how="left",
+ on="row_id",
+)
+
+eco4_lookup = pd.DataFrame(eco4_lookup)
+eco4_submissions = eco4_submissions.merge(
+ eco4_lookup,
+ how="left",
+ on="row_id",
+)
+
+# Store
+eco3_submissions.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO3 - with IDS.csv",
+ index=False
+)
+eco4_submissions.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO4 - with IDS.csv",
+ index=False
+)