From c0cf848db2676f93fd0e458c327de7554370e2af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 20 May 2025 15:59:38 +0100 Subject: [PATCH] re-building thrive's programme --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 48 ++- asset_list/app.py | 52 +-- asset_list/mappings/heating_systems.py | 2 + asset_list/mappings/roof.py | 9 +- etl/customers/thrive/Programme Analysis.py | 373 +++++++++++++++++++-- 7 files changed, 411 insertions(+), 77 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 4b7a11ec..199b175c 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -2507,7 +2507,7 @@ class AssetList: else: raise NotImplementedError("Invalid date in outcomes - implement me") - notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes" + notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes" lookup = lookup.merge( self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id" @@ -2576,6 +2576,7 @@ class AssetList: def flag_survey_master( self, master_filepaths, + master_id_colnames, master_to_asset_list_filepath=None ): # TODO: This probably needs further expansion @@ -2591,7 +2592,7 @@ class AssetList: logger.info("Getting masters and merging onto asset list") master_surveyed = [] unmatched_submissions = [] - for filepath in master_filepaths: + for idx, filepath in enumerate(master_filepaths): master_data = pd.read_csv(filepath) # Strip columns master_data.columns = [c.strip() for c in master_data.columns] @@ -2618,22 +2619,6 @@ class AssetList: "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" ) - # if "UPRN" in master_data.columns: - # # We just need to check if any were cancelled - # master_to_append = master_data[ - # ["UPRN", install_col, submission_col] - # ].rename( - # columns={ - # "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, - # install_col: "survey_status", - # submission_col: "submission_date" - # } - # ) - # master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") - # - # master_surveyed.append(master_to_append) - # continue - master_data["row_id"] = master_data.index self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply( @@ -2656,8 +2641,6 @@ class AssetList: ) measure_mix_col = "MEASURE COMBO" - # Otherwise, we need to match algorithmically - has_property_id = "UPRN" in master_data.columns logger.info("Matching master data to asset list") matched = [] unmatched = [] @@ -2670,13 +2653,22 @@ class AssetList: if pd.isnull(row[postcode_col]): continue - # if has_property_id: - # submission_uprn = row["UPRN"] - # - # if not pd.isnull(submission_uprn): - # df = self.standardised_asset_list[ - # self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn - # ] + if master_id_colnames[idx] is not None: + # Filter the standardised asset list on this + df = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] + ] + if df.shape[0] == 1: + matched.append( + { + "row_id": row["row_id"], + "original_house_no": original_house_no, + "original_street": original_street, + "original_postcode": original_postcode, + self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + } + ) + continue postcode_no_space = row[postcode_col].strip().replace(" ", "").lower() @@ -2721,6 +2713,7 @@ class AssetList: self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], } ) + continue if house_no in df["house_no"].values: df = df[df["house_no"] == house_no] @@ -2793,6 +2786,7 @@ class AssetList: } ) master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") + master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed") master_surveyed.append(master_to_append) unmatched_df = master_data[ master_data["row_id"].isin(unmatched) diff --git a/asset_list/app.py b/asset_list/app.py index bb898c09..3441e5de 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -62,36 +62,42 @@ def app(): Property UPRN """ - # Thurrock - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock" - data_filename = "THURROCK COUNCIL - For analysis.xlsx" - sheet_name = "Assets" - postcode_column = 'Postcode' - fulladdress_column = "Full Address" - address1_column = None - address1_method = "house_number_extraction" + # Thrive - reconciliation + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" + data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" + sheet_name = "Sheet1" + postcode_column = 'postcode' + fulladdress_column = "full_address" + address1_column = "address_line_1" + address1_method = None address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Construction Date" + landlord_year_built = "age_band_calculated" landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Subtype" + landlord_property_type = "property_type" + landlord_built_form = "build_form" landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = "Main Heating Type" + landlord_roof_construction = "assumed_loft_insulation_thickness_updated" + landlord_heating_system = "heating_type_updated" landlord_existing_pv = None - landlord_property_id = "Property Reference" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] + landlord_property_id = "thrive_property_id" + landlord_sap = "sap_rating_updated" + outcomes_filename = [ + os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") + ] + outcomes_sheetname = ["Sheet1"] + outcomes_postcode = ["postcode"] + outcomes_houseno = ["No."] + outcomes_id = ["thrive_property_id"] + outcomes_address = ["address"] + master_filepaths = [ + os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), + os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), + ] master_to_asset_list_filepath = None + master_id_colnames = ["thrive_property_id", "thrive_property_id"] phase = False - ecosurv_landlords = None + ecosurv_landlords = "thrive" # Medway data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway" diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 92f59f2c..daef01bb 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -292,4 +292,6 @@ HEATING_MAPPINGS = { 'Communal Heating': 'communal heating', 'No Data': 'unknown', 'Boiler System': 'gas condensing boiler', + 'Storage heating': 'electric storage heaters', + 'Storage heating (HHRSH)': 'high heat retention storage heaters' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 03d6f9af..3b447829 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = { "pitched unknown access to loft", "piched unknown insulation", "pitched insulated", - "pitched less than 100mm insulation" + "pitched less than 100mm insulation", "another dwelling above", "flat unknown insulation", "unknown insulated", @@ -38,4 +38,11 @@ ROOF_CONSTRUCTION_MAPPINGS = { '200mm': 'pitched insulated', '0-49mm': 'pitched less than 100mm insulation', '50mm': 'pitched less than 100mm insulation', + '': 'unknown', + 'NR': 'unknown', + 'Non-joist': 'unknown', + '25mm': 'pitched less than 100mm insulation', + '400mm+': 'pitched insulated', + '12mm': 'pitched less than 100mm insulation' + } diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py index 521cfd30..2d6a0d69 100644 --- a/etl/customers/thrive/Programme Analysis.py +++ b/etl/customers/thrive/Programme Analysis.py @@ -8,6 +8,8 @@ address the following concerns: """ import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc # This is Thrive's list of properties and when they should have been surveyed thrive_tracker = pd.read_excel( @@ -51,27 +53,10 @@ original_columns = { } original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns) -original_asset_list["Data Source"] = "Thrive Tracker" +original_asset_list["Data Source"] = "Original Asset List" +original_asset_list = original_asset_list.drop_duplicates() # We append on the missed properties, with the information we have -# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#', -# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form', -# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number', -# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number', -# 'Special Requirements ', 'CIGA', 'Date CIGA check received', -# 'Proposed Progamme', 'New Proposed Programme', -# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type', -# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation', -# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition', -# 'Date Submitted to installer', 'PRRN Number', -# 'Loft insulation required? (Thrive)', 'Date booked ', -# 'Completed\n(yes/no)', 'Date Completed', -# 'Vents installed?\n(number and location)', -# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ', -# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added', -# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated', -# 'PRRN Submitted ' - missed_properties["Full Address"] = ( missed_properties["#"].astype(str) + ", " + missed_properties["Adress Line 1"].astype(str) + ", " + @@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected" missed_properties["ECO Eligibility"] = "Property Not Inspected" missed_properties["Data Source"] = "Thrive Tracker" +# We de-dupe ides in original_asset_list +dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique() +dupes = original_asset_list[ + original_asset_list["thrive_property_id"].isin(dupe_ids) +].copy() +dupes = dupes.sort_values("thrive_property_id") + +original_asset_list = original_asset_list.rename( + columns={ + "detailed_property_type": "build_form" + } +) + master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True) # We were provided with a data update for a sample of properties. We update the data with this information @@ -103,12 +101,339 @@ data_update = pd.read_excel( header=0 ) -new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)] +new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy() +new_properties["Full Address"] = ( + new_properties["#"].astype(str) + ", " + + new_properties["Adress Line 1"].astype(str) + ", " + + new_properties["Postcode"].astype(str) +) +new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns) +new_properties["WFT Findings"] = "Property Not Inspected" +new_properties["ECO Eligibility"] = "Property Not Inspected" +new_properties["Data Source"] = "13.05.2025 Data Update" + +master_list = pd.concat([new_properties, master_list]) + +# We append any new data on heating system, heating type, and insulation type, based on the data update +master_list = master_list.merge( + data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename( + columns={ + "Heating Type": "heating_type_updated", + "Assumed mm ": "assumed_loft_insulation_thickness_updated", + "SAP": "sap_rating_updated" + } + ), + how="left", + left_on="thrive_property_id", + right_on="UPRN" +) + +# We fill the missings +master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"]) +master_list["assumed_loft_insulation_thickness_updated"] = master_list[ + "assumed_loft_insulation_thickness_updated" +].fillna(master_list["assumed_loft_insulation_thickness"]) +master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"]) + +assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list" + +master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin( + thrive_tracker["UPRN"].astype(str).values +) + +# Those the asset list - call it master asset list updated May2025 +master_list = master_list.drop(columns=["UPRN"]) +master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str) +# master_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " +# "Complete - Updated May 2025.xlsx", +# ) + +master_list["house_number_TEMP"] = master_list.apply( + lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]), + axis=1 +) + +# We add in the status of the property +# TODO: Add the status of the property from the Thrive tracker +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April " + "24-March25.xlsx", + header=0 +) +outcomes["row_id"] = outcomes.index + +# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these +tracker_for_matching = thrive_tracker[ + ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1']) +].copy() +tracker_for_matching["Full Address"] = ( + tracker_for_matching["#"].astype(str) + ", " + + tracker_for_matching["Adress Line 1"].astype(str) + ", " + + tracker_for_matching["Postcode"].astype(str) +) + +outcomes_id_lookup = [] +for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)): + + hn = str(x["No."]) + address = x["Address"] + postcode = x["Postcode"] + contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"]) + contact_no = None if contact_no == "nan" else contact_no + + if address == "292 Micklefield Road": + hn = "292" + + if (address == "Micklefield Road") & (hn == "302"): + hn = "292" + + if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "103a" + + if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "105a" + + if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "107a" + + # + # # We match this to the tracker + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no] + # # Many of the phone numbers don't have a leading zero in the tracker so we add them + # if (m1.shape[0] != 1) and not pd.isnull(contact_no): + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")] + # + # if m1.shape[0] > 1: + # raise ValueError( + # f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + # ) + + # if m1.empty: + m1 = tracker_for_matching[ + (tracker_for_matching["#"].astype(str) == hn) & + (tracker_for_matching["Postcode"] == postcode) + ] + + if m1.empty: + # Some properties aren't in the tracker, we match to the master list + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == postcode) + ] + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "address": m1["full_address"].values[0], + "postcode": m1["postcode"].values[0], + } + ) + continue + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + ) + + # We add the status to the master list + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["UPRN"].values[0], + "address": m1["Full Address"].values[0], + "postcode": m1["Postcode"].values[0], + } + ) + +outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup) +outcomes = outcomes.merge( + outcomes_id_lookup, + how="left", + left_on="row_id", + right_on="row_id" +) + +outcomes = outcomes.drop(columns=["row_id"]) +outcomes = outcomes.rename( + columns={ + "Outcomes": "Outcome", + "Notes (If 'no " + "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes", + } +) +# Store the corrected outcomes +# outcomes.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - +# April 24-March25 - Corrected.xlsx", +# index=False +# ) -data_update = = data_update[["UPRN", ""]] -# TODO: Flag the Thrive priorities and create a separate project code for these -# TODO: Add the general project code -# TODO: Add the thrive \ No newline at end of file +def parse_date(value): + # Strip any 'W.C' or 'w/c' prefix and clean whitespace + value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip() + try: + # Try parsing the date with dayfirst=True + return pd.to_datetime(value, dayfirst=True, errors='coerce') + except Exception: + return pd.NaT + + +outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date) + +# Next step - match the submissions master to the asset list. We will append on the UPRN +eco3_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3.csv", + header=0 +) +eco3_submissions["row_id"] = eco3_submissions.index + +eco4_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4.csv", + header=0 +) +eco4_submissions["row_id"] = eco4_submissions.index + +# List of properties never on the asset list +not_on_master = [ + "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL", + "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA", + "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN", + "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN", + "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN", + "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN", + "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN", + "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN", + '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN', + '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN', + '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN', + '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN' +] + +eco3_remap = { + "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'), + "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'), + "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'), + "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'), + "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'), + "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'), + "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'), + "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'), + "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'), + "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'), + '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'), + '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'), + '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'), + '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'), + '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'), + '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'), + '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'), + '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'), + '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'), + '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'), + '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'), + '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'), + '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'), +} + +eco3_lookup = [] +for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)): + hn = row["NO "] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == pc) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco3_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO "], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +eco4_lookup = [] +for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)): + hn = row["NO."] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"].str.lower() == pc.lower()) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco4_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO."], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +# We match the lookups back to the submission sheets +eco3_lookup = pd.DataFrame(eco3_lookup) +eco3_submissions = eco3_submissions.merge( + eco3_lookup, + how="left", + on="row_id", +) + +eco4_lookup = pd.DataFrame(eco4_lookup) +eco4_submissions = eco4_submissions.merge( + eco4_lookup, + how="left", + on="row_id", +) + +# Store +eco3_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3 - with IDS.csv", + index=False +) +eco4_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4 - with IDS.csv", + index=False +)