""" The Thrive programme has not been completed to specification. This script re-builds the programme and attempts to address the following concerns: - Which properties have been surveyed - Of the properties that have been surveyed, what has been installed - Which properties have been visited """ import pandas as pd from tqdm import tqdm from backend.SearchEpc import SearchEpc # This is Thrive's list of properties and when they should have been surveyed thrive_tracker = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " "Standardised.xlsx", sheet_name="Tracker", header=2 ) original_asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List.xlsx", header=0 ) # Find properties that are on the thrive tracker that missed_properties = thrive_tracker[ ~thrive_tracker["UPRN"].astype(str).isin(original_asset_list["Client ref 1"].astype(str).values) ].copy() # We produce the complete list, with all of the columns we need, for data standardisation original_columns = { "Client ref 1": "thrive_property_id", "Address": "full_address", "Address Line 1": "address_line_1", "Address Line 2": "address_line_2", "Address Line 3": "address_line_3", "Address Line 4": "address_line_4", "County": "county", "Postcode": "postcode", "Block Name": "block_reference", "Construction Year": "construction_year", "Age band (calculated)": "age_band_calculated", "Property type": "property_type", "Client property type 1": "detailed_property_type", "Client property type 2": "detailed_property_type_2", "bed count": "number_of_bedrooms", "Heating Type": "heating_type", "WFT Findings": "WFT Findings", "ECO Eligibility": "ECO Eligibility", } original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns) original_asset_list["Data Source"] = "Original Asset List" original_asset_list = original_asset_list.drop_duplicates() # We append on the missed properties, with the information we have missed_properties["Full Address"] = ( missed_properties["#"].astype(str) + ", " + missed_properties["Adress Line 1"].astype(str) + ", " + missed_properties["Postcode"].astype(str) ) missed_columns = { "UPRN": "thrive_property_id", "Full Address": "full_address", "Short Address": "address_line_1", "Postcode": "postcode", "Property Type": "property_type", "Build Form": "build_form", "Build year": "age_band_calculated", "Assumed mm ": "assumed_loft_insulation_thickness", "SAP": "sap_rating", } missed_properties = missed_properties[missed_columns.keys()].rename(columns=missed_columns) missed_properties["WFT Findings"] = "Property Not Inspected" missed_properties["ECO Eligibility"] = "Property Not Inspected" missed_properties["Data Source"] = "Thrive Tracker" # We de-dupe ides in original_asset_list dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique() dupes = original_asset_list[ original_asset_list["thrive_property_id"].isin(dupe_ids) ].copy() dupes = dupes.sort_values("thrive_property_id") original_asset_list = original_asset_list.rename( columns={ "detailed_property_type": "build_form" } ) master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True) # We were provided with a data update for a sample of properties. We update the data with this information data_update = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Property List " "13_05.xlsx", header=0 ) new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy() new_properties["Full Address"] = ( new_properties["#"].astype(str) + ", " + new_properties["Adress Line 1"].astype(str) + ", " + new_properties["Postcode"].astype(str) ) new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns) new_properties["WFT Findings"] = "Property Not Inspected" new_properties["ECO Eligibility"] = "Property Not Inspected" new_properties["Data Source"] = "13.05.2025 Data Update" master_list = pd.concat([new_properties, master_list]) # We append any new data on heating system, heating type, and insulation type, based on the data update master_list = master_list.merge( data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename( columns={ "Heating Type": "heating_type_updated", "Assumed mm ": "assumed_loft_insulation_thickness_updated", "SAP": "sap_rating_updated" } ), how="left", left_on="thrive_property_id", right_on="UPRN" ) # We fill the missings master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"]) master_list["assumed_loft_insulation_thickness_updated"] = master_list[ "assumed_loft_insulation_thickness_updated" ].fillna(master_list["assumed_loft_insulation_thickness"]) master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"]) assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list" master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin( thrive_tracker["UPRN"].astype(str).values ) # Those the asset list - call it master asset list updated May2025 master_list = master_list.drop(columns=["UPRN"]) master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str) # master_list.to_excel( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " # "Complete - Updated May 2025.xlsx", # ) master_list["house_number_TEMP"] = master_list.apply( lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]), axis=1 ) # We add in the status of the property # TODO: Add the status of the property from the Thrive tracker outcomes = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April " "24-March25.xlsx", header=0 ) outcomes["row_id"] = outcomes.index # We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these tracker_for_matching = thrive_tracker[ ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1']) ].copy() tracker_for_matching["Full Address"] = ( tracker_for_matching["#"].astype(str) + ", " + tracker_for_matching["Adress Line 1"].astype(str) + ", " + tracker_for_matching["Postcode"].astype(str) ) outcomes_id_lookup = [] for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)): hn = str(x["No."]) address = x["Address"] postcode = x["Postcode"] contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"]) contact_no = None if contact_no == "nan" else contact_no if address == "292 Micklefield Road": hn = "292" if (address == "Micklefield Road") & (hn == "302"): hn = "292" if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): hn = "103a" if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): hn = "105a" if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): hn = "107a" # # # We match this to the tracker # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no] # # Many of the phone numbers don't have a leading zero in the tracker so we add them # if (m1.shape[0] != 1) and not pd.isnull(contact_no): # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")] # # if m1.shape[0] > 1: # raise ValueError( # f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" # ) # if m1.empty: m1 = tracker_for_matching[ (tracker_for_matching["#"].astype(str) == hn) & (tracker_for_matching["Postcode"] == postcode) ] if m1.empty: # Some properties aren't in the tracker, we match to the master list m1 = master_list[ (master_list["house_number_TEMP"].astype(str) == hn) & (master_list["postcode"] == postcode) ] outcomes_id_lookup.append( { "row_id": x["row_id"], "thrive_property_id": m1["thrive_property_id"].values[0], "address": m1["full_address"].values[0], "postcode": m1["postcode"].values[0], } ) continue if m1.shape[0] != 1: raise ValueError( f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" ) # We add the status to the master list outcomes_id_lookup.append( { "row_id": x["row_id"], "thrive_property_id": m1["UPRN"].values[0], "address": m1["Full Address"].values[0], "postcode": m1["Postcode"].values[0], } ) outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup) outcomes = outcomes.merge( outcomes_id_lookup, how="left", left_on="row_id", right_on="row_id" ) outcomes = outcomes.drop(columns=["row_id"]) outcomes = outcomes.rename( columns={ "Outcomes": "Outcome", "Notes (If 'no " "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes", } ) # Store the corrected outcomes # outcomes.to_excel( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - # April 24-March25 - Corrected.xlsx", # index=False # ) def parse_date(value): # Strip any 'W.C' or 'w/c' prefix and clean whitespace value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip() try: # Try parsing the date with dayfirst=True return pd.to_datetime(value, dayfirst=True, errors='coerce') except Exception: return pd.NaT outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date) # Next step - match the submissions master to the asset list. We will append on the UPRN eco3_submissions = pd.read_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " "ECO3.csv", header=0 ) eco3_submissions["row_id"] = eco3_submissions.index eco4_submissions = pd.read_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " "ECO4.csv", header=0 ) eco4_submissions["row_id"] = eco4_submissions.index # List of properties never on the asset list not_on_master = [ "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL", "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA", "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN", "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN", "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN", "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN", "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN", "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN", '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN', '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN', '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN', '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN' ] eco3_remap = { "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'), "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'), "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'), "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'), "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'), "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'), "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'), "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'), "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'), "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'), '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'), '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'), '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'), '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'), '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'), '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'), '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'), '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'), '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'), '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'), '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'), '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'), '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'), } eco3_lookup = [] for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)): hn = row["NO "] pc = row["Post Code"] street = row["Street / Block Name"] key = f"{hn}+{street}+{pc}" if key in not_on_master: continue if key in eco3_remap: hn, street, pc = eco3_remap[key] # The postcode is different to the asse # We filter the asset list, because it's hard to know how accurate this is m1 = master_list[ (master_list["house_number_TEMP"].astype(str) == hn) & (master_list["postcode"] == pc) ] if m1.shape[0] != 1: raise ValueError( f"Error for {key} in the tracker" ) eco3_lookup.append( { "row_id": row["row_id"], "thrive_property_id": m1["thrive_property_id"].values[0], "submission_house_number": row["NO "], "submission_address1": row["Street / Block Name"], "submission_postcode": row["Post Code"], } ) eco4_lookup = [] for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)): hn = row["NO."] pc = row["Post Code"] street = row["Street / Block Name"] key = f"{hn}+{street}+{pc}" if key in not_on_master: continue if key in eco3_remap: hn, street, pc = eco3_remap[key] # The postcode is different to the asse # We filter the asset list, because it's hard to know how accurate this is m1 = master_list[ (master_list["house_number_TEMP"].astype(str) == hn) & (master_list["postcode"].str.lower() == pc.lower()) ] if m1.shape[0] != 1: raise ValueError( f"Error for {key} in the tracker" ) eco4_lookup.append( { "row_id": row["row_id"], "thrive_property_id": m1["thrive_property_id"].values[0], "submission_house_number": row["NO."], "submission_address1": row["Street / Block Name"], "submission_postcode": row["Post Code"], } ) # We match the lookups back to the submission sheets eco3_lookup = pd.DataFrame(eco3_lookup) eco3_submissions = eco3_submissions.merge( eco3_lookup, how="left", on="row_id", ) eco4_lookup = pd.DataFrame(eco4_lookup) eco4_submissions = eco4_submissions.merge( eco4_lookup, how="left", on="row_id", ) # Store eco3_submissions.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " "ECO3 - with IDS.csv", index=False ) eco4_submissions.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " "ECO4 - with IDS.csv", index=False )