From 97eaf948c5030ad079d7336741849f3bec3104e9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 16 May 2025 15:51:05 +0100 Subject: [PATCH 01/14] fixing test --- recommendations/tests/test_lighting_recommendations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py index dbb621e7..5fb914a8 100644 --- a/recommendations/tests/test_lighting_recommendations.py +++ b/recommendations/tests/test_lighting_recommendations.py @@ -49,6 +49,6 @@ class TestLightingRecommendations: 'lighting in all ' 'fixed outlets', 'low-energy-lighting': 100}, - 'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, - 'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, - 'labour_cost': 63.0, 'survey': False}] + 'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3, + 'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False} + ] From c0cf848db2676f93fd0e458c327de7554370e2af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 20 May 2025 15:59:38 +0100 Subject: [PATCH 02/14] re-building thrive's programme --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 48 ++- asset_list/app.py | 52 +-- asset_list/mappings/heating_systems.py | 2 + asset_list/mappings/roof.py | 9 +- etl/customers/thrive/Programme Analysis.py | 373 +++++++++++++++++++-- 7 files changed, 411 insertions(+), 77 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 4b7a11ec..199b175c 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -2507,7 +2507,7 @@ class AssetList: else: raise NotImplementedError("Invalid date in outcomes - implement me") - notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes" + notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes" lookup = lookup.merge( self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id" @@ -2576,6 +2576,7 @@ class AssetList: def flag_survey_master( self, master_filepaths, + master_id_colnames, master_to_asset_list_filepath=None ): # TODO: This probably needs further expansion @@ -2591,7 +2592,7 @@ class AssetList: logger.info("Getting masters and merging onto asset list") master_surveyed = [] unmatched_submissions = [] - for filepath in master_filepaths: + for idx, filepath in enumerate(master_filepaths): master_data = pd.read_csv(filepath) # Strip columns master_data.columns = [c.strip() for c in master_data.columns] @@ -2618,22 +2619,6 @@ class AssetList: "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" ) - # if "UPRN" in master_data.columns: - # # We just need to check if any were cancelled - # master_to_append = master_data[ - # ["UPRN", install_col, submission_col] - # ].rename( - # columns={ - # "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, - # install_col: "survey_status", - # submission_col: "submission_date" - # } - # ) - # master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") - # - # master_surveyed.append(master_to_append) - # continue - master_data["row_id"] = master_data.index self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply( @@ -2656,8 +2641,6 @@ class AssetList: ) measure_mix_col = "MEASURE COMBO" - # Otherwise, we need to match algorithmically - has_property_id = "UPRN" in master_data.columns logger.info("Matching master data to asset list") matched = [] unmatched = [] @@ -2670,13 +2653,22 @@ class AssetList: if pd.isnull(row[postcode_col]): continue - # if has_property_id: - # submission_uprn = row["UPRN"] - # - # if not pd.isnull(submission_uprn): - # df = self.standardised_asset_list[ - # self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn - # ] + if master_id_colnames[idx] is not None: + # Filter the standardised asset list on this + df = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] + ] + if df.shape[0] == 1: + matched.append( + { + "row_id": row["row_id"], + "original_house_no": original_house_no, + "original_street": original_street, + "original_postcode": original_postcode, + self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + } + ) + continue postcode_no_space = row[postcode_col].strip().replace(" ", "").lower() @@ -2721,6 +2713,7 @@ class AssetList: self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], } ) + continue if house_no in df["house_no"].values: df = df[df["house_no"] == house_no] @@ -2793,6 +2786,7 @@ class AssetList: } ) master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") + master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed") master_surveyed.append(master_to_append) unmatched_df = master_data[ master_data["row_id"].isin(unmatched) diff --git a/asset_list/app.py b/asset_list/app.py index bb898c09..3441e5de 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -62,36 +62,42 @@ def app(): Property UPRN """ - # Thurrock - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock" - data_filename = "THURROCK COUNCIL - For analysis.xlsx" - sheet_name = "Assets" - postcode_column = 'Postcode' - fulladdress_column = "Full Address" - address1_column = None - address1_method = "house_number_extraction" + # Thrive - reconciliation + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" + data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" + sheet_name = "Sheet1" + postcode_column = 'postcode' + fulladdress_column = "full_address" + address1_column = "address_line_1" + address1_method = None address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Construction Date" + landlord_year_built = "age_band_calculated" landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Subtype" + landlord_property_type = "property_type" + landlord_built_form = "build_form" landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = "Main Heating Type" + landlord_roof_construction = "assumed_loft_insulation_thickness_updated" + landlord_heating_system = "heating_type_updated" landlord_existing_pv = None - landlord_property_id = "Property Reference" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] + landlord_property_id = "thrive_property_id" + landlord_sap = "sap_rating_updated" + outcomes_filename = [ + os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") + ] + outcomes_sheetname = ["Sheet1"] + outcomes_postcode = ["postcode"] + outcomes_houseno = ["No."] + outcomes_id = ["thrive_property_id"] + outcomes_address = ["address"] + master_filepaths = [ + os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), + os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), + ] master_to_asset_list_filepath = None + master_id_colnames = ["thrive_property_id", "thrive_property_id"] phase = False - ecosurv_landlords = None + ecosurv_landlords = "thrive" # Medway data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway" diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 92f59f2c..daef01bb 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -292,4 +292,6 @@ HEATING_MAPPINGS = { 'Communal Heating': 'communal heating', 'No Data': 'unknown', 'Boiler System': 'gas condensing boiler', + 'Storage heating': 'electric storage heaters', + 'Storage heating (HHRSH)': 'high heat retention storage heaters' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 03d6f9af..3b447829 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = { "pitched unknown access to loft", "piched unknown insulation", "pitched insulated", - "pitched less than 100mm insulation" + "pitched less than 100mm insulation", "another dwelling above", "flat unknown insulation", "unknown insulated", @@ -38,4 +38,11 @@ ROOF_CONSTRUCTION_MAPPINGS = { '200mm': 'pitched insulated', '0-49mm': 'pitched less than 100mm insulation', '50mm': 'pitched less than 100mm insulation', + '': 'unknown', + 'NR': 'unknown', + 'Non-joist': 'unknown', + '25mm': 'pitched less than 100mm insulation', + '400mm+': 'pitched insulated', + '12mm': 'pitched less than 100mm insulation' + } diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py index 521cfd30..2d6a0d69 100644 --- a/etl/customers/thrive/Programme Analysis.py +++ b/etl/customers/thrive/Programme Analysis.py @@ -8,6 +8,8 @@ address the following concerns: """ import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc # This is Thrive's list of properties and when they should have been surveyed thrive_tracker = pd.read_excel( @@ -51,27 +53,10 @@ original_columns = { } original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns) -original_asset_list["Data Source"] = "Thrive Tracker" +original_asset_list["Data Source"] = "Original Asset List" +original_asset_list = original_asset_list.drop_duplicates() # We append on the missed properties, with the information we have -# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#', -# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form', -# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number', -# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number', -# 'Special Requirements ', 'CIGA', 'Date CIGA check received', -# 'Proposed Progamme', 'New Proposed Programme', -# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type', -# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation', -# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition', -# 'Date Submitted to installer', 'PRRN Number', -# 'Loft insulation required? (Thrive)', 'Date booked ', -# 'Completed\n(yes/no)', 'Date Completed', -# 'Vents installed?\n(number and location)', -# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ', -# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added', -# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated', -# 'PRRN Submitted ' - missed_properties["Full Address"] = ( missed_properties["#"].astype(str) + ", " + missed_properties["Adress Line 1"].astype(str) + ", " + @@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected" missed_properties["ECO Eligibility"] = "Property Not Inspected" missed_properties["Data Source"] = "Thrive Tracker" +# We de-dupe ides in original_asset_list +dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique() +dupes = original_asset_list[ + original_asset_list["thrive_property_id"].isin(dupe_ids) +].copy() +dupes = dupes.sort_values("thrive_property_id") + +original_asset_list = original_asset_list.rename( + columns={ + "detailed_property_type": "build_form" + } +) + master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True) # We were provided with a data update for a sample of properties. We update the data with this information @@ -103,12 +101,339 @@ data_update = pd.read_excel( header=0 ) -new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)] +new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy() +new_properties["Full Address"] = ( + new_properties["#"].astype(str) + ", " + + new_properties["Adress Line 1"].astype(str) + ", " + + new_properties["Postcode"].astype(str) +) +new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns) +new_properties["WFT Findings"] = "Property Not Inspected" +new_properties["ECO Eligibility"] = "Property Not Inspected" +new_properties["Data Source"] = "13.05.2025 Data Update" + +master_list = pd.concat([new_properties, master_list]) + +# We append any new data on heating system, heating type, and insulation type, based on the data update +master_list = master_list.merge( + data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename( + columns={ + "Heating Type": "heating_type_updated", + "Assumed mm ": "assumed_loft_insulation_thickness_updated", + "SAP": "sap_rating_updated" + } + ), + how="left", + left_on="thrive_property_id", + right_on="UPRN" +) + +# We fill the missings +master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"]) +master_list["assumed_loft_insulation_thickness_updated"] = master_list[ + "assumed_loft_insulation_thickness_updated" +].fillna(master_list["assumed_loft_insulation_thickness"]) +master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"]) + +assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list" + +master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin( + thrive_tracker["UPRN"].astype(str).values +) + +# Those the asset list - call it master asset list updated May2025 +master_list = master_list.drop(columns=["UPRN"]) +master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str) +# master_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " +# "Complete - Updated May 2025.xlsx", +# ) + +master_list["house_number_TEMP"] = master_list.apply( + lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]), + axis=1 +) + +# We add in the status of the property +# TODO: Add the status of the property from the Thrive tracker +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April " + "24-March25.xlsx", + header=0 +) +outcomes["row_id"] = outcomes.index + +# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these +tracker_for_matching = thrive_tracker[ + ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1']) +].copy() +tracker_for_matching["Full Address"] = ( + tracker_for_matching["#"].astype(str) + ", " + + tracker_for_matching["Adress Line 1"].astype(str) + ", " + + tracker_for_matching["Postcode"].astype(str) +) + +outcomes_id_lookup = [] +for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)): + + hn = str(x["No."]) + address = x["Address"] + postcode = x["Postcode"] + contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"]) + contact_no = None if contact_no == "nan" else contact_no + + if address == "292 Micklefield Road": + hn = "292" + + if (address == "Micklefield Road") & (hn == "302"): + hn = "292" + + if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "103a" + + if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "105a" + + if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"): + hn = "107a" + + # + # # We match this to the tracker + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no] + # # Many of the phone numbers don't have a leading zero in the tracker so we add them + # if (m1.shape[0] != 1) and not pd.isnull(contact_no): + # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")] + # + # if m1.shape[0] > 1: + # raise ValueError( + # f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + # ) + + # if m1.empty: + m1 = tracker_for_matching[ + (tracker_for_matching["#"].astype(str) == hn) & + (tracker_for_matching["Postcode"] == postcode) + ] + + if m1.empty: + # Some properties aren't in the tracker, we match to the master list + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == postcode) + ] + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "address": m1["full_address"].values[0], + "postcode": m1["postcode"].values[0], + } + ) + continue + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker" + ) + + # We add the status to the master list + outcomes_id_lookup.append( + { + "row_id": x["row_id"], + "thrive_property_id": m1["UPRN"].values[0], + "address": m1["Full Address"].values[0], + "postcode": m1["Postcode"].values[0], + } + ) + +outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup) +outcomes = outcomes.merge( + outcomes_id_lookup, + how="left", + left_on="row_id", + right_on="row_id" +) + +outcomes = outcomes.drop(columns=["row_id"]) +outcomes = outcomes.rename( + columns={ + "Outcomes": "Outcome", + "Notes (If 'no " + "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes", + } +) +# Store the corrected outcomes +# outcomes.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - +# April 24-March25 - Corrected.xlsx", +# index=False +# ) -data_update = = data_update[["UPRN", ""]] -# TODO: Flag the Thrive priorities and create a separate project code for these -# TODO: Add the general project code -# TODO: Add the thrive \ No newline at end of file +def parse_date(value): + # Strip any 'W.C' or 'w/c' prefix and clean whitespace + value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip() + try: + # Try parsing the date with dayfirst=True + return pd.to_datetime(value, dayfirst=True, errors='coerce') + except Exception: + return pd.NaT + + +outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date) + +# Next step - match the submissions master to the asset list. We will append on the UPRN +eco3_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3.csv", + header=0 +) +eco3_submissions["row_id"] = eco3_submissions.index + +eco4_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4.csv", + header=0 +) +eco4_submissions["row_id"] = eco4_submissions.index + +# List of properties never on the asset list +not_on_master = [ + "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL", + "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA", + "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN", + "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN", + "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN", + "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN", + "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN", + "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN", + '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN', + '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN', + '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN', + '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN' +] + +eco3_remap = { + "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'), + "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'), + "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'), + "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'), + "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'), + "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'), + "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'), + "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'), + "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'), + "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'), + "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'), + '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'), + '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'), + '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'), + '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'), + '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'), + '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'), + '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'), + '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'), + '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'), + '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'), + '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'), + '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'), + '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'), +} + +eco3_lookup = [] +for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)): + hn = row["NO "] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"] == pc) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco3_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO "], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +eco4_lookup = [] +for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)): + hn = row["NO."] + pc = row["Post Code"] + street = row["Street / Block Name"] + key = f"{hn}+{street}+{pc}" + if key in not_on_master: + continue + + if key in eco3_remap: + hn, street, pc = eco3_remap[key] + # The postcode is different to the asse + + # We filter the asset list, because it's hard to know how accurate this is + m1 = master_list[ + (master_list["house_number_TEMP"].astype(str) == hn) & + (master_list["postcode"].str.lower() == pc.lower()) + ] + + if m1.shape[0] != 1: + raise ValueError( + f"Error for {key} in the tracker" + ) + + eco4_lookup.append( + { + "row_id": row["row_id"], + "thrive_property_id": m1["thrive_property_id"].values[0], + "submission_house_number": row["NO."], + "submission_address1": row["Street / Block Name"], + "submission_postcode": row["Post Code"], + } + ) + +# We match the lookups back to the submission sheets +eco3_lookup = pd.DataFrame(eco3_lookup) +eco3_submissions = eco3_submissions.merge( + eco3_lookup, + how="left", + on="row_id", +) + +eco4_lookup = pd.DataFrame(eco4_lookup) +eco4_submissions = eco4_submissions.merge( + eco4_lookup, + how="left", + on="row_id", +) + +# Store +eco3_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO3 - with IDS.csv", + index=False +) +eco4_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions " + "ECO4 - with IDS.csv", + index=False +) From 2e041bfe75384f65a437dc55d585d04366dff1ff Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 21 May 2025 11:55:10 +0100 Subject: [PATCH 03/14] set up hubspot status --- asset_list/AssetList.py | 193 +++++++++++++++++++++++++++++++---- asset_list/app.py | 82 +++++---------- asset_list/hubspot/config.py | 28 +++++ 3 files changed, 231 insertions(+), 72 deletions(-) create mode 100644 asset_list/hubspot/config.py diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 199b175c..e68ee6dd 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -4,8 +4,8 @@ import re import tiktoken from pprint import pprint from datetime import datetime +import asset_list.hubspot.config as hubspot_config -from numpy.ma.core import masked_not_equal from openai import OpenAI import numpy as np import pandas as pd @@ -292,6 +292,13 @@ class AssetList: "Any further surveyor notes", 'Surveyors Name' ] + NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [ + "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", + "Does the property have cladding?", "Gable Wall Obstructions", + "Does the property have foliage that needs removal?", + "Potential unsafe environment", "Date of Inspection" + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -400,6 +407,10 @@ class AssetList: self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + self.new_format_non_insturives_present = ( + "Has the property been re-walled?" in self.standardised_asset_list.columns + ) + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -687,6 +698,9 @@ class AssetList: if self.non_intrusives_eligibility: non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN) + if self.new_format_non_insturives_present: + non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -931,6 +945,23 @@ class AssetList: self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str) ) + # CLean up the standard SAP column, that can be problematic + if self.landlord_sap is not None: + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP] + .astype(str) + .str.replace('\xa0', ' ', regex=False) + .str.strip() + ) + self.standardised_asset_list[self.STANDARD_SAP] = np.where( + self.standardised_asset_list[self.STANDARD_SAP] == "", + None, + self.standardised_asset_list[self.STANDARD_SAP] + ) + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP].astype(float) + ) + def merge_data(self, df: pd.DataFrame): """ Used to insert data into the standardised asset list, based on the domna property id @@ -1864,7 +1895,7 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["submission_date"])) + (~pd.isnull(self.standardised_asset_list["submission_status"])) ), None, self.standardised_asset_list[col] @@ -1874,7 +1905,7 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_reference"])) + (~pd.isnull(self.standardised_asset_list["ecosurv_status"])) ), None, self.standardised_asset_list[col] @@ -1911,6 +1942,42 @@ class AssetList: self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work) ] + def label_property_status(self): + """ + This function is designed to be run after identify_worktypes() has been run, and will create a "property_status" + column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we + recognise within hubspot + :return: + """ + + # For anything that is ready to go, that gets set to ready to be scheduled + self.standardised_asset_list["hubspot_status"] = np.where( + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | + ~pd.isnull(self.standardised_asset_list["solar_reason"]), + hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label, + None + ) + + # we step through the process of flagging completed surveys + + # We utilise submissions, ecosurv and outcomes to define the hubspot status + # We'll take the maximum of these three columns, based on the enum integer value + label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus} + + def get_max_status_from_columns(row): + status_candidates = [] + for col in ["submission_status", "ecosurv_install_status", "outcome_status"]: + label = row.get(col) + if label in label_to_enum: + status_candidates.append(label_to_enum[label]) + if not status_candidates: + return row["hubspot_status"] # fallback to existing status if no updates + return max(status_candidates).label + + self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply( + get_max_status_from_columns, axis=1 + ) + def flat_analysis(self): # We need to deduce the building name - we strip out the house number @@ -2331,6 +2398,52 @@ class AssetList: # It doesn't matter too much which record we take matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + # We merge on the status of the property + matched = matched.merge( + self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename( + columns={ + "Reference": "ecosurv_reference", + "status": "ecosurv_status", + "Lead Status": "ecosurv_lead_status", + "Tags": "ecosurv_tags" + } + ), how="left", on="ecosurv_reference" + ) + + matched["ecosurv_install_status"] = None + + # This mapping is ordered by process order, where lodgment is the final step so if we have an indication + # that the property is ready for lodgement, we set the status to that. We then proceed through the other + # statuses where the penultimate status is install complete + mapping = { + "Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED, + "TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE, + "Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + } + + def get_max_status(tag_str): + if pd.isna(tag_str): + return None + matched_statuses = [] + for tag, status in mapping.items(): + if tag in tag_str: + matched_statuses.append(status) + if not matched_statuses: + return None + return max(matched_statuses).label + + matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status) + self.standardised_asset_list = self.standardised_asset_list.merge( matched, how="left", @@ -2380,7 +2493,7 @@ class AssetList: # Perform the remap outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary) - outcomes["Outcome"] = outcomes["Outcome"].str.lower() + outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip() logger.info("Matching outcomes to asset list") # Merge the outcomes onto the asset list - we check we're able to match sufficiently well @@ -2542,12 +2655,13 @@ class AssetList: apply(get_latest_note). reset_index(drop=True) ) - latest_note = latest_note[["domna_property_id", notes_col]] + latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename( + columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"} + ) pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index() - pivot_df = pivot_df.merge( - visit_counts, how="left", on="domna_property_id" - ) + pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id") + pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id") # We want the latest note @@ -2558,15 +2672,32 @@ class AssetList: self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id") + # We flag the outcome status, based on the outcome + pivot_df["outcome_status"] = None + + if "surveyed" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + pivot_df["outcome_status"] + ) + + if "installer refusal" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, + pivot_df["outcome_status"] + ) + + pivot_df["outcome_status"] = np.where( + pivot_df["latest_outcome"].isin(["see notes"]) & + (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label), + hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label, + pivot_df["outcome_status"] + ) + # We merge out pivoted outcomes onto the asset list self.standardised_asset_list = self.standardised_asset_list.merge( pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" ) - # Merge the latest note - self.standardised_asset_list = self.standardised_asset_list.merge( - latest_note.rename(columns={notes_col: "Latest Route March Note"}), - how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" - ) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): raise ValueError("Duplicates appreared - something went wrong") @@ -2640,6 +2771,7 @@ class AssetList: master_data.columns else "PROPERTY TYPE As per table emailed" ) measure_mix_col = "MEASURE COMBO" + installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" logger.info("Matching master data to asset list") matched = [] @@ -2774,19 +2906,30 @@ class AssetList: self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") # We match the "UPRN" which is the landlords ID, onto the master sheet + + if measure_mix_col not in master_data.columns: + master_data[measure_mix_col] = "Measure mix not recorded" + matched = pd.DataFrame(matched) - master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge( + master_to_append = master_data[ + [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col] + ].merge( matched, how="left", on="row_id" ).rename( columns={ scheme_col: "funding_scheme", measure_mix_col: "measure_mix", install_col: "survey_status", - submission_col: "submission_date" + submission_col: "submission_date", + installer_notes_col: "submission_installer_notes" } ) - master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") - master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed") + master_to_append["submission_cancelled"] = ( + master_to_append["survey_status"].str.lower().str.contains("cancel") + ) + master_to_append["submission_installed"] = ( + master_to_append["survey_status"].str.lower().str.contains("installed") + ) master_surveyed.append(master_to_append) unmatched_df = master_data[ master_data["row_id"].isin(unmatched) @@ -2822,7 +2965,21 @@ class AssetList: ].astype(str) # We de-dupe crudely on landlord property id - self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy() + + # We now add the submission status, based on the hubspot stages + self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_cancelled"] == True, + hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label, + self.master_surveyed["submission_status"] + ) + + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_installed"] == True, + hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label, + self.master_surveyed["submission_status"] + ) self.standardised_asset_list = self.standardised_asset_list.merge( self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID diff --git a/asset_list/app.py b/asset_list/app.py index 3441e5de..31c404e5 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -99,66 +99,36 @@ def app(): phase = False ecosurv_landlords = "thrive" - # Medway - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway" - data_filename = "MEDWAY Asset List.xlsx" - sheet_name = "Asset list" + # Torus + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2" + data_filename = "Torus Property Asset List - INSPECTIONS.xlsx" + sheet_name = "TORUS" postcode_column = 'Postcode' fulladdress_column = None - address1_column = "House Number" + address1_column = "AddressLine1" address1_method = None - address_cols_to_concat = ["House Number", "Street 1"] + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] missing_postcodes_method = None - landlord_year_built = "Year Built" - landlord_os_uprn = None - landlord_property_type = "Property Type - Academy" - landlord_built_form = "Property Type - Academy" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Row ID" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - - # MHS - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS" - data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - fulladdress_column = "FullAddress" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "BuiltInYear" - landlord_os_uprn = None - landlord_property_type = "AssetType" - landlord_built_form = "PropertyType" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None + landlord_year_built = "Property Age" + landlord_os_uprn = "NatUPRN" + landlord_property_type = "Property Type" + landlord_built_form = "Built Form" + landlord_wall_construction = "Wall Construction" + landlord_roof_construction = "Roof Construction" + landlord_heating_system = "Space Heating Source" + landlord_existing_pv = "Low Carbon Technology (Solar PV)" landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] + landlord_sap = "SAP Score" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None master_filepaths = [] master_to_asset_list_filepath = None - phase = False + master_id_colnames = [] + phase = True ecosurv_landlords = None # Southern Midlands @@ -300,7 +270,8 @@ def app(): asset_list.flag_survey_master( master_filepaths=master_filepaths, - master_to_asset_list_filepath=master_to_asset_list_filepath + master_to_asset_list_filepath=master_to_asset_list_filepath, + master_id_colnames=master_id_colnames, ) asset_list.flag_ecosurv(ecosurv_landlords) @@ -505,6 +476,9 @@ def app(): pprint(asset_list.work_type_figures) + # We now flag the status of the property + asset_list.label_property_status() + asset_list.flat_analysis() asset_list.load_contact_details( diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py new file mode 100644 index 00000000..180bf0e0 --- /dev/null +++ b/asset_list/hubspot/config.py @@ -0,0 +1,28 @@ +from enum import IntEnum + + +class HubspotProcessStatus(IntEnum): + def __new__(cls, value, label): + obj = int.__new__(cls, value) + obj._value_ = value + obj.label = label + return obj + + # the numerical values of this enum aren't important, but they define the order of operations + + # This is the first stage, where a survey is ready to go + READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED" + # The property didn't get access and needs sign off + SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF" + # The survey has been completed. We don't have any update as to whether the property has been installed + SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF" + # The property turned out to be ineligibile + NOT_VIABLE = 4, "NOT VIABLE" + # The property is with the installer. This will likely be the default for historic programmes + SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER" + # The property has been installed + INSTALL_COMPLETE = 6, "INSTALL COMPLETE" + # The install has complete and lodgement is complete + LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE" + # The property has been cancelled + INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED" From 1e0fbb111dc401e7cb5697a6285d0d89e9483d91 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 May 2025 10:23:38 +0100 Subject: [PATCH 04/14] don't fetch from find my epc website when the property doesnt have an epc --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 82 ++++++++-------- asset_list/app.py | 8 +- asset_list/hubspot/prepare_for_hubspot.py | 18 ++++ backend/engine/engine.py | 2 +- etl/customers/thrive/Project codes.py | 108 ++++++++++++++++++++++ 7 files changed, 180 insertions(+), 42 deletions(-) create mode 100644 asset_list/hubspot/prepare_for_hubspot.py create mode 100644 etl/customers/thrive/Project codes.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index e68ee6dd..fea0f59e 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -279,6 +279,7 @@ class AssetList: STANDARD_HEATING_SYSTEM = "landlord_heating_system" STANDARD_EXISTING_PV = "landlord_existing_pv" STANDARD_SAP = "landlord_sap_rating" + STANDARD_BLOCK_REFERENCE = "landlord_block_reference" DOMNA_PROPERTY_ID = "domna_property_id" @@ -369,6 +370,7 @@ class AssetList: landlord_heating_system=None, landlord_existing_pv=None, landlord_sap=None, + landlord_block_reference=None, phase=False, header=0 ): @@ -382,7 +384,7 @@ class AssetList: self.standardised_asset_list = self.raw_asset_list.copy() # Will be used to store aggregated figures against the various work types self.work_type_figures = {} - self.flat_data = None + self.block_analysis_df = None self.duplicated_addresses = None self.contact_details = None self.contact_detail_fields = None @@ -425,6 +427,7 @@ class AssetList: self.landlord_heating_system = landlord_heating_system self.landlord_existing_pv = landlord_existing_pv self.landlord_sap = landlord_sap + self.landlord_block_reference = landlord_block_reference # parameters for cleaning self.full_address_cols_to_concat = full_address_cols_to_concat @@ -671,6 +674,7 @@ class AssetList: self.landlord_heating_system, self.landlord_existing_pv, self.landlord_sap, + self.landlord_block_reference, ] # Keep just non-null variables (e.g landlord may not provide uprn self.keep_variables = [v for v in variables if v is not None] @@ -688,6 +692,7 @@ class AssetList: self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, self.landlord_existing_pv: self.STANDARD_EXISTING_PV, self.landlord_sap: self.STANDARD_SAP, + self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} @@ -1905,7 +1910,7 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_status"])) + (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"])) ), None, self.standardised_asset_list[col] @@ -1978,42 +1983,42 @@ class AssetList: get_max_status_from_columns, axis=1 ) - def flat_analysis(self): + def block_analysis(self): - # We need to deduce the building name - we strip out the house number + if self.landlord_block_reference is None: + # This information is not available + return - # We want to deduce if flats have 50% of the properties below C75 - # We group by postcode and property type - grouped = self.standardised_asset_list.groupby( - [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE] - ) + # Reverse mapping: label -> enum + LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} - flat_data = [] - for _, group in grouped: - if "flat" in group[self.STANDARD_PROPERTY_TYPE].values: - num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0] - num_below_c75 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum() - # Check if any flats are below C69 - num_flats_below_c69 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(69).sum() + # Threshold status - anythign that is at this stage or beyond is considered surveyed + threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value - flat_data.append( - { - "Postcode": group[self.STANDARD_POSTCODE].iloc[0], - "Property Type": "Flat", - "Number of Flats with EPC": num_flats, - "Number of Flats below C75": num_below_c75, - "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats), - "Number of Flats Below C69": num_flats_below_c69, - } - ) + block_analysis = [] + for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 - flat_data = pd.DataFrame(flat_data) + if all(cavity_breakdown.index == "No Eligibility"): + continue - self.flat_data = flat_data + works = group["hubspot_status"] + above_threshold = works.map(LABEL_TO_ENUM.get).dropna() + count_above = (above_threshold >= threshold).sum() + proportion = count_above / len(works) + + block_analysis.append( + { + "Block Reference": block_reference, + "Proportion of properties suryeyed": proportion, + **cavity_breakdown.to_dict(), + } + ) + + block_analysis = pd.DataFrame(block_analysis) + block_analysis = block_analysis.fillna(0) + + self.block_analysis_df = block_analysis @staticmethod def split_full_name(x): @@ -2403,14 +2408,15 @@ class AssetList: self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename( columns={ "Reference": "ecosurv_reference", - "status": "ecosurv_status", + "Status": "ecosurv_status", "Lead Status": "ecosurv_lead_status", - "Tags": "ecosurv_tags" + "Tags": "ecosurv_tags", + "Installer": "ecosurv_installer" } ), how="left", on="ecosurv_reference" ) - matched["ecosurv_install_status"] = None + matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER # This mapping is ordered by process order, where lodgment is the final step so if we have an indication # that the property is ready for lodgement, we set the status to that. We then proceed through the other @@ -2772,6 +2778,7 @@ class AssetList: ) measure_mix_col = "MEASURE COMBO" installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" + installer_col = "INSTALLER" logger.info("Matching master data to asset list") matched = [] @@ -2912,7 +2919,7 @@ class AssetList: matched = pd.DataFrame(matched) master_to_append = master_data[ - [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col] + [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col] ].merge( matched, how="left", on="row_id" ).rename( @@ -2921,7 +2928,8 @@ class AssetList: measure_mix_col: "measure_mix", install_col: "survey_status", submission_col: "submission_date", - installer_notes_col: "submission_installer_notes" + installer_notes_col: "submission_installer_notes", + installer_col: "submission_installer" } ) master_to_append["submission_cancelled"] = ( diff --git a/asset_list/app.py b/asset_list/app.py index 31c404e5..41623880 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -82,6 +82,7 @@ def app(): landlord_existing_pv = None landlord_property_id = "thrive_property_id" landlord_sap = "sap_rating_updated" + landlord_block_reference = "block_reference" outcomes_filename = [ os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") ] @@ -119,6 +120,7 @@ def app(): landlord_existing_pv = "Low Carbon Technology (Solar PV)" landlord_property_id = "UPRN" landlord_sap = "SAP Score" + landlord_block_reference = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None @@ -209,6 +211,7 @@ def app(): landlord_heating_system=landlord_heating_system, landlord_existing_pv=landlord_existing_pv, landlord_sap=landlord_sap, + landlord_block_reference=landlord_block_reference, phase=phase ) asset_list.init_standardise() @@ -479,7 +482,7 @@ def app(): # We now flag the status of the property asset_list.label_property_status() - asset_list.flat_analysis() + asset_list.block_analysis() asset_list.load_contact_details( local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), @@ -526,7 +529,8 @@ def app(): with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) - asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False) + if asset_list.block_analysis_df is not None: + asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) # If we have outcomes, we add a tab with the outcomes if not asset_list.outcomes_for_output.empty: asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py new file mode 100644 index 00000000..302d2673 --- /dev/null +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -0,0 +1,18 @@ +import pandas as pd + + +def app(): + """ + TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after + review. So, we will need to update the hubspot status for these entries and set them to None, if they + were previously being set to ready for scheduling. We don't want to just filter on rows where + cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove + them + + :return: + """ + + filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive " + "Programme - reconciled.xlsx") + + standardised_asset_list = pd.read_excel(filepath, sheet_name="Standardised Asset List") diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 58c3dc8e..5316fd03 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest): ) # if we have a remote assment data type, we pull the additional data and include it - if body.event_type == "remote_assessment": + if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]): logger.info("Retrieving find my epc data") try: property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py new file mode 100644 index 00000000..6235ebed --- /dev/null +++ b/etl/customers/thrive/Project codes.py @@ -0,0 +1,108 @@ +""" +THis script will take the standardised asset list and append on the project codes. +We also, review the existing install status, in case anything is wrong +""" +import pandas as pd +import numpy as np + +standardised_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Standardised Asset List", +) + +project_code_allocations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - " + "Warmfront).xlsx", + sheet_name="Master Tracker", + header=1 +) + +programme_codes = project_code_allocations[ + ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ] +].copy() +programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy() +programme_codes["programme_reference"] = np.where( + pd.isnull(programme_codes["programme_reference"]), + programme_codes["Proposed Progamme"], + programme_codes["programme_reference"] +) + +PROJECT_CODE_MAP = { + 'Phase 2': "THRIVE-002", + 'Phase 3': "THRIVE-003", + 'Phase 4': "THRIVE-004", + 'Phase 5': "THRIVE-005", + 'Phase 6': "THRIVE-006", + 'Phase 7': "THRIVE-007", + 'Phase 8': "THRIVE-008", + 'Phase 9': "THRIVE-009", + 'Phase 10': "THRIVE-010", + "Week1": "THRIVE-WEEK-001", + "Week2": "THRIVE-WEEK-002", + "Week4": "THRIVE-WEEK-004", + "Week7": "THRIVE-WEEK-007", +} +programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP) + +thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy() + +standardised_asset_list = standardised_asset_list.merge( + programme_codes[["UPRN", "project_code", "programme_reference"]], + how="left", + left_on="landlord_property_id", + right_on="UPRN", +).merge( + thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]], + how="left", + on="UPRN", +) + +standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"]) + +# We fill the project code for historical completions +standardised_asset_list["project_code"] = np.where( + pd.isnull(standardised_asset_list["project_code"]) & ( + standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED" + ) & ( + ~pd.isnull(standardised_asset_list["hubspot_status"]) + ), + "THRIVE-HISTORICAL", + standardised_asset_list["project_code"] +) + +# Store as an excel +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - " + "reconciled.xlsx") +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data +# Other tabs: +block_analysis = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Block Analysis", +) +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Outcomes", +) +unmatched_submissions = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Submissions", +) +unmatched_ecosurv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Ecosurv", +) + +with pd.ExcelWriter(filename) as writer: + standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) + block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False) + # If we have outcomes, we add a tab with the outcomes + outcomes.to_excel(writer, sheet_name="Outcomes", index=False) + + unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) + + unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) From d2a74d5612439e3732553eb133129c2aaad9f5bc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 May 2025 10:48:08 +0100 Subject: [PATCH 05/14] adding in a placeholder method which will assume that properties without an EPC, are going to be older properties --- backend/SearchEpc.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 0010191a..1ee1f950 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -702,6 +702,18 @@ class SearchEpc: exclude_old=exclude_old ) + # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build + # so we avoid comparing it to new builds + # TODO - this is experimental + newer_age_bands = [ + "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011", + "England and Wales: 2012 onwards" + ] + + if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum(): + # We have some older age bands, so we need to filter them out + epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy() + # If we have missing lodgment date, we fill it with inspection-date epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"]) # If we still have missing dates, we set it to the mean of the non NA dates From 94dcd9c00acdfd2b7db34c6ab9e79a6061233e09 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 3 Jun 2025 18:41:01 +0100 Subject: [PATCH 06/14] working on hubspot upload --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 274 +++++++++++++++------- asset_list/hubspot/config.py | 43 ++++ asset_list/hubspot/prepare_for_hubspot.py | 39 ++- backend/Funding.py | 120 ++++++++++ etl/customers/cambridge/surveys.py | 24 ++ etl/customers/places_for_people/abs.py | 199 ++++++++++++++++ etl/customers/thrive/Project codes.py | 34 ++- 9 files changed, 635 insertions(+), 102 deletions(-) create mode 100644 etl/customers/cambridge/surveys.py create mode 100644 etl/customers/places_for_people/abs.py diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index fea0f59e..3f5ef7ff 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -350,6 +350,34 @@ class AssetList: "cavity wall, as built, partial insulation", ] + # Work type prefixes: + # Empties + EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity" + EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled" + EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other" + EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build" + EPC_EMPTY_INSPECTIONS_NON_CAVITY = "EPC Shows Empty Cavity, inspections show non-cavity build" + EPC_EMPTY = "EPC Shows Empty Cavity" + LANDLORD_EMPTY_INSPECTIONS_OTHER = ("Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or " + "Non-cavity") + # Extraction + EXTRACTION_NON_INTRUSIVE = "Non-Intrusive Data Shows Cavity Extraction" + + # Solar + SOLAR_ELIGIBLE = "Solar Eligible" + SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" + SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade" + + CRM_PRODUCTS = { + "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity & Loft - ECO4"}, + "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"}, + "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"}, + "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"}, + "Solar PV + Heating Upgrade - ECO4": { + "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4" + }, + } + def __init__( self, local_filepath, @@ -1719,10 +1747,10 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = None empty_cavity_map = { - "non_intrusive_indicates_empty_cavity": "Non-Intrusive Data Shows Empty Cavity: ", - "non_intrusive_indicates_empty_cavity_has_solar": "Non-Intrusive Data Shows Empty Cavity - property " + "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE_PREFIX + ": ", + "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " "already has solar: ", - "non_intrusive_indicates_empty_cavity_no_year_filter": f"Non-Intrusive Data Shows Empty Cavity, " + "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", } @@ -1747,7 +1775,7 @@ class AssetList: )) & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[ + f"{EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1759,7 +1787,7 @@ class AssetList: self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show filled or other: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1771,7 +1799,7 @@ class AssetList: (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1783,8 +1811,7 @@ class AssetList: (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list[ - "SAP Category"], + f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) else: @@ -1794,7 +1821,7 @@ class AssetList: ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity: " + self.standardised_asset_list["SAP Category"], + f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1804,10 +1831,12 @@ class AssetList: ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Shows Empty Cavity, inspections show non-cavity build: " + self.standardised_asset_list[ - "SAP Category"], + f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) + + # Work type prefixes + # Landlord data: The landlord's data indicates that the wall is an uninsulated cavity wall, but EPC and # inspections show filled self.standardised_asset_list["cavity_reason"] = np.where( @@ -1817,7 +1846,7 @@ class AssetList: ~self.standardised_asset_list["epc_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or Non-cavity: " + + f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1828,7 +1857,7 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Non-Intrusive Data Shows Cavity Extraction: " + self.standardised_asset_list["SAP Category"], + f"{self.EXTRACTION_NON_INTRUSIVE}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1837,7 +1866,7 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"Non-Intrusive Data Shows Cavity Extraction, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + + f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1850,11 +1879,9 @@ class AssetList: # Map of variables and fill values for the solar_reason variable # ordering of this map is important, where we flag our prioritised work types first solar_reason_map = { - "solar_eligible": "Solar Eligible: ", - "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ", - "solar_eligible_needs_heating_upgrade": ( - "Solar Eligible, Needs Heating Upgrade: " - ) + "solar_eligible": f"{self.SOLAR_ELIGIBLE}: ", + "solar_eligible_solid_wall_uninsulated": f"{self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED}: ", + "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: " } for variable, reason in solar_reason_map.items(): @@ -2079,68 +2106,97 @@ class AssetList: *contact_details[fullname_column].apply(self.split_full_name) ) else: - raise NotImplementedError("Implement me") + contact_details["title"] = None self.contact_details = contact_details - def prepare_for_crm(self, company_domain, crm_pipeline_name, first_dealstage, assigned_surveyors): + @classmethod + def load_standardised_asset_list(cls, filepath): + """ + This function is designed to load the standardised asset list from a file + :return: + """ + # This is a placeholder for now + # instantiate the class + instance = cls( + local_filepath=filepath, + sheet_name="Standardised Asset List", + address1_colname=cls.STANDARD_ADDRESS_1, + postcode_colname=cls.STANDARD_POSTCODE, + full_address_colname=cls.STANDARD_FULL_ADDRESS, + landlord_property_id=cls.STANDARD_LANDLORD_PROPERTY_ID, + full_address_cols_to_concat=[], + missing_postcodes_method=None, + address1_extraction_method=None, + landlord_year_built=None, + landlord_uprn=None, + landlord_property_type=None, + landlord_built_form=None, + landlord_wall_construction=None, + landlord_roof_construction=None, + landlord_heating_system=None, + landlord_existing_pv=None, + landlord_sap=None, + landlord_block_reference=None, + phase=False, + header=0 + ) + return instance + + def prepare_for_crm(self, company_domain, installer_name): """ This function prepares the data for upload into Hubspot :return: """ - # This is a placeholder for now - # This maps the opportunities as we reference them, to the product data as stored in Hubspot - product_lookup_table = { - "Non-Intrusive Data Showed Cavity Extraction": { - "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 - }, - "Non-Intrusive Data Showed Empty Cavity": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed": { - "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 - }, - "EPC Data Showed Empty Cavity": { - "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 - }, - "Solid Floor, Insulated, No Solar": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Solid Floor, Insulated, Needs Loft": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Other Floor, Insulated, No Solar": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - }, - "Other Floor, Insulated, Needs Loft": { - "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 - } + + prefixes_to_products = { + # Empty + self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + # Extraction + self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"], + # Solar + self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"], } + # We check if all products are covered in the lookup table - cavity_products = self.standardised_asset_list["cavity_reason"].unique() - solar_products = self.standardised_asset_list["solar_reason"].unique() - # Check if there any options not in out lookup table - if ( - any(x for x in cavity_products if x not in product_lookup_table) or - any(x for x in solar_products if x not in product_lookup_table) - ): - raise ValueError("We have products not referenced in the lookup table - check this") + cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist() + solar_products = self.standardised_asset_list["solar_reason"].unique().tolist() + + product_map = {} + for identified_product in cavity_products + solar_products: + if pd.isnull(identified_product): + continue + + matched_product = None + for product_prefix, crm_product in prefixes_to_products.items(): + if identified_product.startswith(product_prefix): + matched_product = crm_product + + product_map[identified_product] = matched_product + + # For each cavity and solar product, we iterate through the prexies and map to the products + + # # Check if there any options not in out lookup table + # if ( + # any(x for x in cavity_products if x not in product_lookup_table) or + # any(x for x in solar_products if x not in product_lookup_table) + # ): + # raise ValueError("We have products not referenced in the lookup table - check this") programme_data = self.standardised_asset_list.copy() - - # Exclusions - these are properties we won't treat for the moment - product_exclusions = [ - "Other Floor, Insulated, No Solar", - "Other Floor, Insulated, Needs Loft" - ] - if product_exclusions: - logger.warning("Excluding products: %s", product_exclusions) - - programme_data = programme_data[programme_data["solar_reason"].isin(product_exclusions) == False] + # We take rows that have a survyor and a date for the survey + programme_data = programme_data[ + ~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"]) + ] # Merge on the contact details programme_data = programme_data.merge( @@ -2153,26 +2209,34 @@ class AssetList: programme_data["Company Domain Name "] = company_domain # Append the product data onto the programme data programme_data["cavity_product"] = programme_data["cavity_reason"].map( - lambda x: product_lookup_table.get(x, {"name": None})["name"] + lambda x: product_map.get(x, {"name": None})["name"] ) programme_data["solar_product"] = programme_data["solar_reason"].map( - lambda x: product_lookup_table.get(x, {"name": None})["name"] + lambda x: product_map.get(x, {"name": None})["name"] ) - programme_data["domna_product"] = programme_data["solar_reason"].copy() + # We check if we have any missings + cavity_missing = pd.isnull(programme_data[~pd.isnull(programme_data["cavity_reason"])]["cavity_product"]).sum() + solar_missing = pd.isnull(programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"]).sum() + + if cavity_missing > 0 or solar_missing > 0: + raise ValueError( + f"We have {cavity_missing} cavity products and {solar_missing} solar products that are not " + "mapped to a product in the lookup table. Please check the mapping." + ) + + programme_data["domna_product"] = programme_data["solar_product"].copy() programme_data["domna_product"] = np.where( pd.isnull(programme_data["domna_product"]), - programme_data["solar_product"], + programme_data["cavity_product"], programme_data["domna_product"] ) # We filter just on rows where we have a product - programme_data = programme_data[ - ~pd.isnull(programme_data["domna_product"]) - ] + programme_data = programme_data[~pd.isnull(programme_data["domna_product"])] programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( - pd.DataFrame(product_lookup_table).T[["name", "id", "unit_price"]] + pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] .reset_index() .rename( columns={ @@ -2194,21 +2258,27 @@ class AssetList: ) # Add in deal and pipeline information - programme_data["dealname"] = programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data[ - "domna_product"] - programme_data['Pipeline '] = crm_pipeline_name - programme_data['Deal Stage '] = first_dealstage + programme_data["dealname"] = ( + programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"] + ) + programme_data['Pipeline '] = hubspot_config.CRM_PIPELINE_NAME + programme_data['Deal Stage '] = hubspot_config.CRM_PIPELINE_FIRST_STAGE_NAME programme_data['Associations: Listing'] = "Property Owner" - programme_data = programme_data.merge( - assigned_surveyors.rename( - columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID} - ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID + # programme_data = programme_data.merge( + # assigned_surveyors.rename( + # columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID} + # ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID + # ) + + # Add in some columns if we have them + date_of_inspections = ( + "Non-Intrusives: Date of Inspection" if + "Non-Intrusives: Date of Inspection" in programme_data.columns else None ) # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged schema_mappings = { - 'Name ': self.DOMNA_PROPERTY_ID, # TODO: Maybe change this? 'Company Domain Name ': 'Company Domain Name ', 'Email ': ( self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None @@ -2227,9 +2297,10 @@ class AssetList: 'Address 2 ': None, # TODO: Don't have this for the moment 'Postcode ': self.STANDARD_POSTCODE, 'Property Type ': self.STANDARD_PROPERTY_TYPE, - 'Property Sub Type ': None, # TODO: Don't have this for the moment + 'Property Sub Type ': self.STANDARD_BUILT_FORM, 'Bedroom(s) ': None, # TODO: Don't have this for the moment 'Domna Property ID ': self.DOMNA_PROPERTY_ID, + # We populate this with the column that we have 'National UPRN ': ( self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"] ), @@ -2239,8 +2310,7 @@ class AssetList: 'Year Built ': self.STANDARD_YEAR_BUILT, 'Boiler Make ': None, # TODO: Don't have this for the moment 'Boiler Model ': None, # TODO: Don't have this for the moment - 'Non-Intrusives: Date Checked ': None, - # TODO: Don't have this for the moment + 'Non-Intrusives: Date Checked ': date_of_inspections, 'Non-Intrusives: Wall Type ': ( "non-intrusives: Construction" if self.non_intrusives_present else None ), @@ -2283,16 +2353,22 @@ class AssetList: 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], 'Deal Stage ': 'Deal Stage ', 'Pipeline ': 'Pipeline ', - 'Expected Commencement Date ': None, # TODO: Need to set this, + 'Expected Commencement Date ': "survey_week", 'Deal Name ': "dealname", # Need to create this, 'Product ID ': 'Product ID ', 'Name ': 'Name ', 'Unit price ': 'Unit price ', 'Quantity ': 'Quantity ', 'Deal Owner': 'surveyor_email', - 'Amount ': 'Unit price ', + 'Project Code ': 'project_code', + 'Associations: Listing': 'Associations: Listing', } + # We sometimes columns if the landlord never provided them + missed_mapping_cols = [c for c in schema_mappings.values() if c not in programme_data.columns if c is not None] + for c in missed_mapping_cols: + programme_data[c] = None + # We now create the finalised dataset to be uploaded into Hubspot variables_required = list(schema_mappings.values()) variables_required = [v for v in variables_required if v is not None] @@ -2307,6 +2383,22 @@ class AssetList: columns={v: k for k, v in schema_mappings.items() if v is not None} ) + programme_data['Installer '] = installer_name + programme_data['Name '] = ( + programme_data['Address 1 '] + " ," + programme_data['Postcode '] + ) + # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing + programme_data['Listing Owner Email '] = programme_data['Deal Owner'] + programme_data['Amount '] = 0 + + # We make sure we have all of the columns that we need + missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] + if missed_columns: + raise ValueError( + f"We have the following columns that are not in the programme data: {missed_columns}. " + "Please check the mapping and ensure all required columns are present." + ) + self.hubspot_data = programme_data def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None): diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py index 180bf0e0..6e16279a 100644 --- a/asset_list/hubspot/config.py +++ b/asset_list/hubspot/config.py @@ -1,5 +1,8 @@ from enum import IntEnum +CRM_PIPELINE_NAME = 'Operations - Housing Associations' +CRM_PIPELINE_FIRST_STAGE_NAME = 'READY TO BE SCHEDULED' + class HubspotProcessStatus(IntEnum): def __new__(cls, value, label): @@ -26,3 +29,43 @@ class HubspotProcessStatus(IntEnum): LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE" # The property has been cancelled INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED" + + +CRM_UPLOAD_COLUMNS = [ + 'Name ', 'Associations: Listing', 'Company Domain Name ', + 'Email ', 'First Name ', 'Last Name ', + 'Phone ', 'Listing Owner Email ', + 'Full Address ', 'Address 1 ', + 'Address 2 ', 'Postcode ', + 'Property Type ', 'Property Sub Type ', + 'Bedroom(s) ', 'Domna Property ID ', + 'National UPRN ', 'Owner Property ID ', + 'Wall Construction ', 'Heating System ', + 'Year Built ', 'Boiler Make ', + 'Boiler Model ', + 'Non-Intrusives: Date Checked ', + 'Non-Intrusives: Wall Type ', + 'Non-intrusives: Insulation ', + 'Non-intrusives: Insulation Material ', + 'Non-Intrusives: CIGA Check Required ', + 'Non-Intrusives: PV Access Issues ', + 'Non-Intrusives: Roof Orientation ', + 'Non-Intrusives: Surveyor Notes ', + 'Non-Intrusives: Surveyor Name ', + 'CIGA: Date Requested ', + 'CIGA: Cavity Guarantee Found ', + 'Last EPC: Is Estimated ', + 'Last EPC: EPC Rating ', + 'Last EPC: SAP Rating ', + 'Last EPC: Main Heating Description ', + 'Last EPC: Heating Controls ', + 'Last EPC: Lodgement Date ', + 'Last EPC: Floor Area ', 'Last EPC: Wall ', + 'Last EPC: Roof ', 'Last EPC: Floor ', + 'Last EPC: Room Height ', + 'Last EPC: Age Band ', 'Deal Stage ', + 'Pipeline ', 'Expected Commencement Date ', + 'Deal Name ', 'Project Code ', + 'Product ID ', 'Name ', 'Unit price ', + 'Quantity ', 'Deal Owner', 'Amount ', 'Installer ' +] diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index 302d2673..8ed654f3 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -1,4 +1,5 @@ import pandas as pd +from asset_list.AssetList import AssetList def app(): @@ -9,10 +10,42 @@ def app(): cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove them + + TODO: If we wish to upload deals in batches + :return: """ - filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive " - "Programme - reconciled.xlsx") + # inputs: + customer_domain = "https://thrivehomes.org.uk" + asset_list_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Hubspot Upload - " + "Sample.xlsx" + ) + contact_details_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Sample contact " + "details.xlsx" + ) + contacts_sheet_name = "Sheet1" + contacts_landlord_property_id = "landlord_property_id" + contacts_phone_number_column = "phone_number" + contacts_email_column = "email" + contacts_fullname_column = "fullname" + contacts_firstname_column = "firstname" + contacts_lastname_column = "lastname" - standardised_asset_list = pd.read_excel(filepath, sheet_name="Standardised Asset List") + asset_list = AssetList.load_standardised_asset_list(asset_list_filepath) + asset_list.load_contact_details( + local_filepath=contact_details_filepath, + sheet_name=contacts_sheet_name, + landlord_property_id=contacts_landlord_property_id, + phone_number_column=contacts_phone_number_column, + email_column=contacts_email_column, + fullname_column=contacts_fullname_column, + firstname_column=contacts_firstname_column, + lastname_column=contacts_lastname_column + ) + + asset_list.prepare_for_crm( + company_domain=customer_domain + ) diff --git a/backend/Funding.py b/backend/Funding.py index f5f85b9f..78440eac 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -411,3 +411,123 @@ class Funding: self.gbis() # self.eco4() self.whlg() + + +class Funding2: + """ + New class to handle funding calculation + """ + + def __init__(self, tenure: HousingType): + self.tenure = tenure + + @staticmethod + def get_sap_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + def eco4_prs_eligibility( + self, starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str + ): + """ + Handles the eligibility criteria for private rental properties under eco + :return: + """ + + # Help to heat group + # 1) EPC E - G + # 2) Must receive one of SWI, FTCH, renewable heating or DHC + # 3) Tenant must be on benefits + + # We don't consider the tenant being on benefits - we just notify the end user that this is a requirement + + meets_epc = starting_sap <= 54 + has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures + # We check if the property has a heating system that means solar pv counts as a renewable heating system + + has_eligible_electric_heating = any(x in mainheat_description for x in [ + "air source heat pump", "ground source heat pump", "boiler and radiators, electric" + ]) | (("electric storage heaters" in mainheat_description) and + (heating_control_description.lower() == "controls for high heat retention storage heaters") + ) + + # Counts as renewable heating + solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures) + # Is a renewable heating + ashp = "air_source_heat_pump" in measures + + if meets_epc & (solar_renweable_heating or ashp or has_solid_wall): + return True + + return False + + def check_funding( + self, measures: List, + starting_sap: int, + ending_sap: int, + mainheat_description: str, + heating_control_description: str + ): + """ + Given a list of measures, this function will check if the package of measures is fundable + :param measures: + :param starting_sap: + :param ending_sap: + :return: + """ + + starting_band = self.get_sap_band(starting_sap) + ending_band = self.get_sap_band(ending_sap) + + # For ECO4 eligibility, the property needs to end at a C if it starts at a D or E, otherwise should end at a + # D + + if starting_band <= 38 & ending_band >= 55: + # F or G should get to D + raise NotImplementedError("Implement F or G to D eligibility") + + ######################## + # Private + ######################## + # 1) ECO4 + # 2) GBIS + + if self.tenure == "Private": + is_eligible = self.eco4_prs_eligibility( + starting_sap=starting_sap, + measures=measures, + mainheat_description=mainheat_description, + heating_control_description=heating_control_description + ) + pass + + ######################## + # Social + ######################## + # 1) ECO4 + # 2) GBIS + + if self.tenure == "Social": + pass + + raise NotImplementedError("Only implemented for Private or Social housing") diff --git a/etl/customers/cambridge/surveys.py b/etl/customers/cambridge/surveys.py new file mode 100644 index 00000000..2aa52d6f --- /dev/null +++ b/etl/customers/cambridge/surveys.py @@ -0,0 +1,24 @@ +import pandas as pd +from backend.ml_models.Valuation import PropertyValuation +from backend.app.utils import sap_to_epc + +# Read in the survey data +surveys = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx", + sheet_name="Survey data", +) + +increases = [] +for _, x in surveys.iterrows(): + current_epc = sap_to_epc(x["Pre SAP"]) + target_epc = sap_to_epc(x["Scenario 1 Post SAP"]) + current_value = x["Valuation"] + + val = PropertyValuation.estimate_valuation_improvement( + current_value, + current_epc, + target_epc, + total_cost=None + ) + avg_increase = val["average_increase"] + increases.append(round(avg_increase)) diff --git a/etl/customers/places_for_people/abs.py b/etl/customers/places_for_people/abs.py new file mode 100644 index 00000000..aa85a93f --- /dev/null +++ b/etl/customers/places_for_people/abs.py @@ -0,0 +1,199 @@ +""" +This script is to calculate the ABS for the Places for People London project +""" + +import os +import pandas as pd + +# London +pfp_london_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"}) +pfp_london_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"}) +pfp_london_cav["location"] = "London" +pfp_london_pv["location"] = "London" +# East +pfp_east_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_east_reviewed_standarised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"}) +pfp_east_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_east_reviewed_standarised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"}) +pfp_east_cav["location"] = "East" +pfp_east_pv["location"] = "East" +# North east +pfp_north_east_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_east_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"}) +pfp_north_east_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_east_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"}) +pfp_north_east_cav["location"] = "North East" +pfp_north_east_pv["location"] = "North East" +# North West +pfp_north_west_cav = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_west_reviewed_standardised_15052025.xlsx", + sheet_name="Cav Route", + header=1 +) +pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"}) +pfp_north_west_pv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs " + "rates/PFP_north_west_reviewed_standardised_15052025.xlsx", + sheet_name="PV Route", + header=1 +) +pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"}) +pfp_north_west_cav["location"] = "North West" +pfp_north_west_pv["location"] = "North West" + +cav_route = pd.concat( + [ + pfp_london_cav, + pfp_east_cav, + pfp_north_east_cav, + pfp_north_west_cav + ] +) +solar_route = pd.concat( + [ + pfp_london_pv, + pfp_east_pv, + pfp_north_east_pv, + pfp_north_west_pv + ] +) + + +def get_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + +def classify_floor_area(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200+" + + +# We classify the abs bounds +solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band) +solar_route["ending_abs_band_scenario1"] = "High_C" +solar_route["ending_abs_band_scenario2"] = "Low_B" +solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90) +solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area) + +# We classify the abs bounds +cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68) +cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band) +cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area) +cav_route["ending_abs_band"] = "Low_C" + +abs_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" +) + +cav_route = cav_route.merge( + abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}), + how="left", + left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"], + right_on=["Starting Band", "Finishing Band", "Floor Area Segment"], +) +solar_route = solar_route.merge( + abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}), + how="left", + left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"], + right_on=["Starting Band", "Finishing Band", "Floor Area Segment"], +) +cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0) +solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0) + +cav_abs_agg = ( + cav_route.groupby("Route March").agg( + { + "ABS Rate": "sum", + "landlord_property_id": "count", + } + ).reset_index() +) +cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int) +cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True) +cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"}) + +solar_abs_agg = ( + solar_route.groupby("Route March").agg( + { + "ABS Rate": "sum", + "landlord_property_id": "count", + } + ).reset_index() +) +solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int) +solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"}) +solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True) + +# We store the data +# Store as an excel +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx" +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + +with pd.ExcelWriter(filename) as writer: + solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False) + cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False) + + cav_route.to_excel(writer, sheet_name="Cavity data", index=False) + solar_route.to_excel(writer, sheet_name="Solar data", index=False) diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py index 6235ebed..01a15497 100644 --- a/etl/customers/thrive/Project codes.py +++ b/etl/customers/thrive/Project codes.py @@ -38,10 +38,10 @@ PROJECT_CODE_MAP = { 'Phase 8': "THRIVE-008", 'Phase 9': "THRIVE-009", 'Phase 10': "THRIVE-010", - "Week1": "THRIVE-WEEK-001", - "Week2": "THRIVE-WEEK-002", - "Week4": "THRIVE-WEEK-004", - "Week7": "THRIVE-WEEK-007", + "Week 1": "THRIVE-WEEK-001", + "Week 2": "THRIVE-WEEK-002", + "Week 4": "THRIVE-WEEK-004", + "Week 7": "THRIVE-WEEK-007", } programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP) @@ -102,7 +102,29 @@ with pd.ExcelWriter(filename) as writer: block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False) # If we have outcomes, we add a tab with the outcomes outcomes.to_excel(writer, sheet_name="Outcomes", index=False) - unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) - unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) + +# A check, just comparing against the master tracker to make sure I have all of the installs +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Standardised Asset List", +) + +master_tracker = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - " + "Warmfront).xlsx", + sheet_name="Master Tracker", + header=1 +) + +df = asset_list[["landlord_property_id", "hubspot_status"]].merge( + master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]], + how="inner", + left_on="landlord_property_id", + right_on="UPRN" +) + +df["hubspot_status"].value_counts() +df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"] From 1a49740bb0fb9778fcd663d1ebd6e0ab01d7b5c6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Jun 2025 17:54:55 +0100 Subject: [PATCH 07/14] finished hubspot upload code for Thrive --- asset_list/AssetList.py | 213 +++++++++++++++------- asset_list/hubspot/config.py | 16 +- asset_list/hubspot/prepare_for_hubspot.py | 26 ++- etl/customers/l_and_g/risk_matrix.py | 1 + 4 files changed, 186 insertions(+), 70 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 3f5ef7ff..ef125110 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -368,14 +368,19 @@ class AssetList: SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade" + CRM_HISTORICAL_CAVITY_PRODUCT = { + "id": 156989182176, "unit_price": 0, "name": "Historical ECO Cavity" + } + CRM_PRODUCTS = { - "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity & Loft - ECO4"}, + "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity - ECO4"}, "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"}, "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"}, "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"}, "Solar PV + Heating Upgrade - ECO4": { "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4" }, + "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT } def __init__( @@ -2128,27 +2133,33 @@ class AssetList: full_address_cols_to_concat=[], missing_postcodes_method=None, address1_extraction_method=None, - landlord_year_built=None, - landlord_uprn=None, - landlord_property_type=None, - landlord_built_form=None, - landlord_wall_construction=None, - landlord_roof_construction=None, - landlord_heating_system=None, - landlord_existing_pv=None, - landlord_sap=None, - landlord_block_reference=None, + landlord_year_built=cls.STANDARD_YEAR_BUILT, + landlord_uprn=cls.STANDARD_UPRN, + landlord_property_type=cls.STANDARD_PROPERTY_TYPE, + landlord_built_form=cls.STANDARD_BUILT_FORM, + landlord_wall_construction=cls.STANDARD_WALL_CONSTRUCTION, + landlord_roof_construction=cls.STANDARD_ROOF_CONSTRUCTION, + landlord_heating_system=cls.STANDARD_HEATING_SYSTEM, + landlord_existing_pv=cls.STANDARD_EXISTING_PV, + landlord_sap=cls.STANDARD_SAP, + landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE, phase=False, header=0 ) return instance - def prepare_for_crm(self, company_domain, installer_name): + def prepare_for_crm(self, company_domain, installer_name, reconcile_programme=False): """ This function prepares the data for upload into Hubspot + :param company_domain: The company domain name to be used in the CRM + :param installer_name: The name of the installer to be used in the CRM + :param reconcile_programme: If True, will include all properties with a project code, regardless of status + :raises ValueError: If the installer name is not valid or if there are missing products :return: """ # This maps the opportunities as we reference them, to the product data as stored in Hubspot + if not hubspot_config.Installer.is_valid_value(installer_name): + raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.") prefixes_to_products = { # Empty @@ -2185,18 +2196,37 @@ class AssetList: # For each cavity and solar product, we iterate through the prexies and map to the products - # # Check if there any options not in out lookup table - # if ( - # any(x for x in cavity_products if x not in product_lookup_table) or - # any(x for x in solar_products if x not in product_lookup_table) - # ): - # raise ValueError("We have products not referenced in the lookup table - check this") - programme_data = self.standardised_asset_list.copy() + # Format the two date columns + programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce") + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], + errors="coerce" + ) + # Convert to dd/mm/yyyy format + programme_data["survey_week"] = programme_data["survey_week"].dt.strftime("%d/%m/%Y") + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = ( + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y") + ) + # We take rows that have a survyor and a date for the survey - programme_data = programme_data[ - ~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"]) - ] + # We include properties under 2 circumstances: + # 1) The hubspot status is ready to be scheduled and there is an assigned surveyor and week for survey + # 2) The hubspot status is something else, meaning this has been included in an existing programme + # 3) reconcile programme is true, and therefore all proeprties with a project code will be included + + if reconcile_programme: + programme_data = programme_data[~pd.isnull(programme_data["project_code"])] + else: + ready_to_be_scheduled = ( + ( + programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label + ) & (~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"])) + ) + completed_works = ( + programme_data["hubspot_status"] != hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label + ) + programme_data = programme_data[ready_to_be_scheduled | completed_works] # Merge on the contact details programme_data = programme_data.merge( @@ -2232,8 +2262,16 @@ class AssetList: programme_data["domna_product"] ) # We filter just on rows where we have a product - programme_data = programme_data[~pd.isnull(programme_data["domna_product"])] - programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) + if reconcile_programme: + # We include historical works, which will include hisorical cavity so we set these as extraction (as + # this is the main work mix) + programme_data["domna_product"] = programme_data["domna_product"].fillna( + self.CRM_HISTORICAL_CAVITY_PRODUCT["name"] + ) + else: + + programme_data = programme_data[~pd.isnull(programme_data["domna_product"])] + programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] @@ -2251,25 +2289,24 @@ class AssetList: product_df['Quantity '] = 1 # Append on the product data - programme_data = programme_data.merge( - product_df, - how="left", - on="domna_product", - ) + programme_data = programme_data.merge(product_df, how="left", on="domna_product") # Add in deal and pipeline information programme_data["dealname"] = ( programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"] ) programme_data['Pipeline '] = hubspot_config.CRM_PIPELINE_NAME - programme_data['Deal Stage '] = hubspot_config.CRM_PIPELINE_FIRST_STAGE_NAME programme_data['Associations: Listing'] = "Property Owner" - # programme_data = programme_data.merge( - # assigned_surveyors.rename( - # columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID} - # ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID - # ) + # We determine which column we should use for the UPRN + if self.STANDARD_UPRN not in programme_data.columns: + uprn_column = self.EPC_API_DATA_NAMES["uprn"] + else: + # Use the value that has the most coverage + uprn_column = "hubspot_uprn" + programme_data[uprn_column] = programme_data[self.STANDARD_UPRN].fillna( + programme_data[self.EPC_API_DATA_NAMES["uprn"]] + ) # Add in some columns if we have them date_of_inspections = ( @@ -2277,6 +2314,67 @@ class AssetList: "Non-Intrusives: Date of Inspection" in programme_data.columns else None ) + # Ammend the property type and built form columns + programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy() + programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy() + + def _replace_property_description_data(programme_data, column_name): + """ + Helper function to replace property type or built form data with a specified value. + """ + + if column_name == "hubspot_property_type": + valid_values = ["house", "bungalow", "flat", "maisonette"] + epc_fill_col = "property-type" + elif column_name == "hubspot_built_form": + valid_values = ["detached", "semi-detached", "mid-terrace", "end-terrace"] + epc_fill_col = "built-form" + else: + raise ValueError(f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or " + f"'hubspot_built_form'.") + + # Any vakue that is not house, bungalow, flat or maisonette is set to None + programme_data[column_name] = np.where( + ~programme_data[column_name].isin(valid_values), + None, + programme_data[column_name] + ) + # We fill with the EPC property type + programme_data[column_name] = np.where( + pd.isnull(programme_data[column_name]), + programme_data[self.EPC_API_DATA_NAMES[epc_fill_col]], + programme_data[column_name] + ) + + programme_data[column_name] = programme_data[column_name].fillna("unknown") + + return programme_data + + # Clean up the property type and built form columns + programme_data = _replace_property_description_data(programme_data, "hubspot_property_type") + programme_data = _replace_property_description_data(programme_data, "hubspot_built_form") + + # We accomodate the old vs new inspections format + if "non-intrusives: WFT Findings" in programme_data.columns: + # We have the old format - we only have notes + non_intrusives_surveyor_notes = "non-intrusives: WFT Findings" + non_intrusives_construction = None + non_intrusives_insulated = None + non_intrusives_insulation_material = None + non_intrusives_ciga_check_required = None + non_intrusives_pv_access = None + non_intrusives_roof_orientation = None + non_intrusives_surveyor_name = None + else: + non_intrusives_surveyor_notes = 'non-intrusives: Any further surveyor notes' + non_intrusives_construction = "non-intrusives: Construction" + non_intrusives_insulated = "non-intrusives: Insulated" + non_intrusives_insulation_material = "non-intrusives: Material" + non_intrusives_ciga_check_required = 'non-intrusives: CIGA Check Required' + non_intrusives_pv_access = 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' + non_intrusives_roof_orientation = 'non-intrusives: OFF GAS - ROOF ORIENTATION' + non_intrusives_surveyor_name = 'non-intrusives: Surveyors Name' + # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged schema_mappings = { 'Company Domain Name ': 'Company Domain Name ', @@ -2296,14 +2394,12 @@ class AssetList: 'Address 1 ': self.STANDARD_ADDRESS_1, 'Address 2 ': None, # TODO: Don't have this for the moment 'Postcode ': self.STANDARD_POSTCODE, - 'Property Type ': self.STANDARD_PROPERTY_TYPE, - 'Property Sub Type ': self.STANDARD_BUILT_FORM, + 'Property Type ': "hubspot_property_type", + 'Property Sub Type ': "hubspot_built_form", 'Bedroom(s) ': None, # TODO: Don't have this for the moment 'Domna Property ID ': self.DOMNA_PROPERTY_ID, # We populate this with the column that we have - 'National UPRN ': ( - self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"] - ), + 'National UPRN ': uprn_column, 'Owner Property ID ': self.STANDARD_LANDLORD_PROPERTY_ID, 'Wall Construction ': self.STANDARD_WALL_CONSTRUCTION, 'Heating System ': self.STANDARD_HEATING_SYSTEM, @@ -2311,30 +2407,17 @@ class AssetList: 'Boiler Make ': None, # TODO: Don't have this for the moment 'Boiler Model ': None, # TODO: Don't have this for the moment 'Non-Intrusives: Date Checked ': date_of_inspections, - 'Non-Intrusives: Wall Type ': ( - "non-intrusives: Construction" if self.non_intrusives_present else None - ), - 'Non-intrusives: Insulation ': ( - "non-intrusives: Insulated" if self.non_intrusives_present else None - ), - 'Non-intrusives: Insulation Material ': ( - "non-intrusives: Material" if self.non_intrusives_present else None - ), - 'Non-Intrusives: CIGA Check Required ': ( - 'non-intrusives: CIGA Check Required' if self.non_intrusives_present else None - ), - 'Non-Intrusives: PV Access Issues ': ( - 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Roof Orientation ': ( - 'non-intrusives: OFF GAS - ROOF ORIENTATION' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Surveyor Notes ': ( - 'non-intrusives: Any further surveyor notes' if self.non_intrusives_present else None - ), - 'Non-Intrusives: Surveyor Name ': ( - 'non-intrusives: Surveyors Name' if self.non_intrusives_present else None - ), + 'Non-Intrusives: Wall Type ': non_intrusives_construction, + 'Non-intrusives: Insulation ': non_intrusives_insulated, + 'Non-intrusives: Insulation Material ': + non_intrusives_insulation_material, + 'Non-Intrusives: CIGA Check Required ': + non_intrusives_ciga_check_required, + 'Non-Intrusives: PV Access Issues ': non_intrusives_pv_access, + 'Non-Intrusives: Roof Orientation ': + non_intrusives_roof_orientation, + 'Non-Intrusives: Surveyor Notes ': non_intrusives_surveyor_notes, + 'Non-Intrusives: Surveyor Name ': non_intrusives_surveyor_name, 'CIGA: Date Requested ': None, # TODO: Don't have this for the moment 'CIGA: Cavity Guarantee Found ': None, 'Last EPC: Is Estimated ': self.EPC_API_DATA_NAMES["estimated"], @@ -2351,7 +2434,6 @@ class AssetList: 'Last EPC: Floor ': self.EPC_API_DATA_NAMES["floor-description"], 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], - 'Deal Stage ': 'Deal Stage ', 'Pipeline ': 'Pipeline ', 'Expected Commencement Date ': "survey_week", 'Deal Name ': "dealname", # Need to create this, @@ -2362,6 +2444,7 @@ class AssetList: 'Deal Owner': 'surveyor_email', 'Project Code ': 'project_code', 'Associations: Listing': 'Associations: Listing', + 'Deal Stage ': "hubspot_status", } # We sometimes columns if the landlord never provided them diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py index 6e16279a..01540b7b 100644 --- a/asset_list/hubspot/config.py +++ b/asset_list/hubspot/config.py @@ -1,7 +1,6 @@ -from enum import IntEnum +from enum import IntEnum, Enum CRM_PIPELINE_NAME = 'Operations - Housing Associations' -CRM_PIPELINE_FIRST_STAGE_NAME = 'READY TO BE SCHEDULED' class HubspotProcessStatus(IntEnum): @@ -31,6 +30,19 @@ class HubspotProcessStatus(IntEnum): INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED" +class Installer(Enum): + SCIS = "SCIS" + JJ_CRUMP = "J & J CRUMP" + SGEC = "SGEC" + + @classmethod + def is_valid_value(cls, value): + """ + Check if the value is a valid installer. + """ + return value in cls._value2member_map_ + + CRM_UPLOAD_COLUMNS = [ 'Name ', 'Associations: Listing', 'Company Domain Name ', 'Email ', 'First Name ', 'Last Name ', diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index 8ed654f3..ee3bc65d 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -1,3 +1,4 @@ +import os import pandas as pd from asset_list.AssetList import AssetList @@ -17,10 +18,12 @@ def app(): """ # inputs: + reconcile_programme = True # If True, the hubspot upload will include all properties with a project code customer_domain = "https://thrivehomes.org.uk" + installer_name = "J & J CRUMP" asset_list_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Hubspot Upload - " - "Sample.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Thrive Programme - " + "Hubspot Upload 3.xlsx" ) contact_details_filepath = ( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Sample contact " @@ -47,5 +50,22 @@ def app(): ) asset_list.prepare_for_crm( - company_domain=customer_domain + company_domain=customer_domain, + installer_name=installer_name, + reconcile_programme=reconcile_programme ) + + # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv + directory, filename = os.path.split(asset_list_filepath) + name, ext = os.path.splitext(filename) + output_filename = f"{name} - Hubspot Upload.csv" + output_filepath = os.path.join(directory, output_filename) + + if pd.isnull(asset_list.hubspot_data['Project Code ']).sum(): + raise ValueError("FIX MEEE") + + if pd.isnull(asset_list.hubspot_data['Deal Stage ']).any(): + raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.") + + # Just store locally + asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig") diff --git a/etl/customers/l_and_g/risk_matrix.py b/etl/customers/l_and_g/risk_matrix.py index c800117e..8f5451fc 100644 --- a/etl/customers/l_and_g/risk_matrix.py +++ b/etl/customers/l_and_g/risk_matrix.py @@ -81,6 +81,7 @@ def app(): # We need to calculate the costs cost_data = [] for _, row in epr_data.iterrows(): + epc = row["EPC"][0] sap = int(row["EPC"][1:]) From dd2a04f05e698b549815b5ab62219cd953c29f60 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Jun 2025 18:14:14 +0100 Subject: [PATCH 08/14] deleted old hubspot data from asset list app --- asset_list/app.py | 42 ------------------------------------------ 1 file changed, 42 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index 41623880..5e62bbe1 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -484,45 +484,6 @@ def app(): asset_list.block_analysis() - asset_list.load_contact_details( - local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), - sheet_name="Report 1", - landlord_property_id=asset_list.landlord_property_id, - phone_number_column='Property Current Tel. Number', - fullname_column='Proeprty Current Occupant', - firstname_column=None, - lastname_column=None, - email_column=None, # TODO - we need this - ) - - # Convert to a format suitable for CRM - # TODO: TEMP - assigned_surveyors = pd.DataFrame( - [ - { - asset_list.landlord_property_id: "02610001", - "week_commencing": "10/10/2025", - "surveyor_name": "Khalim Conn-Kowlessar", - "surveyor_email": "khalim@domna.homes", - } - ] - ) - - # TODO: Sort the output by postcode - - company_domain = "ealing.gov.uk" - crm_pipeline_name = "Survey Management" - first_dealstage = "READY TO BEGIN SCHEDULING" - # TODO - temp, upload to either SharePoint or AWS - - asset_list.prepare_for_crm( - assigned_surveyors=assigned_surveyors, - company_domain=company_domain, - crm_pipeline_name=crm_pipeline_name, - first_dealstage=first_dealstage - ) - hubspot_data = asset_list.hubspot_data - # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data @@ -543,6 +504,3 @@ def app(): if not asset_list.ecosurv_no_match.empty: asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) - - # Store the Hubspot export as a csv - hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False) From d8b0662422c61ebd620a59b8e7667ad28c457dfc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 8 Jun 2025 16:15:23 +0100 Subject: [PATCH 09/14] adding project codes for blocks --- asset_list/AssetList.py | 92 +++++++++++---- asset_list/app.py | 152 ++++++++++--------------- asset_list/mappings/built_form.py | 29 +++++ asset_list/mappings/exising_pv.py | 3 +- asset_list/mappings/heating_systems.py | 34 +++++- asset_list/mappings/property_type.py | 5 +- asset_list/mappings/roof.py | 9 +- asset_list/mappings/walls.py | 21 +++- 8 files changed, 226 insertions(+), 119 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index ef125110..78c589db 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -526,6 +526,23 @@ class AssetList: self.standardised_asset_list["Archetype"].copy() ) + self.prefixes_to_products = { + # Empty + self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + # Extraction + self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"], + # Solar + self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"], + self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"], + } + def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): if method not in self.ADDRESS_1_CLEANING_METHODS: @@ -1752,7 +1769,7 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = None empty_cavity_map = { - "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE_PREFIX + ": ", + "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + ": ", "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " "already has solar: ", "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " @@ -1780,7 +1797,7 @@ class AssetList: )) & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ "SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1979,6 +1996,22 @@ class AssetList: self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work) ] + # Finally, direct operations feedback has suggested that if a property is a flat that has a SAP rating of + # 76 or above, we should exclude it because it's likely not going to be eligible for anyting + self.standardised_asset_list["cavity_reason"] = np.where( + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") & + (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), + None, + self.standardised_asset_list["cavity_reason"] + ) + # Split cavity_reason on the colon and check if the first part is equal to one of the two options above + # that indicates empties + self.standardised_asset_list["identified_empty_cavity"] = ( + self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin( + [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EPC_EMPTY] + ) + ) + def label_property_status(self): """ This function is designed to be run after identify_worktypes() has been run, and will create a "property_status" @@ -2015,6 +2048,28 @@ class AssetList: get_max_status_from_columns, axis=1 ) + self.standardised_asset_list["project_code"] = None + # if we have any blocks, where work is eligible, we flag them now + if self.landlord_block_reference is not None: + # For blocks that have a 50% allocation, we create project codes + self.block_analysis() + # find any block refs with more than 50% emptires + viable_empty_blocks = self.block_analysis_df[ + self.block_analysis_df['Percentage of Empties'] >= 0.50 + ] + + if not viable_empty_blocks.empty: + project_code_lookup = viable_empty_blocks[["Block Reference"]].copy() + self.standardised_asset_list = self.standardised_asset_list.merge( + project_code_lookup, how="left", left_on=self.STANDARD_BLOCK_REFERENCE, right_on="Block Reference" + ) + self.standardised_asset_list["project_code"] = np.where( + ~pd.isnull(self.standardised_asset_list["Block Reference"]), + self.standardised_asset_list["Block Reference"], + self.standardised_asset_list["project_code"] + ) + self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"]) + def block_analysis(self): if self.landlord_block_reference is None: @@ -2024,7 +2079,7 @@ class AssetList: # Reverse mapping: label -> enum LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} - # Threshold status - anythign that is at this stage or beyond is considered surveyed + # Threshold status - anything that is at this stage or beyond is considered surveyed threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value block_analysis = [] @@ -2034,15 +2089,21 @@ class AssetList: if all(cavity_breakdown.index == "No Eligibility"): continue + # We check the % of empty vs not empty as right now, we're focused on empty + n_empties = ((group["identified_empty_cavity"] == True) & (~pd.isnull(group["cavity_reason"]))).sum() + works = group["hubspot_status"] above_threshold = works.map(LABEL_TO_ENUM.get).dropna() count_above = (above_threshold >= threshold).sum() - proportion = count_above / len(works) + proportion_surveyed = count_above / len(works) + proportion_empty = n_empties / len(works) + # We auto-populate any blocks that have greater than 50% proportion empty block_analysis.append( { "Block Reference": block_reference, - "Proportion of properties suryeyed": proportion, + "Proportion of properties suryeyed": proportion_surveyed, + "Percentage of Empties": proportion_empty, **cavity_breakdown.to_dict(), } ) @@ -2050,6 +2111,8 @@ class AssetList: block_analysis = pd.DataFrame(block_analysis) block_analysis = block_analysis.fillna(0) + # We flag which properties are eligible for works. We need at least 50% + self.block_analysis_df = block_analysis @staticmethod @@ -2161,23 +2224,6 @@ class AssetList: if not hubspot_config.Installer.is_valid_value(installer_name): raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.") - prefixes_to_products = { - # Empty - self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - # Extraction - self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"], - # Solar - self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"], - self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"], - self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"], - } - # We check if all products are covered in the lookup table cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist() solar_products = self.standardised_asset_list["solar_reason"].unique().tolist() @@ -2188,7 +2234,7 @@ class AssetList: continue matched_product = None - for product_prefix, crm_product in prefixes_to_products.items(): + for product_prefix, crm_product in self.prefixes_to_products.items(): if identified_product.startswith(product_prefix): matched_product = crm_product diff --git a/asset_list/app.py b/asset_list/app.py index 5e62bbe1..881334b5 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -62,77 +62,77 @@ def app(): Property UPRN """ - # Thrive - reconciliation - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" - data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" - sheet_name = "Sheet1" - postcode_column = 'postcode' - fulladdress_column = "full_address" - address1_column = "address_line_1" - address1_method = None + # Stori + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" + data_filename = "Asset list - for analysis.xlsx" + sheet_name = "SAP and Costs Calculations" + postcode_column = 'Postcode' + fulladdress_column = "Address1" + address1_column = None + address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "age_band_calculated" + landlord_year_built = "Age" landlord_os_uprn = None - landlord_property_type = "property_type" - landlord_built_form = "build_form" - landlord_wall_construction = None - landlord_roof_construction = "assumed_loft_insulation_thickness_updated" - landlord_heating_system = "heating_type_updated" - landlord_existing_pv = None - landlord_property_id = "thrive_property_id" - landlord_sap = "sap_rating_updated" - landlord_block_reference = "block_reference" - outcomes_filename = [ - os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") - ] - outcomes_sheetname = ["Sheet1"] - outcomes_postcode = ["postcode"] - outcomes_houseno = ["No."] - outcomes_id = ["thrive_property_id"] - outcomes_address = ["address"] - master_filepaths = [ - os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), - os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), - ] - master_to_asset_list_filepath = None - master_id_colnames = ["thrive_property_id", "thrive_property_id"] - phase = False - ecosurv_landlords = "thrive" - - # Torus - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2" - data_filename = "Torus Property Asset List - INSPECTIONS.xlsx" - sheet_name = "TORUS" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = "Property Age" - landlord_os_uprn = "NatUPRN" - landlord_property_type = "Property Type" - landlord_built_form = "Built Form" - landlord_wall_construction = "Wall Construction" - landlord_roof_construction = "Roof Construction" - landlord_heating_system = "Space Heating Source" - landlord_existing_pv = "Low Carbon Technology (Solar PV)" + landlord_property_type = "TYPE" + landlord_built_form = "AGE / DETACHMENT" + landlord_wall_construction = "WALL" + landlord_roof_construction = "LOFT INSULATION" + landlord_heating_system = "BOILER" + landlord_existing_pv = "SOLAR PV" landlord_property_id = "UPRN" - landlord_sap = "SAP Score" + landlord_sap = "Current SAP Rating" landlord_block_reference = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None + outcomes_filename = [] + outcomes_sheetname = [] + outcomes_postcode = [] + outcomes_houseno = [] + outcomes_id = [] + outcomes_address = [] master_filepaths = [] master_to_asset_list_filepath = None master_id_colnames = [] - phase = True + phase = False ecosurv_landlords = None + # Thrive - reconciliation + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" + # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'postcode' + # fulladdress_column = "full_address" + # address1_column = "address_line_1" + # address1_method = None + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "age_band_calculated" + # landlord_os_uprn = None + # landlord_property_type = "property_type" + # landlord_built_form = "build_form" + # landlord_wall_construction = None + # landlord_roof_construction = "assumed_loft_insulation_thickness_updated" + # landlord_heating_system = "heating_type_updated" + # landlord_existing_pv = None + # landlord_property_id = "thrive_property_id" + # landlord_sap = "sap_rating_updated" + # landlord_block_reference = "block_reference" + # outcomes_filename = [ + # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") + # ] + # outcomes_sheetname = ["Sheet1"] + # outcomes_postcode = ["postcode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["thrive_property_id"] + # outcomes_address = ["address"] + # master_filepaths = [ + # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), + # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), + # ] + # master_to_asset_list_filepath = None + # master_id_colnames = ["thrive_property_id", "thrive_property_id"] + # phase = False + # ecosurv_landlords = "thrive" + # Southern Midlands # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" # data_filename = "Southern Housing Midlands Property List - combined.xlsx" @@ -160,34 +160,6 @@ def app(): # master_filepaths = [] # master_to_asset_list_filepath = None - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West" - data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx" - sheet_name = "CHECKED" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype (PFP)" - landlord_built_form = "Archetype (PFP)" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Uprn" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - master_filepaths = [] - master_to_asset_list_filepath = None - landlord_sap = None - phase = None - # Maps addresses to uprn in problematic cases manual_uprn_map = {} @@ -482,8 +454,6 @@ def app(): # We now flag the status of the property asset_list.label_property_status() - asset_list.block_analysis() - # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 116c3203..45e45c54 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -331,4 +331,33 @@ BUILT_FORM_MAPPINGS = { 'Low Rise': 'low rise', 'Upper Floor': 'top-floor', 'High Rise': 'high rise', + + '2012 ONWARDS DETACHED': 'detached', + '1950-66 END TERRACE': 'end-terrace', + '1976-82 MID TERRACED': 'mid-terrace', + '1950-66 MID TERRACE': 'mid-terrace', + '1991-95 DETACHED': 'detached', + '1976-82 END TERRACED': 'end-terrace', + '1967-75 DETACHED': 'detached', + 'PRE 1900 DETACHED': 'detached', + 'PRE 1900 MID TERRACE': 'mid-terrace', + '1900 DET': 'detached', + '1967-75 MID TERR': 'mid-terrace', + '1930-49 SEMI DET': 'semi-detached', + '1900-29 SEMI DET': 'semi-detached', + '1900-29 MID TERR': 'mid-terrace', + '1983- 90 MID TERR': 'mid-terrace', + '1976-82 MID TERR': 'mid-terrace', + '1983-90 END TERR': 'end-terrace', + '1991-95 SEMI DET': 'semi-detached', + '1983-90 SEMI DET': 'semi-detached', + '1991-95 MID TERR': 'mid-terrace', + '1950-66 SEMI DET': 'semi-detached', + '1900 MID TERR': 'mid-terrace', + '1967-75 SEMI DET': 'semi-detached', + '1983- 90 SEMI DET': 'semi-detached', + '1983-90 MID TERR': 'mid-terrace', + '1976-82 SEMI DET': 'semi-detached', + 'PRE 1900 MID TERR': 'mid-terrace' + } diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index 51f5f922..e67fafb4 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = { 'PV: 25% roof area, PV: 3.6kWp array': 'already has PV', 'PV: 10% roof area, PV: 2kWp array': 'already has PV', 'PV: 50% roof area': 'already has PV', - 'Solar PV': 'already has PV' + 'Solar PV': 'already has PV', + 'SOLAR PV': 'already has PV' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index daef01bb..d2959873 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -293,5 +293,37 @@ HEATING_MAPPINGS = { 'No Data': 'unknown', 'Boiler System': 'gas condensing boiler', 'Storage heating': 'electric storage heaters', - 'Storage heating (HHRSH)': 'high heat retention storage heaters' + 'Storage heating (HHRSH)': 'high heat retention storage heaters', + + 'ELECTRIC BOILER': 'electric boiler', + 'STORAGE HEATERS': 'electric storage heaters', + 'GREENSTAR 24I JUNIOR': 'gas combi boiler', + 'generic cond combi post98': 'gas condensing combi', + 'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler', + 'ECO TEC PRO 28 H COMBI A': 'gas combi boiler', + 'GREENSTAR 25I ErP': 'gas combi boiler', + 'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler', + 'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler', + 'IDEAL LOGIC HEAT 30': 'gas boiler, radiators', + 'WORCESTER 240': 'gas boiler, radiators', + 'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler', + 'ECO TEC PRO 28 (OLD)': 'gas combi boiler', + 'LOGIC COMBI2 C30': 'gas combi boiler', + 'GREENSTAR 28I JUNIOR': 'gas combi boiler', + 'WORCESTER 24i': 'gas combi boiler', + 'GREENSTAR 30I ErP': 'gas combi boiler', + '25 CDI': 'gas combi boiler', + 'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler', + 'GREENSTAR 24 RI': 'gas boiler, radiators', + 'BAXI COMBI 105 HE': 'gas combi boiler', + 'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler', + 'WORCESTER 28 SI ll RSF': 'gas combi boiler', + 'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler', + 'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler', + 'WORCESTER 24 SI ll RSF': 'gas combi boiler', + 'GREENSTAR 4000': 'gas combi boiler', + 'GREENSTAR 24i JUNIOR': 'gas combi boiler', + 'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler', + 'GREENSTAR 30SI COMPACT': 'gas combi boiler', + 'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index b705d6ef..1a61c3eb 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -252,5 +252,8 @@ PROPERTY_MAPPING = { 'Bedsit bungalow semi detached': 'bedsit', 'Bedsit Flat': 'bedsit', 'Semi detached house': 'house', - 'Unit': 'unknown' + 'Unit': 'unknown', + 'HOUSE (3 STOREY)': 'house', + 'FLAT GROUND FLOOR': 'flat', + 'FLAT TOP FLOOR': 'flat' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 3b447829..13359ded 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -43,6 +43,13 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Non-joist': 'unknown', '25mm': 'pitched less than 100mm insulation', '400mm+': 'pitched insulated', - '12mm': 'pitched less than 100mm insulation' + '12mm': 'pitched less than 100mm insulation', + '150MM': 'pitched insulated', + '200MM': 'pitched insulated', + '250MM': 'pitched insulated', + '100MM': 'pitched less than 100mm insulation', + 'U/K': 'unknown', + 'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation', + 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation' } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 5e32531f..5baabe6f 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -224,5 +224,24 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Traditional Cavity Brickwork': 'cavity unknown insulation', 'System build (undefined)': 'system built', 'Non Trad Wimpey': 'system built', - 'Non Trad Wates': 'system built' + 'Non Trad Wates': 'system built', + + 'CAVITY FILLED 270MM': 'filled cavity', + 'CAVITY FILLED 270MM': 'filled cavity', + 'CAVITY FILLED 250MM': 'filled cavity', + 'CAVITY FILLED 260MM': 'filled cavity', + 'CAVITY FILLED 260MM': 'filled cavity', + 'SOLID A/B 220MM': 'solid brick unknown insulation', + 'CAVITY A/B 300MM': "uninsulated cavity", + 'CAVITY A/B 250MM': "uninsulated cavity", + 'CAVITY A/B 260MM': "uninsulated cavity", + 'CAVITY A/B 270MM': "uninsulated cavity", + 'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation', + 'CAVITY EWI': 'filled cavity', + 'SANDSTONE/CAVITY EXT': 'sandstone or limestone', + 'SYSTEM BUILD 100MM EWI': 'system built', + 'CAVITY A/B 260MM': "uninsulated cavity", + 'CAVITY A/B 270MM': "uninsulated cavity", + 'CAVITY A/B 250MM': "uninsulated cavity" + } From 02f423f60c28e359e48357762f9d7503d44a451b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 8 Jun 2025 16:46:42 +0100 Subject: [PATCH 10/14] if a property was flagged for a heating upgrade based on epc but the landlord's data indicates it has a boiler, it's removed --- asset_list/AssetList.py | 19 ++++++++++++++----- asset_list/app.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 78c589db..4c729245 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -1512,13 +1512,22 @@ class AssetList: ) ) + # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the + # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the + # landlord data suggests otherwise (e.g. there's a gas boiler), we default to what the landlord has told us self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters|room heaters" + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( + "electric storage heaters|room heaters" + ) & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] != "Controls for high heat retention storage heaters" + ) ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] - ] != "Controls for high heat retention storage heaters" + ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + ["district heating", "communal heating", "communal gas boiler"] + ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ") ) ) diff --git a/asset_list/app.py b/asset_list/app.py index 881334b5..3c9176ca 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -95,6 +95,42 @@ def app(): phase = False ecosurv_landlords = None + # For ACIS - programme re-build + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025" + data_filename = "ACIS asset list.xlsx" + sheet_name = "Assets" + address1_column = "House No" + postcode_column = "Postcode" + landlord_property_id = "UPRN" + fulladdress_column = None + address_cols_to_concat = ["House No", "Street", "Town"] + missing_postcodes_method = None + address1_method = None + landlord_year_built = "YEAR BUILT" + landlord_os_uprn = None + landlord_property_type = "Property type" + landlord_built_form = None + landlord_wall_construction = "Wall Constuction" + landlord_roof_construction = None + landlord_sap = None + landlord_heating_system = "Heating" + landlord_existing_pv = None + outcomes_filename = [os.path.join(data_folder, "ACIS Group - 25.11.2024 - outcomes.xlsx")] + outcomes_sheetname = ["Feedback"] + outcomes_postcode = ["Postcode"] + outcomes_address = ["Address"] + outcomes_houseno = ["No"] + outcomes_id = [None] + master_filepaths = [ + os.path.join(data_folder, "ECO 3 -Table 1.csv"), + os.path.join(data_folder, "ECO 4 -Table 1.csv"), + ] + master_id_colnames = [None, None] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + landlord_block_reference = None + # Thrive - reconciliation # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" From c22179f1a5d12728e7a50ce9ab78543ce23b968a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 17 Jun 2025 15:59:07 +0100 Subject: [PATCH 11/14] updating code for calico --- asset_list/AssetList.py | 274 ++++++++++++++---- asset_list/app.py | 209 ++++++++----- asset_list/hubspot/config.py | 4 +- asset_list/hubspot/prepare_for_hubspot.py | 22 +- asset_list/mappings/heating_systems.py | 5 +- asset_list/mappings/property_type.py | 13 +- asset_list/mappings/walls.py | 9 +- .../Futures Housing/validation_surveys.py | 167 +++++++++++ 8 files changed, 572 insertions(+), 131 deletions(-) create mode 100644 etl/customers/Futures Housing/validation_surveys.py diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 4c729245..62016239 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -29,6 +29,7 @@ from recommendations.recommendation_utils import ( ) from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes +from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes logger = setup_logger() @@ -428,6 +429,7 @@ class AssetList: self.unmatched_submissions = pd.DataFrame() self.ecosurv = None self.ecosurv_no_match = pd.DataFrame() + self.geographical_areas = pd.DataFrame() # When this is True, we intend to break the programme into multiple phases. We may need to review # how this is structured in the future, as depending on how we get future data, we may need to @@ -443,7 +445,7 @@ class AssetList: self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns self.new_format_non_insturives_present = ( - "Has the property been re-walled?" in self.standardised_asset_list.columns + "Has the property been re-walled?" in self.raw_asset_list.columns ) # Names of columns @@ -989,7 +991,7 @@ class AssetList: self.STANDARD_YEAR_BUILT, self.STANDARD_WALL_CONSTRUCTION, self.STANDARD_HEATING_SYSTEM, - self.STANDARD_EXISTING_PV + self.STANDARD_BLOCK_REFERENCE, ] if v not in self.standardised_asset_list.columns ] for v in missing_variables: @@ -1016,6 +1018,12 @@ class AssetList: self.standardised_asset_list[self.STANDARD_SAP] = ( self.standardised_asset_list[self.STANDARD_SAP].astype(float) ) + # If it's zero, we set it to None + self.standardised_asset_list[self.STANDARD_SAP] = np.where( + self.standardised_asset_list[self.STANDARD_SAP] == 0, + None, + self.standardised_asset_list[self.STANDARD_SAP] + ) def merge_data(self, df: pd.DataFrame): """ @@ -1233,7 +1241,7 @@ class AssetList: processed_age_band, how="left" ) - def identify_worktypes(self, cleaned): + def identify_worktypes(self): if self.landlord_sap is not None: # We add a SAP category for all work type identification @@ -1596,19 +1604,9 @@ class AssetList: else: self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False - # We merge on the u-value for average thermal transmittance - walls_uvalue_data = pd.DataFrame(cleaned["walls-description"]) - walls_uvalue_data = walls_uvalue_data[ - ~pd.isnull(walls_uvalue_data["thermal_transmittance"]) - ][["original_description", "thermal_transmittance"]].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["walls-description"], - "thermal_transmittance": "walls_u_value" - } - ) - self.standardised_asset_list = self.standardised_asset_list.merge( - walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"] - ) + self.standardised_asset_list["walls_u_value"] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"] + ].apply(lambda x: WallAttributes(x).process()["thermal_transmittance"] if not pd.isnull(x) else None) self.standardised_asset_list["solar_epc_walls_insulated"] = ( ( @@ -1621,16 +1619,20 @@ class AssetList: ) ) - # We merge on the u-value for average thermal transmittance - roof_data = pd.DataFrame(cleaned["roof-description"])[ - ["original_description", "thermal_transmittance", "is_pitched", "is_loft"] - ].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["roof-description"], - "thermal_transmittance": "roof_u_value", - } - ) - + roof_data = [] + for desc in self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["roof-description"] + ].unique(): + if pd.isnull(desc): + continue + roof_data.append( + { + self.EPC_API_DATA_NAMES["roof-description"]: desc, + **RoofAttributes(desc).process() + } + ) + roof_data = pd.DataFrame(roof_data) + roof_data = roof_data.rename(columns={"thermal_transmittance": "roof_u_value"}) self.standardised_asset_list = self.standardised_asset_list.merge( roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"] ) @@ -1723,6 +1725,8 @@ class AssetList: self.standardised_asset_list["solar_epc_loft_needs_topup"] ) + z = self.standardised_asset_list[self.standardised_asset_list["landlord_property_id"] == "DW150120029"] + self.standardised_asset_list["solar_eligible"] = ( # Property isn't a flag not_a_flat & @@ -1964,7 +1968,8 @@ class AssetList: self.standardised_asset_list[col] ) - if self.ecosurv is not None: + if self.ecosurv is not None and "ecosurv_install_status" in self.standardised_asset_list.columns: + # If we didn't match anything to ecosurv, the ecosurv_install_status won't exist for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( @@ -2021,6 +2026,20 @@ class AssetList: ) ) + def fill_landlord_block_reference(self, has_blocks_of_flats): + if not has_blocks_of_flats: + return + + # If we have blocks of flats, we fill the landlord_block_reference field with address 1 + postcode + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats") & ( + pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]) + ), + self.standardised_asset_list[self.STANDARD_ADDRESS_1] + " " + + self.standardised_asset_list[self.STANDARD_POSTCODE], + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + ) + def label_property_status(self): """ This function is designed to be run after identify_worktypes() has been run, and will create a "property_status" @@ -2059,7 +2078,89 @@ class AssetList: self.standardised_asset_list["project_code"] = None # if we have any blocks, where work is eligible, we flag them now - if self.landlord_block_reference is not None: + # These blocks may be refecence via the landlord_block_reference field, or by property types being + # blocks of flats + has_landlord_block_reference = self.landlord_block_reference is not None + has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum() + + if has_landlord_block_reference or has_blocks_of_flats: + + # If we blocks of flats, without a landlord block reference, we create this + self.fill_landlord_block_reference(has_blocks_of_flats) + + self.split_blocks(has_blocks_of_flats) + + def split_blocks(self, has_blocks_of_flats): + """ + Where we have a single row that is a block of flats, we split this into multiple rows, + one for each unit. The data that we have will be copied across rows + :param self: + :param has_blocks_of_flats: + :return: + """ + + blocks = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" + ].copy() + + RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b') + NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. + + expanded_rows = [] + for _, row in blocks.iterrows(): + addr = str(row[self.STANDARD_ADDRESS_1]) + + # 1 ─ Range (e.g. 1-7) + m_range = RANGE_RE.search(addr) + if m_range: + start, end = m_range.groups() + start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) + if start > end or (end - start) > 100: + raise ValueError(f"Suspicious range '{addr}'") + for n in range(start, end + 1): + new = row.copy() + new_addr = RANGE_RE.sub(str(n), addr, count=1) + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 2 ─ Explicit list (e.g. 1, 2, 5 Block) + nums = NUM_RE.findall(addr) + if len(nums) > 1 and ',' in addr: + for n in nums: + new = row.copy() + new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 3 ─ Single number → treat as individual dwelling + if len(nums) == 1: + expanded_rows.append(row) + continue + + # 4 ─ No numbers → keep as-is + if not nums: + expanded_rows.append(row) + continue + + # Anything else with digits is unrecognised + raise NotImplementedError(f"Unhandled block format: '{addr}'") + + expanded_blocks = pd.DataFrame(expanded_rows) + + # We drop the blocks from the standardised asset list and append on the expanded blocks + self.standardised_asset_list = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" + ] + + self.standardised_asset_list = pd.concat( + [self.standardised_asset_list, expanded_blocks], + ignore_index=True + ) + # For blocks that have a 50% allocation, we create project codes self.block_analysis() # find any block refs with more than 50% emptires @@ -2079,11 +2180,39 @@ class AssetList: ) self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"]) - def block_analysis(self): + def analyse_geographies(self): + cavity_programme = ( + self.standardised_asset_list[["domna_postcode", "cavity_reason"]] + .groupby(["domna_postcode"])["cavity_reason"] + .count() + .reset_index() + ) + solar_programme = ( + self.standardised_asset_list[["domna_postcode", "solar_reason"]] + .groupby(["domna_postcode"])["solar_reason"] + .count() + .reset_index() + ) + postcodes = ( + self.standardised_asset_list[["domna_postcode", "landlord_property_id"]] + .groupby("domna_postcode")["landlord_property_id"] + .count() + .reset_index() + .rename(columns={"landlord_property_id": "n_properties"}) + ) + geographical_areas = postcodes.merge(cavity_programme, how="left", on="domna_postcode").merge( + solar_programme, how="left", on="domna_postcode" + ).fillna(0) + geographical_areas["coverage"] = ( + ( + geographical_areas["solar_reason"] + geographical_areas["cavity_reason"] + ) / geographical_areas["n_properties"] * 100 + ) - if self.landlord_block_reference is None: - # This information is not available - return + geographical_areas = geographical_areas.sort_values("coverage", ascending=False) + self.geographical_areas = geographical_areas + + def block_analysis(self): # Reverse mapping: label -> enum LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} @@ -2146,6 +2275,8 @@ class AssetList: sheet_name, landlord_property_id, phone_number_column=None, + secondary_phone_number_column=None, + secondary_contact_full_name=None, email_column=None, fullname_column=None, firstname_column=None, @@ -2155,6 +2286,8 @@ class AssetList: self.contact_detail_fields = { "landlord_property_id": landlord_property_id, "phone_number": phone_number_column, + "secondary_phone_number": secondary_phone_number_column, + "secondary_contact_full_name": secondary_contact_full_name, "email": email_column, "fullname": fullname_column, "firstname": firstname_column, @@ -2162,7 +2295,8 @@ class AssetList: } details_colnames = [ - phone_number_column, email_column, fullname_column, firstname_column, lastname_column + phone_number_column, secondary_phone_number_column, email_column, fullname_column, firstname_column, + lastname_column ] # We'll fill them none_details = [x for x in details_colnames if x is None] @@ -2188,7 +2322,7 @@ class AssetList: self.contact_details = contact_details @classmethod - def load_standardised_asset_list(cls, filepath): + def load_standardised_asset_list(cls, filepath, sheet_name, header): """ This function is designed to load the standardised asset list from a file :return: @@ -2197,7 +2331,7 @@ class AssetList: # instantiate the class instance = cls( local_filepath=filepath, - sheet_name="Standardised Asset List", + sheet_name=sheet_name, address1_colname=cls.STANDARD_ADDRESS_1, postcode_colname=cls.STANDARD_POSTCODE, full_address_colname=cls.STANDARD_FULL_ADDRESS, @@ -2216,7 +2350,7 @@ class AssetList: landlord_sap=cls.STANDARD_SAP, landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE, phase=False, - header=0 + header=header ) return instance @@ -2252,6 +2386,10 @@ class AssetList: # For each cavity and solar product, we iterate through the prexies and map to the products programme_data = self.standardised_asset_list.copy() + programme_data["domna_full_address"] = ( + programme_data["domna_full_address"].str.replace(";", ", ", regex=False).str.replace(" ", "") + ) + # Format the two date columns programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce") programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( @@ -2324,8 +2462,11 @@ class AssetList: self.CRM_HISTORICAL_CAVITY_PRODUCT["name"] ) else: - - programme_data = programme_data[~pd.isnull(programme_data["domna_product"])] + programme_data = programme_data[ + ~pd.isnull(programme_data["domna_product"]) & + ~pd.isnull(programme_data["surveyor"]) & + ~pd.isnull(programme_data["survey_week"]) + ] programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( @@ -2356,6 +2497,10 @@ class AssetList: # We determine which column we should use for the UPRN if self.STANDARD_UPRN not in programme_data.columns: uprn_column = self.EPC_API_DATA_NAMES["uprn"] + # If we're working form the EPC, we don't have this information if the EPC is estimated + programme_data[uprn_column] = np.where( + programme_data["estimated"] == True, None, programme_data[uprn_column] + ) else: # Use the value that has the most coverage uprn_column = "hubspot_uprn" @@ -2445,6 +2590,14 @@ class AssetList: 'Phone ': ( self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None ), # TODO: Review + 'Secondary Phone ': ( + self.contact_detail_fields["secondary_phone_number"] if + self.contact_detail_fields["secondary_phone_number"] else None + ), + "Secondary Contact Full Name ": ( + self.contact_detail_fields["secondary_contact_full_name"] if + self.contact_detail_fields["secondary_contact_full_name"] else None + ), 'Full Address ': self.STANDARD_FULL_ADDRESS, 'Address 1 ': self.STANDARD_ADDRESS_1, 'Address 2 ': None, # TODO: Don't have this for the moment @@ -2496,7 +2649,7 @@ class AssetList: 'Name ': 'Name ', 'Unit price ': 'Unit price ', 'Quantity ': 'Quantity ', - 'Deal Owner': 'surveyor_email', + 'Deal Owner': 'surveyor', 'Project Code ': 'project_code', 'Associations: Listing': 'Associations: Listing', 'Deal Stage ': "hubspot_status", @@ -2523,11 +2676,12 @@ class AssetList: programme_data['Installer '] = installer_name programme_data['Name '] = ( - programme_data['Address 1 '] + " ," + programme_data['Postcode '] + programme_data['Full Address '] + " ," + programme_data['Postcode '] ) # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing programme_data['Listing Owner Email '] = programme_data['Deal Owner'] programme_data['Amount '] = 0 + programme_data["Deal Owner"] = programme_data["Deal Owner"].str.lower() # We make sure we have all of the columns that we need missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] @@ -2626,7 +2780,10 @@ class AssetList: logger.info("Matched %s properties to ecosurv data", len(matched)) logger.info("%s properties in Ecosurv remain unmatched", len(unmatched)) - # We now match + if not matched: + return + + # We now match matched = pd.DataFrame(matched) # We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum(): @@ -2995,20 +3152,32 @@ class AssetList: axis=1 ) - scheme_col = ( - "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if - "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH" - ) + if "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns: + scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" + elif "AFFORDABLE WARMTH" in master_data.columns: + scheme_col = "AFFORDABLE WARMTH" + else: + scheme_col = "OFFICE USE ONLY" + postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" - house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO" - property_type_col = ( - "PROPERTY TYPE As per table emailed" if - "PROPERTY TYPE As per table emailed" in - master_data.columns else "PROPERTY TYPE As per table emailed" - ) + if 'NO.' in master_data.columns: + house_no_col = 'NO.' + elif "NO" in master_data.columns: + house_no_col = 'NO' + else: + house_no_col = "NUMBER" + + if "PROPERTY TYPE As per table emailed" in master_data.columns: + property_type_col = "PROPERTY TYPE As per table emailed" + elif "PROPERTY TYPE As per table emailed" in master_data.columns: + property_type_col = "PROPERTY TYPE As per table emailed" + else: + property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)" + measure_mix_col = "MEASURE COMBO" installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" installer_col = "INSTALLER" + town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area' logger.info("Matching master data to asset list") matched = [] @@ -3098,7 +3267,8 @@ class AssetList: df = df[ df[self.STANDARD_FULL_ADDRESS].str.lower().apply( lambda x: process.extractOne( - " ".join([row[house_no_col], row["Street / Block Name"], row["TOWN"]]).lower(), + " ".join( + [row[house_no_col], row["Street / Block Name"], row[town_colname]]).lower(), x )[1] ) > 90 diff --git a/asset_list/app.py b/asset_list/app.py index 3c9176ca..08164c19 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -2,8 +2,6 @@ import os import json import pandas as pd from pprint import pprint -import msgpack -from utils.s3 import read_from_s3 from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS @@ -62,27 +60,28 @@ def app(): Property UPRN """ - # Stori - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" - data_filename = "Asset list - for analysis.xlsx" - sheet_name = "SAP and Costs Calculations" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" + data_filename = "07.04 CALICO - Final List.xlsx" + asset_list_header = 2 + sheet_name = "Final List" postcode_column = 'Postcode' - fulladdress_column = "Address1" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] + fulladdress_column = None + address1_column = "Property Number / Name" + address1_method = None + address_cols_to_concat = [ + "Property Number / Name", + "Street", + "Town" + ] missing_postcodes_method = None - landlord_year_built = "Age" + landlord_year_built = "NROSH Estimated Build Date" landlord_os_uprn = None - landlord_property_type = "TYPE" - landlord_built_form = "AGE / DETACHMENT" - landlord_wall_construction = "WALL" - landlord_roof_construction = "LOFT INSULATION" - landlord_heating_system = "BOILER" - landlord_existing_pv = "SOLAR PV" - landlord_property_id = "UPRN" - landlord_sap = "Current SAP Rating" - landlord_block_reference = None + landlord_property_type = "Asset Type" + landlord_built_form = None + landlord_wall_construction = "Wall Type" + landlord_heating_system = "Boiler Type" + landlord_existing_pv = None + landlord_property_id = "Asset Reference" outcomes_filename = [] outcomes_sheetname = [] outcomes_postcode = [] @@ -90,46 +89,126 @@ def app(): outcomes_id = [] outcomes_address = [] master_filepaths = [] - master_to_asset_list_filepath = None master_id_colnames = [] - phase = False + master_to_asset_list_filepath = None + landlord_roof_construction = None + landlord_block_reference = None + landlord_sap = "Current Efficiency Rating - Score" + phase = None ecosurv_landlords = None - # For ACIS - programme re-build - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025" - data_filename = "ACIS asset list.xlsx" - sheet_name = "Assets" - address1_column = "House No" - postcode_column = "Postcode" - landlord_property_id = "UPRN" - fulladdress_column = None - address_cols_to_concat = ["House No", "Street", "Town"] - missing_postcodes_method = None - address1_method = None - landlord_year_built = "YEAR BUILT" - landlord_os_uprn = None - landlord_property_type = "Property type" - landlord_built_form = None - landlord_wall_construction = "Wall Constuction" - landlord_roof_construction = None - landlord_sap = None - landlord_heating_system = "Heating" - landlord_existing_pv = None - outcomes_filename = [os.path.join(data_folder, "ACIS Group - 25.11.2024 - outcomes.xlsx")] - outcomes_sheetname = ["Feedback"] - outcomes_postcode = ["Postcode"] - outcomes_address = ["Address"] - outcomes_houseno = ["No"] - outcomes_id = [None] - master_filepaths = [ - os.path.join(data_folder, "ECO 3 -Table 1.csv"), - os.path.join(data_folder, "ECO 4 -Table 1.csv"), - ] - master_id_colnames = [None, None] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - landlord_block_reference = None + # data_folder = ( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset + # List" + # ) + # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" + # sheet_name = "Assets" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Year" + # landlord_os_uprn = None + # landlord_property_type = "Property Archetype" + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_heating_system = "Heating Fuel Type" + # landlord_existing_pv = None + # landlord_property_id = "Uprn - DO NOT DELETE" + # outcomes_filename = [ + # os.path.join(data_folder, "RT - LiveWest.xlsx") + # ] + # outcomes_sheetname = ["Feedback"] + # outcomes_postcode = ["Poscode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["UPRN"] + # outcomes_address = ["Address"] + # master_filepaths = [ + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling + # Master " + # "- redacted for analysis/CAVITY-Table 1.csv" + # ] + # master_id_colnames = [None] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = None + # phase = None + # ecosurv_landlords = "livewest|live west" + + # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " + # "2025/Livewest Asset List (Original) - csv") + # data_filename = "Report-Table 1.csv" + # sheet_name = None + # postcode_column = 'Postcode' + # fulladdress_column = "T1_Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Yr" + # landlord_os_uprn = None + # landlord_property_type = "T1_AssetType" + # landlord_built_form = "T1_AssetType" + # landlord_wall_construction = "Wall Type Cavity" + # landlord_heating_system = "Heating Fuel" + # landlord_existing_pv = None + # landlord_property_id = "T1_UPRN" + # outcomes_filename = [ + # os.path.join(data_folder, "RT - LiveWest.xlsx") + # ] + # outcomes_address = ["Address"] + # outcomes_sheetname = ["Feedback"] + # outcomes_postcode = ["Poscode"] + # outcomes_houseno = ["No."] + # outcomes_id = ["UPRN"] + # master_filepaths = [ + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling + # Master " + # "- redacted for analysis/CAVITY-Table 1.csv" + # ] + # master_id_colnames = [None] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = None + # phase = None + # ecosurv_landlords = "livewest|live west" + + # Stori + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" + # data_filename = "Asset list - for analysis.xlsx" + # sheet_name = "SAP and Costs Calculations" + # postcode_column = 'Postcode' + # fulladdress_column = "Address1" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Age" + # landlord_os_uprn = None + # landlord_property_type = "TYPE" + # landlord_built_form = "AGE / DETACHMENT" + # landlord_wall_construction = "WALL" + # landlord_roof_construction = "LOFT INSULATION" + # landlord_heating_system = "BOILER" + # landlord_existing_pv = "SOLAR PV" + # landlord_property_id = "UPRN" + # landlord_sap = "Current SAP Rating" + # landlord_block_reference = None + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # master_id_colnames = [] + # phase = False + # ecosurv_landlords = None # Thrive - reconciliation # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" @@ -201,7 +280,7 @@ def app(): asset_list = AssetList( local_filepath=os.path.join(data_folder, data_filename), - header=0, + header=asset_list_header, sheet_name=sheet_name, address1_colname=address1_column, postcode_colname=postcode_column, @@ -294,7 +373,7 @@ def app(): epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks - chunk_size = 5000 + chunk_size = 2000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): @@ -477,18 +556,13 @@ def app(): asset_list.extract_attributes() - cleaned = read_from_s3( - s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" - ) - cleaned = msgpack.unpackb(cleaned, raw=False) - - asset_list.identify_worktypes(cleaned) + asset_list.identify_worktypes() pprint(asset_list.work_type_figures) # We now flag the status of the property asset_list.label_property_status() + asset_list.analyse_geographies() # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" @@ -510,3 +584,6 @@ def app(): if not asset_list.ecosurv_no_match.empty: asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) + + if not asset_list.geographical_areas.empty: + asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py index 01540b7b..5110fb5f 100644 --- a/asset_list/hubspot/config.py +++ b/asset_list/hubspot/config.py @@ -46,7 +46,9 @@ class Installer(Enum): CRM_UPLOAD_COLUMNS = [ 'Name ', 'Associations: Listing', 'Company Domain Name ', 'Email ', 'First Name ', 'Last Name ', - 'Phone ', 'Listing Owner Email ', + 'Phone ', 'Secondary Phone ', + 'Secondary Contact Full Name ', + 'Listing Owner Email ', 'Full Address ', 'Address 1 ', 'Address 2 ', 'Postcode ', 'Property Type ', 'Property Sub Type ', diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index ee3bc65d..9ffe24ca 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -18,31 +18,39 @@ def app(): """ # inputs: - reconcile_programme = True # If True, the hubspot upload will include all properties with a project code - customer_domain = "https://thrivehomes.org.uk" + reconcile_programme = False # If True, the hubspot upload will include all properties with a project code + customer_domain = "https://sandwell.gov.uk" installer_name = "J & J CRUMP" asset_list_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Thrive Programme - " - "Hubspot Upload 3.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - " + "Standardised.xlsx" ) + asset_list_sheet_name = "Proposed Program" + asset_list_header = 1 + contact_details_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Testing Hubspot Upload/Sample contact " - "details.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx" ) contacts_sheet_name = "Sheet1" contacts_landlord_property_id = "landlord_property_id" contacts_phone_number_column = "phone_number" + contacts_secondary_phone_number_column = "secondary_phone_number" + contacts_secondary_contact_full_name = "secondary_contact_full_name" contacts_email_column = "email" contacts_fullname_column = "fullname" contacts_firstname_column = "firstname" contacts_lastname_column = "lastname" - asset_list = AssetList.load_standardised_asset_list(asset_list_filepath) + asset_list = AssetList.load_standardised_asset_list( + asset_list_filepath, asset_list_sheet_name, asset_list_header + ) asset_list.load_contact_details( local_filepath=contact_details_filepath, sheet_name=contacts_sheet_name, landlord_property_id=contacts_landlord_property_id, phone_number_column=contacts_phone_number_column, + secondary_phone_number_column=contacts_secondary_phone_number_column, + secondary_contact_full_name=contacts_secondary_contact_full_name, email_column=contacts_email_column, fullname_column=contacts_fullname_column, firstname_column=contacts_firstname_column, diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index d2959873..1a46c429 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -27,7 +27,7 @@ STANDARD_HEATING_SYSTEMS = { "electric ceiling", "electric underfloor", "no heating", - "non-electric underfloor" + "non-electric underfloor", } HEATING_MAPPINGS = { @@ -325,5 +325,6 @@ HEATING_MAPPINGS = { 'GREENSTAR 24i JUNIOR': 'gas combi boiler', 'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler', 'GREENSTAR 30SI COMPACT': 'gas combi boiler', - 'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler' + 'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler', + 'Not applicable for this asset type': 'unknown' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 1a61c3eb..bdb6580e 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -255,5 +255,16 @@ PROPERTY_MAPPING = { 'Unit': 'unknown', 'HOUSE (3 STOREY)': 'house', 'FLAT GROUND FLOOR': 'flat', - 'FLAT TOP FLOOR': 'flat' + 'FLAT TOP FLOOR': 'flat', + + 'SHARED HOUSE': 'house', + 'MAISONETTE': 'maisonette', + 'DIRECT ACCESS HOSTEL': 'other', + 'Day centre': 'other', + 'Care home': 'other', + 'BLOCK (Communal)': 'block of flats', + 'SHOP': 'other', + 'Office Block': 'other', + 'BLOCK (Non-Communal)': 'block of flats', + 'Refuge': 'other' } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 5baabe6f..8be8575a 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -242,6 +242,11 @@ WALL_CONSTRUCTION_MAPPINGS = { 'SYSTEM BUILD 100MM EWI': 'system built', 'CAVITY A/B 260MM': "uninsulated cavity", 'CAVITY A/B 270MM': "uninsulated cavity", - 'CAVITY A/B 250MM': "uninsulated cavity" - + 'CAVITY A/B 250MM': "uninsulated cavity", + 'System': 'system built', + 'Sandstone/Limestone': 'sandstone or limestone', + 'No Fines': 'system built', + 'Granite/Whinstone': 'granite or whinstone', + 'Not applicable to this asset type': 'unknown', + 'Steel Frame': 'system built' } diff --git a/etl/customers/Futures Housing/validation_surveys.py b/etl/customers/Futures Housing/validation_surveys.py new file mode 100644 index 00000000..1f8e6cfa --- /dev/null +++ b/etl/customers/Futures Housing/validation_surveys.py @@ -0,0 +1,167 @@ +import pandas as pd + + +def get_band(sap_score_number): + bands = [ + ("High_A", 96, float("inf")), + ("Low_A", 92, 96), + ("High_B", 86, 92), + ("Low_B", 81, 86), + ("High_C", 74.5, 81), + ("Low_C", 69, 74.5), + ("High_D", 61.5, 69), + ("Low_D", 55, 61.5), + ("High_E", 46.5, 55), + ("Low_E", 39, 46.5), + ("High_F", 29.5, 39), + ("Low_F", 21, 29.5), + ("High_G", 10.5, 21), + ("Low_G", 1, 10.5), + ] + + for band, lower, upper in bands: + if lower <= sap_score_number < upper: + return band + + return None + + +def classify_floor_area(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200+" + + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx", + sheet_name="Standardised Asset List" +) + +asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band) +asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area) + +# Objective: +# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below +# +# Therefore: +# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying +# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do +# qualify +# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the +# archetypes +# +# Driving Factors: +# 1) Floor area band & starting SAP band - this will determine how much funding is produced +# 2) Heating system - this will determine if the property needs a heating upgrade or not + + +archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby( + ["floor_area_band", "starting_sap_band", "landlord_heating_system"] +)["landlord_property_id"].nunique().reset_index() +archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"}) +archetypes = archetypes.sort_values("n_properties", ascending=False) +archetypes["running_total"] = archetypes["n_properties"].cumsum() +archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100 + +archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel" +archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin( + ["boiler - other fuel", "electric storage heaters"] +) +archetypes = archetypes.reset_index(drop=True) + +# Right now, they don't want to treat the oil properties so we'll exclude them for the moment +electric_heated_archetypes = ( + archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True) +) +electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum() +electric_heated_archetypes["cumulative_percentage"] = ( + electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100 +) + +# The main properties that need validation surveys are properties that require a heating upgrade +electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]] +electric_heated_archetypes = electric_heated_archetypes.merge( + archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]], + how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"] +) + +oil_archetypes = archetypes[ + archetypes["landlord_heating_system"] == "boiler - other fuel" + ].copy().reset_index(drop=True) + +archetypes["archetype_id"] = archetypes.index + +asset_list = asset_list.merge( + archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]], + how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"] +) + +properties_for_verification = asset_list[ + asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values) +].copy() +properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[ + 0].str.strip() + +properties_for_verification["epc_age"] = ( + pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"]) +).dt.days + +# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes +archetypes_for_survey = pd.concat( + [electric_heated_archetypes, oil_archetypes.head(2)] +) + +# Take the property with the oldest EPC, by region. Prioritise estimated properties +sample = [] +for _, config in archetypes_for_survey.iterrows(): + properties = asset_list[ + (asset_list["archetype_id"] == config["archetype_id"]) & + (asset_list["floor_area_band"] == config["floor_area_band"]) & + (asset_list["starting_sap_band"] == config["starting_sap_band"]) + ] + + if pd.isnull(properties["epc_inspection_date"]).sum(): + sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records") + else: + # Take the property with the oldest EPC + sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records") + + sample.extend(sample_property) + +sample = pd.DataFrame(sample) + +sample = sample[ + [ + "landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band", + "floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id" + ] +] + +archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy() +archetypes["archetype_id"] = archetypes["archetype_id"].astype(str) + +filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx" +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + +with pd.ExcelWriter(filename) as writer: + archetypes.to_excel(writer, sheet_name="Archetypes", index=False) + sample.to_excel(writer, sheet_name="Survey Sample", index=False) + +# We store this + +# Questions: +# 1) If futures are considering changing properties that have oil heating systems, we could include them and +# we have 39 total archetypes. Otherwise, we have 25 archetypes +# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're +# using + +# Recommendations: +# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover +# From 383a4852e207b2879fd5de21316a1a8fda9ceb3f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 19 Jun 2025 08:15:01 +0100 Subject: [PATCH 12/14] created block splitting code for calico asset list --- asset_list/AssetList.py | 197 ++++++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 77 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 62016239..acca0c58 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -1025,6 +1025,15 @@ class AssetList: self.standardised_asset_list[self.STANDARD_SAP] ) + has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum() + + # Perform block splitting, ahead of fetching the EPC data + # If we blocks of flats, without a landlord block reference, we create this + self.fill_landlord_block_reference(has_blocks_of_flats) + + # If we have blocks of flats, we split these out into individual units. + self.split_blocks() + def merge_data(self, df: pd.DataFrame): """ Used to insert data into the standardised asset list, based on the domna property id @@ -1270,6 +1279,12 @@ class AssetList: ) ) + self.standardised_asset_list["SAP Category"] = np.where( + pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]), + "SAP Unknown", + self.standardised_asset_list["SAP Category"] + ) + else: # We add a SAP category for all work type identification # We break into 4 categories (54 or less, 55-68, 69-74, 75 or more) @@ -1290,6 +1305,11 @@ class AssetList: ), ) ) + self.standardised_asset_list["SAP Category"] = np.where( + pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + "SAP Unknown", + self.standardised_asset_list["SAP Category"] + ) # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs @@ -2040,6 +2060,100 @@ class AssetList: self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] ) + def split_blocks(self): + """ + Where we have a single row that is a block of flats, we split this into multiple rows, + one for each unit. The data that we have will be copied across rows + :return: + """ + + blocks = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" + ].copy() + + if blocks.empty: + return + + RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b') + NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. + + expanded_rows = [] + for _, row in blocks.iterrows(): + addr = str(row[self.STANDARD_ADDRESS_1]) + + # 1 ─ Range (e.g. 1-7) + m_range = RANGE_RE.search(addr) + if m_range: + start, end = m_range.groups() + start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) + if start > end or (end - start) > 100: + raise ValueError(f"Suspicious range '{addr}'") + for n in range(start, end + 1): + new = row.copy() + new_addr = RANGE_RE.sub(str(n), addr, count=1) + original_full_address = new[self.STANDARD_FULL_ADDRESS] + new_full_address = original_full_address.replace(addr, new_addr) + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.STANDARD_FULL_ADDRESS] = new_full_address + new[self.STANDARD_PROPERTY_TYPE] = "flat" + # Keep a record of the previous address 1 + new["block_address1"] = addr + new["block_full_address"] = original_full_address + new["is_expended_block"] = True + # We update the full address + + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 2 ─ Explicit list (e.g. 1, 2, 5 Block) + nums = NUM_RE.findall(addr) + if len(nums) > 1 and ',' in addr: + for n in nums: + new = row.copy() + new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only + new[self.STANDARD_ADDRESS_1] = new_addr + new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + expanded_rows.append(new) + continue + + # 3 ─ Single number or no number, treat as individual dwelling + if (len(nums) == 1) or not nums: + expanded_rows.append(row) + continue + + # Anything else with digits is unrecognised + raise NotImplementedError(f"Unhandled block format: '{addr}'") + + expanded_blocks = pd.DataFrame(expanded_rows) + + # We drop the blocks from the standardised asset list and append on the expanded blocks + self.standardised_asset_list = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" + ] + + self.standardised_asset_list = pd.concat( + [self.standardised_asset_list, expanded_blocks], + ignore_index=True + ) + + # As a final clean up, for any blocks that are size 1, we don't includr a project code + sizes = ( + expanded_blocks + .groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID] + .nunique() + .reset_index() + ) + size_1 = sizes[sizes[self.DOMNA_PROPERTY_ID] <= 1] + # Remove the size 1 blocks from the standardised asset list + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin( + size_1[self.STANDARD_BLOCK_REFERENCE].values + ), + None, + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + ) + def label_property_status(self): """ This function is designed to be run after identify_worktypes() has been run, and will create a "property_status" @@ -2081,85 +2195,14 @@ class AssetList: # These blocks may be refecence via the landlord_block_reference field, or by property types being # blocks of flats has_landlord_block_reference = self.landlord_block_reference is not None - has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum() - if has_landlord_block_reference or has_blocks_of_flats: + if has_landlord_block_reference: - # If we blocks of flats, without a landlord block reference, we create this - self.fill_landlord_block_reference(has_blocks_of_flats) - - self.split_blocks(has_blocks_of_flats) - - def split_blocks(self, has_blocks_of_flats): - """ - Where we have a single row that is a block of flats, we split this into multiple rows, - one for each unit. The data that we have will be copied across rows - :param self: - :param has_blocks_of_flats: - :return: - """ - - blocks = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ].copy() - - RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b') - NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. - - expanded_rows = [] - for _, row in blocks.iterrows(): - addr = str(row[self.STANDARD_ADDRESS_1]) - - # 1 ─ Range (e.g. 1-7) - m_range = RANGE_RE.search(addr) - if m_range: - start, end = m_range.groups() - start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) - if start > end or (end - start) > 100: - raise ValueError(f"Suspicious range '{addr}'") - for n in range(start, end + 1): - new = row.copy() - new_addr = RANGE_RE.sub(str(n), addr, count=1) - new[self.STANDARD_ADDRESS_1] = new_addr - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" - expanded_rows.append(new) - continue - - # 2 ─ Explicit list (e.g. 1, 2, 5 Block) - nums = NUM_RE.findall(addr) - if len(nums) > 1 and ',' in addr: - for n in nums: - new = row.copy() - new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only - new[self.STANDARD_ADDRESS_1] = new_addr - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" - expanded_rows.append(new) - continue - - # 3 ─ Single number → treat as individual dwelling - if len(nums) == 1: - expanded_rows.append(row) - continue - - # 4 ─ No numbers → keep as-is - if not nums: - expanded_rows.append(row) - continue - - # Anything else with digits is unrecognised - raise NotImplementedError(f"Unhandled block format: '{addr}'") - - expanded_blocks = pd.DataFrame(expanded_rows) - - # We drop the blocks from the standardised asset list and append on the expanded blocks - self.standardised_asset_list = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] - - self.standardised_asset_list = pd.concat( - [self.standardised_asset_list, expanded_blocks], - ignore_index=True - ) + # # If we blocks of flats, without a landlord block reference, we create this + # self.fill_landlord_block_reference(has_blocks_of_flats) + # + # # If we have blocks of flats, we split these out into individual units + # self.split_blocks() # For blocks that have a 50% allocation, we create project codes self.block_analysis() From 980f439f49e2a1504099cd9204e79cbba328e951 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 19 Jun 2025 22:48:25 +0100 Subject: [PATCH 13/14] debugging calico epc search to handle the strict blocks --- asset_list/AssetList.py | 94 ++++++++++++++++------- asset_list/app.py | 4 - asset_list/hubspot/prepare_for_hubspot.py | 14 ++-- asset_list/utils.py | 8 +- backend/Funding.py | 93 ++++++++++++++++++---- backend/SearchEpc.py | 27 ++++++- backend/tests/test_funding.py | 52 +++++++++++++ 7 files changed, 236 insertions(+), 56 deletions(-) create mode 100644 backend/tests/test_funding.py diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index acca0c58..130d1242 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -298,7 +298,7 @@ class AssetList: "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", "Does the property have cladding?", "Gable Wall Obstructions", "Does the property have foliage that needs removal?", - "Potential unsafe environment", "Date of Inspection" + "Potential unsafe environment", "Date of Inspection", "Borescoped?" ] NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" @@ -354,6 +354,7 @@ class AssetList: # Work type prefixes: # Empties EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity" + EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002' EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled" EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other" EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build" @@ -1280,7 +1281,8 @@ class AssetList: ) self.standardised_asset_list["SAP Category"] = np.where( - pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]), + pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) & + pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), "SAP Unknown", self.standardised_asset_list["SAP Category"] ) @@ -1745,8 +1747,6 @@ class AssetList: self.standardised_asset_list["solar_epc_loft_needs_topup"] ) - z = self.standardised_asset_list[self.standardised_asset_list["landlord_property_id"] == "DW150120029"] - self.standardised_asset_list["solar_eligible"] = ( # Property isn't a flag not_a_flat & @@ -2035,14 +2035,15 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = np.where( (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") & (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), - None, + self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)", self.standardised_asset_list["cavity_reason"] ) + # Split cavity_reason on the colon and check if the first part is equal to one of the two options above # that indicates empties self.standardised_asset_list["identified_empty_cavity"] = ( self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin( - [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EPC_EMPTY] + [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY] ) ) @@ -2078,6 +2079,7 @@ class AssetList: NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. expanded_rows = [] + for _, row in blocks.iterrows(): addr = str(row[self.STANDARD_ADDRESS_1]) @@ -2194,16 +2196,9 @@ class AssetList: # if we have any blocks, where work is eligible, we flag them now # These blocks may be refecence via the landlord_block_reference field, or by property types being # blocks of flats - has_landlord_block_reference = self.landlord_block_reference is not None + has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])) if has_landlord_block_reference: - - # # If we blocks of flats, without a landlord block reference, we create this - # self.fill_landlord_block_reference(has_blocks_of_flats) - # - # # If we have blocks of flats, we split these out into individual units - # self.split_blocks() - # For blocks that have a 50% allocation, we create project codes self.block_analysis() # find any block refs with more than 50% emptires @@ -2265,13 +2260,18 @@ class AssetList: block_analysis = [] for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 if all(cavity_breakdown.index == "No Eligibility"): continue # We check the % of empty vs not empty as right now, we're focused on empty - n_empties = ((group["identified_empty_cavity"] == True) & (~pd.isnull(group["cavity_reason"]))).sum() + n_empties = ( + (group["identified_empty_cavity"] == True) & + (~pd.isnull(group["cavity_reason"])) & + (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + ).sum() works = group["hubspot_status"] above_threshold = works.map(LABEL_TO_ENUM.get).dropna() @@ -2293,6 +2293,36 @@ class AssetList: block_analysis = block_analysis.fillna(0) # We flag which properties are eligible for works. We need at least 50% + block_analysis["Eligible for Works"] = ( + block_analysis["Percentage of Empties"] >= 0.50 + ) + block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False) + + # For properties that are NOT eligible, we should update the cavity reason + ineligible_blocks = block_analysis[ + ~block_analysis["Eligible for Works"] + ]["Block Reference"].values + + eligible_blocks = block_analysis[ + block_analysis["Eligible for Works"] + ]["Block Reference"].values + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks), + self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list["cavity_reason"] + ) + + # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this + # The criteria is: + # =The property should be in a block of flats + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), + self.standardised_asset_list["cavity_reason"] + + " " + "(Flat in block with more than 50% eligible, but not eligible itself)", + self.standardised_asset_list["cavity_reason"] + ) self.block_analysis_df = block_analysis @@ -2434,13 +2464,13 @@ class AssetList: ) # Format the two date columns - programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce") + programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce") programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], errors="coerce" ) # Convert to dd/mm/yyyy format - programme_data["survey_week"] = programme_data["survey_week"].dt.strftime("%d/%m/%Y") + programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y") programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = ( programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y") ) @@ -2457,12 +2487,14 @@ class AssetList: ready_to_be_scheduled = ( ( programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) & (~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"])) + ) & (~pd.isnull(programme_data["survey_date"])) ) - completed_works = ( - programme_data["hubspot_status"] != hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) - programme_data = programme_data[ready_to_be_scheduled | completed_works] + # completed_works = ( + # (programme_data["hubspot_status"] != + # hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) & + # (~pd.isnull(programme_data["hubspot_status"])) + # ) + programme_data = programme_data[ready_to_be_scheduled] # Merge on the contact details programme_data = programme_data.merge( @@ -2505,11 +2537,13 @@ class AssetList: self.CRM_HISTORICAL_CAVITY_PRODUCT["name"] ) else: + # We shouldn't have any missing products programme_data = programme_data[ - ~pd.isnull(programme_data["domna_product"]) & - ~pd.isnull(programme_data["surveyor"]) & - ~pd.isnull(programme_data["survey_week"]) - ] + ~pd.isnull(programme_data["survey_date"]) + ] + + if pd.isnull(programme_data["domna_product"]).sum(): + raise ValueError("Missing products") programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( @@ -2686,7 +2720,7 @@ class AssetList: 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], 'Pipeline ': 'Pipeline ', - 'Expected Commencement Date ': "survey_week", + 'Expected Commencement Date ': "survey_date", 'Deal Name ': "dealname", # Need to create this, 'Product ID ': 'Product ID ', 'Name ': 'Name ', @@ -2724,7 +2758,11 @@ class AssetList: # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing programme_data['Listing Owner Email '] = programme_data['Deal Owner'] programme_data['Amount '] = 0 - programme_data["Deal Owner"] = programme_data["Deal Owner"].str.lower() + programme_data["Deal Owner"] = np.where( + ~pd.isnull(programme_data["Deal Owner"]), + programme_data["Deal Owner"].astype(str).str.lower(), + programme_data["Deal Owner"] + ) # We make sure we have all of the columns that we need missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] diff --git a/asset_list/app.py b/asset_list/app.py index 08164c19..8158becc 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -553,13 +553,9 @@ def app(): ) asset_list.merge_data(epc_df) - asset_list.extract_attributes() - asset_list.identify_worktypes() - pprint(asset_list.work_type_figures) - # We now flag the status of the property asset_list.label_property_status() asset_list.analyse_geographies() diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index 9ffe24ca..0d0abcb2 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -19,19 +19,19 @@ def app(): # inputs: reconcile_programme = False # If True, the hubspot upload will include all properties with a project code - customer_domain = "https://sandwell.gov.uk" + customer_domain = "https://livewest.co.uk" installer_name = "J & J CRUMP" asset_list_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - " - "Standardised.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/Livewest South-West - Standardised " + "V2.xlsx" ) - asset_list_sheet_name = "Proposed Program" - asset_list_header = 1 + asset_list_sheet_name = "Standardised Asset List" + asset_list_header = 0 contact_details_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/23.06 Livewest Contacts.xlsx" ) - contacts_sheet_name = "Sheet1" + contacts_sheet_name = "Contact Information" contacts_landlord_property_id = "landlord_property_id" contacts_phone_number_column = "phone_number" contacts_secondary_phone_number_column = "secondary_phone_number" diff --git a/asset_list/utils.py b/asset_list/utils.py index ff9db3f8..61dcf8ea 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -79,7 +79,13 @@ def get_data( uprn=uprn ) # Force the skipping of estimating the EPC - searcher.ordnance_survey_client.property_type = None + # We check if the property was split + if home["is_expended_block"]: + searcher.ordnance_survey_client.property_type = "Flat" + searcher.property_type = "Flat" + searcher.set_strict_property_type_search() + else: + searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) diff --git a/backend/Funding.py b/backend/Funding.py index 78440eac..49d2d293 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -5,7 +5,7 @@ from typing import List from backend.app.plan.schemas import HousingType -class Funding: +class FundingOld: """ Given a property, this class identifies if the home is possibly eligible for funding under the various funding schemes. It will also calculate the expected amount of funding available @@ -413,13 +413,32 @@ class Funding: self.whlg() -class Funding2: +class Funding: """ New class to handle funding calculation """ - def __init__(self, tenure: HousingType): + def __init__( + self, + tenure: HousingType, + social_cavity_abs_rate: float, + social_solid_abs_rate: float, + private_cavity_abs_rate: float, + private_solid_abs_rate: float, + project_scores_matrix, + whlg_eligible_postcodes + ): self.tenure = tenure + self.social_cavity_abs_rate = social_cavity_abs_rate + self.social_solid_abs_rate = social_solid_abs_rate + self.private_cavity_abs_rate = private_cavity_abs_rate + self.private_solid_abs_rate = private_solid_abs_rate + + self.starting_sap_band = None + self.ending_sap_band = None + self.floor_area_band = None + self.project_scores_matrix = project_scores_matrix + self.whlg_eligible_postcodes = whlg_eligible_postcodes @staticmethod def get_sap_band(sap_score_number): @@ -446,8 +465,22 @@ class Funding2: return None + @staticmethod + def get_floor_area_band(floor_area): + if floor_area <= 72: + return "0-72" + + if floor_area <= 97: + return "73-97" + + if floor_area <= 199: + return "98-199" + + return "200" + + @staticmethod def eco4_prs_eligibility( - self, starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str + starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str ): """ Handles the eligibility criteria for private rental properties under eco @@ -481,31 +514,53 @@ class Funding2: return False + def calculate_full_project_abs(self): + + # Filter the project scores matrix + data = self.project_scores_matrix[ + (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) & + (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) & + (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band) + ] + + if data.emtpy: + raise ValueError("Missing abs rate, check the project scores matrix") + + return data["Cost Savings"].values[0] + def check_funding( self, measures: List, starting_sap: int, ending_sap: int, + floor_area: float, mainheat_description: str, - heating_control_description: str + heating_control_description: str, + is_cavity: bool ): """ Given a list of measures, this function will check if the package of measures is fundable :param measures: :param starting_sap: :param ending_sap: + :param floor_area: + :param mainheat_description: + :param heating_control_description: + :param is_cavity: Indicates if the property has cavity wall insulation :return: """ - starting_band = self.get_sap_band(starting_sap) - ending_band = self.get_sap_band(ending_sap) + # If it's an E or D, should get to an EPC C + if starting_sap >= 55 and ending_sap < 69: + raise NotImplementedError("This property doesn't have sufficient SAP movement") - # For ECO4 eligibility, the property needs to end at a C if it starts at a D or E, otherwise should end at a - # D - - if starting_band <= 38 & ending_band >= 55: + if starting_sap <= 38 & ending_sap <= 55: # F or G should get to D raise NotImplementedError("Implement F or G to D eligibility") + self.starting_sap_band = self.get_sap_band(starting_sap) + self.ending_sap_band = self.get_sap_band(ending_sap) + self.floor_area_band = self.get_floor_area_band(floor_area) + ######################## # Private ######################## @@ -513,13 +568,25 @@ class Funding2: # 2) GBIS if self.tenure == "Private": - is_eligible = self.eco4_prs_eligibility( + is_eco4_eligible = self.eco4_prs_eligibility( starting_sap=starting_sap, measures=measures, mainheat_description=mainheat_description, heating_control_description=heating_control_description ) - pass + + # Need to implement + # 1) Package has to include an insulation measure + # 2) We should use the funding for the measure that has the largest partial project score + is_gbis_eligible = () + + if not is_eco4_eligible: + return + eco4_abs = self.calculate_full_project_abs() + # We estimate rates now + eco4_funding = ( + eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate + ) ######################## # Social diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 1ee1f950..16dd8f04 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -160,6 +160,9 @@ class SearchEpc: """ Address lines 1 and postcode are mandatory fields. The other address lines are optional but can be used to find the epc for the home, if address1 and postcode are insufficient + + If you wish to run a strict property type search, please run set_strict_property_type_search() + :param address1: string, propery's address line 1 :param postcode: string, propery's postcode :param full_address: string, optional parameter, the full address of the property @@ -189,6 +192,7 @@ class SearchEpc: self.older_epcs = None self.full_sap_epc = None self.metadata = None + self.strict_property_type_search = False # These are the address and postcode values, which we store in the database self.address_clean = None @@ -199,6 +203,14 @@ class SearchEpc: self.property_type = property_type self.fast = fast + def set_strict_property_type_search(self): + """ + This method sets the strict property type search flag to True. When this flag is set, the search will + only return results that match the specified property type. + :return: + """ + self.strict_property_type_search = True + @staticmethod def get_house_number(address: str, postcode=None) -> str | None: """ @@ -315,6 +327,8 @@ class SearchEpc: address_params["address"] = self.address1 if self.postcode: address_params["postcode"] = self.postcode + if self.strict_property_type_search and self.property_type: + address_params["property-type"] = self.property_type.lower() # We attempt the search with uprn params @@ -365,11 +379,16 @@ class SearchEpc: unique_property_types = {r["property-type"] for r in rows} + is_just_a_house = (len(unique_property_types) == 1) & ( + ("House" in unique_property_types) | ("Bungalow" in unique_property_types) + ) + # We allow for variation in property type across flats/maisonettes # If we know that we have a flat/maisonette, we allow for both property types - if property_type in ["Flat", "Maisonette"]: - if ((len(uprns) == 1) and ((len(unique_property_types) == 1) - ) or unique_property_types == {"Flat", "Maisonette"}): + # Make sure we have not JUST a house, or not JUST a flat/maisonette + if property_type in ["Flat", "Maisonette"] and not is_just_a_house: + if (((len(uprns) == 1) and ((len(unique_property_types) == 1) + ) or unique_property_types == {"Flat", "Maisonette"})): return rows if property_type is not None: @@ -424,6 +443,8 @@ class SearchEpc: return rows + raise ValueError("property type and address cannot both be None, at least one must be provided") + @staticmethod def format_address(newest_epc): """ diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py new file mode 100644 index 00000000..311ab589 --- /dev/null +++ b/backend/tests/test_funding.py @@ -0,0 +1,52 @@ +import pytest +import pandas as pd +from utils.s3 import read_csv_from_s3 +from backend.Funding import Funding + + +def get_funding_data(): + """ + This function retrieves the eco project scores matrix and the warm homes local grant funding data + :return: + """ + project_scores_matrix = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/ECO4 Full Project Scores Matrix.csv", + ) + project_scores_matrix = pd.DataFrame(project_scores_matrix) + project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings'] + project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float) + + whlg_eligible_postcodes = read_csv_from_s3( + bucket_name="retrofit-data-dev", + filepath="funding/whlg eligible postcodes.csv", + ) + whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + + return project_scores_matrix, whlg_eligible_postcodes + + +class TestFunding: + + def test_prs(self): + eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() + funding = Funding( + project_scores_matrix=eco_project_scores_matrix, + whlg_eligible_postcodes=whlg_eligible_postcodes, + social_cavity_abs_rate=13.5, + social_solid_abs_rate=17, + private_cavity_abs_rate=13.5, + private_solid_abs_rate=17, + tenure="Private", + ) + + measures_1 = ["internal_wall_insulation", "solar_pv"] + funding.check_funding( + measures=measures_1, + starting_sap=54, + ending_sap=69, + floor_area=73, + mainheat_description="Boiler and radiators, mains gas", + heating_control_description="Programmer, room thermostat and TRVs", + is_cavity=True + ) From 127773a19d1400188e67b5a6b797722d597bca78 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 22 Jun 2025 15:34:21 +0100 Subject: [PATCH 14/14] cleaning up hubspot prepare code --- asset_list/AssetList.py | 2 + asset_list/app.py | 96 +++++++++++++++-------- asset_list/hubspot/prepare_for_hubspot.py | 26 ++++-- asset_list/mappings/walls.py | 4 +- asset_list/utils.py | 2 +- 5 files changed, 90 insertions(+), 40 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 130d1242..ad3087c3 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -2442,7 +2442,9 @@ class AssetList: # We check if all products are covered in the lookup table cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist() + cavity_products = [x for x in cavity_products if not pd.isnull(x)] solar_products = self.standardised_asset_list["solar_reason"].unique().tolist() + solar_products = [x for x in solar_products if not pd.isnull(x)] product_map = {} for identified_product in cavity_products + solar_products: diff --git a/asset_list/app.py b/asset_list/app.py index 8158becc..7c0023ce 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -60,42 +60,76 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" - data_filename = "07.04 CALICO - Final List.xlsx" - asset_list_header = 2 - sheet_name = "Final List" + # NCHA + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" + data_filename = "Energy Information MASTER June 2025.xlsx" + sheet_name = "Data" postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "Property Number / Name" - address1_method = None - address_cols_to_concat = [ - "Property Number / Name", - "Street", - "Town" - ] + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "NROSH Estimated Build Date" + landlord_year_built = "Build Date (HAR10)" landlord_os_uprn = None - landlord_property_type = "Asset Type" - landlord_built_form = None - landlord_wall_construction = "Wall Type" - landlord_heating_system = "Boiler Type" - landlord_existing_pv = None - landlord_property_id = "Asset Reference" - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None + landlord_property_type = "Property Type (HAR10)" + landlord_built_form = "Build Form (EPC)" + landlord_wall_construction = "Wall Description" landlord_roof_construction = None - landlord_block_reference = None - landlord_sap = "Current Efficiency Rating - Score" - phase = None + landlord_heating_system = "HEAT Code" + landlord_existing_pv = None + landlord_property_id = "Place ref" + landlord_sap = "EPC SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_to_asset_list_filepath = None + phase = False ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + master_id_colnames = [] + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" + # data_filename = "07.04 CALICO - Final List.xlsx" + # asset_list_header = 2 + # sheet_name = "Final List" + # postcode_column = 'Postcode' + # fulladdress_column = None + # address1_column = "Property Number / Name" + # address1_method = None + # address_cols_to_concat = [ + # "Property Number / Name", + # "Street", + # "Town" + # ] + # missing_postcodes_method = None + # landlord_year_built = "NROSH Estimated Build Date" + # landlord_os_uprn = None + # landlord_property_type = "Asset Type" + # landlord_built_form = None + # landlord_wall_construction = "Wall Type" + # landlord_heating_system = "Boiler Type" + # landlord_existing_pv = None + # landlord_property_id = "Asset Reference" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_id_colnames = [] + # master_to_asset_list_filepath = None + # landlord_roof_construction = None + # landlord_block_reference = None + # landlord_sap = "Current Efficiency Rating - Score" + # phase = None + # ecosurv_landlords = None # data_folder = ( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index 0d0abcb2..eed6d7e7 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -19,19 +19,19 @@ def app(): # inputs: reconcile_programme = False # If True, the hubspot upload will include all properties with a project code - customer_domain = "https://livewest.co.uk" + customer_domain = "https://sandwell.gov.uk" installer_name = "J & J CRUMP" asset_list_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/Livewest South-West - Standardised " - "V2.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - " + "Standardised.xlsx" ) - asset_list_sheet_name = "Standardised Asset List" - asset_list_header = 0 + asset_list_sheet_name = "Proposed Program" + asset_list_header = 1 contact_details_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/23.06 Livewest Contacts.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx" ) - contacts_sheet_name = "Contact Information" + contacts_sheet_name = "Sheet1" contacts_landlord_property_id = "landlord_property_id" contacts_phone_number_column = "phone_number" contacts_secondary_phone_number_column = "secondary_phone_number" @@ -41,6 +41,10 @@ def app(): contacts_firstname_column = "firstname" contacts_lastname_column = "lastname" + existing_programme_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv" + ) + asset_list = AssetList.load_standardised_asset_list( asset_list_filepath, asset_list_sheet_name, asset_list_header ) @@ -63,6 +67,14 @@ def app(): reconcile_programme=reconcile_programme ) + # Remove the existing programme + existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig") + asset_list.hubspot_data = asset_list.hubspot_data[ + ~asset_list.hubspot_data["Domna Property ID "].isin( + existing_programme['Domna Property ID'].values + ) + ] + # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv directory, filename = os.path.split(asset_list_filepath) name, ext = os.path.splitext(filename) diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 8be8575a..2e0a332f 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -248,5 +248,7 @@ WALL_CONSTRUCTION_MAPPINGS = { 'No Fines': 'system built', 'Granite/Whinstone': 'granite or whinstone', 'Not applicable to this asset type': 'unknown', - 'Steel Frame': 'system built' + 'Steel Frame': 'system built', + 'Solid Wall As Built': 'uninsulated solid brick', + 'Solid As Built': 'uninsulated solid brick' } diff --git a/asset_list/utils.py b/asset_list/utils.py index 61dcf8ea..1678b8e9 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -80,7 +80,7 @@ def get_data( ) # Force the skipping of estimating the EPC # We check if the property was split - if home["is_expended_block"]: + if home.get("is_expended_block"): searcher.ordnance_survey_client.property_type = "Flat" searcher.property_type = "Flat" searcher.set_strict_property_type_search()