diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 302b2f2f..66cdf575 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -301,6 +301,14 @@ class AssetList: "Potential unsafe environment", "Date of Inspection", "Borescoped?" ] + # Another version of non-intrusives: + NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [ + 'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Boroscoped?', + 'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED', + 'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE', + 'NAME OF SURVEYOR' + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -449,6 +457,8 @@ class AssetList: "Has the property been re-walled?" in self.raw_asset_list.columns ) + self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -750,7 +760,7 @@ class AssetList: self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} non_intrusive_columns = [] - if self.non_intrusives_present: + if self.non_intrusives_present and not self.new_format_non_insturives_present_v2: non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES if self.non_intrusives_eligibility: @@ -759,6 +769,9 @@ class AssetList: if self.new_format_non_insturives_present: non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES + if self.new_format_non_insturives_present_v2: + non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -1308,9 +1321,15 @@ class AssetList: # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs if self.non_intrusives_present: - existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" - ) + + if self.new_format_non_insturives_present_v2: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] == "ALREADY HAS SOLAR PV" + ) + else: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" + ) elif self.old_format_non_intrusives_present: existing_solar_non_intrusives_check = ( self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( @@ -1549,7 +1568,7 @@ class AssetList: ) & ( ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( ["district heating", "communal heating", "communal gas boiler"] - ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ") + ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ") ) ) @@ -2081,15 +2100,40 @@ class AssetList: for _, row in blocks.iterrows(): addr = str(row[self.STANDARD_ADDRESS_1]) + full_addr = row[self.STANDARD_FULL_ADDRESS] + + # We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just + # the odds, evens or all of the numbers + has_odd = ( + "(odd)" in addr.lower() or + "(odd)" in full_addr.lower() or + "(odds)" in addr.lower() or + "(odds)" in full_addr.lower() + ) + has_even = ( + "(even)" in addr.lower() or + "(even)" in full_addr.lower() or + "(evens)" in addr.lower() or + "(evens)" in full_addr.lower() + ) # 1 ─ Range (e.g. 1-7) m_range = RANGE_RE.search(addr) if m_range: + start, end = m_range.groups() start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) if start > end or (end - start) > 100: raise ValueError(f"Suspicious range '{addr}'") - for n in range(start, end + 1): + + # We define the looping range on whether we have odd, even or all numbers + house_number_range = range(start, end + 1) + if has_odd: + house_number_range = [x for x in house_number_range if x % 2 != 0] + if has_even: + house_number_range = [x for x in house_number_range if x % 2 == 0] + + for n in house_number_range: new = row.copy() new_addr = RANGE_RE.sub(str(n), addr, count=1) original_full_address = new[self.STANDARD_FULL_ADDRESS] @@ -2107,9 +2151,9 @@ class AssetList: expanded_rows.append(new) continue - # 2 ─ Explicit list (e.g. 1, 2, 5 Block) + # 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block) nums = NUM_RE.findall(addr) - if len(nums) > 1 and ',' in addr: + if len(nums) > 1 and (',' in addr or '&' in addr): for n in nums: new = row.copy() new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only @@ -2319,7 +2363,7 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = np.where( self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), self.standardised_asset_list["cavity_reason"] - + " " + "(Flat in block with more than 50% eligible, but not eligible itself)", + + " " + "(Flat in block with more than 50% eligible)", self.standardised_asset_list["cavity_reason"] ) @@ -2490,10 +2534,14 @@ class AssetList: if reconcile_programme: programme_data = programme_data[~pd.isnull(programme_data["project_code"])] else: + + if programme_data["hubspot_status"].nunique() > 1: + logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?") + ready_to_be_scheduled = ( ( programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) & (~pd.isnull(programme_data["survey_date"])) + ) ) # completed_works = ( # (programme_data["hubspot_status"] != @@ -2544,13 +2592,13 @@ class AssetList: ) else: # We shouldn't have any missing products - programme_data = programme_data[ - ~pd.isnull(programme_data["survey_date"]) - ] + # programme_data = programme_data[ + # ~pd.isnull(programme_data["survey_date"]) + # ] if pd.isnull(programme_data["domna_product"]).sum(): raise ValueError("Missing products") - programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) + programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) product_df = ( pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] @@ -2591,6 +2639,13 @@ class AssetList: programme_data[self.EPC_API_DATA_NAMES["uprn"]] ) + # Remove any negative URPSN which are not valid + programme_data[uprn_column] = np.where( + programme_data["estimated"].isin([1, True]), + None, + programme_data[uprn_column] + ) + # Add in some columns if we have them date_of_inspections = ( "Non-Intrusives: Date of Inspection" if @@ -2757,6 +2812,7 @@ class AssetList: columns={v: k for k, v in schema_mappings.items() if v is not None} ) + programme_data['Postcode '] = programme_data['Postcode '].copy() programme_data['Installer '] = installer_name programme_data['Name '] = ( programme_data['Full Address '] + " ," + programme_data['Postcode '] @@ -3225,6 +3281,8 @@ class AssetList: install_col = 'INSTALL/ CANCELLATION DATE' elif "INSTALL/CANCELLATION DATE" in master_data.columns: install_col = "INSTALL/CANCELLATION DATE" + elif 'Measure 1 Install Date' in master_data.columns: + install_col = 'Measure 1 Install Date' else: raise ValueError("No install or cancellation date") @@ -3264,6 +3322,8 @@ class AssetList: property_type_col = "PROPERTY TYPE As per table emailed" elif "PROPERTY TYPE As per table emailed" in master_data.columns: property_type_col = "PROPERTY TYPE As per table emailed" + elif "PROPERTY TYPE" in master_data.columns: + property_type_col = "PROPERTY TYPE" else: property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)" diff --git a/asset_list/app.py b/asset_list/app.py index 63ccf9cf..93739b8b 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,47 +59,151 @@ def app(): Property UPRN """ - # Southern - Jan list - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" - data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" - sheet_name = "Jan 2025 additions" - postcode_column = 'Post Code' - fulladdress_column = None - address1_column = "NO." - address1_method = None - address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] + # Pickering and Ferens + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens" + data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx" + sheet_name = "Sava Intelligent Energy - Prope" + postcode_column = 'Postcode' + fulladdress_column = 'Address' + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = "Archetype" # Using the inspections property type - landlord_built_form = "Archetype" + landlord_property_type = "Property Type" # Using the inspections property type + landlord_built_form = "Archetype 2" landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "SH Property Reference" - landlord_sap = None - outcomes_filename = [ - os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), - os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), - ] - outcomes_sheetname = ["Feedback", "Collated"] - outcomes_postcode = ["Poscode", "Postcode"] - outcomes_houseno = ["No.", "No"] - outcomes_id = ["UPRNs", None] - outcomes_address = ["Address", "Address"] + landlord_property_id = "UPRN" + landlord_sap = "SAP Rating (RdSAP 10)" + outcomes_filename = [] + outcomes_sheetname = [] + outcomes_postcode = [] + outcomes_houseno = [] + outcomes_id = [] + outcomes_address = [] master_filepaths = [ - os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), - os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), - os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), - os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), + os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"), + os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"), ] master_to_asset_list_filepath = None phase = False - ecosurv_landlords = "southern" + ecosurv_landlords = "pickering" asset_list_header = 0 landlord_block_reference = None - master_id_colnames = [None, None, None, None] + master_id_colnames = [None, None] + + # Colchester + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" + # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Full Address.1' + # fulladdress_column = "Full Address" + # address1_column = None + # address1_method = "first_word" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Date" + # landlord_os_uprn = None + # landlord_property_type = "Property Type" + # landlord_wall_construction = "Wallinsul" + # landlord_heating_system = "HeatSorc" + # landlord_existing_pv = None + # landlord_property_id = "Property Reference" + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # asset_list_header = 0 + # landlord_built_form = None + # landlord_roof_construction = None + # landlord_sap = None + # landlord_block_reference = None + # phase = False + # ecosurv_landlords = None + # master_id_colnames = [] + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot" + # data_filename = "EalingFlats.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # fulladdress_column = "Address" + # address1_column = None + # address1_method = "house_number_extraction" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None # Using the inspections property type + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "Property ref" + # landlord_sap = None + # outcomes_filename = [] + # outcomes_sheetname = [] + # outcomes_postcode = [] + # outcomes_houseno = [] + # outcomes_id = [] + # outcomes_address = [] + # master_filepaths = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = "Block Ref" + # master_id_colnames = [] + + # Southern - Jan list + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" + # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" + # sheet_name = "Jan 2025 additions" + # postcode_column = 'Post Code' + # fulladdress_column = None + # address1_column = "NO." + # address1_method = None + # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None # Using the inspections property type + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "SH Property Reference" + # landlord_sap = None + # outcomes_filename = [ + # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), + # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), + # ] + # outcomes_sheetname = ["Feedback", "Collated"] + # outcomes_postcode = ["Poscode", "Postcode"] + # outcomes_houseno = ["No.", "No"] + # outcomes_id = ["UPRNs", None] + # outcomes_address = ["Address", "Address"] + # master_filepaths = [ + # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), + # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), + # ] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = "southern" + # asset_list_header = 0 + # landlord_block_reference = None + # master_id_colnames = [None, None, None, None] # NCHA # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py index 5110fb5f..23ff900a 100644 --- a/asset_list/hubspot/config.py +++ b/asset_list/hubspot/config.py @@ -17,7 +17,7 @@ class HubspotProcessStatus(IntEnum): # The property didn't get access and needs sign off SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF" # The survey has been completed. We don't have any update as to whether the property has been installed - SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF" + SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF" # The property turned out to be ineligibile NOT_VIABLE = 4, "NOT VIABLE" # The property is with the installer. This will likely be the default for historic programmes @@ -79,7 +79,7 @@ CRM_UPLOAD_COLUMNS = [ 'Last EPC: Room Height ', 'Last EPC: Age Band ', 'Deal Stage ', 'Pipeline ', 'Expected Commencement Date ', - 'Deal Name ', 'Project Code ', + 'Deal Name ', 'Project Code ', 'Postcode ', 'Product ID ', 'Name ', 'Unit price ', 'Quantity ', 'Deal Owner', 'Amount ', 'Installer ' ] diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py index 6c8d9499..b12f4c04 100644 --- a/asset_list/hubspot/prepare_for_hubspot.py +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -2,6 +2,32 @@ import os import pandas as pd from asset_list.AssetList import AssetList +import re + + +def normalize_uk_phone(number: str | float | int) -> str | None: + if pd.isna(number): + return None + + number = str(number) + number = re.sub(r"[^\d+]", "", number) + + # Handle common short inputs: add '0' if likely missing + if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number): + number = "0" + number + + # Convert to international format + if number.startswith("0"): + number = "+44" + number[1:] + elif number.startswith("0044"): + number = "+" + number[2:] + + # Must be +44 followed by 10 digits (some area codes may vary) + if re.match(r"^\+44\d{9,10}$", number): + return number + + return None + def app(): """ @@ -18,27 +44,26 @@ def app(): """ # inputs: - reconcile_programme = False # If True, the hubspot upload will include all properties with a project code - customer_domain = "https://medway.gov.uk" - installer_name = "SGEC" + reconcile_programme = True # If True, the hubspot upload will include all properties with a project code + customer_domain = "https://ealing.gov.uk" + installer_name = "SCIS" asset_list_filepath = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/Hubspot/Reviewed programme - 2025-05-27.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared " + "programme.xlsx" ) - asset_list_sheet_name = "Finalised Route" + asset_list_sheet_name = "Standardised Asset List" asset_list_header = 0 - contact_details_filepath = ( - None - ) - contacts_sheet_name = "Sheet1" - contacts_landlord_property_id = "landlord_property_id" + contact_details_filepath = None + contacts_sheet_name = "Sheet 1" + contacts_landlord_property_id = "UPRN" contacts_phone_number_column = "phone_number" contacts_secondary_phone_number_column = "secondary_phone_number" contacts_secondary_contact_full_name = "secondary_contact_full_name" contacts_email_column = "email" contacts_fullname_column = "fullname" - contacts_firstname_column = "firstname" - contacts_lastname_column = "lastname" + contacts_firstname_column = "First Name" + contacts_lastname_column = "Last Name" existing_programme_filepath = None @@ -65,12 +90,12 @@ def app(): ) # Remove the existing programme - existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig") - asset_list.hubspot_data = asset_list.hubspot_data[ - ~asset_list.hubspot_data["Domna Property ID "].isin( - existing_programme['Domna Property ID'].values - ) - ] + # existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig") + # asset_list.hubspot_data = asset_list.hubspot_data[ + # ~asset_list.hubspot_data["Domna Property ID "].isin( + # existing_programme['Domna Property ID'].values + # ) + # ] # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv directory, filename = os.path.split(asset_list_filepath) @@ -86,3 +111,66 @@ def app(): # Just store locally asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig") + + # # TODO: Set this up separately, but we associate multiple contacts to the same deal + # contact_details = pd.read_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot " + # "Upload/Hubspot/contact " + # "details.csv" + # ) + # + # # contacts_phone_number_column = "phone_number" + # # contacts_secondary_phone_number_column = "secondary_phone_number" + # # contacts_secondary_contact_full_name = "secondary_contact_full_name" + # # contacts_email_column = "email" + # # contacts_fullname_column = "fullname" + # # contacts_firstname_column = "First Name" + # # contacts_lastname_column = "Last Name" + # contact_details["phone_number"] = contact_details["Mobile Phone"].copy() + # # If phone number is NaN, we will use the landline number + # contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"]) + # contact_details["secondary_phone_number"] = contact_details["Landline"].copy() + # # If secondary phone number is the same as primary, we remove it + # import numpy as np + # contact_details["secondary_phone_number"] = np.where( + # contact_details["secondary_phone_number"] == contact_details["phone_number"], + # np.nan, + # contact_details["secondary_phone_number"] + # ) + # contact_details = contact_details[ + # ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number", + # "secondary_phone_number", "First Name", "Last Name"]].copy().rename( + # columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"} + # ) + # contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"] + # # Format the phone numbers + # + # contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply( + # normalize_uk_phone) + # contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype( + # str).apply( + # normalize_uk_phone) + # + # # Add in the Hubspot deal data + # hubspot_data = pd.read_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/" + # "property-status.csv", + # encoding="utf-8-sig" + # ) + # # Merge on contact details + # contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge( + # contact_details, + # how="left", + # right_on="landlord_proprty_id", + # left_on="Landlord Property ID" + # ) + # + # contact_details = contact_details.drop(columns=["landlord_proprty_id"]) + # + # # Store as csv + # contact_details.to_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar " + # "Programme Hubspot Upload/Hubspot/" + # "contact_details.csv", + # index=False, encoding="utf-8-sig" + # ) diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 5c4a55b6..0245b71d 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -3,7 +3,7 @@ import numpy as np STANDARD_BUILT_FORMS = { "unknown", # Houses - "end-terrace", "semi-detached", "detached", "mid-terrace", + "end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace", # Flats "ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise", } @@ -359,5 +359,12 @@ BUILT_FORM_MAPPINGS = { '1983-90 MID TERR': 'mid-terrace', '1976-82 SEMI DET': 'semi-detached', 'PRE 1900 MID TERR': 'mid-terrace', - None: 'unknown' + None: 'unknown', + + 'SEMI-DETACHED': 'semi-detached', + 'DETACHED': 'detached', + 'MID TERRACE': 'mid-terrace', + 'END TERRACE': 'end-terrace', + 'ENCLOSED MID': 'enclosed mid-terrace' + } diff --git a/asset_list/utils.py b/asset_list/utils.py index 9affc326..fe2b7d14 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -37,7 +37,9 @@ def get_data( "mid-terrace": "Mid-Terrace", "end-terrace": "End-Terrace", "semi-detached": "Semi-Detached", - "detached": "Detached" + "detached": "Detached", + "enclosed end-terrace": "End-Terrace", + "enclosed mid-terrace": "Mid-Terrace", } epc_data = [] @@ -101,7 +103,6 @@ def get_data( else: # Try splitting on space add1 = full_address.split(" ")[0].strip() - else: add1 = str(house_number) searcher = SearchEpc( diff --git a/etl/customers/Colchester/July 2025 Finalised Route.py b/etl/customers/Colchester/July 2025 Finalised Route.py new file mode 100644 index 00000000..f3ecf2d9 --- /dev/null +++ b/etl/customers/Colchester/July 2025 Finalised Route.py @@ -0,0 +1,54 @@ +import pandas as pd + +comments_df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/CBH_RetroTeamList_amended_25-06-05.xlsx", +) + +cavity_route = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/20250708 Colchester Borough Homes- Standardised.xlsx", + sheet_name="July 2025 Route - Cavity" +) + +solar_route = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/20250708 Colchester Borough Homes- Standardised.xlsx", + sheet_name="July 2025 Route - Solar" +) + +# Merge on the comments +comments = comments_df[ + ["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)'] +].copy() + +cavity_route = cavity_route.merge( + comments, left_on="landlord_property_id", right_on="URPN", how="left" +) +solar_route = solar_route.merge( + comments, left_on="landlord_property_id", right_on="URPN", how="left" +) + +# Get properties that are not on either route +not_on_routes = comments_df[ + ~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) & + ~comments_df["URPN"].isin(solar_route["landlord_property_id"]) + ] + +# Store +not_on_routes.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Properties not on routes.xlsx", + index=False +) +# Save the routes +cavity_route.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Cavity Route.xlsx", + index=False +) +solar_route.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised " + "Project/Solar Route.xlsx", + index=False +) diff --git a/etl/customers/bromford/solar_pv_cleanup.py b/etl/customers/bromford/solar_pv_cleanup.py new file mode 100644 index 00000000..c2c541da --- /dev/null +++ b/etl/customers/bromford/solar_pv_cleanup.py @@ -0,0 +1,289 @@ +import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc +import numpy as np + +contact_list = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP.csv" +) +contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number( + address=str(x["Address 1: Street 1"]).strip(), + postcode=str(x["Postal Code"]).strip(), +), axis=1) + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - " + "Standardised (1).xlsx", + sheet_name="Standardised Asset List" +) + +lookup = [] +missed = [] +for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)): + + if x["Address 1: Street 1"] == '1 The Beck': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40692, + } + ) + continue + + if x["Address 1: Street 1"] == '3 The Beck ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40693, + } + ) + continue + + if x["Address 1: Street 1"] == '2 Orchard Close ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 7924, + } + ) + continue + + if x["Address 1: Street 1"] == '2 Orchard Close ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 7924, + } + ) + continue + + if x["Address 1: Street 1"] == '3 Croxall Road': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 40650, + } + ) + continue + + if x["Address 1: Street 1"] == '4 Ward Road ': + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": 33175, + } + ) + continue + + df = asset_list[ + asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) & + asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) + ] + + if df.shape[0] != 1: + df = asset_list[ + asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() & + asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) + ] + + if df.shape[0] != 1: + df = asset_list[ + (asset_list["domna_address_1"].astype(str) == str(x["house_no"])) & + (asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True) + ] + + if df.shape[0] != 1: + missed.append(x["UPRN"]) + continue + + lookup.append( + { + "UPRN": x["UPRN"], + "landlord_property_id": df["landlord_property_id"].values[0], + } + ) + +lookup = pd.DataFrame(lookup) + +contact_list = contact_list.merge(lookup, how="left", on="UPRN") +# Store +contact_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP with landlord_property_id.csv", + index=False +) + +# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the +# stndardised asset list +contacts_complete = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar " + "PV address list - second wave KLD - PP with landlord_property_id.csv" +) + +new_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet " + "Solar PV installs.xlsx", + sheet_name="Sheet1" +) + +contact_list = contact_list.merge( + new_data, + how="left", + left_on="UPRN", + right_on="CE UPRN" +) +route = asset_list[ + asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str)) +].copy() + +# Add the new heating data +contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str) +route2 = contact_list.merge( + route, + how="left", + right_on="landlord_property_id", + left_on="Legacy UPRN" +) + +# Because I did a data pull, we can fill the other bits of information +missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))] + +# Store both the route and missed +route2.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv", + index=False +) + +# Add on phone number +contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme " + "Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with " + "landlord_property_id.xlsx") + +contacts_filenames = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/FAO Paul Contact Details-Table 1.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/Green Contact Details-Table 1.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact " + "details/Main Contact Details-Table 1.csv", +] + +merge_to = pd.read_excel(contact_details_filepath) + +lookup = [] +for fn in contacts_filenames: + df = pd.read_csv(fn, encoding="utf-8-sig") + # Merge on phone + details = df[ + df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str)) + ][[ + "Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address", + "First Name", "Last Name" + ]] + + lookup.append(details) + +lookup = pd.concat(lookup) + +# Drop entries where landline, mobile and email are all NaN +lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all") +lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"]) +# Sort so email is first, then landline, then mobile +lookup = lookup.sort_values( + ["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"], + ascending=[True, True, True, True] +) + +# Store +lookup.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact " + "details.csv", + index=False +) + +lookup2 = [] +for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"): + + # We any entries have an email, we take that + if x["Email Address"].notna().any(): + x = x[x["Email Address"].notna()] + # We then take the entry with a phone number + if x["Landline"].notna().any() or x["Mobile Phone"].notna().any(): + x = x[x["Landline"].notna() | x["Mobile Phone"].notna()] + + # Take the first entry + x = x.iloc[0] + lookup2.append(x) + +lookup2 = pd.DataFrame(lookup2) + +import pandas as pd + +# Sample structure based on your columns +columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address'] + +# Simulating example input DataFrame +# In practice, you would use: lookup = pd.read_csv(...) or similar +lookup = pd.DataFrame(columns=columns) + +# Grouping and transforming +results = [] + +for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"): + # Filter rows with any contact information + filtered = group[ + group["Email Address"].notna() & + (group["Landline"].notna() | group["Mobile Phone"].notna()) + ] + + if filtered.empty: + continue + + # Sort by presence of phone numbers (prioritize those with both) + filtered["contact_score"] = ( + filtered["Landline"].notna().astype(int) + + filtered["Mobile Phone"].notna().astype(int) + ) + filtered = filtered.sort_values("contact_score", ascending=False) + + primary = filtered.iloc[0] + # Make sure secondary is not the same as primary + if not pd.isnull(primary["Mobile Phone"]): + secondary = filtered[ + (filtered["Mobile Phone"] != primary["Mobile Phone"]) + ] + elif not pd.isnull(primary["Landline"]): + secondary = filtered[ + (filtered["Landline"] != primary["Landline"]) + ] + else: + raise Exception("Look at me") + + secondary = filtered.iloc[1] if len(filtered) > 1 else None + + results.append({ + "Property ID": prop_id, + "Primary Email": primary["Email Address"], + "Primary Phone": primary["Mobile Phone"] or primary["Landline"], + "Secondary Email": secondary["Email Address"] if secondary is not None else None, + "Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None, + }) + +final_df = pd.DataFrame(results) + +import ace_tools as tools; + +tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df) + +# We set up primary and secondary phone numbers. We use mobile as the primary + + +# We have duplicates, we prioritise entries, by ID, that have a email +lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates( + "Property Reference Number (Main Address) (Property)", keep="last" +) + +# TODO: Get into the standardised asset list format +# TODO: Add the deal postcode to Hubspot +# TODO: Upload the deal postcode diff --git a/etl/customers/ealing/prepare_for_hubspot.py b/etl/customers/ealing/prepare_for_hubspot.py new file mode 100644 index 00000000..8cffda57 --- /dev/null +++ b/etl/customers/ealing/prepare_for_hubspot.py @@ -0,0 +1,75 @@ +import numpy as np +import pandas as pd +from asset_list.hubspot.config import HubspotProcessStatus + +project_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW " + "170625.xlsx", + sheet_name="All_Flats" +) + +project_data["hubspot_status"] = None +project_data["hubspot_status"] = np.where( + (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"), + HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + project_data["hubspot_status"] +) +project_data["hubspot_status"] = np.where( + (project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"), + "SURVEYED UNDER 2019 - NEEDS RE-SURVEY", + project_data["hubspot_status"] +) +project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str) + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Standardised Asset List" +) +asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str) +asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str) +project_data["Property ref"] = project_data["Property ref"].astype(str) + +# We need to update the status of properties that already been surveyed +asset_list2 = asset_list.merge( + project_data[["Property ref", "hubspot_status", "project_code"]], + how="left", + right_on="Property ref", + left_on="incorrect_landlord_property_id", + suffixes=("", "_project") +) +asset_list2["hubspot_status"] = np.where( + ~pd.isna(asset_list2["hubspot_status_project"]), + asset_list2["hubspot_status_project"], + asset_list2["hubspot_status"] +) +asset_list2["project_code"] = np.where( + ~pd.isna(asset_list2["project_code"]), + asset_list2["project_code"], + asset_list2["landlord_property_id"] +) + +asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"]) +asset_list2["cavity_reason"] = np.where( + pd.isnull(asset_list2["cavity_reason"]), + "Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68", + asset_list2["cavity_reason"] +) +asset_list2["solar_reason"] = None + +# Read in block analysis and geographical areas from standardised asset list +block_analysis_df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Block Analysis" +) +geographical_areas = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx", + sheet_name="Geographical Areas" +) + +# Update the new standardised asset list +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared " + "programme.xlsx") +with pd.ExcelWriter(filename) as writer: + asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False) + block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) + geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) diff --git a/etl/customers/mhs/new_programme.py b/etl/customers/mhs/new_programme.py new file mode 100644 index 00000000..6f1caafe --- /dev/null +++ b/etl/customers/mhs/new_programme.py @@ -0,0 +1,116 @@ +# +import pandas as pd + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="Standardised Asset List" +) + +new_cavity_programme = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="New Cavity Programme" +) + +new_cavity_pilot = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="Empty Cavity Pilot" +) + +new_solar_programme = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - " + "programme.xlsx", + sheet_name="New Solar Programme" +) + +in_fill_properties_houses = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 (" + "1).xlsx", + sheet_name="Houses and Bungalows" +) +in_fill_properties_flats = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 (" + "1).xlsx", + sheet_name="Flats and Maistonettes" +) +# Q1) What are these properties? Do we have them on our list already? +# All of the houses are already in the asset list +in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin( + asset_list["landlord_property_id"].values +) +# All of the flats are already in the asset list +in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin( + asset_list["landlord_property_id"].values +) + +# Q2) Which properties are excluded from the new programme? +in_fill_properties = pd.concat( + [in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False +) + +# Merge on the data +in_fill_properties = in_fill_properties.merge( + asset_list, + left_on="UPRN", + right_on="landlord_property_id", + how="left" +) +# How many properties are in the new programme? + +in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin( + new_cavity_programme["landlord_property_id"].values +) +in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin( + new_solar_programme["landlord_property_id"].values +) +in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin( + new_cavity_pilot["landlord_property_id"].values +) +not_in_new_programme = in_fill_properties[ + (~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~ + in_fill_properties["in_new_cavity_pilot"]) +].copy() + +# Why? +not_in_new_programme["cavity_reason"].value_counts() +not_in_new_programme["solar_reason"].value_counts() + +not_identified_for_anything = not_in_new_programme[ + pd.isnull(not_in_new_programme["cavity_reason"]) & + pd.isnull(not_in_new_programme["solar_reason"]) + ] + +# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding +# the extraction +not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin( + [ + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more", + "Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more", + "EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less", + "EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more", + ] +) + +not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin( + not_identified_for_anything["landlord_property_id"].values +) + +not_in_new_programme[ + not_in_new_programme["funded_extractions"] +].to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv", + index=False +) + +not_in_new_programme[ + not_in_new_programme["excluded"] == True + ].to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv", + index=False +) diff --git a/etl/customers/thrive/Make Insepctions route.py b/etl/customers/thrive/Make Insepctions route.py new file mode 100644 index 00000000..ec4f620b --- /dev/null +++ b/etl/customers/thrive/Make Insepctions route.py @@ -0,0 +1,40 @@ +""" +This script will pull in properties, in neighbouring areas, that have been flagged for CWI +""" +import pandas as pd + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Standardised Asset List" +) + +cavity_areas = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Cavity Areas" +) + +existing_inspections_sheet = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="July 2025 Inspections" +) + +empties = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - " + "reconciled.xlsx", + sheet_name="Cavity properties - for review" +) + +cavity_inspections = asset_list[ + asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values) +] +cavity_inspections = cavity_inspections[ + ~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values) +] + +cavity_inspections.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv", + index=False +) diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 3fd7918f..7439200f 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -674,7 +674,8 @@ class RetrieveFindMyEpc: ], 'Heating controls (programmer room thermostat and thermostatic radiator valves)': [ "roomstat_programmer_trvs", "time_temperature_zone_control" - ] + ], + "Internal wall insulation": ["internal_wall_insulation"], } survey = True