debugging asset list prep to account for new template

This commit is contained in:
Khalim Conn-Kowlessar 2025-07-09 17:41:21 +01:00
parent 7c3d8caea4
commit c169260219
12 changed files with 902 additions and 67 deletions

View file

@ -301,6 +301,14 @@ class AssetList:
"Potential unsafe environment", "Date of Inspection", "Borescoped?"
]
# Another version of non-intrusives:
NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [
'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Boroscoped?',
'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED',
'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE',
'NAME OF SURVEYOR'
]
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@ -449,6 +457,8 @@ class AssetList:
"Has the property been re-walled?" in self.raw_asset_list.columns
)
self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns
# Names of columns
self.landlord_property_id = landlord_property_id
self.address1_colname = address1_colname
@ -750,7 +760,7 @@ class AssetList:
self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
non_intrusive_columns = []
if self.non_intrusives_present:
if self.non_intrusives_present and not self.new_format_non_insturives_present_v2:
non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES
if self.non_intrusives_eligibility:
@ -759,6 +769,9 @@ class AssetList:
if self.new_format_non_insturives_present:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
if self.new_format_non_insturives_present_v2:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2
if self.old_format_non_intrusives_present:
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
@ -1308,9 +1321,15 @@ class AssetList:
# Before we being, we identify if a property has solar already as we use this
# for identifying cavity jobs
if self.non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
)
if self.new_format_non_insturives_present_v2:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] == "ALREADY HAS SOLAR PV"
)
else:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
)
elif self.old_format_non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin(
@ -1549,7 +1568,7 @@ class AssetList:
) & (
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
["district heating", "communal heating", "communal gas boiler"]
) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ")
) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ")
)
)
@ -2081,15 +2100,40 @@ class AssetList:
for _, row in blocks.iterrows():
addr = str(row[self.STANDARD_ADDRESS_1])
full_addr = row[self.STANDARD_FULL_ADDRESS]
# We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just
# the odds, evens or all of the numbers
has_odd = (
"(odd)" in addr.lower() or
"(odd)" in full_addr.lower() or
"(odds)" in addr.lower() or
"(odds)" in full_addr.lower()
)
has_even = (
"(even)" in addr.lower() or
"(even)" in full_addr.lower() or
"(evens)" in addr.lower() or
"(evens)" in full_addr.lower()
)
# 1 ─ Range (e.g. 1-7)
m_range = RANGE_RE.search(addr)
if m_range:
start, end = m_range.groups()
start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
if start > end or (end - start) > 100:
raise ValueError(f"Suspicious range '{addr}'")
for n in range(start, end + 1):
# We define the looping range on whether we have odd, even or all numbers
house_number_range = range(start, end + 1)
if has_odd:
house_number_range = [x for x in house_number_range if x % 2 != 0]
if has_even:
house_number_range = [x for x in house_number_range if x % 2 == 0]
for n in house_number_range:
new = row.copy()
new_addr = RANGE_RE.sub(str(n), addr, count=1)
original_full_address = new[self.STANDARD_FULL_ADDRESS]
@ -2107,9 +2151,9 @@ class AssetList:
expanded_rows.append(new)
continue
# 2 ─ Explicit list (e.g. 1, 2, 5 Block)
# 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block)
nums = NUM_RE.findall(addr)
if len(nums) > 1 and ',' in addr:
if len(nums) > 1 and (',' in addr or '&' in addr):
for n in nums:
new = row.copy()
new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only
@ -2319,7 +2363,7 @@ class AssetList:
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
self.standardised_asset_list["cavity_reason"]
+ " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
+ " " + "(Flat in block with more than 50% eligible)",
self.standardised_asset_list["cavity_reason"]
)
@ -2490,10 +2534,14 @@ class AssetList:
if reconcile_programme:
programme_data = programme_data[~pd.isnull(programme_data["project_code"])]
else:
if programme_data["hubspot_status"].nunique() > 1:
logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?")
ready_to_be_scheduled = (
(
programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
) & (~pd.isnull(programme_data["survey_date"]))
)
)
# completed_works = (
# (programme_data["hubspot_status"] !=
@ -2544,13 +2592,13 @@ class AssetList:
)
else:
# We shouldn't have any missing products
programme_data = programme_data[
~pd.isnull(programme_data["survey_date"])
]
# programme_data = programme_data[
# ~pd.isnull(programme_data["survey_date"])
# ]
if pd.isnull(programme_data["domna_product"]).sum():
raise ValueError("Missing products")
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
product_df = (
pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]]
@ -2591,6 +2639,13 @@ class AssetList:
programme_data[self.EPC_API_DATA_NAMES["uprn"]]
)
# Remove any negative URPSN which are not valid
programme_data[uprn_column] = np.where(
programme_data["estimated"].isin([1, True]),
None,
programme_data[uprn_column]
)
# Add in some columns if we have them
date_of_inspections = (
"Non-Intrusives: Date of Inspection" if
@ -2757,6 +2812,7 @@ class AssetList:
columns={v: k for k, v in schema_mappings.items() if v is not None}
)
programme_data['Postcode <DEAL postcode>'] = programme_data['Postcode <LISTING hs_zip>'].copy()
programme_data['Installer <DEAL installer>'] = installer_name
programme_data['Name <LISTING hs_name>'] = (
programme_data['Full Address <LISTING full_address>'] + " ," + programme_data['Postcode <LISTING hs_zip>']
@ -3225,6 +3281,8 @@ class AssetList:
install_col = 'INSTALL/ CANCELLATION DATE'
elif "INSTALL/CANCELLATION DATE" in master_data.columns:
install_col = "INSTALL/CANCELLATION DATE"
elif 'Measure 1 Install Date' in master_data.columns:
install_col = 'Measure 1 Install Date'
else:
raise ValueError("No install or cancellation date")
@ -3264,6 +3322,8 @@ class AssetList:
property_type_col = "PROPERTY TYPE As per table emailed"
elif "PROPERTY TYPE As per table emailed" in master_data.columns:
property_type_col = "PROPERTY TYPE As per table emailed"
elif "PROPERTY TYPE" in master_data.columns:
property_type_col = "PROPERTY TYPE"
else:
property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)"

View file

@ -59,47 +59,151 @@ def app():
Property UPRN
"""
# Southern - Jan list
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
sheet_name = "Jan 2025 additions"
postcode_column = 'Post Code'
fulladdress_column = None
address1_column = "NO."
address1_method = None
address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
# Pickering and Ferens
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens"
data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx"
sheet_name = "Sava Intelligent Energy - Prope"
postcode_column = 'Postcode'
fulladdress_column = 'Address'
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype" # Using the inspections property type
landlord_built_form = "Archetype"
landlord_property_type = "Property Type" # Using the inspections property type
landlord_built_form = "Archetype 2"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "SH Property Reference"
landlord_sap = None
outcomes_filename = [
os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
]
outcomes_sheetname = ["Feedback", "Collated"]
outcomes_postcode = ["Poscode", "Postcode"]
outcomes_houseno = ["No.", "No"]
outcomes_id = ["UPRNs", None]
outcomes_address = ["Address", "Address"]
landlord_property_id = "UPRN"
landlord_sap = "SAP Rating (RdSAP 10)"
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = [
os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"),
os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"),
]
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = "southern"
ecosurv_landlords = "pickering"
asset_list_header = 0
landlord_block_reference = None
master_id_colnames = [None, None, None, None]
master_id_colnames = [None, None]
# Colchester
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Full Address.1'
# fulladdress_column = "Full Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Property Type"
# landlord_wall_construction = "Wallinsul"
# landlord_heating_system = "HeatSorc"
# landlord_existing_pv = None
# landlord_property_id = "Property Reference"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# asset_list_header = 0
# landlord_built_form = None
# landlord_roof_construction = None
# landlord_sap = None
# landlord_block_reference = None
# phase = False
# ecosurv_landlords = None
# master_id_colnames = []
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot"
# data_filename = "EalingFlats.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None # Using the inspections property type
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "Property ref"
# landlord_sap = None
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = "Block Ref"
# master_id_colnames = []
# Southern - Jan list
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
# data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
# sheet_name = "Jan 2025 additions"
# postcode_column = 'Post Code'
# fulladdress_column = None
# address1_column = "NO."
# address1_method = None
# address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None # Using the inspections property type
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "SH Property Reference"
# landlord_sap = None
# outcomes_filename = [
# os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
# os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
# ]
# outcomes_sheetname = ["Feedback", "Collated"]
# outcomes_postcode = ["Poscode", "Postcode"]
# outcomes_houseno = ["No.", "No"]
# outcomes_id = ["UPRNs", None]
# outcomes_address = ["Address", "Address"]
# master_filepaths = [
# os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = "southern"
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = [None, None, None, None]
# NCHA
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"

View file

@ -17,7 +17,7 @@ class HubspotProcessStatus(IntEnum):
# The property didn't get access and needs sign off
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
# The survey has been completed. We don't have any update as to whether the property has been installed
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF"
# The property turned out to be ineligibile
NOT_VIABLE = 4, "NOT VIABLE"
# The property is with the installer. This will likely be the default for historic programmes
@ -79,7 +79,7 @@ CRM_UPLOAD_COLUMNS = [
'Last EPC: Room Height <LISTING last_epc__room_height>',
'Last EPC: Age Band <LISTING last_epc__age_band>', 'Deal Stage <DEAL dealstage>',
'Pipeline <DEAL pipeline>', 'Expected Commencement Date <DEAL expected_commencement_date>',
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>',
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>', 'Postcode <DEAL postcode>',
'Product ID <LINE_ITEM hs_product_id>', 'Name <LINE_ITEM name>', 'Unit price <LINE_ITEM price>',
'Quantity <LINE_ITEM quantity>', 'Deal Owner', 'Amount <DEAL amount>', 'Installer <DEAL installer>'
]

View file

@ -2,6 +2,32 @@ import os
import pandas as pd
from asset_list.AssetList import AssetList
import re
def normalize_uk_phone(number: str | float | int) -> str | None:
if pd.isna(number):
return None
number = str(number)
number = re.sub(r"[^\d+]", "", number)
# Handle common short inputs: add '0' if likely missing
if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number):
number = "0" + number
# Convert to international format
if number.startswith("0"):
number = "+44" + number[1:]
elif number.startswith("0044"):
number = "+" + number[2:]
# Must be +44 followed by 10 digits (some area codes may vary)
if re.match(r"^\+44\d{9,10}$", number):
return number
return None
def app():
"""
@ -18,27 +44,26 @@ def app():
"""
# inputs:
reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://medway.gov.uk"
installer_name = "SGEC"
reconcile_programme = True # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://ealing.gov.uk"
installer_name = "SCIS"
asset_list_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/Hubspot/Reviewed programme - 2025-05-27.xlsx"
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
"programme.xlsx"
)
asset_list_sheet_name = "Finalised Route"
asset_list_sheet_name = "Standardised Asset List"
asset_list_header = 0
contact_details_filepath = (
None
)
contacts_sheet_name = "Sheet1"
contacts_landlord_property_id = "landlord_property_id"
contact_details_filepath = None
contacts_sheet_name = "Sheet 1"
contacts_landlord_property_id = "UPRN"
contacts_phone_number_column = "phone_number"
contacts_secondary_phone_number_column = "secondary_phone_number"
contacts_secondary_contact_full_name = "secondary_contact_full_name"
contacts_email_column = "email"
contacts_fullname_column = "fullname"
contacts_firstname_column = "firstname"
contacts_lastname_column = "lastname"
contacts_firstname_column = "First Name"
contacts_lastname_column = "Last Name"
existing_programme_filepath = None
@ -65,12 +90,12 @@ def app():
)
# Remove the existing programme
existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
asset_list.hubspot_data = asset_list.hubspot_data[
~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
existing_programme['Domna Property ID'].values
)
]
# existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
# asset_list.hubspot_data = asset_list.hubspot_data[
# ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
# existing_programme['Domna Property ID'].values
# )
# ]
# Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
directory, filename = os.path.split(asset_list_filepath)
@ -86,3 +111,66 @@ def app():
# Just store locally
asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
# # TODO: Set this up separately, but we associate multiple contacts to the same deal
# contact_details = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot "
# "Upload/Hubspot/contact "
# "details.csv"
# )
#
# # contacts_phone_number_column = "phone_number"
# # contacts_secondary_phone_number_column = "secondary_phone_number"
# # contacts_secondary_contact_full_name = "secondary_contact_full_name"
# # contacts_email_column = "email"
# # contacts_fullname_column = "fullname"
# # contacts_firstname_column = "First Name"
# # contacts_lastname_column = "Last Name"
# contact_details["phone_number"] = contact_details["Mobile Phone"].copy()
# # If phone number is NaN, we will use the landline number
# contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"])
# contact_details["secondary_phone_number"] = contact_details["Landline"].copy()
# # If secondary phone number is the same as primary, we remove it
# import numpy as np
# contact_details["secondary_phone_number"] = np.where(
# contact_details["secondary_phone_number"] == contact_details["phone_number"],
# np.nan,
# contact_details["secondary_phone_number"]
# )
# contact_details = contact_details[
# ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number",
# "secondary_phone_number", "First Name", "Last Name"]].copy().rename(
# columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"}
# )
# contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"]
# # Format the phone numbers
#
# contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply(
# normalize_uk_phone)
# contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype(
# str).apply(
# normalize_uk_phone)
#
# # Add in the Hubspot deal data
# hubspot_data = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/"
# "property-status.csv",
# encoding="utf-8-sig"
# )
# # Merge on contact details
# contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge(
# contact_details,
# how="left",
# right_on="landlord_proprty_id",
# left_on="Landlord Property ID"
# )
#
# contact_details = contact_details.drop(columns=["landlord_proprty_id"])
#
# # Store as csv
# contact_details.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar "
# "Programme Hubspot Upload/Hubspot/"
# "contact_details.csv",
# index=False, encoding="utf-8-sig"
# )

View file

@ -3,7 +3,7 @@ import numpy as np
STANDARD_BUILT_FORMS = {
"unknown",
# Houses
"end-terrace", "semi-detached", "detached", "mid-terrace",
"end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace",
# Flats
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
}
@ -359,5 +359,12 @@ BUILT_FORM_MAPPINGS = {
'1983-90 MID TERR': 'mid-terrace',
'1976-82 SEMI DET': 'semi-detached',
'PRE 1900 MID TERR': 'mid-terrace',
None: 'unknown'
None: 'unknown',
'SEMI-DETACHED': 'semi-detached',
'DETACHED': 'detached',
'MID TERRACE': 'mid-terrace',
'END TERRACE': 'end-terrace',
'ENCLOSED MID': 'enclosed mid-terrace'
}

View file

@ -37,7 +37,9 @@ def get_data(
"mid-terrace": "Mid-Terrace",
"end-terrace": "End-Terrace",
"semi-detached": "Semi-Detached",
"detached": "Detached"
"detached": "Detached",
"enclosed end-terrace": "End-Terrace",
"enclosed mid-terrace": "Mid-Terrace",
}
epc_data = []
@ -101,7 +103,6 @@ def get_data(
else:
# Try splitting on space
add1 = full_address.split(" ")[0].strip()
else:
add1 = str(house_number)
searcher = SearchEpc(

View file

@ -0,0 +1,54 @@
import pandas as pd
comments_df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/CBH_RetroTeamList_amended_25-06-05.xlsx",
)
cavity_route = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
sheet_name="July 2025 Route - Cavity"
)
solar_route = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
sheet_name="July 2025 Route - Solar"
)
# Merge on the comments
comments = comments_df[
["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)']
].copy()
cavity_route = cavity_route.merge(
comments, left_on="landlord_property_id", right_on="URPN", how="left"
)
solar_route = solar_route.merge(
comments, left_on="landlord_property_id", right_on="URPN", how="left"
)
# Get properties that are not on either route
not_on_routes = comments_df[
~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) &
~comments_df["URPN"].isin(solar_route["landlord_property_id"])
]
# Store
not_on_routes.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Properties not on routes.xlsx",
index=False
)
# Save the routes
cavity_route.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Cavity Route.xlsx",
index=False
)
solar_route.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Solar Route.xlsx",
index=False
)

View file

@ -0,0 +1,289 @@
import pandas as pd
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
import numpy as np
contact_list = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP.csv"
)
contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number(
address=str(x["Address 1: Street 1"]).strip(),
postcode=str(x["Postal Code"]).strip(),
), axis=1)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - "
"Standardised (1).xlsx",
sheet_name="Standardised Asset List"
)
lookup = []
missed = []
for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)):
if x["Address 1: Street 1"] == '1 The Beck':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40692,
}
)
continue
if x["Address 1: Street 1"] == '3 The Beck ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40693,
}
)
continue
if x["Address 1: Street 1"] == '2 Orchard Close ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 7924,
}
)
continue
if x["Address 1: Street 1"] == '2 Orchard Close ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 7924,
}
)
continue
if x["Address 1: Street 1"] == '3 Croxall Road':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40650,
}
)
continue
if x["Address 1: Street 1"] == '4 Ward Road ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 33175,
}
)
continue
df = asset_list[
asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) &
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
]
if df.shape[0] != 1:
df = asset_list[
asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() &
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
]
if df.shape[0] != 1:
df = asset_list[
(asset_list["domna_address_1"].astype(str) == str(x["house_no"])) &
(asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True)
]
if df.shape[0] != 1:
missed.append(x["UPRN"])
continue
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": df["landlord_property_id"].values[0],
}
)
lookup = pd.DataFrame(lookup)
contact_list = contact_list.merge(lookup, how="left", on="UPRN")
# Store
contact_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP with landlord_property_id.csv",
index=False
)
# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the
# stndardised asset list
contacts_complete = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP with landlord_property_id.csv"
)
new_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet "
"Solar PV installs.xlsx",
sheet_name="Sheet1"
)
contact_list = contact_list.merge(
new_data,
how="left",
left_on="UPRN",
right_on="CE UPRN"
)
route = asset_list[
asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str))
].copy()
# Add the new heating data
contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str)
route2 = contact_list.merge(
route,
how="left",
right_on="landlord_property_id",
left_on="Legacy UPRN"
)
# Because I did a data pull, we can fill the other bits of information
missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))]
# Store both the route and missed
route2.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv",
index=False
)
# Add on phone number
contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme "
"Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with "
"landlord_property_id.xlsx")
contacts_filenames = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/FAO Paul Contact Details-Table 1.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/Green Contact Details-Table 1.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/Main Contact Details-Table 1.csv",
]
merge_to = pd.read_excel(contact_details_filepath)
lookup = []
for fn in contacts_filenames:
df = pd.read_csv(fn, encoding="utf-8-sig")
# Merge on phone
details = df[
df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str))
][[
"Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address",
"First Name", "Last Name"
]]
lookup.append(details)
lookup = pd.concat(lookup)
# Drop entries where landline, mobile and email are all NaN
lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all")
lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"])
# Sort so email is first, then landline, then mobile
lookup = lookup.sort_values(
["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"],
ascending=[True, True, True, True]
)
# Store
lookup.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact "
"details.csv",
index=False
)
lookup2 = []
for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"):
# We any entries have an email, we take that
if x["Email Address"].notna().any():
x = x[x["Email Address"].notna()]
# We then take the entry with a phone number
if x["Landline"].notna().any() or x["Mobile Phone"].notna().any():
x = x[x["Landline"].notna() | x["Mobile Phone"].notna()]
# Take the first entry
x = x.iloc[0]
lookup2.append(x)
lookup2 = pd.DataFrame(lookup2)
import pandas as pd
# Sample structure based on your columns
columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address']
# Simulating example input DataFrame
# In practice, you would use: lookup = pd.read_csv(...) or similar
lookup = pd.DataFrame(columns=columns)
# Grouping and transforming
results = []
for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"):
# Filter rows with any contact information
filtered = group[
group["Email Address"].notna() &
(group["Landline"].notna() | group["Mobile Phone"].notna())
]
if filtered.empty:
continue
# Sort by presence of phone numbers (prioritize those with both)
filtered["contact_score"] = (
filtered["Landline"].notna().astype(int) +
filtered["Mobile Phone"].notna().astype(int)
)
filtered = filtered.sort_values("contact_score", ascending=False)
primary = filtered.iloc[0]
# Make sure secondary is not the same as primary
if not pd.isnull(primary["Mobile Phone"]):
secondary = filtered[
(filtered["Mobile Phone"] != primary["Mobile Phone"])
]
elif not pd.isnull(primary["Landline"]):
secondary = filtered[
(filtered["Landline"] != primary["Landline"])
]
else:
raise Exception("Look at me")
secondary = filtered.iloc[1] if len(filtered) > 1 else None
results.append({
"Property ID": prop_id,
"Primary Email": primary["Email Address"],
"Primary Phone": primary["Mobile Phone"] or primary["Landline"],
"Secondary Email": secondary["Email Address"] if secondary is not None else None,
"Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None,
})
final_df = pd.DataFrame(results)
import ace_tools as tools;
tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df)
# We set up primary and secondary phone numbers. We use mobile as the primary
# We have duplicates, we prioritise entries, by ID, that have a email
lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates(
"Property Reference Number (Main Address) (Property)", keep="last"
)
# TODO: Get into the standardised asset list format
# TODO: Add the deal postcode to Hubspot
# TODO: Upload the deal postcode

View file

@ -0,0 +1,75 @@
import numpy as np
import pandas as pd
from asset_list.hubspot.config import HubspotProcessStatus
project_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW "
"170625.xlsx",
sheet_name="All_Flats"
)
project_data["hubspot_status"] = None
project_data["hubspot_status"] = np.where(
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"),
HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
project_data["hubspot_status"]
)
project_data["hubspot_status"] = np.where(
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"),
"SURVEYED UNDER 2019 - NEEDS RE-SURVEY",
project_data["hubspot_status"]
)
project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Standardised Asset List"
)
asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str)
asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str)
project_data["Property ref"] = project_data["Property ref"].astype(str)
# We need to update the status of properties that already been surveyed
asset_list2 = asset_list.merge(
project_data[["Property ref", "hubspot_status", "project_code"]],
how="left",
right_on="Property ref",
left_on="incorrect_landlord_property_id",
suffixes=("", "_project")
)
asset_list2["hubspot_status"] = np.where(
~pd.isna(asset_list2["hubspot_status_project"]),
asset_list2["hubspot_status_project"],
asset_list2["hubspot_status"]
)
asset_list2["project_code"] = np.where(
~pd.isna(asset_list2["project_code"]),
asset_list2["project_code"],
asset_list2["landlord_property_id"]
)
asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"])
asset_list2["cavity_reason"] = np.where(
pd.isnull(asset_list2["cavity_reason"]),
"Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68",
asset_list2["cavity_reason"]
)
asset_list2["solar_reason"] = None
# Read in block analysis and geographical areas from standardised asset list
block_analysis_df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Block Analysis"
)
geographical_areas = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Geographical Areas"
)
# Update the new standardised asset list
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
"programme.xlsx")
with pd.ExcelWriter(filename) as writer:
asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False)
block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)

View file

@ -0,0 +1,116 @@
#
import pandas as pd
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="Standardised Asset List"
)
new_cavity_programme = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="New Cavity Programme"
)
new_cavity_pilot = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="Empty Cavity Pilot"
)
new_solar_programme = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="New Solar Programme"
)
in_fill_properties_houses = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
"1).xlsx",
sheet_name="Houses and Bungalows"
)
in_fill_properties_flats = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
"1).xlsx",
sheet_name="Flats and Maistonettes"
)
# Q1) What are these properties? Do we have them on our list already?
# All of the houses are already in the asset list
in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin(
asset_list["landlord_property_id"].values
)
# All of the flats are already in the asset list
in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin(
asset_list["landlord_property_id"].values
)
# Q2) Which properties are excluded from the new programme?
in_fill_properties = pd.concat(
[in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False
)
# Merge on the data
in_fill_properties = in_fill_properties.merge(
asset_list,
left_on="UPRN",
right_on="landlord_property_id",
how="left"
)
# How many properties are in the new programme?
in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin(
new_cavity_programme["landlord_property_id"].values
)
in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin(
new_solar_programme["landlord_property_id"].values
)
in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin(
new_cavity_pilot["landlord_property_id"].values
)
not_in_new_programme = in_fill_properties[
(~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~
in_fill_properties["in_new_cavity_pilot"])
].copy()
# Why?
not_in_new_programme["cavity_reason"].value_counts()
not_in_new_programme["solar_reason"].value_counts()
not_identified_for_anything = not_in_new_programme[
pd.isnull(not_in_new_programme["cavity_reason"]) &
pd.isnull(not_in_new_programme["solar_reason"])
]
# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding
# the extraction
not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin(
[
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more",
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more",
]
)
not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin(
not_identified_for_anything["landlord_property_id"].values
)
not_in_new_programme[
not_in_new_programme["funded_extractions"]
].to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv",
index=False
)
not_in_new_programme[
not_in_new_programme["excluded"] == True
].to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv",
index=False
)

View file

@ -0,0 +1,40 @@
"""
This script will pull in properties, in neighbouring areas, that have been flagged for CWI
"""
import pandas as pd
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Standardised Asset List"
)
cavity_areas = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Cavity Areas"
)
existing_inspections_sheet = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="July 2025 Inspections"
)
empties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Cavity properties - for review"
)
cavity_inspections = asset_list[
asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values)
]
cavity_inspections = cavity_inspections[
~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values)
]
cavity_inspections.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv",
index=False
)

View file

@ -674,7 +674,8 @@ class RetrieveFindMyEpc:
],
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
"roomstat_programmer_trvs", "time_temperature_zone_control"
]
],
"Internal wall insulation": ["internal_wall_insulation"],
}
survey = True