mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debugging asset list prep to account for new template
This commit is contained in:
parent
7c3d8caea4
commit
c169260219
12 changed files with 902 additions and 67 deletions
|
|
@ -301,6 +301,14 @@ class AssetList:
|
|||
"Potential unsafe environment", "Date of Inspection", "Borescoped?"
|
||||
]
|
||||
|
||||
# Another version of non-intrusives:
|
||||
NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [
|
||||
'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Boroscoped?',
|
||||
'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED',
|
||||
'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE',
|
||||
'NAME OF SURVEYOR'
|
||||
]
|
||||
|
||||
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
|
||||
|
||||
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
|
||||
|
|
@ -449,6 +457,8 @@ class AssetList:
|
|||
"Has the property been re-walled?" in self.raw_asset_list.columns
|
||||
)
|
||||
|
||||
self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns
|
||||
|
||||
# Names of columns
|
||||
self.landlord_property_id = landlord_property_id
|
||||
self.address1_colname = address1_colname
|
||||
|
|
@ -750,7 +760,7 @@ class AssetList:
|
|||
self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
|
||||
|
||||
non_intrusive_columns = []
|
||||
if self.non_intrusives_present:
|
||||
if self.non_intrusives_present and not self.new_format_non_insturives_present_v2:
|
||||
non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES
|
||||
|
||||
if self.non_intrusives_eligibility:
|
||||
|
|
@ -759,6 +769,9 @@ class AssetList:
|
|||
if self.new_format_non_insturives_present:
|
||||
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
|
||||
|
||||
if self.new_format_non_insturives_present_v2:
|
||||
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2
|
||||
|
||||
if self.old_format_non_intrusives_present:
|
||||
# We check if we have the ECO Eligibility column, which we might not have
|
||||
non_intrusive_columns = [
|
||||
|
|
@ -1308,9 +1321,15 @@ class AssetList:
|
|||
# Before we being, we identify if a property has solar already as we use this
|
||||
# for identifying cavity jobs
|
||||
if self.non_intrusives_present:
|
||||
existing_solar_non_intrusives_check = (
|
||||
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
|
||||
)
|
||||
|
||||
if self.new_format_non_insturives_present_v2:
|
||||
existing_solar_non_intrusives_check = (
|
||||
self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] == "ALREADY HAS SOLAR PV"
|
||||
)
|
||||
else:
|
||||
existing_solar_non_intrusives_check = (
|
||||
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
|
||||
)
|
||||
elif self.old_format_non_intrusives_present:
|
||||
existing_solar_non_intrusives_check = (
|
||||
self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin(
|
||||
|
|
@ -1549,7 +1568,7 @@ class AssetList:
|
|||
) & (
|
||||
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
|
||||
["district heating", "communal heating", "communal gas boiler"]
|
||||
) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ")
|
||||
) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ")
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -2081,15 +2100,40 @@ class AssetList:
|
|||
|
||||
for _, row in blocks.iterrows():
|
||||
addr = str(row[self.STANDARD_ADDRESS_1])
|
||||
full_addr = row[self.STANDARD_FULL_ADDRESS]
|
||||
|
||||
# We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just
|
||||
# the odds, evens or all of the numbers
|
||||
has_odd = (
|
||||
"(odd)" in addr.lower() or
|
||||
"(odd)" in full_addr.lower() or
|
||||
"(odds)" in addr.lower() or
|
||||
"(odds)" in full_addr.lower()
|
||||
)
|
||||
has_even = (
|
||||
"(even)" in addr.lower() or
|
||||
"(even)" in full_addr.lower() or
|
||||
"(evens)" in addr.lower() or
|
||||
"(evens)" in full_addr.lower()
|
||||
)
|
||||
|
||||
# 1 ─ Range (e.g. 1-7)
|
||||
m_range = RANGE_RE.search(addr)
|
||||
if m_range:
|
||||
|
||||
start, end = m_range.groups()
|
||||
start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
|
||||
if start > end or (end - start) > 100:
|
||||
raise ValueError(f"Suspicious range '{addr}'")
|
||||
for n in range(start, end + 1):
|
||||
|
||||
# We define the looping range on whether we have odd, even or all numbers
|
||||
house_number_range = range(start, end + 1)
|
||||
if has_odd:
|
||||
house_number_range = [x for x in house_number_range if x % 2 != 0]
|
||||
if has_even:
|
||||
house_number_range = [x for x in house_number_range if x % 2 == 0]
|
||||
|
||||
for n in house_number_range:
|
||||
new = row.copy()
|
||||
new_addr = RANGE_RE.sub(str(n), addr, count=1)
|
||||
original_full_address = new[self.STANDARD_FULL_ADDRESS]
|
||||
|
|
@ -2107,9 +2151,9 @@ class AssetList:
|
|||
expanded_rows.append(new)
|
||||
continue
|
||||
|
||||
# 2 ─ Explicit list (e.g. 1, 2, 5 Block)
|
||||
# 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block)
|
||||
nums = NUM_RE.findall(addr)
|
||||
if len(nums) > 1 and ',' in addr:
|
||||
if len(nums) > 1 and (',' in addr or '&' in addr):
|
||||
for n in nums:
|
||||
new = row.copy()
|
||||
new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only
|
||||
|
|
@ -2319,7 +2363,7 @@ class AssetList:
|
|||
self.standardised_asset_list["cavity_reason"] = np.where(
|
||||
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
|
||||
self.standardised_asset_list["cavity_reason"]
|
||||
+ " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
|
||||
+ " " + "(Flat in block with more than 50% eligible)",
|
||||
self.standardised_asset_list["cavity_reason"]
|
||||
)
|
||||
|
||||
|
|
@ -2490,10 +2534,14 @@ class AssetList:
|
|||
if reconcile_programme:
|
||||
programme_data = programme_data[~pd.isnull(programme_data["project_code"])]
|
||||
else:
|
||||
|
||||
if programme_data["hubspot_status"].nunique() > 1:
|
||||
logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?")
|
||||
|
||||
ready_to_be_scheduled = (
|
||||
(
|
||||
programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
|
||||
) & (~pd.isnull(programme_data["survey_date"]))
|
||||
)
|
||||
)
|
||||
# completed_works = (
|
||||
# (programme_data["hubspot_status"] !=
|
||||
|
|
@ -2544,13 +2592,13 @@ class AssetList:
|
|||
)
|
||||
else:
|
||||
# We shouldn't have any missing products
|
||||
programme_data = programme_data[
|
||||
~pd.isnull(programme_data["survey_date"])
|
||||
]
|
||||
# programme_data = programme_data[
|
||||
# ~pd.isnull(programme_data["survey_date"])
|
||||
# ]
|
||||
|
||||
if pd.isnull(programme_data["domna_product"]).sum():
|
||||
raise ValueError("Missing products")
|
||||
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
|
||||
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
|
||||
|
||||
product_df = (
|
||||
pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]]
|
||||
|
|
@ -2591,6 +2639,13 @@ class AssetList:
|
|||
programme_data[self.EPC_API_DATA_NAMES["uprn"]]
|
||||
)
|
||||
|
||||
# Remove any negative URPSN which are not valid
|
||||
programme_data[uprn_column] = np.where(
|
||||
programme_data["estimated"].isin([1, True]),
|
||||
None,
|
||||
programme_data[uprn_column]
|
||||
)
|
||||
|
||||
# Add in some columns if we have them
|
||||
date_of_inspections = (
|
||||
"Non-Intrusives: Date of Inspection" if
|
||||
|
|
@ -2757,6 +2812,7 @@ class AssetList:
|
|||
columns={v: k for k, v in schema_mappings.items() if v is not None}
|
||||
)
|
||||
|
||||
programme_data['Postcode <DEAL postcode>'] = programme_data['Postcode <LISTING hs_zip>'].copy()
|
||||
programme_data['Installer <DEAL installer>'] = installer_name
|
||||
programme_data['Name <LISTING hs_name>'] = (
|
||||
programme_data['Full Address <LISTING full_address>'] + " ," + programme_data['Postcode <LISTING hs_zip>']
|
||||
|
|
@ -3225,6 +3281,8 @@ class AssetList:
|
|||
install_col = 'INSTALL/ CANCELLATION DATE'
|
||||
elif "INSTALL/CANCELLATION DATE" in master_data.columns:
|
||||
install_col = "INSTALL/CANCELLATION DATE"
|
||||
elif 'Measure 1 Install Date' in master_data.columns:
|
||||
install_col = 'Measure 1 Install Date'
|
||||
else:
|
||||
raise ValueError("No install or cancellation date")
|
||||
|
||||
|
|
@ -3264,6 +3322,8 @@ class AssetList:
|
|||
property_type_col = "PROPERTY TYPE As per table emailed"
|
||||
elif "PROPERTY TYPE As per table emailed" in master_data.columns:
|
||||
property_type_col = "PROPERTY TYPE As per table emailed"
|
||||
elif "PROPERTY TYPE" in master_data.columns:
|
||||
property_type_col = "PROPERTY TYPE"
|
||||
else:
|
||||
property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)"
|
||||
|
||||
|
|
|
|||
|
|
@ -59,47 +59,151 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
# Southern - Jan list
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
|
||||
data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
|
||||
sheet_name = "Jan 2025 additions"
|
||||
postcode_column = 'Post Code'
|
||||
fulladdress_column = None
|
||||
address1_column = "NO."
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
|
||||
# Pickering and Ferens
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens"
|
||||
data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx"
|
||||
sheet_name = "Sava Intelligent Energy - Prope"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = 'Address'
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype" # Using the inspections property type
|
||||
landlord_built_form = "Archetype"
|
||||
landlord_property_type = "Property Type" # Using the inspections property type
|
||||
landlord_built_form = "Archetype 2"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "SH Property Reference"
|
||||
landlord_sap = None
|
||||
outcomes_filename = [
|
||||
os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
|
||||
os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
|
||||
]
|
||||
outcomes_sheetname = ["Feedback", "Collated"]
|
||||
outcomes_postcode = ["Poscode", "Postcode"]
|
||||
outcomes_houseno = ["No.", "No"]
|
||||
outcomes_id = ["UPRNs", None]
|
||||
outcomes_address = ["Address", "Address"]
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = "SAP Rating (RdSAP 10)"
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = [
|
||||
os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
|
||||
os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
|
||||
os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
|
||||
os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
|
||||
os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"),
|
||||
os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"),
|
||||
]
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = "southern"
|
||||
ecosurv_landlords = "pickering"
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
master_id_colnames = [None, None, None, None]
|
||||
master_id_colnames = [None, None]
|
||||
|
||||
# Colchester
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
||||
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
||||
# sheet_name = "Sheet1"
|
||||
# postcode_column = 'Full Address.1'
|
||||
# fulladdress_column = "Full Address"
|
||||
# address1_column = None
|
||||
# address1_method = "first_word"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = "Build Date"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property Type"
|
||||
# landlord_wall_construction = "Wallinsul"
|
||||
# landlord_heating_system = "HeatSorc"
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "Property Reference"
|
||||
# outcomes_filename = []
|
||||
# outcomes_sheetname = []
|
||||
# outcomes_postcode = []
|
||||
# outcomes_houseno = []
|
||||
# outcomes_id = []
|
||||
# outcomes_address = []
|
||||
# master_filepaths = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# asset_list_header = 0
|
||||
# landlord_built_form = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_sap = None
|
||||
# landlord_block_reference = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
# master_id_colnames = []
|
||||
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot"
|
||||
# data_filename = "EalingFlats.xlsx"
|
||||
# sheet_name = "Sheet1"
|
||||
# postcode_column = 'Postcode'
|
||||
# fulladdress_column = "Address"
|
||||
# address1_column = None
|
||||
# address1_method = "house_number_extraction"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = None
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = None # Using the inspections property type
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "Property ref"
|
||||
# landlord_sap = None
|
||||
# outcomes_filename = []
|
||||
# outcomes_sheetname = []
|
||||
# outcomes_postcode = []
|
||||
# outcomes_houseno = []
|
||||
# outcomes_id = []
|
||||
# outcomes_address = []
|
||||
# master_filepaths = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = "Block Ref"
|
||||
# master_id_colnames = []
|
||||
|
||||
# Southern - Jan list
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
|
||||
# data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
|
||||
# sheet_name = "Jan 2025 additions"
|
||||
# postcode_column = 'Post Code'
|
||||
# fulladdress_column = None
|
||||
# address1_column = "NO."
|
||||
# address1_method = None
|
||||
# address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = None
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = None # Using the inspections property type
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "SH Property Reference"
|
||||
# landlord_sap = None
|
||||
# outcomes_filename = [
|
||||
# os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
|
||||
# os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
|
||||
# ]
|
||||
# outcomes_sheetname = ["Feedback", "Collated"]
|
||||
# outcomes_postcode = ["Poscode", "Postcode"]
|
||||
# outcomes_houseno = ["No.", "No"]
|
||||
# outcomes_id = ["UPRNs", None]
|
||||
# outcomes_address = ["Address", "Address"]
|
||||
# master_filepaths = [
|
||||
# os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
|
||||
# os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
|
||||
# os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
|
||||
# os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
|
||||
# ]
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = "southern"
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = None
|
||||
# master_id_colnames = [None, None, None, None]
|
||||
|
||||
# NCHA
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ class HubspotProcessStatus(IntEnum):
|
|||
# The property didn't get access and needs sign off
|
||||
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
|
||||
# The survey has been completed. We don't have any update as to whether the property has been installed
|
||||
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
|
||||
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF"
|
||||
# The property turned out to be ineligibile
|
||||
NOT_VIABLE = 4, "NOT VIABLE"
|
||||
# The property is with the installer. This will likely be the default for historic programmes
|
||||
|
|
@ -79,7 +79,7 @@ CRM_UPLOAD_COLUMNS = [
|
|||
'Last EPC: Room Height <LISTING last_epc__room_height>',
|
||||
'Last EPC: Age Band <LISTING last_epc__age_band>', 'Deal Stage <DEAL dealstage>',
|
||||
'Pipeline <DEAL pipeline>', 'Expected Commencement Date <DEAL expected_commencement_date>',
|
||||
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>',
|
||||
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>', 'Postcode <DEAL postcode>',
|
||||
'Product ID <LINE_ITEM hs_product_id>', 'Name <LINE_ITEM name>', 'Unit price <LINE_ITEM price>',
|
||||
'Quantity <LINE_ITEM quantity>', 'Deal Owner', 'Amount <DEAL amount>', 'Installer <DEAL installer>'
|
||||
]
|
||||
|
|
|
|||
|
|
@ -2,6 +2,32 @@ import os
|
|||
import pandas as pd
|
||||
from asset_list.AssetList import AssetList
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def normalize_uk_phone(number: str | float | int) -> str | None:
|
||||
if pd.isna(number):
|
||||
return None
|
||||
|
||||
number = str(number)
|
||||
number = re.sub(r"[^\d+]", "", number)
|
||||
|
||||
# Handle common short inputs: add '0' if likely missing
|
||||
if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number):
|
||||
number = "0" + number
|
||||
|
||||
# Convert to international format
|
||||
if number.startswith("0"):
|
||||
number = "+44" + number[1:]
|
||||
elif number.startswith("0044"):
|
||||
number = "+" + number[2:]
|
||||
|
||||
# Must be +44 followed by 10 digits (some area codes may vary)
|
||||
if re.match(r"^\+44\d{9,10}$", number):
|
||||
return number
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
|
|
@ -18,27 +44,26 @@ def app():
|
|||
"""
|
||||
|
||||
# inputs:
|
||||
reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
|
||||
customer_domain = "https://medway.gov.uk"
|
||||
installer_name = "SGEC"
|
||||
reconcile_programme = True # If True, the hubspot upload will include all properties with a project code
|
||||
customer_domain = "https://ealing.gov.uk"
|
||||
installer_name = "SCIS"
|
||||
asset_list_filepath = (
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/Hubspot/Reviewed programme - 2025-05-27.xlsx"
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
|
||||
"programme.xlsx"
|
||||
)
|
||||
asset_list_sheet_name = "Finalised Route"
|
||||
asset_list_sheet_name = "Standardised Asset List"
|
||||
asset_list_header = 0
|
||||
|
||||
contact_details_filepath = (
|
||||
None
|
||||
)
|
||||
contacts_sheet_name = "Sheet1"
|
||||
contacts_landlord_property_id = "landlord_property_id"
|
||||
contact_details_filepath = None
|
||||
contacts_sheet_name = "Sheet 1"
|
||||
contacts_landlord_property_id = "UPRN"
|
||||
contacts_phone_number_column = "phone_number"
|
||||
contacts_secondary_phone_number_column = "secondary_phone_number"
|
||||
contacts_secondary_contact_full_name = "secondary_contact_full_name"
|
||||
contacts_email_column = "email"
|
||||
contacts_fullname_column = "fullname"
|
||||
contacts_firstname_column = "firstname"
|
||||
contacts_lastname_column = "lastname"
|
||||
contacts_firstname_column = "First Name"
|
||||
contacts_lastname_column = "Last Name"
|
||||
|
||||
existing_programme_filepath = None
|
||||
|
||||
|
|
@ -65,12 +90,12 @@ def app():
|
|||
)
|
||||
|
||||
# Remove the existing programme
|
||||
existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
|
||||
asset_list.hubspot_data = asset_list.hubspot_data[
|
||||
~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
|
||||
existing_programme['Domna Property ID'].values
|
||||
)
|
||||
]
|
||||
# existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
|
||||
# asset_list.hubspot_data = asset_list.hubspot_data[
|
||||
# ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
|
||||
# existing_programme['Domna Property ID'].values
|
||||
# )
|
||||
# ]
|
||||
|
||||
# Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
|
||||
directory, filename = os.path.split(asset_list_filepath)
|
||||
|
|
@ -86,3 +111,66 @@ def app():
|
|||
|
||||
# Just store locally
|
||||
asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
|
||||
|
||||
# # TODO: Set this up separately, but we associate multiple contacts to the same deal
|
||||
# contact_details = pd.read_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot "
|
||||
# "Upload/Hubspot/contact "
|
||||
# "details.csv"
|
||||
# )
|
||||
#
|
||||
# # contacts_phone_number_column = "phone_number"
|
||||
# # contacts_secondary_phone_number_column = "secondary_phone_number"
|
||||
# # contacts_secondary_contact_full_name = "secondary_contact_full_name"
|
||||
# # contacts_email_column = "email"
|
||||
# # contacts_fullname_column = "fullname"
|
||||
# # contacts_firstname_column = "First Name"
|
||||
# # contacts_lastname_column = "Last Name"
|
||||
# contact_details["phone_number"] = contact_details["Mobile Phone"].copy()
|
||||
# # If phone number is NaN, we will use the landline number
|
||||
# contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"])
|
||||
# contact_details["secondary_phone_number"] = contact_details["Landline"].copy()
|
||||
# # If secondary phone number is the same as primary, we remove it
|
||||
# import numpy as np
|
||||
# contact_details["secondary_phone_number"] = np.where(
|
||||
# contact_details["secondary_phone_number"] == contact_details["phone_number"],
|
||||
# np.nan,
|
||||
# contact_details["secondary_phone_number"]
|
||||
# )
|
||||
# contact_details = contact_details[
|
||||
# ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number",
|
||||
# "secondary_phone_number", "First Name", "Last Name"]].copy().rename(
|
||||
# columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"}
|
||||
# )
|
||||
# contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"]
|
||||
# # Format the phone numbers
|
||||
#
|
||||
# contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply(
|
||||
# normalize_uk_phone)
|
||||
# contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype(
|
||||
# str).apply(
|
||||
# normalize_uk_phone)
|
||||
#
|
||||
# # Add in the Hubspot deal data
|
||||
# hubspot_data = pd.read_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/"
|
||||
# "property-status.csv",
|
||||
# encoding="utf-8-sig"
|
||||
# )
|
||||
# # Merge on contact details
|
||||
# contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge(
|
||||
# contact_details,
|
||||
# how="left",
|
||||
# right_on="landlord_proprty_id",
|
||||
# left_on="Landlord Property ID"
|
||||
# )
|
||||
#
|
||||
# contact_details = contact_details.drop(columns=["landlord_proprty_id"])
|
||||
#
|
||||
# # Store as csv
|
||||
# contact_details.to_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar "
|
||||
# "Programme Hubspot Upload/Hubspot/"
|
||||
# "contact_details.csv",
|
||||
# index=False, encoding="utf-8-sig"
|
||||
# )
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import numpy as np
|
|||
STANDARD_BUILT_FORMS = {
|
||||
"unknown",
|
||||
# Houses
|
||||
"end-terrace", "semi-detached", "detached", "mid-terrace",
|
||||
"end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace",
|
||||
# Flats
|
||||
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
|
||||
}
|
||||
|
|
@ -359,5 +359,12 @@ BUILT_FORM_MAPPINGS = {
|
|||
'1983-90 MID TERR': 'mid-terrace',
|
||||
'1976-82 SEMI DET': 'semi-detached',
|
||||
'PRE 1900 MID TERR': 'mid-terrace',
|
||||
None: 'unknown'
|
||||
None: 'unknown',
|
||||
|
||||
'SEMI-DETACHED': 'semi-detached',
|
||||
'DETACHED': 'detached',
|
||||
'MID TERRACE': 'mid-terrace',
|
||||
'END TERRACE': 'end-terrace',
|
||||
'ENCLOSED MID': 'enclosed mid-terrace'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,9 @@ def get_data(
|
|||
"mid-terrace": "Mid-Terrace",
|
||||
"end-terrace": "End-Terrace",
|
||||
"semi-detached": "Semi-Detached",
|
||||
"detached": "Detached"
|
||||
"detached": "Detached",
|
||||
"enclosed end-terrace": "End-Terrace",
|
||||
"enclosed mid-terrace": "Mid-Terrace",
|
||||
}
|
||||
|
||||
epc_data = []
|
||||
|
|
@ -101,7 +103,6 @@ def get_data(
|
|||
else:
|
||||
# Try splitting on space
|
||||
add1 = full_address.split(" ")[0].strip()
|
||||
|
||||
else:
|
||||
add1 = str(house_number)
|
||||
searcher = SearchEpc(
|
||||
|
|
|
|||
54
etl/customers/Colchester/July 2025 Finalised Route.py
Normal file
54
etl/customers/Colchester/July 2025 Finalised Route.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
import pandas as pd
|
||||
|
||||
comments_df = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
|
||||
"Project/CBH_RetroTeamList_amended_25-06-05.xlsx",
|
||||
)
|
||||
|
||||
cavity_route = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
|
||||
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
|
||||
sheet_name="July 2025 Route - Cavity"
|
||||
)
|
||||
|
||||
solar_route = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
|
||||
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
|
||||
sheet_name="July 2025 Route - Solar"
|
||||
)
|
||||
|
||||
# Merge on the comments
|
||||
comments = comments_df[
|
||||
["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)']
|
||||
].copy()
|
||||
|
||||
cavity_route = cavity_route.merge(
|
||||
comments, left_on="landlord_property_id", right_on="URPN", how="left"
|
||||
)
|
||||
solar_route = solar_route.merge(
|
||||
comments, left_on="landlord_property_id", right_on="URPN", how="left"
|
||||
)
|
||||
|
||||
# Get properties that are not on either route
|
||||
not_on_routes = comments_df[
|
||||
~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) &
|
||||
~comments_df["URPN"].isin(solar_route["landlord_property_id"])
|
||||
]
|
||||
|
||||
# Store
|
||||
not_on_routes.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
|
||||
"Project/Properties not on routes.xlsx",
|
||||
index=False
|
||||
)
|
||||
# Save the routes
|
||||
cavity_route.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
|
||||
"Project/Cavity Route.xlsx",
|
||||
index=False
|
||||
)
|
||||
solar_route.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
|
||||
"Project/Solar Route.xlsx",
|
||||
index=False
|
||||
)
|
||||
289
etl/customers/bromford/solar_pv_cleanup.py
Normal file
289
etl/customers/bromford/solar_pv_cleanup.py
Normal file
|
|
@ -0,0 +1,289 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
import numpy as np
|
||||
|
||||
contact_list = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
|
||||
"PV address list - second wave KLD - PP.csv"
|
||||
)
|
||||
contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number(
|
||||
address=str(x["Address 1: Street 1"]).strip(),
|
||||
postcode=str(x["Postal Code"]).strip(),
|
||||
), axis=1)
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - "
|
||||
"Standardised (1).xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
)
|
||||
|
||||
lookup = []
|
||||
missed = []
|
||||
for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)):
|
||||
|
||||
if x["Address 1: Street 1"] == '1 The Beck':
|
||||
lookup.append(
|
||||
{
|
||||
"UPRN": x["UPRN"],
|
||||
"landlord_property_id": 40692,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if x["Address 1: Street 1"] == '3 The Beck ':
|
||||
lookup.append(
|
||||
{
|
||||
"UPRN": x["UPRN"],
|
||||
"landlord_property_id": 40693,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if x["Address 1: Street 1"] == '2 Orchard Close ':
|
||||
lookup.append(
|
||||
{
|
||||
"UPRN": x["UPRN"],
|
||||
"landlord_property_id": 7924,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if x["Address 1: Street 1"] == '2 Orchard Close ':
|
||||
lookup.append(
|
||||
{
|
||||
"UPRN": x["UPRN"],
|
||||
"landlord_property_id": 7924,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if x["Address 1: Street 1"] == '3 Croxall Road':
|
||||
lookup.append(
|
||||
{
|
||||
"UPRN": x["UPRN"],
|
||||
"landlord_property_id": 40650,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if x["Address 1: Street 1"] == '4 Ward Road ':
|
||||
lookup.append(
|
||||
{
|
||||
"UPRN": x["UPRN"],
|
||||
"landlord_property_id": 33175,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
df = asset_list[
|
||||
asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) &
|
||||
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
|
||||
]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
df = asset_list[
|
||||
asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() &
|
||||
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
|
||||
]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
df = asset_list[
|
||||
(asset_list["domna_address_1"].astype(str) == str(x["house_no"])) &
|
||||
(asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True)
|
||||
]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
missed.append(x["UPRN"])
|
||||
continue
|
||||
|
||||
lookup.append(
|
||||
{
|
||||
"UPRN": x["UPRN"],
|
||||
"landlord_property_id": df["landlord_property_id"].values[0],
|
||||
}
|
||||
)
|
||||
|
||||
lookup = pd.DataFrame(lookup)
|
||||
|
||||
contact_list = contact_list.merge(lookup, how="left", on="UPRN")
|
||||
# Store
|
||||
contact_list.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
|
||||
"PV address list - second wave KLD - PP with landlord_property_id.csv",
|
||||
index=False
|
||||
)
|
||||
|
||||
# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the
|
||||
# stndardised asset list
|
||||
contacts_complete = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
|
||||
"PV address list - second wave KLD - PP with landlord_property_id.csv"
|
||||
)
|
||||
|
||||
new_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet "
|
||||
"Solar PV installs.xlsx",
|
||||
sheet_name="Sheet1"
|
||||
)
|
||||
|
||||
contact_list = contact_list.merge(
|
||||
new_data,
|
||||
how="left",
|
||||
left_on="UPRN",
|
||||
right_on="CE UPRN"
|
||||
)
|
||||
route = asset_list[
|
||||
asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str))
|
||||
].copy()
|
||||
|
||||
# Add the new heating data
|
||||
contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str)
|
||||
route2 = contact_list.merge(
|
||||
route,
|
||||
how="left",
|
||||
right_on="landlord_property_id",
|
||||
left_on="Legacy UPRN"
|
||||
)
|
||||
|
||||
# Because I did a data pull, we can fill the other bits of information
|
||||
missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))]
|
||||
|
||||
# Store both the route and missed
|
||||
route2.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv",
|
||||
index=False
|
||||
)
|
||||
|
||||
# Add on phone number
|
||||
contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme "
|
||||
"Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with "
|
||||
"landlord_property_id.xlsx")
|
||||
|
||||
contacts_filenames = [
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
|
||||
"details/FAO Paul Contact Details-Table 1.csv",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
|
||||
"details/Green Contact Details-Table 1.csv",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
|
||||
"details/Main Contact Details-Table 1.csv",
|
||||
]
|
||||
|
||||
merge_to = pd.read_excel(contact_details_filepath)
|
||||
|
||||
lookup = []
|
||||
for fn in contacts_filenames:
|
||||
df = pd.read_csv(fn, encoding="utf-8-sig")
|
||||
# Merge on phone
|
||||
details = df[
|
||||
df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str))
|
||||
][[
|
||||
"Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address",
|
||||
"First Name", "Last Name"
|
||||
]]
|
||||
|
||||
lookup.append(details)
|
||||
|
||||
lookup = pd.concat(lookup)
|
||||
|
||||
# Drop entries where landline, mobile and email are all NaN
|
||||
lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all")
|
||||
lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"])
|
||||
# Sort so email is first, then landline, then mobile
|
||||
lookup = lookup.sort_values(
|
||||
["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"],
|
||||
ascending=[True, True, True, True]
|
||||
)
|
||||
|
||||
# Store
|
||||
lookup.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact "
|
||||
"details.csv",
|
||||
index=False
|
||||
)
|
||||
|
||||
lookup2 = []
|
||||
for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"):
|
||||
|
||||
# We any entries have an email, we take that
|
||||
if x["Email Address"].notna().any():
|
||||
x = x[x["Email Address"].notna()]
|
||||
# We then take the entry with a phone number
|
||||
if x["Landline"].notna().any() or x["Mobile Phone"].notna().any():
|
||||
x = x[x["Landline"].notna() | x["Mobile Phone"].notna()]
|
||||
|
||||
# Take the first entry
|
||||
x = x.iloc[0]
|
||||
lookup2.append(x)
|
||||
|
||||
lookup2 = pd.DataFrame(lookup2)
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# Sample structure based on your columns
|
||||
columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address']
|
||||
|
||||
# Simulating example input DataFrame
|
||||
# In practice, you would use: lookup = pd.read_csv(...) or similar
|
||||
lookup = pd.DataFrame(columns=columns)
|
||||
|
||||
# Grouping and transforming
|
||||
results = []
|
||||
|
||||
for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"):
|
||||
# Filter rows with any contact information
|
||||
filtered = group[
|
||||
group["Email Address"].notna() &
|
||||
(group["Landline"].notna() | group["Mobile Phone"].notna())
|
||||
]
|
||||
|
||||
if filtered.empty:
|
||||
continue
|
||||
|
||||
# Sort by presence of phone numbers (prioritize those with both)
|
||||
filtered["contact_score"] = (
|
||||
filtered["Landline"].notna().astype(int) +
|
||||
filtered["Mobile Phone"].notna().astype(int)
|
||||
)
|
||||
filtered = filtered.sort_values("contact_score", ascending=False)
|
||||
|
||||
primary = filtered.iloc[0]
|
||||
# Make sure secondary is not the same as primary
|
||||
if not pd.isnull(primary["Mobile Phone"]):
|
||||
secondary = filtered[
|
||||
(filtered["Mobile Phone"] != primary["Mobile Phone"])
|
||||
]
|
||||
elif not pd.isnull(primary["Landline"]):
|
||||
secondary = filtered[
|
||||
(filtered["Landline"] != primary["Landline"])
|
||||
]
|
||||
else:
|
||||
raise Exception("Look at me")
|
||||
|
||||
secondary = filtered.iloc[1] if len(filtered) > 1 else None
|
||||
|
||||
results.append({
|
||||
"Property ID": prop_id,
|
||||
"Primary Email": primary["Email Address"],
|
||||
"Primary Phone": primary["Mobile Phone"] or primary["Landline"],
|
||||
"Secondary Email": secondary["Email Address"] if secondary is not None else None,
|
||||
"Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None,
|
||||
})
|
||||
|
||||
final_df = pd.DataFrame(results)
|
||||
|
||||
import ace_tools as tools;
|
||||
|
||||
tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df)
|
||||
|
||||
# We set up primary and secondary phone numbers. We use mobile as the primary
|
||||
|
||||
|
||||
# We have duplicates, we prioritise entries, by ID, that have a email
|
||||
lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates(
|
||||
"Property Reference Number (Main Address) (Property)", keep="last"
|
||||
)
|
||||
|
||||
# TODO: Get into the standardised asset list format
|
||||
# TODO: Add the deal postcode to Hubspot
|
||||
# TODO: Upload the deal postcode
|
||||
75
etl/customers/ealing/prepare_for_hubspot.py
Normal file
75
etl/customers/ealing/prepare_for_hubspot.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from asset_list.hubspot.config import HubspotProcessStatus
|
||||
|
||||
project_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW "
|
||||
"170625.xlsx",
|
||||
sheet_name="All_Flats"
|
||||
)
|
||||
|
||||
project_data["hubspot_status"] = None
|
||||
project_data["hubspot_status"] = np.where(
|
||||
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"),
|
||||
HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
|
||||
project_data["hubspot_status"]
|
||||
)
|
||||
project_data["hubspot_status"] = np.where(
|
||||
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"),
|
||||
"SURVEYED UNDER 2019 - NEEDS RE-SURVEY",
|
||||
project_data["hubspot_status"]
|
||||
)
|
||||
project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str)
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
)
|
||||
asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str)
|
||||
asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str)
|
||||
project_data["Property ref"] = project_data["Property ref"].astype(str)
|
||||
|
||||
# We need to update the status of properties that already been surveyed
|
||||
asset_list2 = asset_list.merge(
|
||||
project_data[["Property ref", "hubspot_status", "project_code"]],
|
||||
how="left",
|
||||
right_on="Property ref",
|
||||
left_on="incorrect_landlord_property_id",
|
||||
suffixes=("", "_project")
|
||||
)
|
||||
asset_list2["hubspot_status"] = np.where(
|
||||
~pd.isna(asset_list2["hubspot_status_project"]),
|
||||
asset_list2["hubspot_status_project"],
|
||||
asset_list2["hubspot_status"]
|
||||
)
|
||||
asset_list2["project_code"] = np.where(
|
||||
~pd.isna(asset_list2["project_code"]),
|
||||
asset_list2["project_code"],
|
||||
asset_list2["landlord_property_id"]
|
||||
)
|
||||
|
||||
asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"])
|
||||
asset_list2["cavity_reason"] = np.where(
|
||||
pd.isnull(asset_list2["cavity_reason"]),
|
||||
"Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68",
|
||||
asset_list2["cavity_reason"]
|
||||
)
|
||||
asset_list2["solar_reason"] = None
|
||||
|
||||
# Read in block analysis and geographical areas from standardised asset list
|
||||
block_analysis_df = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
|
||||
sheet_name="Block Analysis"
|
||||
)
|
||||
geographical_areas = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
|
||||
sheet_name="Geographical Areas"
|
||||
)
|
||||
|
||||
# Update the new standardised asset list
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
|
||||
"programme.xlsx")
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
|
||||
geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
|
||||
116
etl/customers/mhs/new_programme.py
Normal file
116
etl/customers/mhs/new_programme.py
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
#
|
||||
import pandas as pd
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
|
||||
"programme.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
)
|
||||
|
||||
new_cavity_programme = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
|
||||
"programme.xlsx",
|
||||
sheet_name="New Cavity Programme"
|
||||
)
|
||||
|
||||
new_cavity_pilot = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
|
||||
"programme.xlsx",
|
||||
sheet_name="Empty Cavity Pilot"
|
||||
)
|
||||
|
||||
new_solar_programme = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
|
||||
"programme.xlsx",
|
||||
sheet_name="New Solar Programme"
|
||||
)
|
||||
|
||||
in_fill_properties_houses = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
|
||||
"1).xlsx",
|
||||
sheet_name="Houses and Bungalows"
|
||||
)
|
||||
in_fill_properties_flats = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
|
||||
"1).xlsx",
|
||||
sheet_name="Flats and Maistonettes"
|
||||
)
|
||||
# Q1) What are these properties? Do we have them on our list already?
|
||||
# All of the houses are already in the asset list
|
||||
in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin(
|
||||
asset_list["landlord_property_id"].values
|
||||
)
|
||||
# All of the flats are already in the asset list
|
||||
in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin(
|
||||
asset_list["landlord_property_id"].values
|
||||
)
|
||||
|
||||
# Q2) Which properties are excluded from the new programme?
|
||||
in_fill_properties = pd.concat(
|
||||
[in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False
|
||||
)
|
||||
|
||||
# Merge on the data
|
||||
in_fill_properties = in_fill_properties.merge(
|
||||
asset_list,
|
||||
left_on="UPRN",
|
||||
right_on="landlord_property_id",
|
||||
how="left"
|
||||
)
|
||||
# How many properties are in the new programme?
|
||||
|
||||
in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin(
|
||||
new_cavity_programme["landlord_property_id"].values
|
||||
)
|
||||
in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin(
|
||||
new_solar_programme["landlord_property_id"].values
|
||||
)
|
||||
in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin(
|
||||
new_cavity_pilot["landlord_property_id"].values
|
||||
)
|
||||
not_in_new_programme = in_fill_properties[
|
||||
(~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~
|
||||
in_fill_properties["in_new_cavity_pilot"])
|
||||
].copy()
|
||||
|
||||
# Why?
|
||||
not_in_new_programme["cavity_reason"].value_counts()
|
||||
not_in_new_programme["solar_reason"].value_counts()
|
||||
|
||||
not_identified_for_anything = not_in_new_programme[
|
||||
pd.isnull(not_in_new_programme["cavity_reason"]) &
|
||||
pd.isnull(not_in_new_programme["solar_reason"])
|
||||
]
|
||||
|
||||
# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding
|
||||
# the extraction
|
||||
not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin(
|
||||
[
|
||||
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75",
|
||||
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68",
|
||||
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more",
|
||||
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less",
|
||||
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more",
|
||||
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less",
|
||||
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less",
|
||||
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more",
|
||||
]
|
||||
)
|
||||
|
||||
not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin(
|
||||
not_identified_for_anything["landlord_property_id"].values
|
||||
)
|
||||
|
||||
not_in_new_programme[
|
||||
not_in_new_programme["funded_extractions"]
|
||||
].to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv",
|
||||
index=False
|
||||
)
|
||||
|
||||
not_in_new_programme[
|
||||
not_in_new_programme["excluded"] == True
|
||||
].to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv",
|
||||
index=False
|
||||
)
|
||||
40
etl/customers/thrive/Make Insepctions route.py
Normal file
40
etl/customers/thrive/Make Insepctions route.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""
|
||||
This script will pull in properties, in neighbouring areas, that have been flagged for CWI
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
|
||||
"reconciled.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
)
|
||||
|
||||
cavity_areas = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
|
||||
"reconciled.xlsx",
|
||||
sheet_name="Cavity Areas"
|
||||
)
|
||||
|
||||
existing_inspections_sheet = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
|
||||
"reconciled.xlsx",
|
||||
sheet_name="July 2025 Inspections"
|
||||
)
|
||||
|
||||
empties = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
|
||||
"reconciled.xlsx",
|
||||
sheet_name="Cavity properties - for review"
|
||||
)
|
||||
|
||||
cavity_inspections = asset_list[
|
||||
asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values)
|
||||
]
|
||||
cavity_inspections = cavity_inspections[
|
||||
~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values)
|
||||
]
|
||||
|
||||
cavity_inspections.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv",
|
||||
index=False
|
||||
)
|
||||
|
|
@ -674,7 +674,8 @@ class RetrieveFindMyEpc:
|
|||
],
|
||||
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
|
||||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||||
]
|
||||
],
|
||||
"Internal wall insulation": ["internal_wall_insulation"],
|
||||
}
|
||||
|
||||
survey = True
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue