tidying up stonewater work

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-03 12:54:57 +00:00
parent f6d8688698
commit 01a5077c17

View file

@ -1,4 +1,6 @@
import os
from pyexpat import features
import PyPDF2
import re
import pandas as pd
@ -1704,7 +1706,6 @@ def append_stonewater_id():
)
model_proposed_sample = model_proposed_sample[~pd.isnull(model_proposed_sample["Address ID"])]
model_proposed_sample["Address ID"] = model_proposed_sample["Address ID"].astype(int)
z = model_proposed_sample["Archetype ID"].drop_duplicates().sort_values()
original_archetypes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
@ -2942,7 +2943,6 @@ def revised_model():
"""
# 1) Create the new list of properties
new_priority_postcodes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
"priority list.xlsx"
@ -3188,7 +3188,13 @@ def revised_model():
wates_coordination_sheet_abeyance
]
)
# We correct the Asset ID for 34 Kempster Close
wates_coordination["Asset ID"] = np.where(
wates_coordination["Name"] == "34 Kempster Close",
"12005",
wates_coordination["Asset ID"]
)
wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply(
lambda x: extract_sharepoint_url(x)
)
@ -3198,6 +3204,14 @@ def revised_model():
############################################################
# NEW 450 COORDINATED RETROFIT ASSESSMENTS
#############################################################
features = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
"master sheet.csv",
encoding='latin1'
)
features["Address ID"] = features["Address ID"].astype(str).astype(int)
features_to_merge = features[["Address ID", "Organisation Reference"]]
retrofit_packages_board = pd.read_excel(
os.path.join(
CUSTOMER_FOLDER_PATH,
@ -3211,6 +3225,10 @@ def revised_model():
retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
]
retrofit_packages_board = retrofit_packages_board.merge(
features_to_merge, how="left", on="Address ID"
)
manual_filters = {
"Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
"6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
@ -3527,6 +3545,206 @@ def revised_model():
continue
raise Exception("No match")
wates_matching_lookup = pd.DataFrame(wates_matching_lookup)
# Merge lookup tables onto the coordination sheets
wates_coordination = wates_coordination.merge(
wates_matching_lookup, how="left", on="Name"
)
missed_asset_id = wates_coordination[pd.isnull(wates_coordination["Asset ID_x"])]
if not missed_asset_id.empty:
# We fill the missing ids
missing_lookup = {
"4 Sydnall Fields": 31231,
"12 Sydnall Fields": 31239,
"12 Athena Gardens": 28061,
"49 Banner Lane": 41189,
"4 Jonathan Road": 41232,
"8 Jonathan Road": 41236,
"1 Jonathan Road": 41229,
"96 Taunton Way": 31417,
"94 Taunton Way": 31418,
"1 Lady Lane": 29430,
"10 Jonathan Road": 41283,
"21 Jonathan Road": 41246,
"12 Ashcroft Close": 26399
}
for name, asset_id in missing_lookup.items():
wates_coordination["Asset ID_x"] = np.where(
wates_coordination["Name"] == name,
asset_id,
wates_coordination["Asset ID_x"]
)
ccs_coordination = ccs_coordination.merge(
ccs_matching_lookup, how="left", on="Name"
)
retrofit_packages_board = retrofit_packages_board.merge(
matching_lookup, how="left", on="Name"
)
# We combine this into a singular board
coordinated_packages = pd.concat(
[
retrofit_packages_board[
[
"Name", "Postcode", 'Actual SAP Band', 'Actual SAP Rating',
'Modelled SAP Band', 'Modelled SAP Rating', 'Package Ref',
'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
'Solar PV', 'Other measures', 'Organisation Reference',
]
],
ccs_coordination[
[
# We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls,
# Solar PV
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
'SAP Band Install Package', 'Package Approved (Client)',
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y",
]
].rename(
columns={
"SAP Band Pre": "Actual SAP Band",
"SAP Rating Pre": "Actual SAP Rating",
'SAP Rating Install Package': 'Modelled SAP Band',
'SAP Band Install Package': 'Modelled SAP Rating',
'Package Approved (Client)': 'Package Ref',
'Wall Insulation': 'Main Wall Insulation',
'Loft Insulation': 'Loft insulation',
'Windows Upgrade': 'Window Upgrade',
'Ext. Doors Upgrade': 'Door Upgrade',
'Heating': 'Main Heating',
'Other Measures': 'Other measures',
'Asset ID.1_y': 'Organisation Reference',
}
),
wates_coordination[
[
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
'SAP Band Install Package', 'Package Approved (Client)',
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x'
]
].rename(
columns={
"SAP Band Pre": "Actual SAP Band",
"SAP Rating Pre": "Actual SAP Rating",
'SAP Rating Install Package': 'Modelled SAP Band',
'SAP Band Install Package': 'Modelled SAP Rating',
'Package Approved (Client)': 'Package Ref',
'Wall Insulation': 'Main Wall Insulation',
'Loft Insulation': 'Loft insulation',
'Windows Upgrade': 'Window Upgrade',
'Ext. Doors Upgrade': 'Door Upgrade',
'Heating': 'Main Heating',
'Other Measures': 'Other measures',
'Asset ID_x': 'Organisation Reference',
}
)
]
)
coordinated_packages["Organisation Reference"] = coordinated_packages["Organisation Reference"].astype(int)
# Merge the property features on
coordinated_packages = coordinated_packages.merge(
features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]],
how="left",
on="Organisation Reference"
)
# We need the features pertaining to these priority postcodes
def find_nearest_matching_property(coordinated_packages, home):
filter_levels = [
["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
]
for i, filters in enumerate(filter_levels):
match = coordinated_packages.copy()
for col in filters:
match = match[match[col] == home[col]]
if not match.empty:
return match
return None # No match found
coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
new_priority_postcodes["Postal Region"] = new_priority_postcodes["Postcode"].str.split(" ").str[0].str.strip()
coordinated_packages["Roof Simple"] = coordinated_packages["Roofs"].str.split(":").str[0].str.strip()
new_priority_postcodes["Roof Simple"] = new_priority_postcodes["Roofs"].str.split(":").str[0].str.strip()
coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0]
new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0]
# For every property in the priority postcodes data, we look for a most appropriate matching property
no_match = []
matches = []
for _, home in tqdm(new_priority_postcodes.iterrows(), total=len(new_priority_postcodes)):
closest_match = find_nearest_matching_property(coordinated_packages, home)
if closest_match is None:
no_match.append(home["Organisation Reference"])
continue
to_extend = [
{
"Organisation Reference": home["Organisation Reference"],
"Best Match Organisation Reference": m
} for m in closest_match["Organisation Reference"].values
]
matches.extend(to_extend)
no_match_summary = new_priority_postcodes[
new_priority_postcodes["Organisation Reference"].isin(
no_match
)
].groupby(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"])[
"Organisation Reference"].count().reset_index()
no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
# len(no_match)
# 8764, 5607
# no_match_summary.shape
# (3953, 6), (2948, 6)
# We match the properties to their closest match
matches_df = pd.DataFrame(matches)
matches_df = matches_df.merge(
coordinated_packages[["Organisation Reference", "Actual SAP Band", "Actual SAP Rating"]],
left_on="Best Match Organisation Reference", right_on="Organisation Reference",
suffixes=("", " - Closest Match")
)
# We want to aggregate the matches, when we have multiple
aggregated_matches_df = []
for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
if mapped_matches.shape[0] == 1:
mapped_matches["Number of matches"] = 1
mapped_matches["Proportion"]
aggregated_matches_df.append(mapped_matches)
continue
mapped_priority_list = new_priority_postcodes.merge(
matches_df, on="Organisation Reference",
)
# We merge on the EPC ratings for the matched properties
mapped_priority_list = mapped_priority_list.merge(
)
# if __name__ == "__main__":
# main()