mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
tidying up stonewater work
This commit is contained in:
parent
f6d8688698
commit
01a5077c17
1 changed files with 221 additions and 3 deletions
|
|
@ -1,4 +1,6 @@
|
|||
import os
|
||||
from pyexpat import features
|
||||
|
||||
import PyPDF2
|
||||
import re
|
||||
import pandas as pd
|
||||
|
|
@ -1704,7 +1706,6 @@ def append_stonewater_id():
|
|||
)
|
||||
model_proposed_sample = model_proposed_sample[~pd.isnull(model_proposed_sample["Address ID"])]
|
||||
model_proposed_sample["Address ID"] = model_proposed_sample["Address ID"].astype(int)
|
||||
z = model_proposed_sample["Archetype ID"].drop_duplicates().sort_values()
|
||||
|
||||
original_archetypes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
|
||||
|
|
@ -2942,7 +2943,6 @@ def revised_model():
|
|||
"""
|
||||
|
||||
# 1) Create the new list of properties
|
||||
|
||||
new_priority_postcodes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
|
||||
"priority list.xlsx"
|
||||
|
|
@ -3188,7 +3188,13 @@ def revised_model():
|
|||
wates_coordination_sheet_abeyance
|
||||
]
|
||||
)
|
||||
|
||||
# We correct the Asset ID for 34 Kempster Close
|
||||
wates_coordination["Asset ID"] = np.where(
|
||||
wates_coordination["Name"] == "34 Kempster Close",
|
||||
"12005",
|
||||
wates_coordination["Asset ID"]
|
||||
)
|
||||
|
||||
wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply(
|
||||
lambda x: extract_sharepoint_url(x)
|
||||
)
|
||||
|
|
@ -3198,6 +3204,14 @@ def revised_model():
|
|||
############################################################
|
||||
# NEW 450 COORDINATED RETROFIT ASSESSMENTS
|
||||
#############################################################
|
||||
features = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
|
||||
"master sheet.csv",
|
||||
encoding='latin1'
|
||||
)
|
||||
features["Address ID"] = features["Address ID"].astype(str).astype(int)
|
||||
features_to_merge = features[["Address ID", "Organisation Reference"]]
|
||||
|
||||
retrofit_packages_board = pd.read_excel(
|
||||
os.path.join(
|
||||
CUSTOMER_FOLDER_PATH,
|
||||
|
|
@ -3211,6 +3225,10 @@ def revised_model():
|
|||
retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
|
||||
]
|
||||
|
||||
retrofit_packages_board = retrofit_packages_board.merge(
|
||||
features_to_merge, how="left", on="Address ID"
|
||||
)
|
||||
|
||||
manual_filters = {
|
||||
"Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
|
||||
"6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
|
||||
|
|
@ -3527,6 +3545,206 @@ def revised_model():
|
|||
continue
|
||||
|
||||
raise Exception("No match")
|
||||
wates_matching_lookup = pd.DataFrame(wates_matching_lookup)
|
||||
|
||||
# Merge lookup tables onto the coordination sheets
|
||||
wates_coordination = wates_coordination.merge(
|
||||
wates_matching_lookup, how="left", on="Name"
|
||||
)
|
||||
missed_asset_id = wates_coordination[pd.isnull(wates_coordination["Asset ID_x"])]
|
||||
if not missed_asset_id.empty:
|
||||
# We fill the missing ids
|
||||
missing_lookup = {
|
||||
"4 Sydnall Fields": 31231,
|
||||
"12 Sydnall Fields": 31239,
|
||||
"12 Athena Gardens": 28061,
|
||||
"49 Banner Lane": 41189,
|
||||
"4 Jonathan Road": 41232,
|
||||
"8 Jonathan Road": 41236,
|
||||
"1 Jonathan Road": 41229,
|
||||
"96 Taunton Way": 31417,
|
||||
"94 Taunton Way": 31418,
|
||||
"1 Lady Lane": 29430,
|
||||
"10 Jonathan Road": 41283,
|
||||
"21 Jonathan Road": 41246,
|
||||
"12 Ashcroft Close": 26399
|
||||
}
|
||||
for name, asset_id in missing_lookup.items():
|
||||
wates_coordination["Asset ID_x"] = np.where(
|
||||
wates_coordination["Name"] == name,
|
||||
asset_id,
|
||||
wates_coordination["Asset ID_x"]
|
||||
)
|
||||
|
||||
ccs_coordination = ccs_coordination.merge(
|
||||
ccs_matching_lookup, how="left", on="Name"
|
||||
)
|
||||
|
||||
retrofit_packages_board = retrofit_packages_board.merge(
|
||||
matching_lookup, how="left", on="Name"
|
||||
)
|
||||
|
||||
# We combine this into a singular board
|
||||
coordinated_packages = pd.concat(
|
||||
[
|
||||
retrofit_packages_board[
|
||||
[
|
||||
"Name", "Postcode", 'Actual SAP Band', 'Actual SAP Rating',
|
||||
'Modelled SAP Band', 'Modelled SAP Rating', 'Package Ref',
|
||||
'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
|
||||
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
|
||||
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
|
||||
'Solar PV', 'Other measures', 'Organisation Reference',
|
||||
]
|
||||
],
|
||||
ccs_coordination[
|
||||
[
|
||||
# We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls,
|
||||
# Solar PV
|
||||
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
|
||||
'SAP Band Install Package', 'Package Approved (Client)',
|
||||
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
|
||||
'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y",
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"SAP Band Pre": "Actual SAP Band",
|
||||
"SAP Rating Pre": "Actual SAP Rating",
|
||||
'SAP Rating Install Package': 'Modelled SAP Band',
|
||||
'SAP Band Install Package': 'Modelled SAP Rating',
|
||||
'Package Approved (Client)': 'Package Ref',
|
||||
'Wall Insulation': 'Main Wall Insulation',
|
||||
'Loft Insulation': 'Loft insulation',
|
||||
'Windows Upgrade': 'Window Upgrade',
|
||||
'Ext. Doors Upgrade': 'Door Upgrade',
|
||||
'Heating': 'Main Heating',
|
||||
'Other Measures': 'Other measures',
|
||||
'Asset ID.1_y': 'Organisation Reference',
|
||||
}
|
||||
),
|
||||
wates_coordination[
|
||||
[
|
||||
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
|
||||
'SAP Band Install Package', 'Package Approved (Client)',
|
||||
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
|
||||
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x'
|
||||
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"SAP Band Pre": "Actual SAP Band",
|
||||
"SAP Rating Pre": "Actual SAP Rating",
|
||||
'SAP Rating Install Package': 'Modelled SAP Band',
|
||||
'SAP Band Install Package': 'Modelled SAP Rating',
|
||||
'Package Approved (Client)': 'Package Ref',
|
||||
'Wall Insulation': 'Main Wall Insulation',
|
||||
'Loft Insulation': 'Loft insulation',
|
||||
'Windows Upgrade': 'Window Upgrade',
|
||||
'Ext. Doors Upgrade': 'Door Upgrade',
|
||||
'Heating': 'Main Heating',
|
||||
'Other Measures': 'Other measures',
|
||||
'Asset ID_x': 'Organisation Reference',
|
||||
}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
coordinated_packages["Organisation Reference"] = coordinated_packages["Organisation Reference"].astype(int)
|
||||
|
||||
# Merge the property features on
|
||||
coordinated_packages = coordinated_packages.merge(
|
||||
features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]],
|
||||
how="left",
|
||||
on="Organisation Reference"
|
||||
)
|
||||
|
||||
# We need the features pertaining to these priority postcodes
|
||||
|
||||
def find_nearest_matching_property(coordinated_packages, home):
|
||||
filter_levels = [
|
||||
["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
|
||||
["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
|
||||
]
|
||||
|
||||
for i, filters in enumerate(filter_levels):
|
||||
match = coordinated_packages.copy()
|
||||
|
||||
for col in filters:
|
||||
match = match[match[col] == home[col]]
|
||||
|
||||
if not match.empty:
|
||||
return match
|
||||
|
||||
return None # No match found
|
||||
|
||||
coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
|
||||
new_priority_postcodes["Postal Region"] = new_priority_postcodes["Postcode"].str.split(" ").str[0].str.strip()
|
||||
|
||||
coordinated_packages["Roof Simple"] = coordinated_packages["Roofs"].str.split(":").str[0].str.strip()
|
||||
new_priority_postcodes["Roof Simple"] = new_priority_postcodes["Roofs"].str.split(":").str[0].str.strip()
|
||||
|
||||
coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0]
|
||||
new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0]
|
||||
|
||||
# For every property in the priority postcodes data, we look for a most appropriate matching property
|
||||
no_match = []
|
||||
matches = []
|
||||
for _, home in tqdm(new_priority_postcodes.iterrows(), total=len(new_priority_postcodes)):
|
||||
closest_match = find_nearest_matching_property(coordinated_packages, home)
|
||||
if closest_match is None:
|
||||
no_match.append(home["Organisation Reference"])
|
||||
continue
|
||||
|
||||
to_extend = [
|
||||
{
|
||||
"Organisation Reference": home["Organisation Reference"],
|
||||
"Best Match Organisation Reference": m
|
||||
} for m in closest_match["Organisation Reference"].values
|
||||
]
|
||||
matches.extend(to_extend)
|
||||
|
||||
no_match_summary = new_priority_postcodes[
|
||||
new_priority_postcodes["Organisation Reference"].isin(
|
||||
no_match
|
||||
)
|
||||
].groupby(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"])[
|
||||
"Organisation Reference"].count().reset_index()
|
||||
|
||||
no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
|
||||
|
||||
# len(no_match)
|
||||
# 8764, 5607
|
||||
# no_match_summary.shape
|
||||
# (3953, 6), (2948, 6)
|
||||
|
||||
# We match the properties to their closest match
|
||||
|
||||
matches_df = pd.DataFrame(matches)
|
||||
matches_df = matches_df.merge(
|
||||
coordinated_packages[["Organisation Reference", "Actual SAP Band", "Actual SAP Rating"]],
|
||||
left_on="Best Match Organisation Reference", right_on="Organisation Reference",
|
||||
suffixes=("", " - Closest Match")
|
||||
)
|
||||
# We want to aggregate the matches, when we have multiple
|
||||
aggregated_matches_df = []
|
||||
for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
|
||||
if mapped_matches.shape[0] == 1:
|
||||
mapped_matches["Number of matches"] = 1
|
||||
mapped_matches["Proportion"]
|
||||
aggregated_matches_df.append(mapped_matches)
|
||||
continue
|
||||
|
||||
mapped_priority_list = new_priority_postcodes.merge(
|
||||
matches_df, on="Organisation Reference",
|
||||
)
|
||||
# We merge on the EPC ratings for the matched properties
|
||||
mapped_priority_list = mapped_priority_list.merge(
|
||||
|
||||
)
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue