assigning properties to bands

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-03 16:06:47 +00:00
parent 04eba60961
commit 10bc433283

View file

@ -3212,10 +3212,10 @@ def revised_model():
"12 Ashcroft Close": 26399
}
for name, asset_id in missing_lookup.items():
wates_coordination["Asset ID_x"] = np.where(
wates_coordination["Asset ID"] = np.where(
wates_coordination["Name"] == name,
asset_id,
wates_coordination["Asset ID_x"]
wates_coordination["Asset ID"]
)
wates_coordination = wates_coordination[~pd.isnull(wates_coordination["Asset ID"])]
@ -3596,6 +3596,16 @@ def revised_model():
matching_lookup, how="left", on="Name"
)
# We have 4 properties in the Wates coordination board, that we want to remove from the retrofit packages board
to_remove = wates_coordination[
wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
]
assert to_remove.shape[0] == 4
# Remove them from the wates board
wates_coordination = wates_coordination[
~wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
]
# We combine this into a singular board
coordinated_packages = pd.concat(
[
@ -3662,6 +3672,7 @@ def revised_model():
)
coordinated_packages["Organisation Reference"] = coordinated_packages["Organisation Reference"].astype(int)
assert not coordinated_packages["Organisation Reference"].duplicated().sum()
# Merge the property features on
coordinated_packages = coordinated_packages.merge(
@ -3670,6 +3681,25 @@ def revised_model():
on="Organisation Reference"
)
# We match the properties to their closest match
# We clean up the SAP ratings in the coordinated packages
def sap_to_number(x):
try:
return int(x)
except:
if x[-1] in ["A", "B", "C", "D", "E", "F"]:
return int(x[:-1])
if x[0] in ["A", "B", "C", "D", "E", "F"]:
return int(x[1:])
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Band"])]
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Rating"])]
coordinated_packages["Actual SAP Rating"] = coordinated_packages["Actual SAP Rating"].apply(
lambda x: sap_to_number(x)
)
# We need the features pertaining to these priority postcodes
def find_nearest_matching_property(coordinated_packages, home):
@ -3729,11 +3759,9 @@ def revised_model():
no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
# len(no_match)
# 8764, 5607
# 8764, 5607, 5646
# no_match_summary.shape
# (3953, 6), (2948, 6)
# We match the properties to their closest match
# (3953, 6), (2948, 6), (2969, 7)
matches_df = pd.DataFrame(matches)
matches_df = matches_df.merge(
@ -3745,11 +3773,36 @@ def revised_model():
aggregated_matches_df = []
for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
if mapped_matches.shape[0] == 1:
mapped_matches["Number of matches"] = 1
mapped_matches["Proportion"]
aggregated_matches_df.append(mapped_matches)
aggregated_matches_df.append(
{
"Organisation Reference": org_ref,
"Number of matches": 1,
"Proportion": 100,
"Estimated SAP Rating": mapped_matches["Actual SAP Rating"].values[0],
"Estimated EPC Rating": sap_to_epc(mapped_matches["Actual SAP Rating"].values[0])
}
)
continue
# We need to aggregate the matches, since we have multiple
average_rating = mapped_matches["Actual SAP Rating"].mean()
number_of_matches = mapped_matches.shape[0]
average_epc_rating = sap_to_epc(average_rating)
# proportion is the number of properties that have this EPC rating
proportion_with_this_epc = int(
mapped_matches[mapped_matches["Actual SAP Band"] == average_epc_rating].shape[0] / number_of_matches * 100)
aggregated_matches_df.append(
{
"Organisation Reference": org_ref,
"Number of matches": number_of_matches,
"Proportion": proportion_with_this_epc,
"Estimated SAP Rating": average_rating,
"Estimated EPC Rating": average_epc_rating
}
)
aggregated_matches_df = pd.DataFrame(aggregated_matches_df)
mapped_priority_list = new_priority_postcodes.merge(
matches_df, on="Organisation Reference",
)