From 10bc433283417a2c15ffe2924537ded81af240d6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 3 Feb 2025 16:06:47 +0000 Subject: [PATCH] assigning properties to bands --- .../stonewater/Wave 3 Preparation.py | 71 ++++++++++++++++--- 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 04078e47..c623e9f7 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3212,10 +3212,10 @@ def revised_model(): "12 Ashcroft Close": 26399 } for name, asset_id in missing_lookup.items(): - wates_coordination["Asset ID_x"] = np.where( + wates_coordination["Asset ID"] = np.where( wates_coordination["Name"] == name, asset_id, - wates_coordination["Asset ID_x"] + wates_coordination["Asset ID"] ) wates_coordination = wates_coordination[~pd.isnull(wates_coordination["Asset ID"])] @@ -3596,6 +3596,16 @@ def revised_model(): matching_lookup, how="left", on="Name" ) + # We have 4 properties in the Wates coordination board, that we want to remove from the retrofit packages board + to_remove = wates_coordination[ + wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"]) + ] + assert to_remove.shape[0] == 4 + # Remove them from the wates board + wates_coordination = wates_coordination[ + ~wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"]) + ] + # We combine this into a singular board coordinated_packages = pd.concat( [ @@ -3662,6 +3672,7 @@ def revised_model(): ) coordinated_packages["Organisation Reference"] = coordinated_packages["Organisation Reference"].astype(int) + assert not coordinated_packages["Organisation Reference"].duplicated().sum() # Merge the property features on coordinated_packages = coordinated_packages.merge( @@ -3670,6 +3681,25 @@ def revised_model(): on="Organisation Reference" ) + # We match the properties to their closest match + # We clean up the SAP ratings in the coordinated packages + def sap_to_number(x): + try: + return int(x) + except: + if x[-1] in ["A", "B", "C", "D", "E", "F"]: + return int(x[:-1]) + + if x[0] in ["A", "B", "C", "D", "E", "F"]: + return int(x[1:]) + + coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Band"])] + coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Rating"])] + + coordinated_packages["Actual SAP Rating"] = coordinated_packages["Actual SAP Rating"].apply( + lambda x: sap_to_number(x) + ) + # We need the features pertaining to these priority postcodes def find_nearest_matching_property(coordinated_packages, home): @@ -3729,11 +3759,9 @@ def revised_model(): no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False) # len(no_match) - # 8764, 5607 + # 8764, 5607, 5646 # no_match_summary.shape - # (3953, 6), (2948, 6) - - # We match the properties to their closest match + # (3953, 6), (2948, 6), (2969, 7) matches_df = pd.DataFrame(matches) matches_df = matches_df.merge( @@ -3745,11 +3773,36 @@ def revised_model(): aggregated_matches_df = [] for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"): if mapped_matches.shape[0] == 1: - mapped_matches["Number of matches"] = 1 - mapped_matches["Proportion"] - aggregated_matches_df.append(mapped_matches) + aggregated_matches_df.append( + { + "Organisation Reference": org_ref, + "Number of matches": 1, + "Proportion": 100, + "Estimated SAP Rating": mapped_matches["Actual SAP Rating"].values[0], + "Estimated EPC Rating": sap_to_epc(mapped_matches["Actual SAP Rating"].values[0]) + } + ) continue + # We need to aggregate the matches, since we have multiple + average_rating = mapped_matches["Actual SAP Rating"].mean() + number_of_matches = mapped_matches.shape[0] + average_epc_rating = sap_to_epc(average_rating) + # proportion is the number of properties that have this EPC rating + proportion_with_this_epc = int( + mapped_matches[mapped_matches["Actual SAP Band"] == average_epc_rating].shape[0] / number_of_matches * 100) + aggregated_matches_df.append( + { + "Organisation Reference": org_ref, + "Number of matches": number_of_matches, + "Proportion": proportion_with_this_epc, + "Estimated SAP Rating": average_rating, + "Estimated EPC Rating": average_epc_rating + } + ) + + aggregated_matches_df = pd.DataFrame(aggregated_matches_df) + mapped_priority_list = new_priority_postcodes.merge( matches_df, on="Organisation Reference", )