From a01ff1d8dedaaf78e8ce95b21305a6f1a430ae3e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 Nov 2024 16:45:10 +0000 Subject: [PATCH] tweaking postal region algorithm - may need to swap to postcode or street --- .../stonewater/Wave 3 Preparation.py | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index b36ae756..20f771ec 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -1803,22 +1803,43 @@ def propsed_wave_3_sample(): for a_id in missed_addressids: property = asset_list[asset_list["Address ID"] == a_id].squeeze() - # TODO: This is quite strict for the moment - we might want to relax this by creating reduced versions - # of the wall, roof and heating features, splitting them on the colons and taking the first part + if property["Property Type"].split(":")[0] in ["House", "Bungalow"]: + filter_property_types = ["House", "Bungalow"] + else: + filter_property_types = ["Flat"] + surveyed_similar = survey_results_with_original_features[ (survey_results_with_original_features["Postcode"] == property["Postcode"]) & - (survey_results_with_original_features["Property Type"] == property["Property Type"]) & - (survey_results_with_original_features["Wall Type"] == property["Wall Type"]) & - (survey_results_with_original_features["Roof Type"] == property["Roof Type"]) & - (survey_results_with_original_features["Heating"] == property["Heating"]) + ( + survey_results_with_original_features["Property Type"].str.split(":").str[0].isin( + filter_property_types + ) + ) & + ( + survey_results_with_original_features["Wall Type"].str.split(":").str[0] == + property["Wall Type"].split(":")[0] + ) & + ( + survey_results_with_original_features["Roof Type"].str.split(":").str[0] == + property["Roof Type"].split(":")[0] + ) & + ( + survey_results_with_original_features["Heating"].str.split(":").str[0] == + property["Heating"].split(":")[0] + ) ] if surveyed_similar.empty: surveyed_similar = survey_results_with_original_features[ (survey_results_with_original_features["Postal Region"] == property["Postal Region"]) & - (survey_results_with_original_features["Property Type"] == property["Property Type"]) & - (survey_results_with_original_features["Wall Type"] == property["Wall Type"]) & - (survey_results_with_original_features["Roof Type"] == property["Roof Type"]) & - (survey_results_with_original_features["Heating"] == property["Heating"]) + (survey_results_with_original_features["Property Type"].str.split(":").str[0].isin( + filter_property_types + )) & + (survey_results_with_original_features["Wall Type"].str.split(":").str[0] == + property["Wall Type"].split(":")[0]) & + (survey_results_with_original_features["Roof Type"].str.split(":").str[0] == + property["Roof Type"].split(":")[0]) & + (survey_results_with_original_features["Heating"].str.split(":").str[0] == + property["Heating"].split(":")[0]) ] if surveyed_similar.empty: @@ -1842,7 +1863,7 @@ def propsed_wave_3_sample(): { "Address ID": a_id, "Confidence Tier": tier, - "Current EPC Band": "Unknown" + "Current EPC Band": expected_epc } ) @@ -1899,6 +1920,7 @@ def propsed_wave_3_sample(): geographic_summary = geographic_summary.sort_values("Loss", ascending=True) geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum() + geographic_summary[geographic_summary["Loss Cumulative Sum"] <= 250]["Gain"].sum() # if __name__ == "__main__": # main()