From c13c84b98cbab169300306adeba534145496251c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 Nov 2024 15:55:19 +0000 Subject: [PATCH] First region implemented --- .../stonewater/Wave 3 Preparation.py | 58 +++++++++++++++---- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 019c51c9..7c104f97 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -1729,25 +1729,61 @@ def propsed_wave_3_sample(): missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist() # This means that this archetype was never surveyed and so we need to find a sufficiently similar property + final_missed_matches = [] for a_id in missed_addressids: property = asset_list[asset_list["Address ID"] == a_id].squeeze() - surveyed_same_postcode = survey_results_with_original_features[ + # TODO: This is quite strict for the moment - we might want to relax this by creating reduced versions + # of the wall, roof and heating features, splitting them on the colons and taking the first part + surveyed_similar = survey_results_with_original_features[ (survey_results_with_original_features["Postcode"] == property["Postcode"]) & - (survey_results_with_original_features["Property Type"] == property["Property Type"]) + (survey_results_with_original_features["Property Type"] == property["Property Type"]) & + (survey_results_with_original_features["Wall Type"] == property["Wall Type"]) & + (survey_results_with_original_features["Roof Type"] == property["Roof Type"]) & + (survey_results_with_original_features["Heating"] == property["Heating"]) ] + if surveyed_similar.empty: + surveyed_similar = survey_results_with_original_features[ + (survey_results_with_original_features["Postal Region"] == property["Postal Region"]) & + (survey_results_with_original_features["Property Type"] == property["Property Type"]) & + (survey_results_with_original_features["Wall Type"] == property["Wall Type"]) & + (survey_results_with_original_features["Roof Type"] == property["Roof Type"]) & + (survey_results_with_original_features["Heating"] == property["Heating"]) + ] - surveyed_same_region = survey_results_with_original_features[ - (survey_results_with_original_features["Postal Region"] == property["Postal Region"]) & - (survey_results_with_original_features["Property Type"] == property["Property Type"]) - ] + if surveyed_similar.empty: + final_missed_matches.append( + { + "Address ID": a_id, + "Confidence Tier": "5 - no similar property, needs survey to confirm" + } + ) + continue - same_postcode = survey_results[ - survey_results["Archetype ID"].isin(missed_archetypes) & - (survey_results["Postal Region"] == region) - ][["Archetype ID", "Current EPC Band"]].drop_duplicates() + raise NotImplementedError("Implement me") - pd.isnull(region_assets["Current EPC Band"]).sum() + final_missed_matches = pd.DataFrame(final_missed_matches) + + region_assets = region_assets.merge( + final_missed_matches, + on="Address ID", + how="left", + suffixes=("", "_method3") + ) + + region_assets["Confidence Tier"] = region_assets["Confidence Tier"].fillna( + region_assets["Confidence Tier_method3"] + ) + + region_assets = region_assets.drop(columns=["Confidence Tier_method3"]) + + region_assets["Current EPC Band"] = np.where( + region_assets["Confidence Tier"] == "5 - no similar property, needs survey to confirm", + "Unknown", region_assets["Current EPC Band"] + ) + + if pd.isnull(region_assets["Current EPC Band"]).sum(): + raise Exception("Something went wrong") # if __name__ == "__main__": # main()