From 0331d82f6ac687b55297e80f430a15fa148f5d67 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Jan 2025 20:55:36 +0000 Subject: [PATCH] added manual match --- .../stonewater/Wave 3 Preparation.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index ab640496..61344038 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3267,15 +3267,19 @@ def revised_model(): columns={"Post Code": "Postcode"} ) ccs_coordination = ccs_coordination[~pd.isnull(ccs_coordination["Postcode"])] + ccs_coordination = ccs_coordination[ccs_coordination["Retrofit Assessment"] != "Outstanding"] from fuzzywuzzy import fuzz - ccs_manual_filters = {} + ccs_manual_filters = { + "35 Kittiwake Close": "Wave 2.1 Surveys/11. CCS Dorset/Kittiwake Close 35" + } ccs_matching_lookup = [] for _, home in tqdm(ccs_coordination.iterrows(), total=len(ccs_coordination)): + # Handle the case that has the wrong postcode in the asset data if home["Name"] in ccs_manual_filters: filtered = retrofit_assessment_data[ - retrofit_assessment_data["survey_folder"] == manual_filters[home["Name"]] + retrofit_assessment_data["survey_folder"] == ccs_manual_filters[home["Name"]] ].copy() else: filtered = retrofit_assessment_data[ @@ -3297,11 +3301,15 @@ def revised_model(): ) ) if to_filter.sum() == 0: - to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("") == home[ - "Name"] + to_filter = ( + filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("").str.lower() == + home["Name"].lower() + ) if to_filter.sum() == 0: - to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:1].str.join("") == home[ - "Name"] + to_filter = ( + filtered["Address"].str.replace(" ,", "").str.split(",").str[0:1].str.join("").str.lower() == + home["Name"].lower() + ) if to_filter.sum() == 0: # Do a fuzzy match on the name # Find the best filter