From 7dd64781724df896badfd2170cba3ba5d2c283b9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Jan 2025 20:43:56 +0000 Subject: [PATCH] Added more logic for matching --- etl/customers/stonewater/Wave 3 Preparation.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 904afd30..ab640496 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3273,7 +3273,7 @@ def revised_model(): ccs_matching_lookup = [] for _, home in tqdm(ccs_coordination.iterrows(), total=len(ccs_coordination)): # Handle the case that has the wrong postcode in the asset data - if home["Name"] in manual_filters: + if home["Name"] in ccs_manual_filters: filtered = retrofit_assessment_data[ retrofit_assessment_data["survey_folder"] == manual_filters[home["Name"]] ].copy() @@ -3297,13 +3297,16 @@ def revised_model(): ) ) if to_filter.sum() == 0: - to_filter = filtered["Address"].str.split(",").str[0:2].str.join("") == home["Name"] - + to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("") == home[ + "Name"] + if to_filter.sum() == 0: + to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:1].str.join("") == home[ + "Name"] if to_filter.sum() == 0: # Do a fuzzy match on the name # Find the best filter - to_filter = filtered["Address"].str.split(",").str[0:2].str.join("").apply( - lambda x: fuzz.partial_ratio(home["Name"], x) > 9 + to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("").apply( + lambda x: fuzz.partial_ratio(home["Name"], x) > 93 ) if to_filter.sum() == 0: