From 678a4b52d28194d1dcf7c2d86d3993dde0161f3f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Jan 2025 21:03:11 +0000 Subject: [PATCH] matching for all of ccs --- etl/customers/stonewater/Wave 3 Preparation.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 61344038..fa548f0d 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3316,6 +3316,19 @@ def revised_model(): to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("").apply( lambda x: fuzz.partial_ratio(home["Name"], x) > 93 ) + if to_filter.sum() == 0: + # We also some cases where the name of the survey folder is like "Colville Road 7" and the + # property name is actually 7 Colville Road, so we try taking the final part of the address, + # splitting on space, and adding it to the front + def reformat_survey_folder(x): + filename = x.split("/")[-1] + parts = filename.split(" ") + return " ".join(parts[-1:] + parts[:-1]) + + to_filter = ( + filtered["survey_folder"].apply(lambda x: reformat_survey_folder(x)).str.lower() == + home["Name"].lower() + ) if to_filter.sum() == 0: blah