From 33b3f51ca4701ede548e6af82f80ae191a3c0710 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:54:40 +0000 Subject: [PATCH] handling dupes for ha50 --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a5b99a72..7124919e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1445,6 +1445,21 @@ class DataLoader: "Larch Drive", "Larch Grove" ) + # Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") & + (survey_list["Post Code"] == "ST20QS") & + (survey_list["NO."].isin([31]))) + ] + + # Handle dropping of dupes + survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "") + survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "") + + # Should go to 18 + survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"]) + survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"]) + return survey_list @staticmethod