From c88afeb3301de36967845840689a8522ce605efd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Dec 2023 11:45:14 +0000 Subject: [PATCH] 60% through matching --- etl/eligibility/ha_15_32/ha25_app.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index 3ff43291..8536692a 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -184,11 +184,24 @@ def load_data(): eco4_prospects_survey_list["ADDRESS 1"] = np.where( (eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & ( - eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"), + eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"), "old school road", eco4_prospects_survey_list["ADDRESS 1"] ) + eco4_prospects_survey_list["ADDRESS 1"] = np.where( + (eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & ( + eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & ( + eco4_prospects_survey_list["NO"] == 52), + "drum way", + eco4_prospects_survey_list["ADDRESS 1"] + ) + + # String replace + eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace( + "the gulls, collaton road", "the gulls collaton road" + ) + matched = [] for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)): @@ -209,9 +222,12 @@ def load_data(): # Filter on the first line of the address df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy() - df = df[df["T1_Address"].str.lower().str.contains(str(house_number))] + if house_number is not None: + if df.shape[0] != 1: + df = df[df["T1_Address"].str.lower().str.contains(str(house_number))] if df.shape[0] != 1: - df = df[df["HouseNo"] == str(house_number)] + if house_number is not None: + df = df[df["HouseNo"] == str(house_number)] if df.shape[0] != 1: df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())] if df.shape[0] != 1: