From e49101767a742fdd48ce392476ef66afa7f78662 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Dec 2023 10:56:24 +0000 Subject: [PATCH] working on matching --- etl/eligibility/ha_15_32/ha25_app.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index 88502e69..1ad650bc 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -70,7 +70,7 @@ def load_data(): split_addresses = asset_list['address'].str.split(' ', expand=True) split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7', 'address8', - 'address9', 'address10', 'address11', 'address12', 'address13'] + 'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ] split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "") # We could re-concatenate but we only care about HouseNo for the moment @@ -164,6 +164,9 @@ def load_data(): if isinstance(house_number, str): house_number = house_number.lower() + if "flat" in house_number: + house_number = house_number.split("flat")[1].strip() + # Filter on the first line of the address df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy() df = df[df["T1_Address"].str.lower().str.contains(str(house_number))] @@ -172,18 +175,17 @@ def load_data(): if df.shape[0] != 1: df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())] if df.shape[0] != 1: - print(row["Street / Block Name"]) + print(row["ADDRESS 1"]) print(house_number) - print(row["Post Code"].lower()) + print(row["POSTCODE"].lower()) raise ValueError("Investigate") matched.append( { "survey_key": row["survey_key"], - "matched_address": df["Address"].values[0], - "survey_house_no": row["NO."], - "survey_street_name": row["Street / Block Name"], - "survey_postcode": row["Post Code"], - "survey_status": row["INSTALLED OR CANCELLED"] + "matched_address": df["T1_Address"].values[0], + "survey_house_no": row["NO"], + "survey_street_name": row["ADDRESS 1"], + "survey_postcode": row["POSTCODE"], } )