From d9a3ac37c9a6299a6ba732c1e51aaee8a59432bf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Dec 2023 12:13:25 +0000 Subject: [PATCH] done with matching --- etl/eligibility/ha_15_32/ha25_app.py | 41 ++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index 8536692a..2dd0b0b7 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -201,16 +201,31 @@ def load_data(): eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace( "the gulls, collaton road", "the gulls collaton road" ) + eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace( + "crows-an-eglose", "crows-an-eglos" + ) + # We have a high volume of rows that do not match matched = [] + nomatch = [] for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)): # Not in the asset list if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN": + nomatch.append(row.to_dict()) continue # Not in the asset list if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP": + nomatch.append(row.to_dict()) + continue + + # Not in the asset list + if row["ADDRESS 1"] in [ + "kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road", + "castle street" + ]: + nomatch.append(row.to_dict()) continue house_number = row["NO"] @@ -229,12 +244,11 @@ def load_data(): if house_number is not None: df = df[df["HouseNo"] == str(house_number)] if df.shape[0] != 1: - df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())] + if row["POSTCODE"] is not None: + df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())] if df.shape[0] != 1: - print(row["ADDRESS 1"]) - print(house_number) - print(row["POSTCODE"].lower()) - raise ValueError("Investigate") + nomatch.append(row.to_dict()) + continue matched.append( { @@ -245,3 +259,20 @@ def load_data(): "survey_postcode": row["POSTCODE"], } ) + + nomatch = pd.DataFrame(nomatch) + matched = pd.DataFrame(matched) + + matched["warmfront_identified"] = True + + # Combine asset list and surveys + data = asset_list.merge( + matched, how="left", left_on="T1_Address", right_on="matched_address", + ) + data["warmfront_identified"] = data["warmfront_identified"].fillna(False) + + return data, eco4_prospects_survey_list + + +def app(): + data, eco4_prospects_survey_list = load_data()