From 8e258ff3ca164e2eddcd9cc74d1e7531bf655e4f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 22:29:18 +0000 Subject: [PATCH] 44% through matching --- .../ha_15_32/ha_analysis_batch_3.py | 70 ++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index ff39b190..567394a4 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1083,6 +1083,70 @@ class DataLoader: "McCLAREN COURT", "MCLAREN COURT" ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ST JAMES CLOISTERS", "ST. JAMES'S CLOISTERS" + ) + + survey_list["Street / Block Name"] = np.where( + ((survey_list["NO."].isin( + [ + "FLAT 1 22", + "FLAT 2 22", + "FLAT 3 22", + "FLAT 4 22", + "FLAT 5 22", + "FLAT 6 22", + ] + )) & + (survey_list["Street / Block Name"] == "MELTON ROAD")), + "22 MELTON ROAD", + survey_list["Street / Block Name"] + ) + + survey_list["Street / Block Name"] = np.where( + ((survey_list["NO."].isin( + [ + "FLAT 1 24", + "FLAT 2 24", + "FLAT 3 24", + "FLAT 4 24", + "FLAT 5 24", + "FLAT 6 24", + ] + )) & + (survey_list["Street / Block Name"] == "MELTON ROAD")), + "24 MELTON ROAD", + survey_list["Street / Block Name"] + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "TURRETT GREEN COURT SILENT STREET", "TURRET GREEN COURT" + ) + + # Turret green court flat 1 doesn't exist in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "TURRET GREEN COURT") & + (survey_list["NO."] == 1)) + ] + # 3, 45 raywell steet doesn't exist in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "45 RAYWELL STREET") & + (survey_list["NO."] == 3)) + ] + + # 40 Avondale drive doesn't exist in the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Avondale Drive") & + (survey_list["NO."] == 40)) + ] + # 17A beech road has the wrong postcode + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == "BEECH ROAD") & + (survey_list["Post Code"] == "DH6 1JD"), + "DH6 1JB", + survey_list["Post Code"] + ) + return survey_list @staticmethod @@ -1250,6 +1314,10 @@ class DataLoader: if "," in str(house_number): house_number = house_number.split(",")[0].strip() + # We may also have a space for an instance of flat x y + if " " in str(house_number): + house_number = house_number.split(" ")[0].strip() + df = df[df["matching_address"].str.contains(str(house_number))] if df.empty: @@ -1270,7 +1338,7 @@ class DataLoader: raise ValueError("Investigate") if df.shape[0] != 1: - df = df[df["HouseNo"].astype(str) == str(house_number)] + df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)] if df.shape[0] != 1: df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())] if df.shape[0] != 1: