diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index e347d47c..25b33255 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -93,19 +93,54 @@ def load_data(): survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))] # Tidy up the street/block name a bit survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower() + survey_list["Street / Block Name"] = np.where( + survey_list["Street / Block Name"] == "REEDS RD", + "Reeds ROAD", + survey_list["Street / Block Name"] + ) + # Replace " rd " with "road" + survey_list['Street / Block Name'] = df['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True) + + # Replace " , " with ", " + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace( + " , ", ', ', + ) + # Strip whitespace + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip() + + # Correct errors + survey_list["Post Code"] = np.where( + survey_list["Post Code"] == "M38 0SA", + "M38 9SA", + survey_list["Post Code"] + ) + + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"), + "M44 5JF", + survey_list["Post Code"] + ) # We now need to merge the survey list onto the asset list # Could be easier just to do a search on each row, even though it's much slower matched = [] for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)): + + house_number = row["NO."] + if isinstance(house_number, str): + house_number = house_number.lower() + # Filter on the first line of the address df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy() - df = df[df["Postcode"].str.contains(row["Post Code"])] - df = df[df["Address"].str.contains(str(row["NO."]))] + # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())] + df = df[df["Address"].str.lower().str.contains(str(house_number))] if df.shape[0] != 1: df = df[df["HouseNo"] == str(row["NO."])] if df.shape[0] != 1: - raise ValueError("Investigate") + df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())] + if df.shape[0] != 1: + raise ValueError("Investigate") matched.append( {