diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index cb4b9885..1a28500b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -38,7 +38,9 @@ class DataLoader: } UNMATCHED_CIGA = { - "HA14": 6 + # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not + # the asset list + "HA14": 4 } def __init__(self, directories, use_cache): @@ -518,8 +520,6 @@ class DataLoader: df = df[df["HouseNo"] == str(house_number)] # For ciga, we skip if df.empty: - if row["Matched Postcode"] == "LE3 3EE": - dew unmatched_addresses.append( { "ciga_list_row_id": row["ciga_list_row_id"], @@ -528,18 +528,18 @@ class DataLoader: } ) continue - # TODO: Might need to consider street name at some point + if df.shape[0] != 1: # We split house number and postcode out of the matched address for ciga street_name = self.extract_streetname( address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"] ) - df = df[df["matching_address"].str.contains(street_name)] + df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)] if df.shape[0] != 1: # The final check we do here is to check for the presence of flat in the address - if "flat" in row["Matched Address"]: + if "flat" in row["Matched Address"].lower(): df = df[df["matching_address"].str.contains("flat")] else: df = df[df["matching_address"].str.contains("flat") == False]