ciga matching for ha14

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-22 16:05:31 +00:00
parent c6daf52046
commit 75102704cd

View file

@ -38,7 +38,9 @@ class DataLoader:
}
UNMATCHED_CIGA = {
"HA14": 6
# We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
# the asset list
"HA14": 4
}
def __init__(self, directories, use_cache):
@ -518,8 +520,6 @@ class DataLoader:
df = df[df["HouseNo"] == str(house_number)]
# For ciga, we skip
if df.empty:
if row["Matched Postcode"] == "LE3 3EE":
dew
unmatched_addresses.append(
{
"ciga_list_row_id": row["ciga_list_row_id"],
@ -528,18 +528,18 @@ class DataLoader:
}
)
continue
# TODO: Might need to consider street name at some point
if df.shape[0] != 1:
# We split house number and postcode out of the matched address for ciga
street_name = self.extract_streetname(
address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
)
df = df[df["matching_address"].str.contains(street_name)]
df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
if df.shape[0] != 1:
# The final check we do here is to check for the presence of flat in the address
if "flat" in row["Matched Address"]:
if "flat" in row["Matched Address"].lower():
df = df[df["matching_address"].str.contains("flat")]
else:
df = df[df["matching_address"].str.contains("flat") == False]