working on matching

This commit is contained in:
Khalim Conn-Kowlessar 2023-12-24 22:28:03 +00:00
parent 43004a5d8b
commit 3f7ad82b7a

View file

@ -93,19 +93,54 @@ def load_data():
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
# Tidy up the street/block name a bit
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
survey_list["Street / Block Name"] = np.where(
survey_list["Street / Block Name"] == "REEDS RD",
"Reeds ROAD",
survey_list["Street / Block Name"]
)
# Replace " rd " with "road"
survey_list['Street / Block Name'] = df['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
# Replace " , " with ", "
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
" , ", ', ',
)
# Strip whitespace
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
# Correct errors
survey_list["Post Code"] = np.where(
survey_list["Post Code"] == "M38 0SA",
"M38 9SA",
survey_list["Post Code"]
)
survey_list["Post Code"] = np.where(
(survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
"M44 5JF",
survey_list["Post Code"]
)
# We now need to merge the survey list onto the asset list
# Could be easier just to do a search on each row, even though it's much slower
matched = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
house_number = row["NO."]
if isinstance(house_number, str):
house_number = house_number.lower()
# Filter on the first line of the address
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
df = df[df["Postcode"].str.contains(row["Post Code"])]
df = df[df["Address"].str.contains(str(row["NO."]))]
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
df = df[df["Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
df = df[df["HouseNo"] == str(row["NO."])]
if df.shape[0] != 1:
raise ValueError("Investigate")
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
if df.shape[0] != 1:
raise ValueError("Investigate")
matched.append(
{