44% through matching

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-06 22:29:18 +00:00
parent 17b5f6e140
commit 8e258ff3ca

View file

@ -1083,6 +1083,70 @@ class DataLoader:
"McCLAREN COURT", "MCLAREN COURT"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"ST JAMES CLOISTERS", "ST. JAMES'S CLOISTERS"
)
survey_list["Street / Block Name"] = np.where(
((survey_list["NO."].isin(
[
"FLAT 1 22",
"FLAT 2 22",
"FLAT 3 22",
"FLAT 4 22",
"FLAT 5 22",
"FLAT 6 22",
]
)) &
(survey_list["Street / Block Name"] == "MELTON ROAD")),
"22 MELTON ROAD",
survey_list["Street / Block Name"]
)
survey_list["Street / Block Name"] = np.where(
((survey_list["NO."].isin(
[
"FLAT 1 24",
"FLAT 2 24",
"FLAT 3 24",
"FLAT 4 24",
"FLAT 5 24",
"FLAT 6 24",
]
)) &
(survey_list["Street / Block Name"] == "MELTON ROAD")),
"24 MELTON ROAD",
survey_list["Street / Block Name"]
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"TURRETT GREEN COURT SILENT STREET", "TURRET GREEN COURT"
)
# Turret green court flat 1 doesn't exist in the asset list
survey_list = survey_list[
~((survey_list["Street / Block Name"] == "TURRET GREEN COURT") &
(survey_list["NO."] == 1))
]
# 3, 45 raywell steet doesn't exist in the asset list
survey_list = survey_list[
~((survey_list["Street / Block Name"] == "45 RAYWELL STREET") &
(survey_list["NO."] == 3))
]
# 40 Avondale drive doesn't exist in the asset list
survey_list = survey_list[
~((survey_list["Street / Block Name"] == "Avondale Drive") &
(survey_list["NO."] == 40))
]
# 17A beech road has the wrong postcode
survey_list["Post Code"] = np.where(
(survey_list["Street / Block Name"] == "BEECH ROAD") &
(survey_list["Post Code"] == "DH6 1JD"),
"DH6 1JB",
survey_list["Post Code"]
)
return survey_list
@staticmethod
@ -1250,6 +1314,10 @@ class DataLoader:
if "," in str(house_number):
house_number = house_number.split(",")[0].strip()
# We may also have a space for an instance of flat x y
if " " in str(house_number):
house_number = house_number.split(" ")[0].strip()
df = df[df["matching_address"].str.contains(str(house_number))]
if df.empty:
@ -1270,7 +1338,7 @@ class DataLoader:
raise ValueError("Investigate")
if df.shape[0] != 1:
df = df[df["HouseNo"].astype(str) == str(house_number)]
df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
if df.shape[0] != 1:
df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
if df.shape[0] != 1: