mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on matching
This commit is contained in:
parent
43004a5d8b
commit
3f7ad82b7a
1 changed files with 38 additions and 3 deletions
|
|
@ -93,19 +93,54 @@ def load_data():
|
|||
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
# Tidy up the street/block name a bit
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
|
||||
survey_list["Street / Block Name"] = np.where(
|
||||
survey_list["Street / Block Name"] == "REEDS RD",
|
||||
"Reeds ROAD",
|
||||
survey_list["Street / Block Name"]
|
||||
)
|
||||
# Replace " rd " with "road"
|
||||
survey_list['Street / Block Name'] = df['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
|
||||
|
||||
# Replace " , " with ", "
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
|
||||
" , ", ', ',
|
||||
)
|
||||
# Strip whitespace
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
|
||||
|
||||
# Correct errors
|
||||
survey_list["Post Code"] = np.where(
|
||||
survey_list["Post Code"] == "M38 0SA",
|
||||
"M38 9SA",
|
||||
survey_list["Post Code"]
|
||||
)
|
||||
|
||||
survey_list["Post Code"] = np.where(
|
||||
(survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
|
||||
"M44 5JF",
|
||||
survey_list["Post Code"]
|
||||
)
|
||||
|
||||
# We now need to merge the survey list onto the asset list
|
||||
# Could be easier just to do a search on each row, even though it's much slower
|
||||
matched = []
|
||||
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
|
||||
|
||||
house_number = row["NO."]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
|
||||
df = df[df["Postcode"].str.contains(row["Post Code"])]
|
||||
df = df[df["Address"].str.contains(str(row["NO."]))]
|
||||
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
df = df[df["Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["HouseNo"] == str(row["NO."])]
|
||||
if df.shape[0] != 1:
|
||||
raise ValueError("Investigate")
|
||||
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
raise ValueError("Investigate")
|
||||
|
||||
matched.append(
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue