matching 82% complete

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-22 17:13:35 +00:00
parent 96893aae14
commit a2a8bc012e

View file

@ -142,12 +142,13 @@ class DataLoader:
# We now do the matching between the asset list and the survey list.
# What we'll get from this is a lookup table from the asset list to the survey list
matched_lookup = pd.DataFrame()
if ha_name == "ha_6":
self.merge_ha_6(asset_list, survey_list)
matched_lookup = self.merge_ha_6(asset_list, survey_list)
else:
raise NotImplementedError("Only HA 6 has surveys")
return survey_list
return survey_list, matched_lookup
def merge_ha_6(self, asset_list, survey_list):
@ -223,6 +224,42 @@ class DataLoader:
"Woodcutts Street", "Woodshutts Street"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"HILLARY AVENUE", "Hillary Road"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"HILLARY AVENUE", "Hillary Road"
)
# Replace " Rd" with " Road"
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(" Rd", " Road")
# We have a record listed as 19, MAPLE AVENUE ST7 1JX, when it should be 19, Hollins Crescent ST7 1JX
survey_list.loc[
(survey_list["Street / Block Name"] == "MAPLE AVENUE") &
(survey_list["NO."].isin([19])) &
(survey_list["Post Code"] == "ST7 1JX"),
"Street / Block Name"
] = "Hollins Crescent"
# However, some of the maple avenue records, are indeed Maple avenue, but are listed with the wrong postcode.
# E.g. number 26
survey_list.loc[
(survey_list["Street / Block Name"] == "MAPLE AVENUE") &
(survey_list["NO."].isin([26])) &
(survey_list["Post Code"] == "ST7 1JX"),
"Post Code"
] = "ST7 1JW"
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"BURSLEY Road", "Bursley Way"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"Brittania Avenue", "Brittain Avenue"
)
matching_lookup = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
house_number = row["NO."]