still working on merge

This commit is contained in:
Khalim Conn-Kowlessar 2023-12-24 23:09:25 +00:00
parent 3f7ad82b7a
commit e21057ca61

View file

@ -100,12 +100,14 @@ def load_data():
survey_list["Street / Block Name"]
)
# Replace " rd " with "road"
survey_list['Street / Block Name'] = df['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
# Replace " , " with ", "
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
" , ", ', ',
)
# Fix "{place} ,{place}" with "{place}, {place}"
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
# Strip whitespace
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
@ -122,11 +124,18 @@ def load_data():
survey_list["Post Code"]
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
# We now need to merge the survey list onto the asset list
# Could be easier just to do a search on each row, even though it's much slower
matched = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
if row["Street / Block Name"] in ["carleach grove"]:
continue
house_number = row["NO."]
if isinstance(house_number, str):
house_number = house_number.lower()