diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index 25b33255..0d5a3361 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -100,12 +100,14 @@ def load_data(): survey_list["Street / Block Name"] ) # Replace " rd " with "road" - survey_list['Street / Block Name'] = df['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True) + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True) # Replace " , " with ", " survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace( " , ", ', ', ) + # Fix "{place} ,{place}" with "{place}, {place}" + survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True) # Strip whitespace survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip() @@ -122,11 +124,18 @@ def load_data(): survey_list["Post Code"] ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road") + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan") + # We now need to merge the survey list onto the asset list # Could be easier just to do a search on each row, even though it's much slower matched = [] for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)): + if row["Street / Block Name"] in ["carleach grove"]: + continue + house_number = row["NO."] if isinstance(house_number, str): house_number = house_number.lower()