diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 261c0fd4..3a5b4ab4 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -142,12 +142,13 @@ class DataLoader: # We now do the matching between the asset list and the survey list. # What we'll get from this is a lookup table from the asset list to the survey list + matched_lookup = pd.DataFrame() if ha_name == "ha_6": - self.merge_ha_6(asset_list, survey_list) + matched_lookup = self.merge_ha_6(asset_list, survey_list) else: raise NotImplementedError("Only HA 6 has surveys") - return survey_list + return survey_list, matched_lookup def merge_ha_6(self, asset_list, survey_list): @@ -223,6 +224,42 @@ class DataLoader: "Woodcutts Street", "Woodshutts Street" ) + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "HILLARY AVENUE", "Hillary Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "HILLARY AVENUE", "Hillary Road" + ) + + # Replace " Rd" with " Road" + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(" Rd", " Road") + + # We have a record listed as 19, MAPLE AVENUE ST7 1JX, when it should be 19, Hollins Crescent ST7 1JX + survey_list.loc[ + (survey_list["Street / Block Name"] == "MAPLE AVENUE") & + (survey_list["NO."].isin([19])) & + (survey_list["Post Code"] == "ST7 1JX"), + "Street / Block Name" + ] = "Hollins Crescent" + + # However, some of the maple avenue records, are indeed Maple avenue, but are listed with the wrong postcode. + # E.g. number 26 + survey_list.loc[ + (survey_list["Street / Block Name"] == "MAPLE AVENUE") & + (survey_list["NO."].isin([26])) & + (survey_list["Post Code"] == "ST7 1JX"), + "Post Code" + ] = "ST7 1JW" + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BURSLEY Road", "Bursley Way" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Brittania Avenue", "Brittain Avenue" + ) + matching_lookup = [] for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)): house_number = row["NO."]