diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index 1ad650bc..52d11a27 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -158,8 +158,29 @@ def load_data(): eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])] eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))] + # Correct some errors in the survey list + eco4_prospects_survey_list["POSTCODE"] = np.where( + (eco4_prospects_survey_list["ADDRESS 1"] == "berry park") & + (eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"), + "PL12 6EN", + eco4_prospects_survey_list["POSTCODE"] + ) + + # Remove semi colons from address in asset and survey list + asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "") + eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "") + + # In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b + eco4_prospects_survey_list.loc[838, "NO"] = "6a" + eco4_prospects_survey_list.loc[839, "NO"] = "6b" + matched = [] for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)): + + # Not in the survey list + if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN": + continue + house_number = row["NO"] if isinstance(house_number, str): house_number = house_number.lower()