From 6e4fc23ecc2036e14148b18611cb04aafde8084b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 9 Mar 2024 18:12:12 +0000 Subject: [PATCH] fixed dupes for HA34 --- .../ha_15_32/ha_analysis_batch_3.py | 104 +++++++++++++++++- 1 file changed, 98 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 04ee343c..8784481b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -276,6 +276,12 @@ class DataLoader: asset_list["POST CODE"].astype(str).str.lower().str.strip() ) asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip() + elif ha_name == "HA34": + asset_list["matching_address"] = ( + asset_list[" Address"].astype(str).str.lower().str.strip() + ", " + + asset_list[" Postcode"].astype(str).str.lower().str.strip() + ) + asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA35": asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \ @@ -566,7 +572,8 @@ class DataLoader: eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))] # Perform the eco3 merge - eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name) + if not eco3_list.empty: + eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name) if ha_name in ["HA25"]: # Accomodate ha25 unique structure @@ -1657,9 +1664,94 @@ class DataLoader: @staticmethod def correct_ha35_survey_list(survey_list): - survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( - "BALLADIER WLAK", "BALLADIER WALK" + return survey_list + + @staticmethod + def correct_ha34_survey_list(survey_list): + # Note in the asset list + survey_list = survey_list[ + survey_list["Post Code"] != "L5 3SS" + ] + + survey_list["Post Code"] = survey_list["Post Code"].str.replace( + "L177DR", "L17 7DR" ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PENVALLEY CRESENT", "Penvalley Crescent" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PENLINKEN DRIVE", "Penlinken Drive" + ) + + # There's no 32 Penlinken Drive in the asset sheet + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Penlinken Drive") & + (survey_list["NO."] == 32)) + ] + + # There's no 30 Gwent Street in the asset sheet + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "GWENT ST") & + (survey_list["NO."] == 30)) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "POULTON RD", "Poulton Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "ST PAULS RD", "St Pauls Road" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BROAD LANE, KIRKBY", "BROAD LANE" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BULLENS RD, KIRKBY", "Bullens Road" + ) + + # There's no 219 NORTH HILL ST in the asset sheet + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "NORTH HILL ST") & + (survey_list["NO."] == 219)) + ] + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "CROSLAND RD, KIRKBY", "CROSLAND ROAD" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "PARK BROW DRIVE, KIRKBY", "Park Brow Drive" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "CELTIC TREET", "Celtic Street" + ) + + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "BUCKLAND ROAD", "Buckland Street" + ) + + # duplicates + survey_list = survey_list.drop_duplicates(["Street / Block Name", "NO.", "Post Code"]) + + # This is a duplicate with wrong postcode + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "CLARIBEL STREET") & + (survey_list["NO."] == 7) & + (survey_list["Post Code"] == "L8 8AF")) + ] + + survey_list["NO."] = np.where( + ((survey_list["NO."] == "187 A") & + (survey_list["Post Code"] == "L32 6QF")), + "187A", + survey_list["NO."] + ) + return survey_list @staticmethod @@ -1685,7 +1777,7 @@ class DataLoader: survey_list = survey_list_correction_function(survey_list) missed_postcodes = [] - if ha_name == "HA6": + if ha_name in ["HA6", "HA34"]: missed_postcodes = [ postcode.lower() for postcode in survey_list["Post Code"] if postcode.lower() not in asset_list["matching_postcode"].values @@ -4687,8 +4779,8 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA35", - "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" + "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA34", + "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],