From 19850f924445035e3880eaae40f750d21fb12b80 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 21:34:46 +0000 Subject: [PATCH] fixing up ha63 eco3 list --- .../ha_15_32/ha_analysis_batch_3.py | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 74c6d3f5..aebf0506 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -289,6 +289,10 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA63": + asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["POSTCODE"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip() elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ @@ -1551,6 +1555,16 @@ class DataLoader: def correct_ha41_survey_list(survey_list): return survey_list + @staticmethod + def correct_ha63_survey_list(survey_list): + # Drop some filler rows + survey_list = survey_list[ + ~survey_list[survey_list.columns[0]].isin( + ["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"] + ) + ] + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -1714,6 +1728,26 @@ class DataLoader: def correct_ha41_eco3_list(eco3_list): return eco3_list + @staticmethod + def correct_ha63_eco3_list(eco3_list): + eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])] + # Some postcode that aren't in the asset list + eco3_list = eco3_list[ + ~eco3_list["Post Code"].isin( + ["NR32 15X", "NR30 2BT"] + ) + ] + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "POUND COTTAGES - BLOOMSBERRY CLOSE", "POUND COTTAGES" + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "FREDRICK ROAD", "Frederick Road" + ) + + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -1799,12 +1833,15 @@ class DataLoader: # We verify the missed # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2 # where many surveys were conducted on house numbers, not in the asset list + # 154 missed, 2827 matched for HA 25 if len(missed) != self.UNMATCHED_ECO3[ha_name]: raise ValueError( f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) - # 154 missed, 2827 matched for HA 25 + # 41 + missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] + missed_df.head(1)["Street / Block Name"] matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on @@ -4418,11 +4455,12 @@ def app(): # Add in: priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48", - "HA50", "HA107", + "HA50", "HA63", "HA107", ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come - # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], - # Consider for ECO4: 2, 63, 12, 13, 136, 117 + # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE] + # 63 [WIP] + # Consider for ECO4: 12, 13, 136, 117 # COnsider for GBIS: 56, 35, 34 # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in