From c58acadb730b6e6ab1ebb700b4669ab3cf171f5b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 Mar 2024 12:19:15 +0000 Subject: [PATCH] HA51 eco3 matching --- .../ha_15_32/ha_analysis_batch_3.py | 80 ++++++++++++++++--- 1 file changed, 71 insertions(+), 9 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index dc96d403..af9af514 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -491,6 +491,10 @@ class DataLoader: "address": "A_Address", "postcode": "matching_postcode" }, + "HA45": { + "address": "Full postal address", + "postcode": "Postcode" + }, "HA48": { "address": "Full Address", "postcode": "Postcode" @@ -518,7 +522,8 @@ class DataLoader: "HA50": 4, "HA63": 15, "HA107": 51, - "HA48": 0 + "HA48": 0, + "HA45": 0 } UNMATCHED_ECO3 = { @@ -527,7 +532,8 @@ class DataLoader: "HA50": 5, "HA56": 320, "HA63": 0, - "HA117": 4 + "HA117": 4, + "HA51": 24 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -542,7 +548,7 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]: + if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -717,6 +723,18 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA51": + asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Postcode"].astype(str).str.lower().str.strip() + asset_list["matching_address"] = np.where( + asset_list["Block"].str.strip().str.len() > 0, + asset_list["Block"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["matching_address"], + asset_list["matching_address"] + ) + asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip() elif ha_name == "HA56": asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \ @@ -2485,6 +2503,13 @@ class DataLoader: ) return survey_list + @staticmethod + def correct_ha45_survey_list(survey_list): + survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace( + "Norwich Road", "Norwich Avenue" + ) + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -2744,6 +2769,38 @@ class DataLoader: return eco3_list + @staticmethod + def correct_ha51_eco3_list(eco3_list): + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "HASELEMERE AVENUE", "HASLEMERE AVENUE" + ) + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "THORVILLE GROVE", "THORNVILLE GROVE" + ) + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "MONTBRETA CLOSE", "MONTBRETIA CLOSE" + ) + eco3_list["Post Code"] = np.where( + (eco3_list["Street / Block Name"] == "SYDENHAM ROAD") & + (eco3_list["Post Code"] == "CR0 2DW"), + "CR0 2ED", + eco3_list["Post Code"] + ) + # Not in asset list + eco3_list = eco3_list[ + ~((eco3_list["Street / Block Name"] == "WOODLEY LANE") & + (eco3_list["Post Code"] == "SM5 2RJ") & + (eco3_list["NO "] == "FLAT 3, 11")) + ] + + eco3_list["NO "] = np.where( + (eco3_list["NO "] == "47 B"), + "47B", + eco3_list["NO "] + ) + + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -2752,7 +2809,7 @@ class DataLoader: asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower() eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "") - if ha_name in ["HA25", "HA56"]: + if ha_name in ["HA25", "HA56", "HA51"]: # HA25: 317 -> 259 missed_postcodes = { postcode for postcode in eco3_list["postcode_no_space"] if @@ -2774,7 +2831,7 @@ class DataLoader: matching_lookup = [] missed = [] for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)): - # if row["eco3_list_row_id"] == "HA25_Eco3_5422": + # if row["eco3_list_row_id"] == "HA51_Eco3_22": # raise Exception() postcode = row["postcode_no_space"] @@ -2813,6 +2870,12 @@ class DataLoader: missed.append(row["eco3_list_row_id"]) continue + if df.shape[0] > 1: + if "flat" in str(row["NO "]).lower(): + df = df[df["matching_address"].str.contains("flat")] + else: + df = df[~df["matching_address"].str.contains("flat")] + if df.shape[0] != 1: print(row["Street / Block Name"]) print(house_number) @@ -6200,10 +6263,9 @@ def app(): priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", - "HA63", "HA107", "HA117", - # Added as of March 17th - "HA8", "HA11", "HA21", "HA37", "HA42", - "HA44", + "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", + # Added as of March 18th + "HA44", "HA45", "HA51", # New HAS "HAXX", "HAXXX", ]