From 8b70fb346c0ce51acd24b245bbbecedeaa10d30c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Mar 2024 15:00:51 +0000 Subject: [PATCH] matching ha50 --- .../ha_15_32/ha_analysis_batch_3.py | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 06bb0d96..4708bf35 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -172,7 +172,8 @@ class DataLoader: } UNMATCHED_ECO3 = { - "HA25": 154 + "HA25": 154, + "HA50": 5 } def __init__(self, directories, december_figures_filepath, use_cache, rebuild): @@ -262,6 +263,10 @@ class DataLoader: asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \ asset_list["post_code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip() + elif ha_name == "HA50": + asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Post Code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ @@ -433,6 +438,8 @@ class DataLoader: return "ECO Surveys" elif "ECO Survey" in workbook.sheetnames: return "ECO Survey" + elif "ECO 4 Surveys completed" in workbook.sheetnames: + return "ECO 4 Surveys completed" else: return "ECO surveys" @@ -1289,6 +1296,34 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha50_survey_list(survey_list): + + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"] == 'COSELEY STREET') & + (survey_list["Post Code"] == 'ST16 1LR'), + "ST6 1JU", + survey_list["Post Code"] + ) + + # Remove some of COSELEY STREET, as we have surveys done, outside of the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "COSELEY STREET") & + (survey_list["Post Code"] == "ST6 1JU") & + (survey_list["NO."].isin([96]))) + ] + + survey_list["Post Code"] = survey_list["Post Code"].str.replace("ST33JZ", "ST3 3JZ") + + # Remove some of Jesmond drive as we have surveys done outside of the asset list + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "Jesmond Drive") & + (survey_list["Post Code"] == "ST3 3JZ") & + (survey_list["NO."].isin([29]))) + ] + + return survey_list + @staticmethod def correct_ha107_survey_list(survey_list): # Replace Front Street, East Stockham with Front Street, East Stockwith @@ -1503,6 +1538,10 @@ class DataLoader: ) return eco3_list + @staticmethod + def correct_ha50_eco3_list(eco3_list): + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -1517,6 +1556,7 @@ class DataLoader: postcode for postcode in eco3_list["postcode_no_space"] if postcode not in asset_list["matching_postcode_nospace"].values } + eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)] # For the asset list, we create a matching address without any punctuation @@ -4199,16 +4239,18 @@ def app(): # Grab the December HA figures filepath december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - # Add in: "HA25" + # Add in: # TODO: Remove ECO3 sales from HA25 priority_has = [ - "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107", + "HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA50", "HA107", ] - # Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this], + # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come + # back on this], # Then: 28 [DONE], - # 41, 10, 14 [DONE], 20, 48, 50 - # 38[problematic, but no ECO4] - # TODO - do 50 and 25 next + # 41, 48, 50 + # 38[problematic, but no ECO4], 10 problematic (no eligibility), + # 20 has barely any in + # TODO - do 50 # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has]