From 6693ab4ca6e12a6b9da112e8c8a3d48b1fe6ad87 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 23 Feb 2024 17:13:18 +0000 Subject: [PATCH] Added in read of december figures --- .../ha_15_32/ha_analysis_batch_3.py | 55 +++++++++++++++++-- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 2d95a946..dbe12e92 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -47,11 +47,13 @@ class DataLoader: "HA107": 52 } - def __init__(self, directories, use_cache): + def __init__(self, directories, december_figures_filepath, use_cache): self.directories = directories self.use_cache = use_cache + self.december_figures_filepath = december_figures_filepath self.data = {} + self.december_figures = None def create_asset_list_matching_address(self, ha_name, asset_list): @@ -730,6 +732,11 @@ class DataLoader: ) return + # Get the december figures, which is just a csv + self.december_figures = pd.read_csv(self.december_figures_filepath) + # Remove the spaces in HA Name + self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "") + data = {} for filepath in self.directories: ha_name = filepath.split("/")[2] @@ -763,9 +770,43 @@ class DataLoader: """ ha_facts_and_figures = [] for ha_name, data_assets in self.data.items(): - asset_list = data_assets["asset_list"] - survey_list = data_assets["survey_list"] - ciga_list = data_assets["ciga_list"] + asset_list = data_assets["asset_list"].copy() + survey_list = data_assets["survey_list"].copy() + ciga_list = data_assets["ciga_list"].copy() + + asset_list["ECO Eligibility"].value_counts() + + # We merge on ciga and update the status to reflect if it has failed ciga or not + # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA + # check + asset_list = asset_list.merge( + ciga_list[["asset_list_row_id", "Guarantee"]], + how='left', + on="asset_list_row_id" + ) + + asset_list["ECO Eligibility"].value_counts() + + asset_list["ECO Eligibility"] = np.where( + ( + asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) & + (asset_list["Guarantee"] == "Yes") + ), + "Failed CIGA", + asset_list["ECO Eligibility"] + ) + + # We replace any remaining "Subject to CIGA" with pass Ciga + asset_list["ECO Eligibility"] = np.where( + asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False), + "Pass CIGA", + asset_list["ECO Eligibility"] + ) + + asset_list = asset_list.drop(columns=["Guarantee"]) + + # Update the asset list with the categorisations + self.data[ha_name]["asset_list"] = asset_list return ha_facts_and_figures @@ -1532,16 +1573,18 @@ def app(): :return: """ - use_cache = False + use_cache = True # List all of the data in the folder directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] + # Grab the December HA figures filepath + december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] # Filter down the directories to only the priority HAs directories = [d for d in directories if d.split("/")[2] in priority_has] - loader = DataLoader(directories, use_cache) + loader = DataLoader(directories, december_figures_filepath, use_cache) loader.load() loader.ha_facts_and_figures()