diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 92956337..7bb8b40c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -131,9 +131,17 @@ class DataLoader: return ciga_list + @staticmethod + def get_sheetname(workbook): + if "Asset List" in workbook.sheetnames: + return "Asset List" + else: + return "Assets" + def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) - asset_sheet = workbook["Assets"] + sheetname = self.get_sheetname(workbook) + asset_sheet = workbook[sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] rows_data = [] @@ -170,8 +178,10 @@ class DataLoader: # Remove columns that are None survey_list = survey_list.loc[:, survey_list.columns.notnull()] survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))] + # Perform survey list merge - survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name) + if not survey_list.empty: + survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name) # We check if there are CIGA checks ciga_list = pd.DataFrame() @@ -185,9 +195,10 @@ class DataLoader: ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]]) # Remove columns that are None ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()] - ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) # Perform ciga list merge - ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) + if not ciga_list.empty: + ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list) + ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name) return asset_list, survey_list, ciga_list @@ -208,6 +219,10 @@ class DataLoader: return asset_list + @staticmethod + def correct_ha39_asset_list(asset_list): + return asset_list + @staticmethod def correct_ha6_survey_list(survey_list): @@ -337,6 +352,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha39_survey_list(survey_list): + return survey_list + def merge_surveys_to_assets(self, asset_list, survey_list, ha_name): # Correct the asset list @@ -491,23 +510,10 @@ class DataLoader: ha_name=ha_name, ) - if file_config.get("survey_list"): - # TODO: Delete this - logger.info("Loading survey list for {}".format(ha_name)) - survey_list, matched_lookup = self.load_survey_list( - asset_list=asset_list, - file_path=file_config["survey_list"]["filepath"], - ha_name=ha_name, - sheet_name=file_config["survey_list"]["sheetname"] - ) - else: - survey_list = None - matched_lookup = None - data[ha_name] = { "asset_list": asset_list, "survey_list": survey_list, - "matched_lookup": matched_lookup + "ciga_list": ciga_list } self.data = data @@ -1288,42 +1294,9 @@ def app(): # List all of the data in the folder directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] - files = { - "ha_1": { - "asset_list": { - "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx", - "sheetname": "Energy data" - } - }, - "ha_6": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx", - "sheetname": "HA 6" - }, - "survey_list": { - "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx", - "sheetname": "HA 6" - } - }, - "ha_14": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx", - "sheetname": "HA 14" - } - }, - "ha_39": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx", - "sheetname": "Sheet1" - } - }, - "ha_107": { - "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx", - "sheetname": "HA 107" - } - } - } + priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] + # Filter down the directories to only the priority HAs + directories = [d for d in directories if d.split("/")[2] in priority_has] loader = DataLoader(directories, use_cache) loader.load()