Debugging list loading

2026-07-27 23:35:01 +00:00 · 2024-02-22 12:39:46 +00:00 · 2024-02-22 12:39:46 +00:00 · 615f2289e7
commit 615f2289e7
parent a45cf2f319
3 changed files with 29 additions and 56 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
  <component name="PythonCompatibilityInspectionAdvertiser">
    <option name="version" value="3" />
  </component>
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@ -131,9 +131,17 @@ class DataLoader:

        return ciga_list

+    @staticmethod
+    def get_sheetname(workbook):
+        if "Asset List" in workbook.sheetnames:
+            return "Asset List"
+        else:
+            return "Assets"
+
    def load_asset_list(self, filepath, ha_name):
        workbook = openpyxl.load_workbook(filepath)
-        asset_sheet = workbook["Assets"]
+        sheetname = self.get_sheetname(workbook)
+        asset_sheet = workbook[sheetname]
        asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]

        rows_data = []
@ -170,8 +178,10 @@ class DataLoader:
            # Remove columns that are None
            survey_list = survey_list.loc[:, survey_list.columns.notnull()]
            survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
+
            # Perform survey list merge
-            survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
+            if not survey_list.empty:
+                survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)

        # We check if there are CIGA checks
        ciga_list = pd.DataFrame()
@ -185,9 +195,10 @@ class DataLoader:
            ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
            # Remove columns that are None
            ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
            # Perform ciga list merge
-            ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
+            if not ciga_list.empty:
+                ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
+                ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)

        return asset_list, survey_list, ciga_list

@ -208,6 +219,10 @@ class DataLoader:

        return asset_list

+    @staticmethod
+    def correct_ha39_asset_list(asset_list):
+        return asset_list
+
    @staticmethod
    def correct_ha6_survey_list(survey_list):

@ -337,6 +352,10 @@ class DataLoader:

        return survey_list

+    @staticmethod
+    def correct_ha39_survey_list(survey_list):
+        return survey_list
+
    def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):

        # Correct the asset list
@ -491,23 +510,10 @@ class DataLoader:
                ha_name=ha_name,
            )

-            if file_config.get("survey_list"):
-                # TODO: Delete this
-                logger.info("Loading survey list for {}".format(ha_name))
-                survey_list, matched_lookup = self.load_survey_list(
-                    asset_list=asset_list,
-                    file_path=file_config["survey_list"]["filepath"],
-                    ha_name=ha_name,
-                    sheet_name=file_config["survey_list"]["sheetname"]
-                )
-            else:
-                survey_list = None
-                matched_lookup = None
-
            data[ha_name] = {
                "asset_list": asset_list,
                "survey_list": survey_list,
-                "matched_lookup": matched_lookup
+                "ciga_list": ciga_list
            }

        self.data = data
@ -1288,42 +1294,9 @@ def app():
    # List all of the data in the folder
    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]

-    files = {
-        "ha_1": {
-            "asset_list": {
-                "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx",
-                "sheetname": "Energy data"
-            }
-        },
-        "ha_6": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
-                "sheetname": "HA 6"
-            },
-            "survey_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
-                "sheetname": "HA 6"
-            }
-        },
-        "ha_14": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx",
-                "sheetname": "HA 14"
-            }
-        },
-        "ha_39": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx",
-                "sheetname": "Sheet1"
-            }
-        },
-        "ha_107": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx",
-                "sheetname": "HA 107"
-            }
-        }
-    }
+    priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
+    # Filter down the directories to only the priority HAs
+    directories = [d for d in directories if d.split("/")[2] in priority_has]

    loader = DataLoader(directories, use_cache)
    loader.load()