read in asset list for ha 1, working on ha 6

2026-07-27 23:35:01 +00:00 · 2024-01-22 11:03:45 +00:00 · 2024-01-22 11:03:45 +00:00 · 01a4628d20
commit 01a4628d20
parent 4adfa0bb62
2 changed files with 182 additions and 2 deletions
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@ -16,8 +16,6 @@ from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc
 from backend.Property import Property
 from etl.eligibility.Eligibility import Eligibility
-from etl.epc.DataProcessor import DataProcessor
-from backend.app.plan.utils import create_recommendation_scoring_data
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi

--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@ -0,0 +1,182 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from tqdm import tqdm
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+class DataLoader:
+    COLOUR_CONFIG = {
+        "ha_1": {
+            "asset_list": {"red": "FFFF0000", "green": "FF00B050"},
+        },
+        "ha_6": {
+            "asset_list": {"red": "FFFF0000", "green": "FF00B050"},
+        },
+    }
+
+    def __init__(self, files):
+        self.files = files
+
+    def load_asset_list(self, file_path, ha_name, sheet_name=None):
+        workbook = openpyxl.load_workbook(file_path)
+        if sheet_name is not None:
+            sheet = workbook[sheet_name]
+        else:
+            sheet = workbook.active
+        sheet_colnames = [cell.value for cell in sheet[1]]
+
+        rows_data = []
+        rows_colors = []
+        for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+            # row_color = COLOR_INDEX[row_color]
+            rows_data.append(row_data)
+            rows_colors.append(row_color)
+
+        asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
+        asset_list['row_color'] = rows_colors
+
+        asset_list_colours = self.COLOUR_CONFIG[ha_name]["asset_list"]
+
+        asset_list["row_colour_name"] = np.where(
+            asset_list["row_color"] == asset_list_colours["red"], "red",
+            np.where(asset_list["row_color"] == asset_list_colours["green"], "green", "yellow")
+        )
+
+        asset_list["row_meaning"] = np.where(
+            asset_list["row_colour_name"] == "red", "does not meet criteria",
+            np.where(
+                asset_list["row_colour_name"] == "green", "identified potential eco works (CWI)", "maybe in the future"
+            )
+        )
+
+        return asset_list
+
+    def load_survey_list(self, file_path, ha_name, sheet_name=None):
+        survey_workbook = openpyxl.load_workbook(file_path)
+        if sheet_name is not None:
+            survey_sheet = survey_workbook[sheet_name]
+        else:
+            survey_sheet = survey_workbook.active
+
+        survey_rows = []
+        survey_colors = []
+
+        for row in tqdm(survey_sheet.iter_rows(min_row=2, values_only=False)):  # Assuming the first row is headers
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+            survey_rows.append(row_data)
+            survey_colors.append(row_color)
+
+        survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+
+        survey_list["row_colour"] = survey_colors
+        survey_list_colours = self.COLOUR_CONFIG[ha_name]["asset_list"]
+
+        # The survey list has 4 possible colours:
+        # PURPLE - Installer advised install complete and a complimentary post works EPC has been completed.
+        # GREEN - Installer advised install complete.
+        # RED - Cancelled
+        # NO FILL - No official update from installer (could be installed or cancelled)
+
+        survey_list["row_colour_name"] = np.where(
+            survey_list["row_colour"] == survey_list_colours["red"], "red",
+            np.where(survey_list["row_colour"] == survey_list_colours["green"], "green",
+                     np.where(survey_list["row_colour"] == survey_list_colours["purple"], "purple", "yellow"))
+        )
+
+        survey_list["row_meaning"] = np.where(
+            survey_list["row_colour_name"] == "red", "Cancelled",
+            np.where(
+                survey_list["row_colour_name"] == "green",
+                "Installer advised install complete",
+                np.where(
+                    survey_list["row_colour_name"] == "purple",
+                    "Installer advised install complete and a complimentary post works EPC has been completed",
+                    "No official update from installer (could be installed or cancelled)"
+                )
+            )
+        )
+
+        return survey_list
+
+    def load(self):
+
+        data = {}
+        for ha_name, file_config in self.files.items():
+            # Load asset list
+            # logger.info("LOading asset list for {}".format(ha_name))
+            asset_list = self.load_asset_list(
+                file_path=file_config["asset_list"]["filepath"],
+                ha_name=ha_name,
+                sheet_name=file_config["asset_list"]["sheetname"]
+            )
+
+            if file_config.get("survey_list"):
+                survey_list = self.load_survey_list(
+                    file_path=file_config["survey_list"]["filepath"],
+                    ha_name=ha_name,
+                    sheet_name=file_config["survey_list"]["sheetname"]
+                )
+            else:
+                survey_list = None
+
+            data[ha_name] = {
+                "asset_list": asset_list,
+                "survey_list": survey_list
+            }
+
+
+def app():
+    """
+    This app contains the housign association analysis for HAs 1, 6, 14, 39 and 107.
+    Only HA 6 has surveys
+    :return:
+    """
+
+    files = {
+        "ha_1": {
+            "asset_list": {
+                "filepath": "etl/eligibility/ha_15_32/HA 1 - ASSET LIST.xlsx",
+                "sheetname": "HA 1"
+            }
+        },
+        "ha_6": {
+            "asset_list": {
+                "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
+                "sheetname": "HA 6"
+            },
+            "survey_list": {
+                "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
+                "sheetname": "HA 6"
+            }
+        },
+        "ha_14": {"asset_list": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx"},
+        "ha_39": {"asset_list": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx"},
+        "ha_107": {"asset_list": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx"}
+    }
+
+    loader = DataLoader(files)