From 01a4628d206be30ed88c195fa9b7b04909a53637 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Jan 2024 11:03:45 +0000 Subject: [PATCH] read in asset list for ha 1, working on ha 6 --- etl/eligibility/ha_15_32/app.py | 2 - .../ha_15_32/ha_analysis_batch_3.py | 182 ++++++++++++++++++ 2 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 etl/eligibility/ha_15_32/ha_analysis_batch_3.py diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index 76aadcc4..ce216364 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -16,8 +16,6 @@ from dotenv import load_dotenv from backend.SearchEpc import SearchEpc from backend.Property import Property from etl.eligibility.Eligibility import Eligibility -from etl.epc.DataProcessor import DataProcessor -from backend.app.plan.utils import create_recommendation_scoring_data from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py new file mode 100644 index 00000000..7c28d481 --- /dev/null +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -0,0 +1,182 @@ +import os +import msgpack +import openpyxl +from pathlib import Path +from tqdm import tqdm +from datetime import datetime +import pandas as pd +import numpy as np +from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet +from utils.logger import setup_logger +from dotenv import load_dotenv +from tqdm import tqdm +from backend.SearchEpc import SearchEpc +from etl.eligibility.Eligibility import Eligibility +from etl.eligibility.ha_15_32.app import prepare_model_data_row +from etl.epc.settings import COLUMNS_TO_MERGE_ON +from backend.ml_models.api import ModelApi +from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age +from recommendation_utils import convert_thickness_to_numeric + +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") +ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" + +logger = setup_logger() +load_dotenv(ENV_FILE) + + +class DataLoader: + COLOUR_CONFIG = { + "ha_1": { + "asset_list": {"red": "FFFF0000", "green": "FF00B050"}, + }, + "ha_6": { + "asset_list": {"red": "FFFF0000", "green": "FF00B050"}, + }, + } + + def __init__(self, files): + self.files = files + + def load_asset_list(self, file_path, ha_name, sheet_name=None): + workbook = openpyxl.load_workbook(file_path) + if sheet_name is not None: + sheet = workbook[sheet_name] + else: + sheet = workbook.active + sheet_colnames = [cell.value for cell in sheet[1]] + + rows_data = [] + rows_colors = [] + for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers + row_data = [cell.value for cell in row] # This will get you the cell values + row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None + # row_color = COLOR_INDEX[row_color] + rows_data.append(row_data) + rows_colors.append(row_color) + + asset_list = pd.DataFrame(rows_data, columns=sheet_colnames) + asset_list['row_color'] = rows_colors + + asset_list_colours = self.COLOUR_CONFIG[ha_name]["asset_list"] + + asset_list["row_colour_name"] = np.where( + asset_list["row_color"] == asset_list_colours["red"], "red", + np.where(asset_list["row_color"] == asset_list_colours["green"], "green", "yellow") + ) + + asset_list["row_meaning"] = np.where( + asset_list["row_colour_name"] == "red", "does not meet criteria", + np.where( + asset_list["row_colour_name"] == "green", "identified potential eco works (CWI)", "maybe in the future" + ) + ) + + return asset_list + + def load_survey_list(self, file_path, ha_name, sheet_name=None): + survey_workbook = openpyxl.load_workbook(file_path) + if sheet_name is not None: + survey_sheet = survey_workbook[sheet_name] + else: + survey_sheet = survey_workbook.active + + survey_rows = [] + survey_colors = [] + + for row in tqdm(survey_sheet.iter_rows(min_row=2, values_only=False)): # Assuming the first row is headers + row_data = [cell.value for cell in row] # This will get you the cell values + row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None + survey_rows.append(row_data) + survey_colors.append(row_color) + + survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]]) + + survey_list["row_colour"] = survey_colors + survey_list_colours = self.COLOUR_CONFIG[ha_name]["asset_list"] + + # The survey list has 4 possible colours: + # PURPLE - Installer advised install complete and a complimentary post works EPC has been completed. + # GREEN - Installer advised install complete. + # RED - Cancelled + # NO FILL - No official update from installer (could be installed or cancelled) + + survey_list["row_colour_name"] = np.where( + survey_list["row_colour"] == survey_list_colours["red"], "red", + np.where(survey_list["row_colour"] == survey_list_colours["green"], "green", + np.where(survey_list["row_colour"] == survey_list_colours["purple"], "purple", "yellow")) + ) + + survey_list["row_meaning"] = np.where( + survey_list["row_colour_name"] == "red", "Cancelled", + np.where( + survey_list["row_colour_name"] == "green", + "Installer advised install complete", + np.where( + survey_list["row_colour_name"] == "purple", + "Installer advised install complete and a complimentary post works EPC has been completed", + "No official update from installer (could be installed or cancelled)" + ) + ) + ) + + return survey_list + + def load(self): + + data = {} + for ha_name, file_config in self.files.items(): + # Load asset list + # logger.info("LOading asset list for {}".format(ha_name)) + asset_list = self.load_asset_list( + file_path=file_config["asset_list"]["filepath"], + ha_name=ha_name, + sheet_name=file_config["asset_list"]["sheetname"] + ) + + if file_config.get("survey_list"): + survey_list = self.load_survey_list( + file_path=file_config["survey_list"]["filepath"], + ha_name=ha_name, + sheet_name=file_config["survey_list"]["sheetname"] + ) + else: + survey_list = None + + data[ha_name] = { + "asset_list": asset_list, + "survey_list": survey_list + } + + +def app(): + """ + This app contains the housign association analysis for HAs 1, 6, 14, 39 and 107. + Only HA 6 has surveys + :return: + """ + + files = { + "ha_1": { + "asset_list": { + "filepath": "etl/eligibility/ha_15_32/HA 1 - ASSET LIST.xlsx", + "sheetname": "HA 1" + } + }, + "ha_6": { + "asset_list": { + "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx", + "sheetname": "HA 6" + }, + "survey_list": { + "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx", + "sheetname": "HA 6" + } + }, + "ha_14": {"asset_list": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx"}, + "ha_39": {"asset_list": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx"}, + "ha_107": {"asset_list": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx"} + } + + loader = DataLoader(files)