Model/etl/eligibility/ha_15_32/cancellation.py

import openpyxl
import pandas as pd
import numpy as np


def get_excel_survey_list(workbook_path, worksheet_name=None):
    survey_workbook = openpyxl.load_workbook(workbook_path)
    if worksheet_name is not None:
        survey_sheet = survey_workbook[worksheet_name]
    else:
        survey_sheet = survey_workbook.active

    survey_rows = []
    survey_colors = []

    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
        row_data = [cell.value for cell in row]  # This will get you the cell values
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        # row_color = COLOR_INDEX[row_color]
        survey_rows.append(row_data)
        survey_colors.append(row_color)

    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
    survey_list["row_colour"] = survey_colors

    return survey_list


def load_data():
    # Load for HA 16 - ECO 4
    ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')

    # Load for HA 24 - ECO 4
    ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')

    # Load for HA 25 - ECO 3
    ha25_survey_list = get_excel_survey_list(
        'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
    )

    # Remove columns with None column names
    ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')

    # Standardised this installation status columns
    ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
    ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
        {
            "NO UPDATE - CHECKED 2.10.23": "no update",
            "NO UPDATE - CHECKED 18.12.23": "no update",
            "INSTALLED": "installed",
            "CANCELLED": "cancelled",
            "LOFT STILL TO BE INSTALLED": "loft remaining",
        }
    )

    ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
    ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
        {
            "NO UPDATE - CHECKED 21.11.23": "no update",
            "NO UPDATE - CHECKED 18.12.23": "no update",
            "INSTALLED": "installed",
            "CANCELLED": "cancelled",
            "LOFT STILL TO BE INSTALLED": "loft remaining",
            "SEE NOTES >>": "see notes",
        }
    )

    # We need to prepare HA25 differently
    ha25_survey_list["survey_status"] = np.where(
        ha25_survey_list["row_colour"] == "FF7030A0", "installed",
        np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
                 np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
                          np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
                                   np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
                                   )
                          )
                 )
    )
    ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]

    # We standardise the cancellation reasons - just create a new column
    ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
    ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
    # There's no cancellation reason for HA25
    ha25_survey_list["cancellation_reason"] = "No reason provided"

    # Combine the dataframes
    ha16_survey_list["HA"] = "HA 16"
    ha24_survey_list["HA"] = "HA 24"
    ha25_survey_list["HA"] = "HA 25"

    cancellation_data = pd.concat(
        [
            ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
            ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
            ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
        ]
    )

    # Take just rows that we have a confirmed status for
    cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]

    return cancellation_data


def app():
    """
    This application is used to analyse the cancellation data provided by warmfront
    :return:
    """

    # This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
    sales_cancellation_data = load_data()