Model/etl/eligibility/ha_15_32/cancellation.py

113 lines
4.5 KiB
Python

import openpyxl
import pandas as pd
import numpy as np
def get_excel_survey_list(workbook_path, worksheet_name=None):
survey_workbook = openpyxl.load_workbook(workbook_path)
if worksheet_name is not None:
survey_sheet = survey_workbook[worksheet_name]
else:
survey_sheet = survey_workbook.active
survey_rows = []
survey_colors = []
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
survey_rows.append(row_data)
survey_colors.append(row_color)
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
survey_list["row_colour"] = survey_colors
return survey_list
def load_data():
# Load for HA 16 - ECO 4
ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
# Load for HA 24 - ECO 4
ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
# Load for HA 25 - ECO 3
ha25_survey_list = get_excel_survey_list(
'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
)
# Remove columns with None column names
ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')
# Standardised this installation status columns
ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
{
"NO UPDATE - CHECKED 2.10.23": "no update",
"NO UPDATE - CHECKED 18.12.23": "no update",
"INSTALLED": "installed",
"CANCELLED": "cancelled",
"LOFT STILL TO BE INSTALLED": "loft remaining",
}
)
ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
{
"NO UPDATE - CHECKED 21.11.23": "no update",
"NO UPDATE - CHECKED 18.12.23": "no update",
"INSTALLED": "installed",
"CANCELLED": "cancelled",
"LOFT STILL TO BE INSTALLED": "loft remaining",
"SEE NOTES >>": "see notes",
}
)
# We need to prepare HA25 differently
ha25_survey_list["survey_status"] = np.where(
ha25_survey_list["row_colour"] == "FF7030A0", "installed",
np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
)
)
)
)
ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]
# We standardise the cancellation reasons - just create a new column
ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
# There's no cancellation reason for HA25
ha25_survey_list["cancellation_reason"] = "No reason provided"
# Combine the dataframes
ha16_survey_list["HA"] = "HA 16"
ha24_survey_list["HA"] = "HA 24"
ha25_survey_list["HA"] = "HA 25"
cancellation_data = pd.concat(
[
ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
]
)
# Take just rows that we have a confirmed status for
cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]
return cancellation_data
def app():
"""
This application is used to analyse the cancellation data provided by warmfront
:return:
"""
# This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
sales_cancellation_data = load_data()