wrapping up ha analysis

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-26 18:05:08 +00:00
parent 3dd30445f9
commit 724379a86d

View file

@ -5366,6 +5366,7 @@ def forecast_remaining_sales(loader):
results = []
for ha_name, input_data in loader.data.items():
# Original warmfront figures - ECO4
original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
if original_warmfront_estimates.empty:
@ -6032,7 +6033,7 @@ def forecast_remaining_sales(loader):
def fml_data_pull(loader):
has_bruh = [
"HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
"HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
"HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
"HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
]
@ -6129,7 +6130,7 @@ def fml_analysis(loader):
assumed_ciga_pass_rate = 0.731
has_bruh = [
"HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
"HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
"HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
"HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
]
@ -6738,89 +6739,106 @@ def app():
loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
loader.load()
loader.ha_facts_and_figures()
forecast_remaining_sales(loader)
conversion_rate = 0.95
archetype_check_conversion = 0.7
res = []
for k, v in loader.data.items():
asset_list = v["asset_list"].copy()
agg = asset_list["ECO Eligibility"].value_counts()
# We find a case where there are properties that have passed CIGA
if not any("passed" in x for x in agg.index):
# gbis rate
# breakdowns = []
# for ha, data_assets in loader.data.items():
# asset_list = data_assets["asset_list"].copy()
# breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
# breakdowns.append(breakdown)
# breakdowns = pd.DataFrame(breakdowns)
#
# installer = []
# for ha, data_assets in loader.data.items():
# survey_list = data_assets["survey_list"]
# if survey_list.empty:
# continue
# if "INSTALLER" not in survey_list.columns:
# continue
#
# installers = survey_list["INSTALLER"].value_counts().to_dict()
# installers["ha_name"] = ha
# installer.append(installers)
# installer = pd.DataFrame(installer)
# installer.drop(columns=["ha_name"]).sum().sum()
# Adhoc - for HA16, get the properties that still need a CIGA check
asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
ha_16_need_ciga = asset_list_ha16[
asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
]
completed_cigas = loader.data["HA16"]["ciga_list"].copy()
# Store the results
ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
completed_cigas.to_csv("ha16_completed_cigas.csv")
# Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
# live projects
# Read excel
orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
orderbook_sheet = orderbook_workbook["Contractual Info"]
orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
rows = []
for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
row_data = [cell.value for cell in row] # This will get you the cell values
rows.append(row_data)
orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
dormant_properties = []
missed_has = []
for _, customer in live_orderbook.iterrows():
if customer['Redacted HA'] not in loader.data.keys():
missed_has.append(customer['Redacted HA'])
continue
asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
# Remove sold
if not survey_list.empty:
survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
asset_list = asset_list.merge(
survey_list[["asset_list_row_id", "installation_status"]],
how="left",
on="asset_list_row_id"
)
# Anything that has an installation has gone to installation, and therefore is not remaining
asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
asset_list = asset_list.drop(columns=["installation_status"])
agg = pd.DataFrame(agg).reset_index()
passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"]
passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0
failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"]
failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0
ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1
dormant_ciga = agg[
agg["ECO Eligibility"].str.contains("subject to ciga") &
~agg["ECO Eligibility"].str.contains("subject to archetype")
# We pull out the properties that need a CIGA check
need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
need_ciga_and_archetype = asset_list[
asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
]
dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0
dormant_ciga_archetype = agg[
agg["ECO Eligibility"].str.contains("subject to ciga") &
agg["ECO Eligibility"].str.contains("subject to archetype")
]
dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0
needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion
needing_check = np.round(needing_check)
additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + (
dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate
)
additional_jobs = np.round(additional_jobs)
# We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs
original_estimate = loader.december_figures[
loader.december_figures["HA Name"] == k
]
original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0
base_eco_figures = agg[
agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"])
]["count"].sum()
eco4_from_ciga = original_estimate - base_eco_figures
eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0
surplus_from_dormant = additional_jobs - eco4_from_ciga
surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant
res.append(
dormant_properties.append(
{
"ha_name": k,
"additional_eco4": additional_jobs,
"needing_check": needing_check,
"surplus_from_dormant": surplus_from_dormant
"HA Name": customer['Redacted HA'],
"Need CIGA": need_ciga.shape[0],
"Need Archetype": need_archetype.shape[0],
"Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
}
)
res = pd.DataFrame(res)
# Drop the HAs that are not in that pervious draft
# In the v2 draft, there are 12 HAs
dormant_properties = pd.DataFrame(dormant_properties)
totals = dormant_properties.sum()
totals["HA Name"] = "Total"
v5_surplus = res[
~res["ha_name"].isin(["HA9"])
]["additional_eco4"].sum()
# 7212 properties
# This is not a perfect difference though, because of the variations in how the numbers are recorded in the November
# all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255,
# however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties
# pre-CIGA
dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
dormant_properties.to_csv("dormant_properties.csv")
v5_surplus_from_dormant = res[
~res["ha_name"].isin(["HA9"])
]["surplus_from_dormant"].sum()
# 5539.0
# 9471690
loader.december_figures["ECO4 remaining"].sum()
december_figures = loader.december_figures.copy()
december_figures["ECO4 remaining"] = np.where(
december_figures["ECO4 remaining"] < 0,
0,
december_figures["ECO4 remaining"]
)
december_figures["ECO4 remaining"].sum()