Added automated creation of excel and added missing files to git

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-29 12:13:22 +00:00
parent b6c57c7253
commit 55e28942e4
4 changed files with 876 additions and 30 deletions

View file

@ -128,7 +128,7 @@ class Eligibility:
if insulation_thickness <= 100:
thickness_classification = "0-100mm"
elif insulation_thickness <= 270:
elif insulation_thickness <= high_loft_thickness_threshold:
thickness_classification = "100-270mm"
else:
thickness_classification = "270mm+"
@ -146,24 +146,14 @@ class Eligibility:
"thickness_classification": thickness_classification
}
if insulation_thickness <= high_loft_thickness_threshold:
self.loft = {
"suitability": True,
"thickness": insulation_thickness,
"reason": "high loft thickness but below regulation",
"thickness_classification": thickness_classification
}
return
if insulation_thickness > high_loft_thickness_threshold:
# Insulation is already thick enough
self.loft = {
"suitability": False,
"thickness": insulation_thickness,
"reason": "existing insulation",
"thickness_classification": thickness_classification
}
return
# Insulation is already thick enough
self.loft = {
"suitability": False,
"thickness": insulation_thickness,
"reason": "existing insulation",
"thickness_classification": thickness_classification
}
return
def cavity_insulation(self):

View file

@ -0,0 +1,665 @@
import numpy as np
import pandas as pd
ECO4_NEW_RATES = 1710
GBIS_NEW_RATES = 600
def app():
# Load in the excel
nov_ha_data = pd.read_excel(
'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
)
# Drop rows where HA name is null
nov_ha_data = nov_ha_data.dropna(subset=["HA Name"])
nov_ha_data["ha_number"] = nov_ha_data["HA Name"].str.extract(r"(\d+)").astype(int)
nov_ha_data = nov_ha_data.sort_values("ha_number", ascending=True)
variance_explanations = pd.read_excel(
'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
sheet_name="Variance explanations"
)
september_figures = pd.read_excel(
"etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS SEP 23 UPDATE (2).xlsx",
sheet_name="HA Stats"
)
historical_invoices = pd.read_excel(
"etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx",
sheet_name="Jul 22 to Oct 23"
)
# Drop rows where installer rates is null
historical_invoices = historical_invoices[~pd.isnull(historical_invoices["INSTALLER RATES"])]
historical_invoices = historical_invoices[historical_invoices["INSTALLER RATES"] != "NA "]
# By Scheme, take a weighted mean of the INSTALLER RATES, weighted on the number of rows
n_invoices = historical_invoices.groupby(["Scheme", "INSTALLER RATES"])["Invoice number"].count().reset_index()
n_invoices = n_invoices[n_invoices["Scheme"].isin(["Eco 4", "GBIS"])]
historical_scheme_rates = n_invoices.groupby("Scheme").apply(
lambda x: np.average(x["INSTALLER RATES"], weights=x["Invoice number"])
).reset_index().rename(columns={0: "Historical rates"})
# we take just entries sales data that have sales > 0
sales_data = nov_ha_data[nov_ha_data["Sales"] > 0]
# We now need to adjust sales data depending on the variance explanations
sales_data = sales_data.merge(
variance_explanations[["HA", 'Which figure is correct']],
how="left",
left_on="ha_number",
right_on="HA"
)
def adjust_sales(row):
if pd.isnull(row["Which figure is correct"]):
return row["Sales"]
if row["Which figure is correct"] == "HA facts & figures":
return row['No. of Tech surveys complete']
if row["Which figure is correct"] == "Billed amount":
return row["Sales"]
if row["Which figure is correct"] in ["Both correct, HA facts and figures includes November", "Both correct"]:
return row["Sales"]
raise ValueError(f"Unknown value for 'Which figure is correct': {row['Which figure is correct']}")
# We now need to adjust sales data depending on the variance explanations
sales_data["adjusted_sales"] = sales_data.apply(lambda row: adjust_sales(row), axis=1)
# We therefore adjust GBIS and ECO4 sales data based on adjusted sales
sales_data["adjusted_eco4_sales"] = sales_data["No. of Tech surveys complete - Eco 4"] / sales_data["Sales"] * \
sales_data["adjusted_sales"]
sales_data["adjusted_gbis_sales"] = sales_data["No. of Tech surveys complete - GBIS"] / sales_data["Sales"] * \
sales_data["adjusted_sales"]
sales_data["cancellation_rate"] = (sales_data["Sales"] - sales_data["adjusted_sales"]) / sales_data["Sales"]
# The difference between the adjusted sales and the actual sales is the cancellation
cancellations = (sales_data["adjusted_sales"].sum() - sales_data["Sales"].sum()) / sales_data["Sales"].sum()
# Given the cancellations, we can now adjust the expected remaining surveys
sales_data["No. of Tech surveys remaining"] = sales_data["No. of Tech surveys remaining"] * (
1 - sales_data["cancellation_rate"]
)
# We now merge on the expected values for September
sales_data = sales_data.merge(
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
),
how="left",
on="HA Name",
)
sales_data["Sept Expected ECO4"] = sales_data["Sept Expected ECO4"].fillna(0)
sales_data["Sept Expected GBIS"] = sales_data["Sept Expected GBIS"].fillna(0)
# We calculate the ECO4 and GBIS conversion rates with the adjusted numbers
sales_data["ECO4 Conversion"] = sales_data["adjusted_eco4_sales"] / sales_data["adjusted_sales"]
sales_data["GBIS Conversion"] = sales_data["adjusted_gbis_sales"] / sales_data["adjusted_sales"]
# We now calculate the expected remaining ECO4 and GBIS sales
# We take the number of remaining surveys and multiply by the conversion rate for each scheme, which tells us
# how many more we should expect to see
sales_data["Expected Remaining ECO4"] = sales_data["No. of Tech surveys remaining"] * sales_data["ECO4 Conversion"]
sales_data["Expected Remaining GBIS"] = sales_data["No. of Tech surveys remaining"] * sales_data["GBIS Conversion"]
# We now produce a forecasted ECO4 and GBIS sales figure
sales_data["Forecasted ECO4 Sales"] = sales_data["adjusted_eco4_sales"] + sales_data["Expected Remaining ECO4"]
sales_data["Forecasted GBIS Sales"] = sales_data["adjusted_gbis_sales"] + sales_data["Expected Remaining GBIS"]
# Take the columns we're interestd in
# HA # Properties Sept ECO4 Figures Sept GBIS Figures Nov Total Sales Nov ECO4 Sales Nov GBIS Sales
# Remaining Surveys ECO4 conversion GBIS conversion Forecasted ECO4 Sales Forecasted GBIS sales ECO4 Change
# GBIS Change
sales_data_formatted = sales_data[[
"HA Name",
"ASSET LIST no.",
"Sept Expected ECO4",
"Sept Expected GBIS",
"adjusted_sales",
"adjusted_eco4_sales",
"adjusted_gbis_sales",
"No. of Tech surveys remaining",
"ECO4 Conversion",
"GBIS Conversion",
"Forecasted ECO4 Sales",
"Forecasted GBIS Sales"
]].rename(
columns={
"adjusted_sales": "Oct Total Sales (adjusted for variance)",
"adjusted_eco4_sales": "Oct ECO4 Sales (adjusted for variance)",
"adjusted_gbis_sales": "Oct GBIS Sales (adjusted for variance)",
"No. of Tech surveys remaining": "Remaining Surveys",
}
)
# Convert columns which should be integers to integers
for col in ["ASSET LIST no.", "Remaining Surveys", "Sept Expected ECO4", "Sept Expected GBIS",
"Oct Total Sales (adjusted for variance)", "Oct ECO4 Sales (adjusted for variance)",
"Oct GBIS Sales (adjusted for variance)", "Forecasted ECO4 Sales", "Forecasted GBIS Sales"]:
sales_data_formatted[col] = sales_data_formatted[col].fillna(0)
sales_data_formatted[col] = sales_data_formatted[col].astype(int)
# Remove HA 17 because this was EPCs only. We also remove HA33 because they do not have access to the full portfolio
sales_data_formatted = sales_data_formatted[
~sales_data_formatted["HA Name"].isin(["HA 17", "HA 33"])
]
# September expected ECO4 and GBIS
sept_expected_eco4 = sales_data_formatted["Sept Expected ECO4"].sum()
sept_expected_gbis = sales_data_formatted["Sept Expected GBIS"].sum()
# Completed so far
oct_eco4_sales = sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"].sum()
oct_gbis_sales = sales_data_formatted["Oct GBIS Sales (adjusted for variance)"].sum()
# Forecasted figures
forecasted_eco4_sales = sales_data_formatted["Forecasted ECO4 Sales"].sum()
forecasted_gbis_sales = sales_data_formatted["Forecasted GBIS Sales"].sum()
# Expected remaining sales
expected_remaining_eco4_sales = forecasted_eco4_sales - oct_eco4_sales
expected_remaining_gbis_sales = forecasted_gbis_sales - oct_gbis_sales
# Forecast change vs September
forecasted_eco4_change = 100 * (forecasted_eco4_sales - sept_expected_eco4) / sept_expected_eco4
forecasted_gbis_change = 100 * (forecasted_gbis_sales - sept_expected_gbis) / sept_expected_gbis
aggregates = pd.DataFrame(
columns=["Scheme", "Sept Expected", "Oct Completed", "Forecasted Remaining Sales", "Forecasted Total Sales",
"Forecasted Change vs Sept"],
data=[
["ECO4", sept_expected_eco4, oct_eco4_sales, expected_remaining_eco4_sales, forecasted_eco4_sales,
forecasted_eco4_change],
["GBIS", sept_expected_gbis, oct_gbis_sales, expected_remaining_gbis_sales, forecasted_gbis_sales,
forecasted_gbis_change],
]
)
# Multiply by histoical rates to get revenue
# For ECO4, this is ~£1456 and for GBIS it's ~£432
historical_gbis_price = historical_scheme_rates[
historical_scheme_rates["Scheme"] == "GBIS"
]["Historical rates"].iloc[0]
historical_eco4_price = historical_scheme_rates[
historical_scheme_rates["Scheme"] == "Eco 4"
]["Historical rates"].iloc[0]
aggregates["Sept Expected Revenue"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Sept Expected"] * historical_eco4_price,
aggregates["Sept Expected"] * historical_gbis_price
)
aggregates["Completed Revenue"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Oct Completed"] * historical_eco4_price,
aggregates["Oct Completed"] * historical_gbis_price
)
# We use the new rates for the forecasted revenue
aggregates["Forecasted Remaining Revenue"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Forecasted Remaining Sales"] * ECO4_NEW_RATES,
aggregates["Forecasted Remaining Sales"] * GBIS_NEW_RATES
)
# We also calculate the forecasted remaining revenue at the original price
aggregates["Forecasted Remaining Revenue (original price)"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Forecasted Remaining Sales"] * historical_eco4_price,
aggregates["Forecasted Remaining Sales"] * historical_gbis_price
)
aggregates["Forecasted Revenue"] = aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue"]
# Forecasted revenue with original price
aggregates["Forecasted Revenue (original price)"] = (
aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue (original price)"]
)
# Create a totals row which sums up the two rows
forecasted_change_vs_sept = 100 * (
aggregates["Forecasted Total Sales"].sum() - aggregates["Sept Expected"].sum()
) / aggregates["Sept Expected"].sum()
aggregates = pd.concat(
[
aggregates,
pd.DataFrame(
[
["Total", aggregates["Sept Expected"].sum(), aggregates["Oct Completed"].sum(),
aggregates["Forecasted Remaining Sales"].sum(), aggregates["Forecasted Total Sales"].sum(),
forecasted_change_vs_sept,
aggregates["Sept Expected Revenue"].sum(), aggregates["Completed Revenue"].sum(),
aggregates["Forecasted Remaining Revenue"].sum(),
aggregates["Forecasted Remaining Revenue (original price)"].sum(),
aggregates["Forecasted Revenue"].sum(),
aggregates["Forecasted Revenue (original price)"].sum(),
]
],
columns=aggregates.columns
)
]
)
# For each property in the asset list, we now calculate an average conversion rate to ECO4 and GBIS
# We do this by taking the forecasted sales values for each schemes and dividing by the number of properties
number_properties = sales_data_formatted["ASSET LIST no."].sum()
eco4_conversion_rate = forecasted_eco4_sales / number_properties
gbis_conversion_rate = forecasted_gbis_sales / number_properties
# We also attribute a future value per property
future_eco4_value = ECO4_NEW_RATES * eco4_conversion_rate
future_gbis_value = GBIS_NEW_RATES * gbis_conversion_rate
# We also calulate a revenue figure for the old rates
historical_eco4_value = historical_eco4_price * eco4_conversion_rate
historical_gbis_value = historical_gbis_price * gbis_conversion_rate
# For the HAs that have not begun selling, we estimate the value of the projects
# We start with some problem HAs
# HA 7, HA 24, HA 25
# These HAs have no sales data, so we use the expected figures
problem_has_data = nov_ha_data[
(nov_ha_data["HA Name"].isin(["HA 7", "HA 24", "HA 25"]))
].copy()
# Merge on the september expected figures
problem_has_data = problem_has_data.merge(
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
),
how="left",
on="HA Name",
)
# Fill NAs
problem_has_data["Sept Expected ECO4"] = problem_has_data["Sept Expected ECO4"].fillna(0)
problem_has_data["Sept Expected GBIS"] = problem_has_data["Sept Expected GBIS"].fillna(0)
# We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
problem_has_data["Expected ECO4 Sales"] = problem_has_data["ASSET LIST no."] * eco4_conversion_rate
problem_has_data["Expected GBIS Sales"] = problem_has_data["ASSET LIST no."] * gbis_conversion_rate
# Filter just on columns we're interested in
problem_has_data = problem_has_data[[
"HA Name",
"ASSET LIST no.",
"Sept Expected ECO4",
"Sept Expected GBIS",
"ECO4",
"GBIS",
"Expected ECO4 Sales",
"Expected GBIS Sales"
]].rename(
columns={
"ECO4": "Nov Expected ECO4",
"GBIS": "Nov Expected GBIS",
}
)
# Fill NAs
problem_has_data["Nov Expected ECO4"] = problem_has_data["Nov Expected ECO4"].fillna(0)
problem_has_data["Nov Expected GBIS"] = problem_has_data["Nov Expected GBIS"].fillna(0)
# We calculate HA level Sept, Nov expected revenue, based on historical rates and then forecasted revenue
problem_has_data["Sept Expected ECO4 Value"] = problem_has_data["Sept Expected ECO4"] * historical_eco4_price
problem_has_data["Sept Expected GBIS Value"] = problem_has_data["Sept Expected GBIS"] * historical_gbis_price
problem_has_data["Nov Expected ECO4 Value"] = problem_has_data["Nov Expected ECO4"] * historical_eco4_price
problem_has_data["Nov Expected GBIS Value"] = problem_has_data["Nov Expected GBIS"] * historical_gbis_price
problem_has_data["Forecasted ECO4 Revenue"] = problem_has_data["ASSET LIST no."] * future_eco4_value
problem_has_data["Forecasted GBIS Revenue"] = problem_has_data["ASSET LIST no."] * future_gbis_value
# Totals
problem_has_data["Sept Expected Total Value"] = problem_has_data["Sept Expected ECO4 Value"] + \
problem_has_data["Sept Expected GBIS Value"]
problem_has_data["Nov Expected Total Value"] = problem_has_data["Nov Expected ECO4 Value"] + \
problem_has_data["Nov Expected GBIS Value"]
problem_has_data["Forecasted Total Revenue"] = problem_has_data["Forecasted ECO4 Revenue"] + \
problem_has_data["Forecasted GBIS Revenue"]
# We calculate a total expected value for September, November and then forecasted
problem_has_expected_eco4_value = problem_has_data["Sept Expected ECO4"].sum() * historical_eco4_price
problem_has_expected_gbis_value = problem_has_data["Sept Expected GBIS"].sum() * historical_gbis_price
problem_has_expected_total_value = problem_has_expected_eco4_value + problem_has_expected_gbis_value
problem_has_nov_eco4_value = problem_has_data["Nov Expected ECO4"].sum() * historical_eco4_price
problem_has_nov_gbis_value = problem_has_data["Nov Expected GBIS"].sum() * historical_gbis_price
problem_has_nov_total_value = problem_has_nov_eco4_value + problem_has_nov_gbis_value
forecasted_eco4_value = problem_has_data["ASSET LIST no."].sum() * future_eco4_value
forecasted_gbis_value = problem_has_data["ASSET LIST no."].sum() * future_gbis_value
problem_has_forecasted_total_value = forecasted_eco4_value + forecasted_gbis_value
problem_has_summary = pd.DataFrame(
columns=["Scheme", "Sept Expected", "Nov Expected", "Forecasted"],
data=[
["ECO4", problem_has_expected_eco4_value, problem_has_nov_eco4_value, forecasted_eco4_value],
["GBIS", problem_has_expected_gbis_value, problem_has_nov_gbis_value, forecasted_gbis_value],
["Total", problem_has_expected_total_value, problem_has_nov_total_value, problem_has_forecasted_total_value]
]
)
# We now also estimate the value of the remaining HAs based on historical sales performance and new rates
# We take the has that are not in the sales data
remaining_has = nov_ha_data[
~nov_ha_data["HA Name"].isin(sales_data_formatted["HA Name"])
].copy()
# Merge on the september expected figures
remaining_has = remaining_has.merge(
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
),
how="left",
on="HA Name",
)
# We update the asset list size for HA 33, because they do not have access to the full portfolio
remaining_has.loc[remaining_has["HA Name"] == "HA 33", "ASSET LIST no."] = 20699
# We also remove HA 17
remaining_has = remaining_has[~remaining_has["HA Name"].isin(["HA 17"])]
# We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
remaining_has["Expected ECO4 Sales"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
remaining_has["Expected GBIS Sales"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
# Filter just on columns we're interested in
remaining_has = remaining_has[[
"HA Name",
"ASSET LIST no.",
"Sept Expected ECO4",
"Sept Expected GBIS",
"ECO4",
"GBIS",
]].rename(
columns={
"ECO4": "Nov Expected ECO4",
"GBIS": "Nov Expected GBIS",
}
)
remaining_has = remaining_has.fillna(0)
# We take just HAs that had an initial september expectation for ECO4 or GBIS, or that now have a Nov expectation
remaining_has = remaining_has[
(remaining_has["Sept Expected ECO4"] > 0) | (remaining_has["Sept Expected GBIS"] > 0) |
(remaining_has["Nov Expected ECO4"] > 0) | (remaining_has["Nov Expected GBIS"] > 0)
]
# Expected sales based on asset list size and conversion rate
remaining_has["Forecasted Sales ECO4"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
remaining_has["Forecasted Sales GBIS"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
# Calculat the total expected value for September and November
remaining_has["Sept Expected ECO4 Value"] = remaining_has["Sept Expected ECO4"] * historical_eco4_price
remaining_has["Sept Expected GBIS Value"] = remaining_has["Sept Expected GBIS"] * historical_gbis_price
remaining_has["Nov Expected ECO4 Value"] = remaining_has["Nov Expected ECO4"] * historical_eco4_price
remaining_has["Nov Expected GBIS Value"] = remaining_has["Nov Expected GBIS"] * historical_gbis_price
# Calculate forecasted revenue
remaining_has["Forecasted ECO4 Revenue"] = remaining_has["ASSET LIST no."] * future_eco4_value
remaining_has["Forecasted GBIS Revenue"] = remaining_has["ASSET LIST no."] * future_gbis_value
# We also calculate forecasted revenue with the original price
remaining_has["Forecasted ECO4 Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_eco4_value
remaining_has["Forecasted GBIS Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_gbis_value
# Calculate totals for each scheme
remaining_has_september_eco4_sales = remaining_has["Sept Expected ECO4"].sum()
remaining_has_september_gbis_sales = remaining_has["Sept Expected GBIS"].sum()
remaining_has_november_eco4_sales = remaining_has["Nov Expected ECO4"].sum()
remaining_has_november_gbis_sales = remaining_has["Nov Expected GBIS"].sum()
remaining_has_forecasted_eco4_sales = remaining_has["Forecasted Sales ECO4"].sum()
remaining_has_forecasted_gbis_sales = remaining_has["Forecasted Sales GBIS"].sum()
remaining_has_september_eco4_value = remaining_has["Sept Expected ECO4 Value"].sum()
remaining_has_september_gbis_value = remaining_has["Sept Expected GBIS Value"].sum()
remaining_has_november_eco4_value = remaining_has["Nov Expected ECO4 Value"].sum()
remaining_has_november_gbis_value = remaining_has["Nov Expected GBIS Value"].sum()
remaining_has_forecasted_eco4_value = remaining_has["Forecasted ECO4 Revenue"].sum()
remaining_has_forecasted_gbis_value = remaining_has["Forecasted GBIS Revenue"].sum()
remaining_has_forecasted_eco4_value_original_price = remaining_has["Forecasted ECO4 Revenue (original price)"].sum()
remaining_has_forecasted_gbis_value_original_price = remaining_has["Forecasted GBIS Revenue (original price)"].sum()
# Calculate the change in forecasted sales against the September expected sales
remaining_has_foecast_change_eco4 = 100 * (
remaining_has["Forecasted Sales ECO4"].sum() - remaining_has["Sept Expected ECO4"].sum()
) / remaining_has["Sept Expected ECO4"].sum()
remaining_has_foecast_change_gbis = 100 * (
remaining_has["Forecasted Sales GBIS"].sum() - remaining_has["Sept Expected GBIS"].sum()
) / remaining_has["Sept Expected GBIS"].sum()
# Total change
remaining_has_foecast_change_total = 100 * (
remaining_has["Forecasted Sales ECO4"].sum() + remaining_has["Forecasted Sales GBIS"].sum() -
remaining_has["Sept Expected ECO4"].sum() - remaining_has["Sept Expected GBIS"].sum()
) / (remaining_has["Sept Expected ECO4"].sum() + remaining_has["Sept Expected GBIS"].sum())
asset_list_size = remaining_has["ASSET LIST no."].sum()
# Create a summary table of the rest with the totals for ECO4, GBIS and then a total row
remaining_has_aggregate = pd.DataFrame(
columns=["Scheme", "Asset List Size", "Sept Expected Sales", "Nov Expected Sales", "Forecasted Sales",
"Forecasted Change vs Sept",
"Sept Expected Value", "Nov Expected Value", "Forecasted Value", "Forecasted Value (original price)"],
data=[
[
"ECO4", asset_list_size, remaining_has_september_eco4_sales, remaining_has_november_eco4_sales,
remaining_has_forecasted_eco4_sales, remaining_has_foecast_change_eco4,
remaining_has_september_eco4_value,
remaining_has_november_eco4_value, remaining_has_forecasted_eco4_value,
remaining_has_forecasted_eco4_value_original_price
],
[
"GBIS", asset_list_size, remaining_has_september_gbis_sales, remaining_has_november_gbis_sales,
remaining_has_forecasted_gbis_sales, remaining_has_foecast_change_gbis,
remaining_has_september_gbis_value,
remaining_has_november_gbis_value, remaining_has_forecasted_gbis_value,
remaining_has_forecasted_gbis_value_original_price
],
[
"Total", asset_list_size, remaining_has_september_eco4_sales + remaining_has_september_gbis_sales,
remaining_has_november_eco4_sales + remaining_has_november_gbis_sales,
remaining_has_forecasted_eco4_sales + remaining_has_forecasted_gbis_sales,
remaining_has_foecast_change_total,
remaining_has_september_eco4_value + remaining_has_september_gbis_value,
remaining_has_november_eco4_value + remaining_has_november_gbis_value,
remaining_has_forecasted_eco4_value + remaining_has_forecasted_gbis_value,
remaining_has_forecasted_eco4_value_original_price +
remaining_has_forecasted_gbis_value_original_price
]
]
)
# Calculate pipeline value
pipeline_value = aggregates[["Scheme", "Completed Revenue", "Forecasted Remaining Revenue"]].merge(
remaining_has_aggregate[["Scheme", "Forecasted Value"]].rename(
columns={"Forecasted Value": "Forecasted Revenue, Unconfirmed HAs"}
), how="inner", on="Scheme"
)
# Calculate the total
pipeline_value["Total Value"] = (
pipeline_value["Completed Revenue"] + pipeline_value["Forecasted Remaining Revenue"] + pipeline_value[
"Forecasted Revenue, Unconfirmed HAs"]
)
# TODO: Insert model figures
model_results = pd.DataFrame(
[
{
# This one, we don't have sales data
"HA Name": "HA 15",
"Model Expected Additional ECO4 (unit level)": None,
"Model Expected Total ECO4 (unit level)": 296,
"Model Expected Additional GBIS (unit level)": None,
"Model Expected Total GBIS (unit level)": 209,
},
{
"HA Name": "HA 16",
# Old before re-run
# "Model Expected Additional ECO4 (unit level)": 418,
# "Model Expected Total ECO4 (unit level)": 1820,
# "Model Expected Additional GBIS (unit level)": 576,
# "Model Expected Total GBIS (unit level)": 612,
# IN the partial sales data, WFT have completed 1407 ECO4, 36 GBIS
"Model Expected Additional ECO4 (unit level)": 411 + 342 + 235,
"Model Expected Total ECO4 (unit level)": 1407 + 411 + 342 + 235,
"Model Expected Additional GBIS (unit level)": 223,
"Model Expected Total GBIS (unit level)": 36 + 223,
},
{
"HA Name": "HA 24",
"Model Expected Additional ECO4 (unit level)": 224,
"Model Expected Total ECO4 (unit level)": 848,
"Model Expected Additional GBIS (unit level)": 552,
"Model Expected Total GBIS (unit level)": 552,
},
{
"HA Name": "HA 25",
"Model Expected Additional ECO4 (unit level)": None,
"Model Expected Total ECO4 (unit level)": 1709 + 59,
"Model Expected Additional GBIS (unit level)": None,
"Model Expected Total GBIS (unit level)": 2004 + 107,
}
]
)
sales_data_formatted["Remaining ECO4 Sales"] = (
sales_data_formatted["Forecasted ECO4 Sales"] - sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"]
)
sales_data_formatted["Remaining GBIS Sales"] = (
sales_data_formatted["Forecasted GBIS Sales"] - sales_data_formatted["Oct GBIS Sales (adjusted for variance)"]
)
sales_data_formatted["Completed ECO4 Revenue"] = (sales_data_formatted[
"Oct ECO4 Sales (adjusted for variance)"] *
historical_eco4_price)
sales_data_formatted["Completed GBIS Revenue"] = (sales_data_formatted[
"Oct GBIS Sales (adjusted for variance)"] *
historical_gbis_price)
ha_subset_with_sales = ["HA 15", "HA 16", "HA 24"]
has_subset_with_sales_value = sales_data_formatted[
sales_data_formatted["HA Name"].isin(ha_subset_with_sales)
].copy()[
[
"HA Name",
"Oct ECO4 Sales (adjusted for variance)",
"Oct GBIS Sales (adjusted for variance)",
"Remaining ECO4 Sales",
"Remaining GBIS Sales",
"Forecasted ECO4 Sales",
"Forecasted GBIS Sales",
"Completed ECO4 Revenue",
"Completed GBIS Revenue"
]
]
has_subset_with_sales_value["Remaining ECO4 Revenue"] = has_subset_with_sales_value[
"Remaining ECO4 Sales"] * ECO4_NEW_RATES
has_subset_with_sales_value["Remaining GBIS Revenue"] = has_subset_with_sales_value[
"Remaining GBIS Sales"] * GBIS_NEW_RATES
has_subset_with_sales_value["Remaining Total Revenue"] = (
has_subset_with_sales_value["Remaining ECO4 Revenue"] + has_subset_with_sales_value["Remaining GBIS Revenue"]
)
model_results["Model Expected Additional ECO4 Revenue"] = (
model_results["Model Expected Additional ECO4 (unit level)"] * ECO4_NEW_RATES
)
model_results["Model Expected Additional GBIS revenue"] = (
model_results["Model Expected Additional GBIS (unit level)"] * GBIS_NEW_RATES
)
model_results["Model Expected Additional Total Revenue"] = (
model_results["Model Expected Additional ECO4 Revenue"] + model_results[
"Model Expected Additional GBIS revenue"]
)
# Show more columns with pandas
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# Look at HA 16
ha16_model = model_results[model_results["HA Name"] == "HA 16"]
has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 16"]
# WFT: For HA 16: 4,598,190 ECO4, 57,000 GBIS
# Model:
# Look at HA 24
ha24_model = model_results[model_results["HA Name"] == "HA 24"]
has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 24"]
# Look at HA 15
ha15_data = has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 15"]
ha15_portfolio_value = ha15_data["Completed ECO4 Revenue"] + ha15_data[
"Completed GBIS Revenue"] + ha15_data["Remaining Total Revenue"]
# # This doesn't have sales data so in the model analysis, we just value the ha as a whole
ha15_model = model_results[model_results["HA Name"] == "HA 15"]
ha15_value = ha15_model["Model Expected Total ECO4 (unit level)"].iloc[0] * ECO4_NEW_RATES + \
ha15_model["Model Expected Total GBIS (unit level)"].iloc[0] * GBIS_NEW_RATES
model_results["Expected ECO4 Revenue"] = model_results["Model Expected Total ECO4 (unit level)"] * ECO4_NEW_RATES
model_results["Expected GBIS Revenue"] = model_results["Model Expected Total GBIS (unit level)"] * GBIS_NEW_RATES
model_results["Expected Total Revenue"] = model_results["Expected ECO4 Revenue"] + model_results[
"Expected GBIS Revenue"]
model_results[model_results["HA Name"].isin(["HA 15"])]
# We now create a final excel with all of the data
# We want:
# 1) aggregates
# 2) sales_data_formatted
# 3) remaining_has_aggregate
# 4) remaining_has
# 5) problem_has_summary
# Function to get the maximum column width
def get_col_widths(dataframe):
# First we find the maximum length of the index column
idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
# Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
# Create a Pandas Excel writer using XlsxWriter as the engine
with pd.ExcelWriter('HA Pipeline Analysis.xlsx', engine='xlsxwriter') as writer:
# Write each dataframe to a different worksheet without the index
for df, sheet in [(aggregates, 'Forecasted Sales'),
(sales_data_formatted, 'Sales Data'),
(remaining_has_aggregate, 'Remaining HAs Value'),
(remaining_has, 'Remaining HAs data'),
(pipeline_value, 'Pipeline Value'),
(problem_has_summary, 'Problem HAs Analysis'),
(problem_has_data, 'Problem HAs Data')
]:
df.to_excel(writer, sheet_name=sheet, index=False)
# Auto-adjust columns' width
for i, width in enumerate(get_col_widths(df)):
writer.sheets[sheet].set_column(i, i, width)

View file

@ -0,0 +1,113 @@
import openpyxl
import pandas as pd
import numpy as np
def get_excel_survey_list(workbook_path, worksheet_name=None):
survey_workbook = openpyxl.load_workbook(workbook_path)
if worksheet_name is not None:
survey_sheet = survey_workbook[worksheet_name]
else:
survey_sheet = survey_workbook.active
survey_rows = []
survey_colors = []
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
survey_rows.append(row_data)
survey_colors.append(row_color)
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
survey_list["row_colour"] = survey_colors
return survey_list
def load_data():
# Load for HA 16 - ECO 4
ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
# Load for HA 24 - ECO 4
ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
# Load for HA 25 - ECO 3
ha25_survey_list = get_excel_survey_list(
'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
)
# Remove columns with None column names
ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')
# Standardised this installation status columns
ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
{
"NO UPDATE - CHECKED 2.10.23": "no update",
"NO UPDATE - CHECKED 18.12.23": "no update",
"INSTALLED": "installed",
"CANCELLED": "cancelled",
"LOFT STILL TO BE INSTALLED": "loft remaining",
}
)
ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
{
"NO UPDATE - CHECKED 21.11.23": "no update",
"NO UPDATE - CHECKED 18.12.23": "no update",
"INSTALLED": "installed",
"CANCELLED": "cancelled",
"LOFT STILL TO BE INSTALLED": "loft remaining",
"SEE NOTES >>": "see notes",
}
)
# We need to prepare HA25 differently
ha25_survey_list["survey_status"] = np.where(
ha25_survey_list["row_colour"] == "FF7030A0", "installed",
np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
)
)
)
)
ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]
# We standardise the cancellation reasons - just create a new column
ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
# There's no cancellation reason for HA25
ha25_survey_list["cancellation_reason"] = "No reason provided"
# Combine the dataframes
ha16_survey_list["HA"] = "HA 16"
ha24_survey_list["HA"] = "HA 24"
ha25_survey_list["HA"] = "HA 25"
cancellation_data = pd.concat(
[
ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
]
)
# Take just rows that we have a confirmed status for
cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]
return cancellation_data
def app():
"""
This application is used to analyse the cancellation data provided by warmfront
:return:
"""
# This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
sales_cancellation_data = load_data()

View file

@ -882,6 +882,13 @@ def get_epc_data(
return outputs
def get_col_widths(dataframe):
# First we find the maximum length of the index column
idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
# Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
def analyse_ha_data(outputs, loader):
"""
The approach we take within this function is the following:
@ -901,7 +908,11 @@ def analyse_ha_data(outputs, loader):
:return:
"""
eco4_rate = 1710
gbis_rate = 600
ha_analysis_results = []
ha_revenue_results = []
for ha_name, datasets in outputs.items():
inputs = [x for k, x in loader.data.items() if k == ha_name][0]
@ -1034,7 +1045,8 @@ def analyse_ha_data(outputs, loader):
(
(remaining_eco4_df["eco4_message"] == "sap too high") &
remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
pd.isnull(remaining_eco4_df["prospect_type"])
),
"ECO4 if SAP downgrade",
remaining_eco4_df["prospect_type"]
@ -1048,7 +1060,7 @@ def analyse_ha_data(outputs, loader):
remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
),
"Filled cavity - subject to CIGA check",
"ECO4 - Filled cavity - subject to CIGA check",
remaining_eco4_df["prospect_type"]
)
@ -1064,7 +1076,7 @@ def analyse_ha_data(outputs, loader):
# 5) Looks like GBIS instead
remaining_eco4_df["prospect_type"] = np.where(
(remaining_eco4_df["gbis_eligible"] == True),
(remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
"Looks like GBIS",
remaining_eco4_df["prospect_type"]
)
@ -1094,16 +1106,17 @@ def analyse_ha_data(outputs, loader):
# 2) GBIS candidates that look like strict ECO4 candidates
remaining_gbis["prospect_type"] = np.where(
(remaining_gbis["eco4_eligible"] == True),
"Upgradable to ECO4",
"GBIS - Upgradable to ECO4",
remaining_gbis["prospect_type"]
)
# 3) Subject to CIGA check - Filled cavity
remaining_gbis["prospect_type"] = np.where(
(
remaining_gbis["eligibility_cavity_type"].isin(["full"])
remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
pd.isnull(remaining_gbis["prospect_type"])
),
"Filled cavity - subject to CIGA check",
"GBIS - Filled cavity - subject to CIGA check",
remaining_gbis["prospect_type"]
)
@ -1141,30 +1154,95 @@ def analyse_ha_data(outputs, loader):
)
].copy()
ha_analysis_results.append({
# Perform some checks to make sure we have all of the values
remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
raise ValueError(
"Number of remaining properties does not match the number of properties in remaining ECO4 dict"
)
remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
raise ValueError(
"Number of remaining properties does not match the number of properties in remaining GBIS dict"
)
to_append = {
"ha_name": ha_name,
"n_properties_in_asset_list": n_properties_in_asset_list,
############
# ECO4
############
"properties_sold_eco4": properties_sold_eco4,
"n_remaining_properties_eco4": n_remaining_properties_eco4,
**remaining_eco4_df["prospect_type"].value_counts().to_dict(),
**remaining_eco4_dict,
############
# GBIS
############
"properties_sold_gbis": properties_sold_gbis,
"n_remaining_properties_gbis": n_remaining_properties_gbis,
**remaining_gbis["prospect_type"].value_counts().to_dict(),
**remaining_gbis_dict,
############
# GBIS
############
"n_eco4_surplus": eco4_surplus.shape[0],
"n_gbis_surplus": gbis_surplus.shape[0],
})
}
ha_analysis_results.append(to_append)
revenue_to_append = {
"ha_name": ha_name,
"£ Remaining from asset list": (
n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
),
"Of which: Strict": (
to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
),
"Of which: Subject to CIGA": (
to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
),
"Of which: Prospect, not perfect strict prospect": (
to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
),
"Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
"Of which: Does not look like prospect": (
to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
),
"Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
"Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
}
# Perform a quick check:
if revenue_to_append["£ Remaining from asset list"] - (
revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
revenue_to_append["Of which: Potential downgrade to GBIS"] +
revenue_to_append["Of which: Does not look like prospect"]
) > 1:
raise ValueError("Error between top level revenue figures and breakdown - investigate me")
ha_revenue_results.append(revenue_to_append)
ha_analysis_results = pd.DataFrame(ha_analysis_results)
ha_revenue_results = pd.DataFrame(ha_revenue_results)
# Todo: create revenue figures and automate creation of excel
# Automate creation of the excel
# Create a Pandas Excel writer using XlsxWriter as the engine
with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
# Write each dataframe to a different worksheet without the index
for df, sheet in [(ha_revenue_results, 'Total Revenue'),
(ha_analysis_results, 'By ECO4 and GBIS')]:
df.to_excel(writer, sheet_name=sheet, index=False)
# Auto-adjust columns' width
for i, width in enumerate(get_col_widths(df)):
writer.sheets[sheet].set_column(i, i, width)
def app():