From 55e28942e48bb8cf55e7c95875533710d7e21ea1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jan 2024 12:13:22 +0000
Subject: [PATCH] Added automated creation of excel and added missing files to
 git

---
 etl/eligibility/Eligibility.py                |  28 +-
 .../ha_15_32/WFT Sales data analysis.py       | 665 ++++++++++++++++++
 etl/eligibility/ha_15_32/cancellation.py      | 113 +++
 .../ha_15_32/ha_analysis_batch_3.py           | 100 ++-
 4 files changed, 876 insertions(+), 30 deletions(-)
 create mode 100644 etl/eligibility/ha_15_32/WFT Sales data analysis.py
 create mode 100644 etl/eligibility/ha_15_32/cancellation.py

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 1d868338..906ff594 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -128,7 +128,7 @@ class Eligibility:
 
         if insulation_thickness <= 100:
             thickness_classification = "0-100mm"
-        elif insulation_thickness <= 270:
+        elif insulation_thickness <= high_loft_thickness_threshold:
             thickness_classification = "100-270mm"
         else:
             thickness_classification = "270mm+"
@@ -146,24 +146,14 @@ class Eligibility:
                 "thickness_classification": thickness_classification
             }
 
-        if insulation_thickness <= high_loft_thickness_threshold:
-            self.loft = {
-                "suitability": True,
-                "thickness": insulation_thickness,
-                "reason": "high loft thickness but below regulation",
-                "thickness_classification": thickness_classification
-            }
-            return
-
-        if insulation_thickness > high_loft_thickness_threshold:
-            # Insulation is already thick enough
-            self.loft = {
-                "suitability": False,
-                "thickness": insulation_thickness,
-                "reason": "existing insulation",
-                "thickness_classification": thickness_classification
-            }
-            return
+        # Insulation is already thick enough
+        self.loft = {
+            "suitability": False,
+            "thickness": insulation_thickness,
+            "reason": "existing insulation",
+            "thickness_classification": thickness_classification
+        }
+        return
 
     def cavity_insulation(self):
 
diff --git a/etl/eligibility/ha_15_32/WFT Sales data analysis.py b/etl/eligibility/ha_15_32/WFT Sales data analysis.py
new file mode 100644
index 00000000..a088fe43
--- /dev/null
+++ b/etl/eligibility/ha_15_32/WFT Sales data analysis.py	
@@ -0,0 +1,665 @@
+import numpy as np
+import pandas as pd
+
+ECO4_NEW_RATES = 1710
+GBIS_NEW_RATES = 600
+
+
+def app():
+    # Load in the excel
+    nov_ha_data = pd.read_excel(
+        'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
+    )
+    # Drop rows where HA name is null
+    nov_ha_data = nov_ha_data.dropna(subset=["HA Name"])
+    nov_ha_data["ha_number"] = nov_ha_data["HA Name"].str.extract(r"(\d+)").astype(int)
+    nov_ha_data = nov_ha_data.sort_values("ha_number", ascending=True)
+
+    variance_explanations = pd.read_excel(
+        'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
+        sheet_name="Variance explanations"
+    )
+
+    september_figures = pd.read_excel(
+        "etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS SEP 23 UPDATE (2).xlsx",
+        sheet_name="HA Stats"
+    )
+
+    historical_invoices = pd.read_excel(
+        "etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx",
+        sheet_name="Jul 22 to Oct 23"
+    )
+    # Drop rows where installer rates is null
+    historical_invoices = historical_invoices[~pd.isnull(historical_invoices["INSTALLER RATES"])]
+    historical_invoices = historical_invoices[historical_invoices["INSTALLER RATES"] != "NA "]
+    # By Scheme, take a weighted mean of the INSTALLER RATES, weighted on the number of rows
+    n_invoices = historical_invoices.groupby(["Scheme", "INSTALLER RATES"])["Invoice number"].count().reset_index()
+    n_invoices = n_invoices[n_invoices["Scheme"].isin(["Eco 4", "GBIS"])]
+    historical_scheme_rates = n_invoices.groupby("Scheme").apply(
+        lambda x: np.average(x["INSTALLER RATES"], weights=x["Invoice number"])
+    ).reset_index().rename(columns={0: "Historical rates"})
+
+    # we take just entries sales data that have sales > 0
+    sales_data = nov_ha_data[nov_ha_data["Sales"] > 0]
+
+    # We now need to adjust sales data depending on the variance explanations
+    sales_data = sales_data.merge(
+        variance_explanations[["HA", 'Which figure is correct']],
+        how="left",
+        left_on="ha_number",
+        right_on="HA"
+    )
+
+    def adjust_sales(row):
+        if pd.isnull(row["Which figure is correct"]):
+            return row["Sales"]
+
+        if row["Which figure is correct"] == "HA facts & figures":
+            return row['No. of Tech surveys complete']
+
+        if row["Which figure is correct"] == "Billed amount":
+            return row["Sales"]
+
+        if row["Which figure is correct"] in ["Both correct, HA facts and figures includes November", "Both correct"]:
+            return row["Sales"]
+
+        raise ValueError(f"Unknown value for 'Which figure is correct': {row['Which figure is correct']}")
+
+    # We now need to adjust sales data depending on the variance explanations
+    sales_data["adjusted_sales"] = sales_data.apply(lambda row: adjust_sales(row), axis=1)
+
+    # We therefore adjust GBIS and ECO4 sales data based on adjusted sales
+    sales_data["adjusted_eco4_sales"] = sales_data["No. of Tech surveys complete - Eco 4"] / sales_data["Sales"] * \
+                                        sales_data["adjusted_sales"]
+
+    sales_data["adjusted_gbis_sales"] = sales_data["No. of Tech surveys complete - GBIS"] / sales_data["Sales"] * \
+                                        sales_data["adjusted_sales"]
+
+    sales_data["cancellation_rate"] = (sales_data["Sales"] - sales_data["adjusted_sales"]) / sales_data["Sales"]
+
+    # The difference between the adjusted sales and the actual sales is the cancellation
+    cancellations = (sales_data["adjusted_sales"].sum() - sales_data["Sales"].sum()) / sales_data["Sales"].sum()
+
+    # Given the cancellations, we can now adjust the expected remaining surveys
+    sales_data["No. of Tech surveys remaining"] = sales_data["No. of Tech surveys remaining"] * (
+        1 - sales_data["cancellation_rate"]
+    )
+
+    # We now merge on the expected values for September
+    sales_data = sales_data.merge(
+        september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+            columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+        ),
+        how="left",
+        on="HA Name",
+    )
+
+    sales_data["Sept Expected ECO4"] = sales_data["Sept Expected ECO4"].fillna(0)
+    sales_data["Sept Expected GBIS"] = sales_data["Sept Expected GBIS"].fillna(0)
+
+    # We calculate the ECO4 and GBIS conversion rates with the adjusted numbers
+    sales_data["ECO4 Conversion"] = sales_data["adjusted_eco4_sales"] / sales_data["adjusted_sales"]
+    sales_data["GBIS Conversion"] = sales_data["adjusted_gbis_sales"] / sales_data["adjusted_sales"]
+
+    # We now calculate the expected remaining ECO4 and GBIS sales
+    # We take the number of remaining surveys and multiply by the conversion rate for each scheme, which tells us
+    # how many more we should expect to see
+    sales_data["Expected Remaining ECO4"] = sales_data["No. of Tech surveys remaining"] * sales_data["ECO4 Conversion"]
+    sales_data["Expected Remaining GBIS"] = sales_data["No. of Tech surveys remaining"] * sales_data["GBIS Conversion"]
+
+    # We now produce a forecasted ECO4 and GBIS sales figure
+    sales_data["Forecasted ECO4 Sales"] = sales_data["adjusted_eco4_sales"] + sales_data["Expected Remaining ECO4"]
+    sales_data["Forecasted GBIS Sales"] = sales_data["adjusted_gbis_sales"] + sales_data["Expected Remaining GBIS"]
+
+    # Take the columns we're interestd in
+    # HA  # Properties	Sept ECO4 Figures	Sept GBIS Figures	Nov Total Sales	Nov ECO4 Sales	Nov GBIS Sales
+    # Remaining Surveys	ECO4 conversion	GBIS conversion	Forecasted ECO4 Sales	Forecasted GBIS sales	ECO4 Change
+    # GBIS Change
+    sales_data_formatted = sales_data[[
+        "HA Name",
+        "ASSET LIST no.",
+        "Sept Expected ECO4",
+        "Sept Expected GBIS",
+        "adjusted_sales",
+        "adjusted_eco4_sales",
+        "adjusted_gbis_sales",
+        "No. of Tech surveys remaining",
+        "ECO4 Conversion",
+        "GBIS Conversion",
+        "Forecasted ECO4 Sales",
+        "Forecasted GBIS Sales"
+    ]].rename(
+        columns={
+            "adjusted_sales": "Oct Total Sales (adjusted for variance)",
+            "adjusted_eco4_sales": "Oct ECO4 Sales (adjusted for variance)",
+            "adjusted_gbis_sales": "Oct GBIS Sales (adjusted for variance)",
+            "No. of Tech surveys remaining": "Remaining Surveys",
+        }
+    )
+
+    # Convert columns which should be integers to integers
+    for col in ["ASSET LIST no.", "Remaining Surveys", "Sept Expected ECO4", "Sept Expected GBIS",
+                "Oct Total Sales (adjusted for variance)", "Oct ECO4 Sales (adjusted for variance)",
+                "Oct GBIS Sales (adjusted for variance)", "Forecasted ECO4 Sales", "Forecasted GBIS Sales"]:
+        sales_data_formatted[col] = sales_data_formatted[col].fillna(0)
+        sales_data_formatted[col] = sales_data_formatted[col].astype(int)
+
+    # Remove HA 17 because this was EPCs only. We also remove HA33 because they do not have access to the full portfolio
+    sales_data_formatted = sales_data_formatted[
+        ~sales_data_formatted["HA Name"].isin(["HA 17", "HA 33"])
+    ]
+
+    # September expected ECO4 and GBIS
+    sept_expected_eco4 = sales_data_formatted["Sept Expected ECO4"].sum()
+    sept_expected_gbis = sales_data_formatted["Sept Expected GBIS"].sum()
+
+    # Completed so far
+    oct_eco4_sales = sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"].sum()
+    oct_gbis_sales = sales_data_formatted["Oct GBIS Sales (adjusted for variance)"].sum()
+
+    # Forecasted figures
+    forecasted_eco4_sales = sales_data_formatted["Forecasted ECO4 Sales"].sum()
+    forecasted_gbis_sales = sales_data_formatted["Forecasted GBIS Sales"].sum()
+
+    # Expected remaining sales
+    expected_remaining_eco4_sales = forecasted_eco4_sales - oct_eco4_sales
+    expected_remaining_gbis_sales = forecasted_gbis_sales - oct_gbis_sales
+
+    # Forecast change vs September
+    forecasted_eco4_change = 100 * (forecasted_eco4_sales - sept_expected_eco4) / sept_expected_eco4
+    forecasted_gbis_change = 100 * (forecasted_gbis_sales - sept_expected_gbis) / sept_expected_gbis
+
+    aggregates = pd.DataFrame(
+        columns=["Scheme", "Sept Expected", "Oct Completed", "Forecasted Remaining Sales", "Forecasted Total Sales",
+                 "Forecasted Change vs Sept"],
+        data=[
+            ["ECO4", sept_expected_eco4, oct_eco4_sales, expected_remaining_eco4_sales, forecasted_eco4_sales,
+             forecasted_eco4_change],
+            ["GBIS", sept_expected_gbis, oct_gbis_sales, expected_remaining_gbis_sales, forecasted_gbis_sales,
+             forecasted_gbis_change],
+        ]
+    )
+
+    # Multiply by histoical rates to get revenue
+    # For ECO4, this is ~£1456 and for GBIS it's ~£432
+    historical_gbis_price = historical_scheme_rates[
+        historical_scheme_rates["Scheme"] == "GBIS"
+        ]["Historical rates"].iloc[0]
+
+    historical_eco4_price = historical_scheme_rates[
+        historical_scheme_rates["Scheme"] == "Eco 4"
+        ]["Historical rates"].iloc[0]
+
+    aggregates["Sept Expected Revenue"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Sept Expected"] * historical_eco4_price,
+        aggregates["Sept Expected"] * historical_gbis_price
+    )
+
+    aggregates["Completed Revenue"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Oct Completed"] * historical_eco4_price,
+        aggregates["Oct Completed"] * historical_gbis_price
+    )
+
+    # We use the new rates for the forecasted revenue
+    aggregates["Forecasted Remaining Revenue"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Forecasted Remaining Sales"] * ECO4_NEW_RATES,
+        aggregates["Forecasted Remaining Sales"] * GBIS_NEW_RATES
+    )
+
+    # We also calculate the forecasted remaining revenue at the original price
+    aggregates["Forecasted Remaining Revenue (original price)"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Forecasted Remaining Sales"] * historical_eco4_price,
+        aggregates["Forecasted Remaining Sales"] * historical_gbis_price
+    )
+
+    aggregates["Forecasted Revenue"] = aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue"]
+
+    # Forecasted revenue with original price
+    aggregates["Forecasted Revenue (original price)"] = (
+        aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue (original price)"]
+    )
+
+    # Create a totals row which sums up the two rows
+
+    forecasted_change_vs_sept = 100 * (
+        aggregates["Forecasted Total Sales"].sum() - aggregates["Sept Expected"].sum()
+    ) / aggregates["Sept Expected"].sum()
+
+    aggregates = pd.concat(
+        [
+            aggregates,
+            pd.DataFrame(
+                [
+                    ["Total", aggregates["Sept Expected"].sum(), aggregates["Oct Completed"].sum(),
+                     aggregates["Forecasted Remaining Sales"].sum(), aggregates["Forecasted Total Sales"].sum(),
+                     forecasted_change_vs_sept,
+                     aggregates["Sept Expected Revenue"].sum(), aggregates["Completed Revenue"].sum(),
+                     aggregates["Forecasted Remaining Revenue"].sum(),
+                     aggregates["Forecasted Remaining Revenue (original price)"].sum(),
+                     aggregates["Forecasted Revenue"].sum(),
+                     aggregates["Forecasted Revenue (original price)"].sum(),
+                     ]
+                ],
+                columns=aggregates.columns
+            )
+        ]
+    )
+
+    # For each property in the asset list, we now calculate an average conversion rate to ECO4 and GBIS
+    # We do this by taking the forecasted sales values for each schemes and dividing by the number of properties
+
+    number_properties = sales_data_formatted["ASSET LIST no."].sum()
+    eco4_conversion_rate = forecasted_eco4_sales / number_properties
+    gbis_conversion_rate = forecasted_gbis_sales / number_properties
+
+    # We also attribute a future value per property
+    future_eco4_value = ECO4_NEW_RATES * eco4_conversion_rate
+    future_gbis_value = GBIS_NEW_RATES * gbis_conversion_rate
+
+    # We also calulate a revenue figure for the old rates
+    historical_eco4_value = historical_eco4_price * eco4_conversion_rate
+    historical_gbis_value = historical_gbis_price * gbis_conversion_rate
+
+    # For the HAs that have not begun selling, we estimate the value of the projects
+    # We start with some problem HAs
+
+    # HA 7, HA 24, HA 25
+    # These HAs have no sales data, so we use the expected figures
+
+    problem_has_data = nov_ha_data[
+        (nov_ha_data["HA Name"].isin(["HA 7", "HA 24", "HA 25"]))
+    ].copy()
+    # Merge on the september expected figures
+    problem_has_data = problem_has_data.merge(
+        september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+            columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+        ),
+        how="left",
+        on="HA Name",
+    )
+    # Fill NAs
+    problem_has_data["Sept Expected ECO4"] = problem_has_data["Sept Expected ECO4"].fillna(0)
+    problem_has_data["Sept Expected GBIS"] = problem_has_data["Sept Expected GBIS"].fillna(0)
+
+    # We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
+    problem_has_data["Expected ECO4 Sales"] = problem_has_data["ASSET LIST no."] * eco4_conversion_rate
+    problem_has_data["Expected GBIS Sales"] = problem_has_data["ASSET LIST no."] * gbis_conversion_rate
+
+    # Filter just on columns we're interested in
+    problem_has_data = problem_has_data[[
+        "HA Name",
+        "ASSET LIST no.",
+        "Sept Expected ECO4",
+        "Sept Expected GBIS",
+        "ECO4",
+        "GBIS",
+        "Expected ECO4 Sales",
+        "Expected GBIS Sales"
+    ]].rename(
+        columns={
+            "ECO4": "Nov Expected ECO4",
+            "GBIS": "Nov Expected GBIS",
+        }
+    )
+
+    # Fill NAs
+    problem_has_data["Nov Expected ECO4"] = problem_has_data["Nov Expected ECO4"].fillna(0)
+    problem_has_data["Nov Expected GBIS"] = problem_has_data["Nov Expected GBIS"].fillna(0)
+
+    # We calculate HA level Sept, Nov expected revenue, based on historical rates and then forecasted revenue
+    problem_has_data["Sept Expected ECO4 Value"] = problem_has_data["Sept Expected ECO4"] * historical_eco4_price
+    problem_has_data["Sept Expected GBIS Value"] = problem_has_data["Sept Expected GBIS"] * historical_gbis_price
+
+    problem_has_data["Nov Expected ECO4 Value"] = problem_has_data["Nov Expected ECO4"] * historical_eco4_price
+    problem_has_data["Nov Expected GBIS Value"] = problem_has_data["Nov Expected GBIS"] * historical_gbis_price
+
+    problem_has_data["Forecasted ECO4 Revenue"] = problem_has_data["ASSET LIST no."] * future_eco4_value
+    problem_has_data["Forecasted GBIS Revenue"] = problem_has_data["ASSET LIST no."] * future_gbis_value
+
+    # Totals
+    problem_has_data["Sept Expected Total Value"] = problem_has_data["Sept Expected ECO4 Value"] + \
+                                                    problem_has_data["Sept Expected GBIS Value"]
+    problem_has_data["Nov Expected Total Value"] = problem_has_data["Nov Expected ECO4 Value"] + \
+                                                   problem_has_data["Nov Expected GBIS Value"]
+    problem_has_data["Forecasted Total Revenue"] = problem_has_data["Forecasted ECO4 Revenue"] + \
+                                                   problem_has_data["Forecasted GBIS Revenue"]
+
+    # We calculate a total expected value for September, November and then forecasted
+    problem_has_expected_eco4_value = problem_has_data["Sept Expected ECO4"].sum() * historical_eco4_price
+    problem_has_expected_gbis_value = problem_has_data["Sept Expected GBIS"].sum() * historical_gbis_price
+    problem_has_expected_total_value = problem_has_expected_eco4_value + problem_has_expected_gbis_value
+
+    problem_has_nov_eco4_value = problem_has_data["Nov Expected ECO4"].sum() * historical_eco4_price
+    problem_has_nov_gbis_value = problem_has_data["Nov Expected GBIS"].sum() * historical_gbis_price
+    problem_has_nov_total_value = problem_has_nov_eco4_value + problem_has_nov_gbis_value
+
+    forecasted_eco4_value = problem_has_data["ASSET LIST no."].sum() * future_eco4_value
+    forecasted_gbis_value = problem_has_data["ASSET LIST no."].sum() * future_gbis_value
+    problem_has_forecasted_total_value = forecasted_eco4_value + forecasted_gbis_value
+
+    problem_has_summary = pd.DataFrame(
+        columns=["Scheme", "Sept Expected", "Nov Expected", "Forecasted"],
+        data=[
+            ["ECO4", problem_has_expected_eco4_value, problem_has_nov_eco4_value, forecasted_eco4_value],
+            ["GBIS", problem_has_expected_gbis_value, problem_has_nov_gbis_value, forecasted_gbis_value],
+            ["Total", problem_has_expected_total_value, problem_has_nov_total_value, problem_has_forecasted_total_value]
+        ]
+    )
+
+    # We now also estimate the value of the remaining HAs based on historical sales performance and new rates
+    # We take the has that are not in the sales data
+    remaining_has = nov_ha_data[
+        ~nov_ha_data["HA Name"].isin(sales_data_formatted["HA Name"])
+    ].copy()
+
+    # Merge on the september expected figures
+    remaining_has = remaining_has.merge(
+        september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+            columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+        ),
+        how="left",
+        on="HA Name",
+    )
+
+    # We update the asset list size for HA 33, because they do not have access to the full portfolio
+    remaining_has.loc[remaining_has["HA Name"] == "HA 33", "ASSET LIST no."] = 20699
+    # We also remove HA 17
+    remaining_has = remaining_has[~remaining_has["HA Name"].isin(["HA 17"])]
+
+    # We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
+    remaining_has["Expected ECO4 Sales"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
+    remaining_has["Expected GBIS Sales"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
+
+    # Filter just on columns we're interested in
+    remaining_has = remaining_has[[
+        "HA Name",
+        "ASSET LIST no.",
+        "Sept Expected ECO4",
+        "Sept Expected GBIS",
+        "ECO4",
+        "GBIS",
+    ]].rename(
+        columns={
+            "ECO4": "Nov Expected ECO4",
+            "GBIS": "Nov Expected GBIS",
+        }
+    )
+
+    remaining_has = remaining_has.fillna(0)
+
+    # We take just HAs that had an initial september expectation for ECO4 or GBIS, or that now have a Nov expectation
+    remaining_has = remaining_has[
+        (remaining_has["Sept Expected ECO4"] > 0) | (remaining_has["Sept Expected GBIS"] > 0) |
+        (remaining_has["Nov Expected ECO4"] > 0) | (remaining_has["Nov Expected GBIS"] > 0)
+        ]
+
+    # Expected sales based on asset list size and conversion rate
+    remaining_has["Forecasted Sales ECO4"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
+    remaining_has["Forecasted Sales GBIS"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
+
+    # Calculat the total expected value for September and November
+    remaining_has["Sept Expected ECO4 Value"] = remaining_has["Sept Expected ECO4"] * historical_eco4_price
+    remaining_has["Sept Expected GBIS Value"] = remaining_has["Sept Expected GBIS"] * historical_gbis_price
+
+    remaining_has["Nov Expected ECO4 Value"] = remaining_has["Nov Expected ECO4"] * historical_eco4_price
+    remaining_has["Nov Expected GBIS Value"] = remaining_has["Nov Expected GBIS"] * historical_gbis_price
+
+    # Calculate forecasted revenue
+    remaining_has["Forecasted ECO4 Revenue"] = remaining_has["ASSET LIST no."] * future_eco4_value
+    remaining_has["Forecasted GBIS Revenue"] = remaining_has["ASSET LIST no."] * future_gbis_value
+
+    # We also calculate forecasted revenue with the original price
+    remaining_has["Forecasted ECO4 Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_eco4_value
+    remaining_has["Forecasted GBIS Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_gbis_value
+
+    # Calculate totals for each scheme
+    remaining_has_september_eco4_sales = remaining_has["Sept Expected ECO4"].sum()
+    remaining_has_september_gbis_sales = remaining_has["Sept Expected GBIS"].sum()
+
+    remaining_has_november_eco4_sales = remaining_has["Nov Expected ECO4"].sum()
+    remaining_has_november_gbis_sales = remaining_has["Nov Expected GBIS"].sum()
+
+    remaining_has_forecasted_eco4_sales = remaining_has["Forecasted Sales ECO4"].sum()
+    remaining_has_forecasted_gbis_sales = remaining_has["Forecasted Sales GBIS"].sum()
+
+    remaining_has_september_eco4_value = remaining_has["Sept Expected ECO4 Value"].sum()
+    remaining_has_september_gbis_value = remaining_has["Sept Expected GBIS Value"].sum()
+
+    remaining_has_november_eco4_value = remaining_has["Nov Expected ECO4 Value"].sum()
+    remaining_has_november_gbis_value = remaining_has["Nov Expected GBIS Value"].sum()
+
+    remaining_has_forecasted_eco4_value = remaining_has["Forecasted ECO4 Revenue"].sum()
+    remaining_has_forecasted_gbis_value = remaining_has["Forecasted GBIS Revenue"].sum()
+
+    remaining_has_forecasted_eco4_value_original_price = remaining_has["Forecasted ECO4 Revenue (original price)"].sum()
+    remaining_has_forecasted_gbis_value_original_price = remaining_has["Forecasted GBIS Revenue (original price)"].sum()
+
+    # Calculate the change in forecasted sales against the September expected sales
+    remaining_has_foecast_change_eco4 = 100 * (
+        remaining_has["Forecasted Sales ECO4"].sum() - remaining_has["Sept Expected ECO4"].sum()
+    ) / remaining_has["Sept Expected ECO4"].sum()
+
+    remaining_has_foecast_change_gbis = 100 * (
+        remaining_has["Forecasted Sales GBIS"].sum() - remaining_has["Sept Expected GBIS"].sum()
+    ) / remaining_has["Sept Expected GBIS"].sum()
+
+    # Total change
+    remaining_has_foecast_change_total = 100 * (
+        remaining_has["Forecasted Sales ECO4"].sum() + remaining_has["Forecasted Sales GBIS"].sum() -
+        remaining_has["Sept Expected ECO4"].sum() - remaining_has["Sept Expected GBIS"].sum()
+    ) / (remaining_has["Sept Expected ECO4"].sum() + remaining_has["Sept Expected GBIS"].sum())
+
+    asset_list_size = remaining_has["ASSET LIST no."].sum()
+
+    # Create a summary table of the rest with the totals for ECO4, GBIS and then a total row
+    remaining_has_aggregate = pd.DataFrame(
+        columns=["Scheme", "Asset List Size", "Sept Expected Sales", "Nov Expected Sales", "Forecasted Sales",
+                 "Forecasted Change vs Sept",
+                 "Sept Expected Value", "Nov Expected Value", "Forecasted Value", "Forecasted Value (original price)"],
+        data=[
+            [
+                "ECO4", asset_list_size, remaining_has_september_eco4_sales, remaining_has_november_eco4_sales,
+                remaining_has_forecasted_eco4_sales, remaining_has_foecast_change_eco4,
+                remaining_has_september_eco4_value,
+                remaining_has_november_eco4_value, remaining_has_forecasted_eco4_value,
+                remaining_has_forecasted_eco4_value_original_price
+            ],
+            [
+                "GBIS", asset_list_size, remaining_has_september_gbis_sales, remaining_has_november_gbis_sales,
+                remaining_has_forecasted_gbis_sales, remaining_has_foecast_change_gbis,
+                remaining_has_september_gbis_value,
+                remaining_has_november_gbis_value, remaining_has_forecasted_gbis_value,
+                remaining_has_forecasted_gbis_value_original_price
+            ],
+            [
+                "Total", asset_list_size, remaining_has_september_eco4_sales + remaining_has_september_gbis_sales,
+                                          remaining_has_november_eco4_sales + remaining_has_november_gbis_sales,
+                                          remaining_has_forecasted_eco4_sales + remaining_has_forecasted_gbis_sales,
+                remaining_has_foecast_change_total,
+                                          remaining_has_september_eco4_value + remaining_has_september_gbis_value,
+                                          remaining_has_november_eco4_value + remaining_has_november_gbis_value,
+                                          remaining_has_forecasted_eco4_value + remaining_has_forecasted_gbis_value,
+                                          remaining_has_forecasted_eco4_value_original_price +
+                                          remaining_has_forecasted_gbis_value_original_price
+            ]
+        ]
+    )
+
+    # Calculate pipeline value
+    pipeline_value = aggregates[["Scheme", "Completed Revenue", "Forecasted Remaining Revenue"]].merge(
+        remaining_has_aggregate[["Scheme", "Forecasted Value"]].rename(
+            columns={"Forecasted Value": "Forecasted Revenue, Unconfirmed HAs"}
+        ), how="inner", on="Scheme"
+    )
+
+    # Calculate the total
+    pipeline_value["Total Value"] = (
+        pipeline_value["Completed Revenue"] + pipeline_value["Forecasted Remaining Revenue"] + pipeline_value[
+        "Forecasted Revenue, Unconfirmed HAs"]
+    )
+
+    # TODO: Insert model figures
+    model_results = pd.DataFrame(
+        [
+            {
+                # This one, we don't have sales data
+                "HA Name": "HA 15",
+                "Model Expected Additional ECO4 (unit level)": None,
+                "Model Expected Total ECO4 (unit level)": 296,
+                "Model Expected Additional GBIS (unit level)": None,
+                "Model Expected Total GBIS (unit level)": 209,
+            },
+            {
+                "HA Name": "HA 16",
+                # Old before re-run
+                # "Model Expected Additional ECO4 (unit level)": 418,
+                # "Model Expected Total ECO4 (unit level)": 1820,
+                # "Model Expected Additional GBIS (unit level)": 576,
+                # "Model Expected Total GBIS (unit level)": 612,
+
+                # IN the partial sales data, WFT have completed 1407 ECO4, 36 GBIS
+                "Model Expected Additional ECO4 (unit level)": 411 + 342 + 235,
+                "Model Expected Total ECO4 (unit level)": 1407 + 411 + 342 + 235,
+                "Model Expected Additional GBIS (unit level)": 223,
+                "Model Expected Total GBIS (unit level)": 36 + 223,
+            },
+            {
+                "HA Name": "HA 24",
+                "Model Expected Additional ECO4 (unit level)": 224,
+                "Model Expected Total ECO4 (unit level)": 848,
+                "Model Expected Additional GBIS (unit level)": 552,
+                "Model Expected Total GBIS (unit level)": 552,
+            },
+            {
+                "HA Name": "HA 25",
+                "Model Expected Additional ECO4 (unit level)": None,
+                "Model Expected Total ECO4 (unit level)": 1709 + 59,
+                "Model Expected Additional GBIS (unit level)": None,
+                "Model Expected Total GBIS (unit level)": 2004 + 107,
+            }
+        ]
+    )
+
+    sales_data_formatted["Remaining ECO4 Sales"] = (
+        sales_data_formatted["Forecasted ECO4 Sales"] - sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"]
+    )
+
+    sales_data_formatted["Remaining GBIS Sales"] = (
+        sales_data_formatted["Forecasted GBIS Sales"] - sales_data_formatted["Oct GBIS Sales (adjusted for variance)"]
+    )
+
+    sales_data_formatted["Completed ECO4 Revenue"] = (sales_data_formatted[
+                                                          "Oct ECO4 Sales (adjusted for variance)"] *
+                                                      historical_eco4_price)
+    sales_data_formatted["Completed GBIS Revenue"] = (sales_data_formatted[
+                                                          "Oct GBIS Sales (adjusted for variance)"] *
+                                                      historical_gbis_price)
+
+    ha_subset_with_sales = ["HA 15", "HA 16", "HA 24"]
+
+    has_subset_with_sales_value = sales_data_formatted[
+        sales_data_formatted["HA Name"].isin(ha_subset_with_sales)
+    ].copy()[
+        [
+            "HA Name",
+            "Oct ECO4 Sales (adjusted for variance)",
+            "Oct GBIS Sales (adjusted for variance)",
+            "Remaining ECO4 Sales",
+            "Remaining GBIS Sales",
+            "Forecasted ECO4 Sales",
+            "Forecasted GBIS Sales",
+            "Completed ECO4 Revenue",
+            "Completed GBIS Revenue"
+        ]
+    ]
+
+    has_subset_with_sales_value["Remaining ECO4 Revenue"] = has_subset_with_sales_value[
+                                                                "Remaining ECO4 Sales"] * ECO4_NEW_RATES
+    has_subset_with_sales_value["Remaining GBIS Revenue"] = has_subset_with_sales_value[
+                                                                "Remaining GBIS Sales"] * GBIS_NEW_RATES
+
+    has_subset_with_sales_value["Remaining Total Revenue"] = (
+        has_subset_with_sales_value["Remaining ECO4 Revenue"] + has_subset_with_sales_value["Remaining GBIS Revenue"]
+    )
+
+    model_results["Model Expected Additional ECO4 Revenue"] = (
+        model_results["Model Expected Additional ECO4 (unit level)"] * ECO4_NEW_RATES
+    )
+
+    model_results["Model Expected Additional GBIS revenue"] = (
+        model_results["Model Expected Additional GBIS (unit level)"] * GBIS_NEW_RATES
+    )
+
+    model_results["Model Expected Additional Total Revenue"] = (
+        model_results["Model Expected Additional ECO4 Revenue"] + model_results[
+        "Model Expected Additional GBIS revenue"]
+    )
+
+    # Show more columns with pandas
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    # Look at HA 16
+    ha16_model = model_results[model_results["HA Name"] == "HA 16"]
+    has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 16"]
+
+    # WFT: For HA 16: 4,598,190 ECO4, 57,000 GBIS
+    # Model:
+
+    # Look at HA 24
+    ha24_model = model_results[model_results["HA Name"] == "HA 24"]
+    has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 24"]
+
+    # Look at HA 15
+    ha15_data = has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 15"]
+    ha15_portfolio_value = ha15_data["Completed ECO4 Revenue"] + ha15_data[
+        "Completed GBIS Revenue"] + ha15_data["Remaining Total Revenue"]
+    # # This doesn't have sales data so in the model analysis, we just value the ha as a whole
+    ha15_model = model_results[model_results["HA Name"] == "HA 15"]
+    ha15_value = ha15_model["Model Expected Total ECO4 (unit level)"].iloc[0] * ECO4_NEW_RATES + \
+                 ha15_model["Model Expected Total GBIS (unit level)"].iloc[0] * GBIS_NEW_RATES
+
+    model_results["Expected ECO4 Revenue"] = model_results["Model Expected Total ECO4 (unit level)"] * ECO4_NEW_RATES
+    model_results["Expected GBIS Revenue"] = model_results["Model Expected Total GBIS (unit level)"] * GBIS_NEW_RATES
+    model_results["Expected Total Revenue"] = model_results["Expected ECO4 Revenue"] + model_results[
+        "Expected GBIS Revenue"]
+    model_results[model_results["HA Name"].isin(["HA 15"])]
+
+    # We now create a final excel with all of the data
+    # We want:
+    # 1) aggregates
+    # 2) sales_data_formatted
+    # 3) remaining_has_aggregate
+    # 4) remaining_has
+    # 5) problem_has_summary
+
+    # Function to get the maximum column width
+    def get_col_widths(dataframe):
+        # First we find the maximum length of the index column
+        idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
+        # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
+        return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+
+    # Create a Pandas Excel writer using XlsxWriter as the engine
+    with pd.ExcelWriter('HA Pipeline Analysis.xlsx', engine='xlsxwriter') as writer:
+        # Write each dataframe to a different worksheet without the index
+        for df, sheet in [(aggregates, 'Forecasted Sales'),
+                          (sales_data_formatted, 'Sales Data'),
+                          (remaining_has_aggregate, 'Remaining HAs Value'),
+                          (remaining_has, 'Remaining HAs data'),
+                          (pipeline_value, 'Pipeline Value'),
+                          (problem_has_summary, 'Problem HAs Analysis'),
+                          (problem_has_data, 'Problem HAs Data')
+
+                          ]:
+
+            df.to_excel(writer, sheet_name=sheet, index=False)
+
+            # Auto-adjust columns' width
+            for i, width in enumerate(get_col_widths(df)):
+                writer.sheets[sheet].set_column(i, i, width)
diff --git a/etl/eligibility/ha_15_32/cancellation.py b/etl/eligibility/ha_15_32/cancellation.py
new file mode 100644
index 00000000..849add45
--- /dev/null
+++ b/etl/eligibility/ha_15_32/cancellation.py
@@ -0,0 +1,113 @@
+import openpyxl
+import pandas as pd
+import numpy as np
+
+
+def get_excel_survey_list(workbook_path, worksheet_name=None):
+    survey_workbook = openpyxl.load_workbook(workbook_path)
+    if worksheet_name is not None:
+        survey_sheet = survey_workbook[worksheet_name]
+    else:
+        survey_sheet = survey_workbook.active
+
+    survey_rows = []
+    survey_colors = []
+
+    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        survey_rows.append(row_data)
+        survey_colors.append(row_color)
+
+    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+    survey_list["row_colour"] = survey_colors
+
+    return survey_list
+
+
+def load_data():
+    # Load for HA 16 - ECO 4
+    ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
+
+    # Load for HA 24 - ECO 4
+    ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
+
+    # Load for HA 25 - ECO 3
+    ha25_survey_list = get_excel_survey_list(
+        'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
+    )
+
+    # Remove columns with None column names
+    ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')
+
+    # Standardised this installation status columns
+    ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
+    ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
+        {
+            "NO UPDATE - CHECKED 2.10.23": "no update",
+            "NO UPDATE - CHECKED 18.12.23": "no update",
+            "INSTALLED": "installed",
+            "CANCELLED": "cancelled",
+            "LOFT STILL TO BE INSTALLED": "loft remaining",
+        }
+    )
+
+    ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
+    ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
+        {
+            "NO UPDATE - CHECKED 21.11.23": "no update",
+            "NO UPDATE - CHECKED 18.12.23": "no update",
+            "INSTALLED": "installed",
+            "CANCELLED": "cancelled",
+            "LOFT STILL TO BE INSTALLED": "loft remaining",
+            "SEE NOTES >>": "see notes",
+        }
+    )
+
+    # We need to prepare HA25 differently
+    ha25_survey_list["survey_status"] = np.where(
+        ha25_survey_list["row_colour"] == "FF7030A0", "installed",
+        np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
+                 np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
+                          np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
+                                   np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
+                                   )
+                          )
+                 )
+    )
+    ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]
+
+    # We standardise the cancellation reasons - just create a new column
+    ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
+    ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
+    # There's no cancellation reason for HA25
+    ha25_survey_list["cancellation_reason"] = "No reason provided"
+
+    # Combine the dataframes
+    ha16_survey_list["HA"] = "HA 16"
+    ha24_survey_list["HA"] = "HA 24"
+    ha25_survey_list["HA"] = "HA 25"
+
+    cancellation_data = pd.concat(
+        [
+            ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
+            ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
+            ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
+        ]
+    )
+
+    # Take just rows that we have a confirmed status for
+    cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]
+
+    return cancellation_data
+
+
+def app():
+    """
+    This application is used to analyse the cancellation data provided by warmfront
+    :return:
+    """
+
+    # This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
+    sales_cancellation_data = load_data()
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 1ed95a30..e94babcd 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -882,6 +882,13 @@ def get_epc_data(
     return outputs
 
 
+def get_col_widths(dataframe):
+    # First we find the maximum length of the index column
+    idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
+    # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
+    return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+
+
 def analyse_ha_data(outputs, loader):
     """
     The approach we take within this function is the following:
@@ -901,7 +908,11 @@ def analyse_ha_data(outputs, loader):
     :return:
     """
 
+    eco4_rate = 1710
+    gbis_rate = 600
+
     ha_analysis_results = []
+    ha_revenue_results = []
     for ha_name, datasets in outputs.items():
 
         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
@@ -1034,7 +1045,8 @@ def analyse_ha_data(outputs, loader):
             (
                 (remaining_eco4_df["eco4_message"] == "sap too high") &
                 remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
+                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
+                pd.isnull(remaining_eco4_df["prospect_type"])
             ),
             "ECO4 if SAP downgrade",
             remaining_eco4_df["prospect_type"]
@@ -1048,7 +1060,7 @@ def analyse_ha_data(outputs, loader):
                 remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
                 remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
             ),
-            "Filled cavity - subject to CIGA check",
+            "ECO4 - Filled cavity - subject to CIGA check",
             remaining_eco4_df["prospect_type"]
         )
 
@@ -1064,7 +1076,7 @@ def analyse_ha_data(outputs, loader):
 
         # 5) Looks like GBIS instead
         remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["gbis_eligible"] == True),
+            (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
             "Looks like GBIS",
             remaining_eco4_df["prospect_type"]
         )
@@ -1094,16 +1106,17 @@ def analyse_ha_data(outputs, loader):
         # 2) GBIS candidates that look like strict ECO4 candidates
         remaining_gbis["prospect_type"] = np.where(
             (remaining_gbis["eco4_eligible"] == True),
-            "Upgradable to ECO4",
+            "GBIS - Upgradable to ECO4",
             remaining_gbis["prospect_type"]
         )
 
         # 3) Subject to CIGA check - Filled cavity
         remaining_gbis["prospect_type"] = np.where(
             (
-                remaining_gbis["eligibility_cavity_type"].isin(["full"])
+                remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
+                pd.isnull(remaining_gbis["prospect_type"])
             ),
-            "Filled cavity - subject to CIGA check",
+            "GBIS - Filled cavity - subject to CIGA check",
             remaining_gbis["prospect_type"]
         )
 
@@ -1141,30 +1154,95 @@ def analyse_ha_data(outputs, loader):
             )
         ].copy()
 
-        ha_analysis_results.append({
+        # Perform some checks to make sure we have all of the values
+        remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
+        if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
+            raise ValueError(
+                "Number of remaining properties does not match the number of properties in remaining ECO4 dict"
+            )
+
+        remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
+        if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
+            raise ValueError(
+                "Number of remaining properties does not match the number of properties in remaining GBIS dict"
+            )
+
+        to_append = {
+            "ha_name": ha_name,
             "n_properties_in_asset_list": n_properties_in_asset_list,
             ############
             # ECO4
             ############
             "properties_sold_eco4": properties_sold_eco4,
             "n_remaining_properties_eco4": n_remaining_properties_eco4,
-            **remaining_eco4_df["prospect_type"].value_counts().to_dict(),
+            **remaining_eco4_dict,
             ############
             # GBIS
             ############
             "properties_sold_gbis": properties_sold_gbis,
             "n_remaining_properties_gbis": n_remaining_properties_gbis,
-            **remaining_gbis["prospect_type"].value_counts().to_dict(),
+            **remaining_gbis_dict,
             ############
             # GBIS
             ############
             "n_eco4_surplus": eco4_surplus.shape[0],
             "n_gbis_surplus": gbis_surplus.shape[0],
-        })
+        }
+
+        ha_analysis_results.append(to_append)
+
+        revenue_to_append = {
+            "ha_name": ha_name,
+            "£ Remaining from asset list": (
+                n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
+            ),
+            "Of which: Strict": (
+                to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
+                to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
+            ),
+            "Of which: Subject to CIGA": (
+                to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
+                to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
+            ),
+            "Of which: Prospect, not perfect strict prospect": (
+                to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
+                to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
+            ),
+            "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
+            "Of which: Does not look like prospect": (
+                to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
+                to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
+            ),
+            "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
+            "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
+        }
+
+        # Perform a quick check:
+        if revenue_to_append["£ Remaining from asset list"] - (
+            revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
+            revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
+            revenue_to_append["Of which: Potential downgrade to GBIS"] +
+            revenue_to_append["Of which: Does not look like prospect"]
+        ) > 1:
+            raise ValueError("Error between top level revenue figures and breakdown - investigate me")
+
+        ha_revenue_results.append(revenue_to_append)
 
     ha_analysis_results = pd.DataFrame(ha_analysis_results)
+    ha_revenue_results = pd.DataFrame(ha_revenue_results)
 
-    # Todo: create revenue figures and automate creation of excel
+    # Automate creation of the excel
+    # Create a Pandas Excel writer using XlsxWriter as the engine
+    with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
+        # Write each dataframe to a different worksheet without the index
+        for df, sheet in [(ha_revenue_results, 'Total Revenue'),
+                          (ha_analysis_results, 'By ECO4 and GBIS')]:
+
+            df.to_excel(writer, sheet_name=sheet, index=False)
+
+            # Auto-adjust columns' width
+            for i, width in enumerate(get_col_widths(df)):
+                writer.sheets[sheet].set_column(i, i, width)
 
 
 def app():