From 6ae21bbcb023139961eb69749ac1380a7d3ac001 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 28 Feb 2024 12:31:48 +0000
Subject: [PATCH] Creating the output structure

---
 etl/eligibility/Eligibility.py                |  11 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 548 +++++++-----------
 2 files changed, 220 insertions(+), 339 deletions(-)

diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index f7a5ed98..b594579f 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -365,7 +365,7 @@ class Eligibility:
             return
 
         # Near perfect
-        if self.cavity["suitability"] and (current_sap < 55):
+        if self.cavity["suitability"] and (current_sap < 69):
             self.gbis_warmfront = {
                 "eligible": True,
                 "strict": True,
@@ -373,15 +373,6 @@ class Eligibility:
             }
             return
 
-        # Suitable cavity, but high sap
-        if self.cavity["suitability"] and (current_sap >= 55):
-            self.gbis_warmfront = {
-                "eligible": True,
-                "strict": False,
-                "message": "Meets cavity, fails SAP check",
-            }
-            return
-
         self.gbis_warmfront = {
             "eligible": False,
             "strict": False,
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 5cbfb90c..61c4a243 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1646,10 +1646,26 @@ def get_epc_data(
 
 
 def get_col_widths(dataframe):
-    # First we find the maximum length of the index column
-    idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
-    # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
-    return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+    # Define a maximum width for any column to prevent excessively wide columns
+    max_allowed_width = 25
+
+    # Calculate widths for columns
+    widths = []
+
+    if isinstance(dataframe.columns, pd.MultiIndex):
+        # For MultiIndex, calculate max width considering the header and data
+        header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values]  # +2 for padding
+        for i, column in enumerate(dataframe.columns):
+            max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i])
+            widths.append(min(max_data_width, max_allowed_width))
+    else:
+        # For non-MultiIndex, calculate width normally
+        for col in dataframe.columns:
+            # Calculate the max length of data or column name and limit it
+            max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2)  # +2 for padding
+            widths.append(min(max_length, max_allowed_width))
+
+    return widths
 
 
 def analyse_ha_data(outputs, loader):
@@ -1671,42 +1687,13 @@ def analyse_ha_data(outputs, loader):
     :return:
     """
 
-    eco4_rate = 1710
-    gbis_rate = 600
-
     ha_analysis_results = []
-    ha_revenue_results = []
     for ha_name, datasets in outputs.items():
-
         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-        # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for
-        #       yet
-        #
-        import random
-        randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0])
-        inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes
-        inputs["asset_list"]["funding_scheme"] = None
-        inputs["asset_list"]["funding_scheme"] = np.where(
-            inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)",
-            inputs["asset_list"]["randomly_allocated_schemes"],
-            inputs["asset_list"]["funding_scheme"]
-        )
-
-        # TODO: Also temp, just for HA 6
-        if ha_name == "ha_6":
-            inputs["survey_list"]["funding_scheme"] = None
-            inputs["survey_list"]["funding_scheme"] = np.where(
-                inputs["survey_list"][
-                    'AFFORDABLE WARMTH                 OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH",
-                "ECO4",
-                "GBIS"
-            )
-
-        # End placholder
 
         results_df = datasets["results_df"].copy()
 
-        analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename(
+        analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
             columns={"row_meaning": "asset_identification_status"}
         ).merge(
             results_df,
@@ -1715,293 +1702,236 @@ def analyse_ha_data(outputs, loader):
             left_on="asset_list_row_id"
         )
 
-        # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is
-        # remaining
+        ################################################################################################
+        # We take the properties that strictly qualified under eco
+        ################################################################################################
 
-        if inputs["matched_lookup"] is not None:
-            analysis_data = analysis_data.merge(
-                inputs["matched_lookup"], how="left", on="asset_list_row_id"
+        eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
+        eco4_identified["identification_type"] = None
+        eco4_identified["identification_type"] = np.where(
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
+            "strict",
+            eco4_identified["identification_type"]
+        )
+
+        eco4_identified["identification_type"] = np.where(
+            (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False),
+            "expansive",
+            eco4_identified["identification_type"]
+        )
+        ################################################################################################
+        # We take the properties dependent on CIGA
+        ################################################################################################
+
+        ciga_dependent_identified = analysis_data[
+            analysis_data["ECO Eligibility"].isin(
+                [
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga"
+                ]
             )
-            # Drop any rows that have a survey_list_row_id
-            analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])]
+        ].copy()
 
-        # If we have a survey list, we merge this onto the results
-        n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique()
-
-        properties_sold = (
-            inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if
-            inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"])
-        )
-        properties_sold_eco4 = (
-            properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0
-        )
-        properties_sold_gbis = (
-            properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0
+        # These are properties that show filled cavity
+        ciga_dependent_identified["identification_type"] = None
+        ciga_dependent_identified["identification_type"] = np.where(
+            ciga_dependent_identified["eco4_message"].isin(
+                [
+                    "Perfect suitability",
+                    "Meets cavity and sap",
+                    "Fails cavity, meets loft, fails SAP",
+                    "Meets fabric, fails SAP check",
+                    "Meets cavity, loft borderline, meets sap",
+                ]
+            ),
+            "strict",
+            ciga_dependent_identified["identification_type"]
         )
 
-        # We now calculate the number of remaining properties, by scheme
-        remaining_properties = analysis_data[
-            analysis_data["asset_identification_status"] == "identified potential eco works (CWI)"
-            ].copy()
-        remaining_properties["prospect_type"] = None
-
-        remaining_properties_by_scheme = (
-            remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index()
+        ciga_dependent_identified["identification_type"] = np.where(
+            (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) &
+            (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])),
+            "expansive",
+            ciga_dependent_identified["identification_type"]
         )
 
-        n_remaining_properties_eco4 = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "ECO4"
-            ]["asset_list_row_id"].values[0]
+        ciga_dependent_identified["identification_type"] = np.where(
+            (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+                ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+            ),
+            "expansive",
+            ciga_dependent_identified["identification_type"]
+        )
 
-        n_remaining_properties_gbis = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "GBIS"
-            ]["asset_list_row_id"].values[0]
+        ################################################################################################
+        # We properties that qualified for gbis
+        ################################################################################################
+        gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
+        gbis_identified["identification_type"] = None
+        gbis_identified["identification_type"] = np.where(
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
+            "strict",
+            gbis_identified["identification_type"]
+        )
 
-        # For the remaining properties, we use the results of the eligibility process to classify the property into
-        # one of multiple categories
-        #
-        # For properties that have been identified as ECO4
-        # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because
-        #    Warmfront regularly re-surveys properties which then fall within the SAP requirement
-        #    - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties
-        #      here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have
-        #      very old EPCs which may score lower when re-done
-        # 2) Meets Fabric requirements, not SAP
-        #    Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but
-        #    label is separately as not a strict
-        # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity.
-        #    - we don't have a SAP constraint here because the EPC is (currently) showing what the property might
-        #      actually look like after retrofit and so the EPC currently being a C or above means little, because
-        #      the updated EPC, showing an empty cavity, could bring the property within
-        # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation.
-        #   - No SAP constraint, for the same reason as in category 2)
-        # 5) Looks like GBIS instead
-        # 6) Does not look like ECO4 candidate
-        #
-        # For properties that have been identified as GBIS
-        # 1) Strict GBIS candidates
-        # 2) Properties that actually look like strict GBIS candidates
-        # 3) Subject to CIGA check - Filled cavity
-        # 4) Does not look like a GBIS candidate
+        gbis_identified["identification_type"] = np.where(
+            (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69),
+            "expansive",
+            gbis_identified["identification_type"]
+        )
 
-        remaining_eco4_df = remaining_properties[
-            remaining_properties["funding_scheme"] == "ECO4"
-            ].copy()
+        # Finally, we look at the properties that have not been identified by Warmfront
+        not_identified = analysis_data[
+            analysis_data["ECO Eligibility"].isin(
+                [
+                    "not eligible"
+                ]
+            )
+        ].copy()
 
-        ####################################
+        surplus_eco4 = not_identified[
+            (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
+                ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
+            ))
+            ]
+
+        surplus_gbis = not_identified[
+            (not_identified["gbis_eligible"] == True) & (
+                ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
+            ) & (not_identified["sap"] < 69) & (
+                (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
+                not_identified["walls"].str.contains("partial", case=False, na=False)
+            )
+            )
+            ]
+        surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
+
+        # Output variables
         # ECO4
-        ####################################
-
-        # 1) We identify this if:
-        #   - remaining_properties["eco4_eligible"] == True
-
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["eco4_eligible"] == True),
-            "strict ECO4",
-            remaining_eco4_df["prospect_type"]
+        n_properties_in_asset_list = inputs["asset_list"].shape[0]
+        n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
+        eco4_of_which_identified_strict = (
+            eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
+            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
         )
-
-        # 2) Meets fabric requirements
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                (remaining_eco4_df["eco4_message"] == "sap too high") &
-                remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
-                pd.isnull(remaining_eco4_df["prospect_type"])
-            ),
-            "ECO4 if SAP downgrade",
-            remaining_eco4_df["prospect_type"]
+        eco4_of_which_identified_expansive = (
+            eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
+            ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
         )
-
-        # 3) We identify this if it has a filled cavity but meets the loft conditions
-        # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm)
-        #       to account for measurement error
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
-            ),
-            "ECO4 - Filled cavity - subject to CIGA check",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"])
-            ),
-            "ECO4 prospect - empty cavity, loft insulation below regulation",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 5) Looks like GBIS instead
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
-            "Looks like GBIS",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm)
-        remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna(
-            "Does not look like ECO4 candidate"
-        )
-
-        ####################################
         # GBIS
-        ####################################
-
-        remaining_gbis = remaining_properties[
-            remaining_properties["funding_scheme"] == "GBIS"
-            ].copy()
-
-        # 1) Strict GBIS candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False)
-            ),
-            "strict GBIS",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 2) GBIS candidates that look like strict ECO4 candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (remaining_gbis["eco4_eligible"] == True),
-            "GBIS - Upgradable to ECO4",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 3) Subject to CIGA check - Filled cavity
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
-                pd.isnull(remaining_gbis["prospect_type"])
-            ),
-            "GBIS - Filled cavity - subject to CIGA check",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 4) Everything else
-        remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna(
-            "Does not look like GBIS candidate"
-        )
-
-        ####################################
-        # Surplus properties
-        ####################################
-
-        # Take properties that were not identified by Warmfront and identify those that look like they would qualify
-        # under the strictest criteria
-        surplus_df = analysis_data[
-            analysis_data["asset_identification_status"] != "identified potential eco works (CWI)"
-            ].copy()
-
-        eco4_surplus = surplus_df[
-            (
-                (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") &
-                (
-                    surplus_df["eligibility_classification"].isin(
-                        ["high confidence", "highest confidence", "medium confidence"]
-                    )
-                )
-            )
-        ].copy()
-
-        gbis_surplus = surplus_df[
-            (
-                (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & (
-                surplus_df["eligibility_cavity_type"].isin(["empty", "partial"])
-            )
-            )
-        ].copy()
-
-        # Perform some checks to make sure we have all of the values
-        remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining ECO4 dict"
-            )
-
-        remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining GBIS dict"
-            )
+        n_warmfront_identified_gbis = gbis_identified.shape[0]
+        gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
+        gbis_of_which_identified_expansive = \
+            gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
 
         to_append = {
-            "ha_name": ha_name,
-            "n_properties_in_asset_list": n_properties_in_asset_list,
+            ("", "HA Name"): ha_name,
+            ("", "# Properties in asset list"): n_properties_in_asset_list,
             ############
             # ECO4
             ############
-            "properties_sold_eco4": properties_sold_eco4,
-            "n_remaining_properties_eco4": n_remaining_properties_eco4,
-            **remaining_eco4_dict,
+            ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4,
+            ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
+            ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
+            ("ECO4", "Of which identified by model - total"): (
+                eco4_of_which_identified_strict + eco4_of_which_identified_expansive),
+            ("ECO4", "Additional properties"): surplus_eco4.shape[0],
             ############
             # GBIS
             ############
-            "properties_sold_gbis": properties_sold_gbis,
-            "n_remaining_properties_gbis": n_remaining_properties_gbis,
-            **remaining_gbis_dict,
-            ############
-            # GBIS
-            ############
-            "n_eco4_surplus": eco4_surplus.shape[0],
-            "n_gbis_surplus": gbis_surplus.shape[0],
+            ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis,
+            ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
+            ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
+            ("GBIS", "Of which identified by model - total"): (
+                gbis_of_which_identified_strict + gbis_of_which_identified_expansive
+            ),
+            ("GBIS", "Additional properties"): surplus_gbis.shape[0]
         }
 
         ha_analysis_results.append(to_append)
 
-        revenue_to_append = {
-            "ha_name": ha_name,
-            "£ Remaining from asset list": (
-                n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
-            ),
-            "Of which: Strict": (
-                to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
-                to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
-            ),
-            "Of which: Subject to CIGA": (
-                to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
-                to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
-            ),
-            "Of which: Prospect, not perfect strict prospect": (
-                to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
-                to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
-            ),
-            "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
-            "Of which: Does not look like prospect": (
-                to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
-                to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
-            ),
-            "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
-            "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
-        }
-
-        # Perform a quick check:
-        if revenue_to_append["£ Remaining from asset list"] - (
-            revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
-            revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
-            revenue_to_append["Of which: Potential downgrade to GBIS"] +
-            revenue_to_append["Of which: Does not look like prospect"]
-        ) > 1:
-            raise ValueError("Error between top level revenue figures and breakdown - investigate me")
-
-        ha_revenue_results.append(revenue_to_append)
-
     ha_analysis_results = pd.DataFrame(ha_analysis_results)
-    ha_revenue_results = pd.DataFrame(ha_revenue_results)
+    ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
 
+    facts_and_figures = loader.facts_and_figures.copy()
+    facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
+    facts_and_figures = facts_and_figures.sort_values("ha_number")
+    facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
+
+    # Rename some of the cols
+    facts_and_figures = facts_and_figures.rename(
+        columns={
+            # ECO4 cols
+            "ECO4": "ECO4 - December",
+            "GBIS": "GBIS - December",
+            "eco4 (subject to ciga)": "ECO4 - subject to ciga",
+            "eco4": "ECO4 - doesn't need CIGA",
+            "eco4 - passed ciga": "ECO4 - passed CIGA",
+            "failed ciga": "ECO4 - failed CIGA",
+            "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
+            "ECO4 - in progress": "ECO4 - Install in progress",
+            "ECO4 - cancelled": "ECO4 - Install cancelled",
+            # GBIS cols
+            "gbis": "GBIS total (asset list)"
+        }
+    )
+    # We calculate the eco4 total from the asset list
+    # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
+    # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
+    # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
+    # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
+    facts_and_figures["ECO4 total (asset list)"] = np.where(
+        facts_and_figures["ECO4 - passed CIGA"] > 0,
+        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
+        facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"]
+    )
+
+    # Re-arrange the columns
+    facts_and_figures = facts_and_figures[
+        [
+            'HA Name',
+            'ECO4 - December',
+            'GBIS - December',
+            'ECO4 total (asset list)',
+            'GBIS total (asset list)',
+            'ECO4 - subject to ciga',
+            "ECO4 - doesn't need CIGA",
+            'ECO4 - passed CIGA',
+            'ECO4 - failed CIGA',
+            'ECO4 - installed',
+            'ECO4 - Install in progress',
+            'ECO4 - Install cancelled',
+            'ECO4 - partially installed',
+            'ECO4 - Install downgrade to GBIS',
+        ]
+    ]
+    # Addd a note to flag any rows where ECO4 (
+    # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
+    # )
+    facts_and_figures["Missed CIGA checks opportunity"] = None
+    facts_and_figures["Missed CIGA checks opportunity"] = np.where(
+        (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
+        "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
+            str) + " ECO4 properties needing a CIGA check",
+        facts_and_figures["Missed CIGA checks opportunity"]
+    )
+
+    # Re arrage the columns
+
+    # Also sort ha_analysis_results by ha number
+    ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
+    ha_analysis_results = ha_analysis_results.sort_values("ha_number")
+    ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
+
+    # We save 2 sheets
     # Automate creation of the excel
     # Create a Pandas Excel writer using XlsxWriter as the engine
-    with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
+    with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
         # Write each dataframe to a different worksheet without the index
-        for df, sheet in [(ha_revenue_results, 'Total Revenue'),
-                          (ha_analysis_results, 'By ECO4 and GBIS')]:
+        for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
+                          (ha_analysis_results, 'Asset Identification')]:
 
-            df.to_excel(writer, sheet_name=sheet, index=False)
+            df.to_excel(writer, sheet_name=sheet)
 
             # Auto-adjust columns' width
             for i, width in enumerate(get_col_widths(df)):
@@ -2134,7 +2064,7 @@ def app():
     # Determines if we want to use the cached data in s3
     use_cache = True
     # Determines if we want to perform the data pull
-    pull_data = True
+    pull_data = False
 
     # List all of the data in the folder
     directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
@@ -2173,43 +2103,3 @@ def app():
         floor_area_decile_thresholds=floor_area_decile_thresholds,
         pull_data=pull_data
     )
-
-    # for ha_name, datasets in outputs.items():
-    #     datasets["results_df"] = datasets["results_df"].drop(
-    #         columns=["eligibility_cavity_type", "eligibility_loft_type"]
-    #     )
-    #
-    #     # Re-do
-    #     res = []
-    #     for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]):
-    #         epc = {
-    #             "walls-description": row["walls"],
-    #             "roof-description": row["roof"],
-    #             "floor-description": "",
-    #             "tenure": "",
-    #             "current-energy-efficiency": row["sap"],
-    #         }
-    #         eligibility = Eligibility(epc=epc, cleaned=cleaned)
-    #         eligibility.check_eco4_warmfront()
-    #         res.append(
-    #             {
-    #                 "row_id": row["row_id"],
-    #                 "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
-    #                 "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
-    #             }
-    #         )
-    #
-    #     # Merge back on
-    #     res = pd.DataFrame(res)
-    #     datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id")
-    #
-    #     # Re-save in s3
-    #     save_pickle_to_s3(
-    #         data={
-    #             "results_df": datasets["results_df"],
-    #             "scoring_df": datasets["scoring_df"],
-    #             "nodata": datasets["nodata"]
-    #         },
-    #         bucket_name="retrofit-datalake-dev",
-    #         s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
-    #     )