From 724379a86d1bd9b79159f2f8f9e5d8abe9496f5f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 26 Mar 2024 18:05:08 +0000
Subject: [PATCH] wrapping up ha analysis

---
 .../ha_15_32/ha_analysis_batch_3.py           | 170 ++++++++++--------
 1 file changed, 94 insertions(+), 76 deletions(-)

diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 2f17ed73..e414cd00 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -5366,6 +5366,7 @@ def forecast_remaining_sales(loader):
 
     results = []
     for ha_name, input_data in loader.data.items():
+
         # Original warmfront figures - ECO4
         original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
         if original_warmfront_estimates.empty:
@@ -6032,7 +6033,7 @@ def forecast_remaining_sales(loader):
 def fml_data_pull(loader):
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
@@ -6129,7 +6130,7 @@ def fml_analysis(loader):
     assumed_ciga_pass_rate = 0.731
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
-        "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
         "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
         'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
     ]
@@ -6738,89 +6739,106 @@ def app():
     loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
     loader.load()
     loader.ha_facts_and_figures()
-
     forecast_remaining_sales(loader)
 
-    conversion_rate = 0.95
-    archetype_check_conversion = 0.7
-    res = []
-    for k, v in loader.data.items():
-        asset_list = v["asset_list"].copy()
-        agg = asset_list["ECO Eligibility"].value_counts()
-        # We find a case where there are properties that have passed CIGA
-        if not any("passed" in x for x in agg.index):
+    # gbis rate
+    # breakdowns = []
+    # for ha, data_assets in loader.data.items():
+    #     asset_list = data_assets["asset_list"].copy()
+    #     breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
+    #     breakdowns.append(breakdown)
+    # breakdowns = pd.DataFrame(breakdowns)
+    #
+    # installer = []
+    # for ha, data_assets in loader.data.items():
+    #     survey_list = data_assets["survey_list"]
+    #     if survey_list.empty:
+    #         continue
+    #     if "INSTALLER" not in survey_list.columns:
+    #         continue
+    #
+    #     installers = survey_list["INSTALLER"].value_counts().to_dict()
+    #     installers["ha_name"] = ha
+    #     installer.append(installers)
+    # installer = pd.DataFrame(installer)
+    # installer.drop(columns=["ha_name"]).sum().sum()
+
+    # Adhoc - for HA16, get the properties that still need a CIGA check
+    asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
+    ha_16_need_ciga = asset_list_ha16[
+        asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
+    ]
+    completed_cigas = loader.data["HA16"]["ciga_list"].copy()
+    # Store the results
+    ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
+    completed_cigas.to_csv("ha16_completed_cigas.csv")
+
+    # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
+    # live projects
+
+    # Read excel
+    orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
+    orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
+    orderbook_sheet = orderbook_workbook["Contractual Info"]
+    orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
+
+    rows = []
+    for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        rows.append(row_data)
+
+    orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
+    live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
+    live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
+
+    dormant_properties = []
+    missed_has = []
+    for _, customer in live_orderbook.iterrows():
+        if customer['Redacted HA'] not in loader.data.keys():
+            missed_has.append(customer['Redacted HA'])
             continue
+        asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
+        survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
+        # Remove sold
+        if not survey_list.empty:
+            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+            asset_list = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+            asset_list = asset_list.drop(columns=["installation_status"])
 
-        agg = pd.DataFrame(agg).reset_index()
-
-        passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"]
-        passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0
-
-        failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"]
-        failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0
-
-        ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1
-
-        dormant_ciga = agg[
-            agg["ECO Eligibility"].str.contains("subject to ciga") &
-            ~agg["ECO Eligibility"].str.contains("subject to archetype")
+        # We pull out the properties that need a CIGA check
+        need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
+        need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
+        need_ciga_and_archetype = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
             ]
 
-        dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0
-
-        dormant_ciga_archetype = agg[
-            agg["ECO Eligibility"].str.contains("subject to ciga") &
-            agg["ECO Eligibility"].str.contains("subject to archetype")
-            ]
-
-        dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0
-
-        needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion
-        needing_check = np.round(needing_check)
-
-        additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + (
-            dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate
-        )
-        additional_jobs = np.round(additional_jobs)
-
-        # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs
-        original_estimate = loader.december_figures[
-            loader.december_figures["HA Name"] == k
-            ]
-
-        original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0
-        base_eco_figures = agg[
-            agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"])
-        ]["count"].sum()
-        eco4_from_ciga = original_estimate - base_eco_figures
-        eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0
-        surplus_from_dormant = additional_jobs - eco4_from_ciga
-        surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant
-
-        res.append(
+        dormant_properties.append(
             {
-                "ha_name": k,
-                "additional_eco4": additional_jobs,
-                "needing_check": needing_check,
-                "surplus_from_dormant": surplus_from_dormant
+                "HA Name": customer['Redacted HA'],
+                "Need CIGA": need_ciga.shape[0],
+                "Need Archetype": need_archetype.shape[0],
+                "Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
             }
         )
 
-    res = pd.DataFrame(res)
-    # Drop the HAs that are not in that pervious draft
-    # In the v2 draft, there are 12 HAs
+    dormant_properties = pd.DataFrame(dormant_properties)
+    totals = dormant_properties.sum()
+    totals["HA Name"] = "Total"
 
-    v5_surplus = res[
-        ~res["ha_name"].isin(["HA9"])
-    ]["additional_eco4"].sum()
-    # 7212 properties
-    # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November
-    # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255,
-    # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties
-    # pre-CIGA
+    dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
+    dormant_properties.to_csv("dormant_properties.csv")
 
-    v5_surplus_from_dormant = res[
-        ~res["ha_name"].isin(["HA9"])
-    ]["surplus_from_dormant"].sum()
-    # 5539.0
-    # 9471690
+    loader.december_figures["ECO4 remaining"].sum()
+    december_figures = loader.december_figures.copy()
+    december_figures["ECO4 remaining"] = np.where(
+        december_figures["ECO4 remaining"] < 0,
+        0,
+        december_figures["ECO4 remaining"]
+    )
+    december_figures["ECO4 remaining"].sum()