From 74ce1627ec934c1f023c4e025bf0a89f99ac7f82 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 17 Jan 2026 13:00:00 +0000
Subject: [PATCH] peabody specific

---
 .../k_deck_stats.py                           |  63 +++++++++--
 .../n_fixing_already_installed_bug.py         |   3 +-
 .../o_rerunning_iwi_jobs.py                   |  41 +++++++
 sfr/principal_pitch/2_export_data.py          | 101 ++++++++++++++++--
 4 files changed, 193 insertions(+), 15 deletions(-)
 create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py

diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py
index cd7fba63..b6fc0f8f 100644
--- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py	
+++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py	
@@ -114,14 +114,16 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from collections import defaultdict
 
-PORTFOLIO_ID = 434  # Peabody
+PORTFOLIO_ID = 435  # Peabody
 SCENARIOS = [
-    904,
-    905
+    908,
+    909,
+    910,
 ]
 scenario_names = {
-    904: "EPC C - no solid floor, ashp 3.0",
-    905: "EPC B - no solid floor, ashp 3.0",
+    908: "EPC C - no solid floor, ashp 3.0",
+    909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
+    910: "EPC B - no solid floor, no EWI, ashp 3.0"
 }
 
 
@@ -232,9 +234,58 @@ properties_data, plans_data, recommendations_data = get_data(
 
 recommendations_df = pd.DataFrame(recommendations_data)
 properties_df = pd.DataFrame(properties_data)
+plans_df = pd.DataFrame(plans_data)
 
-solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
+s_id = 910
+ps_w_a_plan = plans_df[plans_df["scenario_id"] == s_id].copy()
+# Take the newest by scenario id
+ps_w_a_plan = ps_w_a_plan.sort_values("created_at", ascending=False).drop_duplicates(
+    subset=["property_id"]
+)
+z = ps_w_a_plan[
+    ps_w_a_plan["cost_of_works"] > 0
+    ].copy()
+z2 = properties_df[properties_df["property_id"].isin(z["property_id"].values)]
+# '', 'hot_water_cost_current',
+#        'lighting_cost_current', 'appliances_cost_current',
+#        'gas_standing_charge', 'electricity_standing_charge'
+z2["total_bills"] = z2["heating_cost_current"] + z2["hot_water_cost_current"] + z2["lighting_cost_current"] + z2[
+    "appliances_cost_current"
+] + z2["gas_standing_charge"] + z2["electricity_standing_charge"]
+
+from tqdm import tqdm
+
+# For a property ID, find a property where the no EWI/IWI approach is more expensive than the EWI approach
+pids = properties_df["property_id"].unique()
+for pid in tqdm(pids):
+
+    if pid in [603272, 550550, 574493]:
+        continue
+
+    # get the plans
+    property_plan = plans_df[plans_df["property_id"] == int(pid)]
+    # Take the newest plan by scenario id
+    property_plan = property_plan.sort_values("created_at", ascending=False).drop_duplicates(
+        subset=["scenario_id"]
+    )
+    a = property_plan[property_plan["scenario_id"] == 909].squeeze()  # no EWI/IWI
+    b = property_plan[property_plan["scenario_id"] == 908].squeeze()  # EWI
+    if (a["cost_of_works"] > b["cost_of_works"]) and (
+        a["post_epc_rating"].value == "C") and (b["cost_of_works"] > 5000):
+        bah
+
+solar_pv_recommendations = recommendations_df[
+    recommendations_df["measure_type"] == "solar_pv"
+    ]
+
+solid_wall_recommendation = recommendations_df[
+    recommendations_df["scenario_id"].isin([908]) &
+    recommendations_df["measure_type"].isin(["internal_wall_insulation"]) &
+    recommendations_df["default"]
+    ]
 average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
+# Add on scenarion names
+average_savings["scenario_name"] = average_savings["scenario_id"].map(scenario_names)
 
 # Check tenures
 initial_asset_data = pd.read_excel(
diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py
index 4bd11a1b..d07f1ac1 100644
--- a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py	
+++ b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py	
@@ -11,7 +11,6 @@ from etl.customers.cambridge.surveys import current_epc
 with db_session() as session:
     # We need installed measures, where the measure type is ewi or iwi
     installed_measures = session.query(InstalledMeasure).filter(
-        InstalledMeasure.measure_type.in_(["cavity_wall_insulation"])
     ).all()
     # Get the uprns
     installed_uprns = [x.uprn for x in installed_measures]
@@ -32,7 +31,7 @@ needing_retry = sal[sal["epc_os_uprn"].isin(installed_uprns)]
 # Store
 needing_retry.to_excel(
     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
-    "SAL/properties_needing_retry_20260115 - cavity wall insulation.xlsx",
+    "SAL/properties_needing_retry_20260115 - all already installed.xlsx",
     sheet_name="Standardised Asset List",
     index=False
 )
diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py b/etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py
new file mode 100644
index 00000000..39eccb0b
--- /dev/null
+++ b/etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py	
@@ -0,0 +1,41 @@
+# get all properties that have an IWI recommendation
+import pandas as pd
+
+r1 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
+    "solid floor, no EWI, ashp 3.0 - 20250113 final.xlsx"
+)
+
+r2 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, ashp 3.0 - 20250113 final.xlsx"
+)
+
+r3 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
+)
+
+s1 = r1[~pd.isnull(r1["internal_wall_insulation"])]
+s2 = r2[~pd.isnull(r2["internal_wall_insulation"])]
+
+# Combined uprns
+uprns = s1["uprn"].tolist() + s2["uprn"].tolist()
+uprns = list(set(uprns))
+
+# Create SAL of these uprns
+sal = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - "
+    "final asset list.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+needing_retry = sal[sal["epc_os_uprn"].isin(uprns)]
+
+# Store
+needing_retry.to_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
+    "SAL/properties_needing_retry_20260115 - internal wall insulation.xlsx",
+    sheet_name="Standardised Asset List",
+    index=False
+)
diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py
index f12eb85d..2184d074 100644
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@@ -11,6 +11,7 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial
 from backend.app.db.functions.materials_functions import get_materials
 from collections import defaultdict
+from sqlalchemy import func
 
 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
@@ -57,9 +58,44 @@ def get_data(portfolio_id, scenario_ids):
     # --------------------
     # Plans
     # --------------------
-    plans_query = session.query(Plan).filter(
-        Plan.scenario_id.in_(scenario_ids)
-    ).all()
+    latest_plans_subq = (
+        session.query(
+            Plan.scenario_id,
+            Plan.property_id,
+            func.max(Plan.created_at).label("latest_created_at")
+        )
+        .filter(Plan.scenario_id.in_(scenario_ids))
+        .group_by(
+            Plan.scenario_id,
+            Plan.property_id
+        )
+        .subquery()
+    )
+
+    # plans_query = session.query(Plan).filter(
+    #     Plan.scenario_id.in_(scenario_ids)
+    # ).all()
+
+    plans_query = (
+        session.query(Plan)
+        .join(
+            latest_plans_subq,
+            (Plan.scenario_id == latest_plans_subq.c.scenario_id) &
+            (Plan.property_id == latest_plans_subq.c.property_id) &
+            (Plan.created_at == latest_plans_subq.c.latest_created_at)
+        )
+        .all()
+    )
+
+    # plans_query = (
+    #     session.query(Plan)
+    #     .join(
+    #         latest_plans_subq,
+    #         (Plan.scenario_id == latest_plans_subq.c.scenario_id) &
+    #         (Plan.created_at == latest_plans_subq.c.latest_created_at)
+    #     )
+    #     .all()
+    # )
 
     plans_data = [
         {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
@@ -73,7 +109,8 @@ def get_data(portfolio_id, scenario_ids):
     # --------------------
     recommendations_query = session.query(
         Recommendation,
-        Plan.scenario_id
+        Plan.scenario_id,
+        PlanRecommendations.plan_id
     ).join(
         PlanRecommendations,
         Recommendation.id == PlanRecommendations.recommendation_id
@@ -216,6 +253,7 @@ for scenario_id in SCENARIOS:
         [
             "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
             "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
+            "id"
         ]
     ].merge(
         recommendations_measures_pivot, how="left", on="property_id"
@@ -223,17 +261,42 @@ for scenario_id in SCENARIOS:
         post_install_sap, how="left", on="property_id"
     )
 
-    df = df.drop(columns=["property_id"])
+    # df = df.drop(columns=["property_id"])
     df["sap_points"] = df["sap_points"].fillna(0)
 
     df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
-    df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
+    df["predicted_post_works_sap"] = df["predicted_post_works_sap"]
     df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
     df["uprn"] = df["uprn"].astype(str)
 
+    relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
+    df2 = df.merge(
+        relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id",
+        suffixes=("", "_plan")
+    )
+    print(df2["predicted_post_works_epc"].value_counts())
+    print(df2["post_epc_rating"].value_counts())
+
+    z = df2[
+        (df2["predicted_post_works_epc"] != "D") &
+        (df2["post_epc_rating"].astype(str) == "Epc.D")
+        ]
+
+    df2["predicted_post_works_epc"].value_counts()
+    df2["post_epc_rating"].astype(str).value_counts()
+
+    df2[df2["total_retrofit_cost"] > 0].shape
+
+    getting_works = df[df["total_retrofit_cost"] > 0]
+    getting_works["predicted_post_works_epc"].value_counts()
+
+    32565 / getting_works.shape[0]
+
+    df[df["predicted_post_works_sap"] == ""]
+
     # Create excel to store to
     filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-                f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx")
+                f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
     with pd.ExcelWriter(filename) as writer:
         df.to_excel(writer, sheet_name="properties", index=False)
 
@@ -388,3 +451,27 @@ asset_list.to_excel(
 condition_cost_comparison = asset_list[
     ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
 ]
+
+# Testing
+plans_df.head()
+
+example = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
+    "SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx"
+)
+
+plans_df2 = plans_df.merge(
+    properties_df[["property_id", "landlord_property_id"]],
+    left_on="property_id",
+    right_on="property_id",
+    how="left"
+)
+
+plans_df2 = plans_df2[plans_df2["scenario_id"] == 909]
+
+dupes = plans_df2[plans_df2["property_id"].duplicated()]
+
+# merge on plans
+example = example.merge(
+    plans_df, how="left",
+)