From bae3e13e219b47c04acbc5bcdf02262a9c8faab4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Jan 2026 13:51:28 +0000
Subject: [PATCH 1/2] minor peabody output work

---
 .../i_testing_parity_data.py                  |  74 +++++-
 .../k_deck_stats.py                           | 236 ++++++++++++++++++
 sfr/principal_pitch/2_export_data.py          |  27 +-
 3 files changed, 317 insertions(+), 20 deletions(-)
 create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py

diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py
index c6fb86ea..41613bc3 100644
--- a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py	
+++ b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py	
@@ -1,8 +1,10 @@
 import pandas as pd
 
 df = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Downloads/Parity Data 08012026.xlsx"
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
+    "08012026.xlsx"
 )
+df["wall_combined"] = df["Wall Construction"] + "+" + df["Wall Insulation"].fillna("Unknown Insulation")
 
 df['SAP Score'].mean()
 
@@ -18,4 +20,72 @@ df["SAP Band"].value_counts(normalize=True)
 z = df[df["SAP Band"] != df["Lodged EPC Band"]]
 agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count")
 
-zz = z[z["Lodged EPC Band"] == "A"]
+recommendations_epc_c = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, ashp 3.0 - corrected.xlsx"
+)
+recommendations_epc_c["uprn"] = recommendations_epc_c["uprn"].astype(int).astype(str)
+
+combined = recommendations_epc_c.merge(
+    df,
+    left_on="uprn",
+    right_on="UPRN",
+    suffixes=("_rec", "_sal")
+)
+
+combined = combined[["uprn", "SAP Score", "current_sap_points", "walls", "wall_combined"]]
+
+combined[combined["SAP Score"] < 69]["current_epc_rating"].value_counts()
+combined[combined["SAP Score"] < 69]["SAP Band"].value_counts()
+combined[combined["SAP Score"] < 69].shape
+combined[combined["current_sap_points"] < 69]
+
+combined["SAP Band"].value_counts()
+
+# Our Cs
+combined_cs = combined[combined["SAP Score"] < 69]
+combined_cs["SAP Band"].value_counts()
+# Their C and below
+
+
+compare = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69]
+
+packages = recommendations_epc_c[recommendations_epc_c["total_retrofit_cost"] > 0]
+packages["current_epc_rating"].value_counts()
+
+# TODO: 612 units
+23219 - 612
+errors = recommendations_epc_c[
+    (recommendations_epc_c["current_sap_points"] >= 69) &
+    (recommendations_epc_c["total_retrofit_cost"] > 0)
+    ]
+errors["total_retrofit_cost"].sum()
+
+below_epc_c = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69]
+
+below_epc_c_compare = below_epc_c.merge(
+    df,
+    left_on="uprn",
+    right_on="UPRN",
+    suffixes=("_rec", "_sal")
+)
+
+eg1 = below_epc_c_compare[below_epc_c_compare["SAP Band"] == "C"].copy()
+eg1["wall_combined"].value_counts()
+
+eg1_counts = eg1.groupby(["walls", "wall_combined"]).size().reset_index(name="count")
+eg1_counts = eg1_counts.sort_values("count", ascending=False)
+
+externally_insulated = eg1[
+    (eg1["wall_combined"] == "Solid Brick+External") &
+    pd.isnull(eg1["internal_wall_insulation"])
+    ]
+
+externally_insulated[externally_insulated.index == 823]["uprn"]
+
+recommendations_epc_c[
+    (recommendations_epc_c["current_sap_points"] < 69) &
+    (recommendations_epc_c["current_sap_points"] > 68)
+    ].shape
+
+recommendations_epc_c[recommendations_epc_c["wall_combined"] == ""]
diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py
new file mode 100644
index 00000000..5200c34d
--- /dev/null
+++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py	
@@ -0,0 +1,236 @@
+import pandas as pd
+
+epc_c_recommendations = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, ashp 3.0 - corrected.xlsx"
+)
+epc_b_recommendations = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
+    "solid floor, ashp 3.0 - corrected.xlsx"
+)
+
+epc_c_movers = epc_b_recommendations[
+    epc_b_recommendations["current_epc_rating"] == "Epc.C"
+    ]
+epc_c_movers["property_type"].value_counts()
+
+house_epc_c_movers = epc_c_movers[
+    epc_c_movers["property_type"] == "House"
+    ]
+house_epc_c_movers_with_solar = house_epc_c_movers[
+    ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
+    ]
+
+house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
+    ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
+]
+
+flat_epc_c_movers = epc_c_movers[
+    epc_c_movers["property_type"] == "Flat"
+    ]
+
+epc_c_recommendations["sap_points"].mean()
+epc_c_recommendations["sap_points"].mean()
+
+measure_cols = [
+    "air_source_heat_pump",
+    "boiler_upgrade",
+    "cavity_wall_insulation",
+    "double_glazing",
+    "external_wall_insulation",
+    "flat_roof_insulation",
+    "high_heat_retention_storage_heaters",
+    "internal_wall_insulation",
+    "loft_insulation",
+    "low_energy_lighting",
+    "mechanical_ventilation",
+    "room_roof_insulation",
+    "roomstat_programmer_trvs",
+    "sealing_open_fireplace",
+    "secondary_glazing",
+    "secondary_heating",
+    "solar_pv",
+    "solar_pv_with_battery",
+    "suspended_floor_insulation",
+    "time_temperature_zone_control",
+]
+
+epc_c_melted = (
+    epc_c_recommendations
+    .melt(
+        id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
+        value_vars=measure_cols,
+        var_name="measure_type",
+        value_name="value",
+    )
+    .dropna(subset=["value"])
+)
+epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
+epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+
+epc_b_melted = (
+    epc_b_recommendations
+    .melt(
+        id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
+        value_vars=measure_cols,
+        var_name="measure_type",
+        value_name="value",
+    )
+    .dropna(subset=["value"])
+)
+
+epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
+epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+
+measures_compared = epc_c_measures.merge(
+    epc_b_measures,
+    left_on="measure_type",
+    right_on="measure_type",
+    suffixes=("_epc_c", "_epc_b"),
+)
+
+epc_c_retrofits = epc_c_recommendations[
+    epc_c_recommendations["total_retrofit_cost"] > 0
+    ]
+
+epc_b_retrofits = epc_b_recommendations[
+    epc_b_recommendations["total_retrofit_cost"] > 0
+    ]
+
+epc_c_retrofits["sap_points"].mean()
+epc_b_retrofits["sap_points"].mean()
+
+properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
+
+properties_in_both["total_retrofit_cost_epc_c"].mean()
+properties_in_both["sap_points_epc_c"].mean()
+properties_in_both["total_retrofit_cost_epc_b"].mean()
+properties_in_both["sap_points_epc_b"].mean()
+
+# Solar PV savings - we need the amount of solar PV bill savings
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+from collections import defaultdict
+
+PORTFOLIO_ID = 434  # Peabody
+SCENARIOS = [
+    904,
+    905
+]
+scenario_names = {
+    904: "EPC C - no solid floor, ashp 3.0",
+    905: "EPC B - no solid floor, ashp 3.0",
+}
+
+
+def get_data(portfolio_id, scenario_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # --------------------
+    # Properties
+    # --------------------
+    properties_query = session.query(
+        PropertyModel,
+        PropertyDetailsEpcModel
+    ).join(
+        PropertyDetailsEpcModel,
+        PropertyModel.id == PropertyDetailsEpcModel.property_id
+    ).filter(
+        PropertyModel.portfolio_id == portfolio_id
+    ).all()
+
+    properties_data = [
+        {
+            **{col.name: getattr(p.PropertyModel, col.name)
+               for col in PropertyModel.__table__.columns},
+            **{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
+               for col in PropertyDetailsEpcModel.__table__.columns},
+        }
+        for p in properties_query
+    ]
+
+    # --------------------
+    # Plans
+    # --------------------
+    plans_query = session.query(Plan).filter(
+        Plan.scenario_id.in_(scenario_ids)
+    ).all()
+
+    plans_data = [
+        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        for plan in plans_query
+    ]
+
+    plan_ids = [p["id"] for p in plans_data]
+
+    # --------------------
+    # Recommendations (NO materials yet)
+    # --------------------
+    recommendations_query = session.query(
+        Recommendation,
+        Plan.scenario_id
+    ).join(
+        PlanRecommendations,
+        Recommendation.id == PlanRecommendations.recommendation_id
+    ).join(
+        Plan,
+        Plan.id == PlanRecommendations.plan_id
+    ).filter(
+        PlanRecommendations.plan_id.in_(plan_ids),
+        Recommendation.default.is_(True),
+        Recommendation.already_installed.is_(False)
+    ).all()
+
+    recommendations_data = [
+        {
+            **{col.name: getattr(r.Recommendation, col.name)
+               for col in Recommendation.__table__.columns},
+            "scenario_id": r.scenario_id,
+            "materials": []  # placeholder
+        }
+        for r in recommendations_query
+    ]
+
+    recommendation_ids = [r["id"] for r in recommendations_data]
+
+    # --------------------
+    # Recommendation materials (SEPARATE QUERY)
+    # --------------------
+    materials_query = session.query(
+        RecommendationMaterials
+    ).filter(
+        RecommendationMaterials.recommendation_id.in_(recommendation_ids)
+    ).all()
+
+    # Group materials by recommendation_id
+    materials_by_recommendation = defaultdict(list)
+
+    for m in materials_query:
+        materials_by_recommendation[m.recommendation_id].append({
+            "material_id": m.material_id,
+            "depth": m.depth,
+            "quantity": m.quantity,
+            "quantity_unit": m.quantity_unit,
+            "estimated_cost": m.estimated_cost,
+        })
+
+    # Attach materials safely (no filtering side effects)
+    for r in recommendations_data:
+        r["materials"] = materials_by_recommendation.get(r["id"], [])
+
+    session.close()
+
+    return properties_data, plans_data, recommendations_data
+
+
+properties_data, plans_data, recommendations_data = get_data(
+    portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
+)
+
+recommendations_df = pd.DataFrame(recommendations_data)
+
+solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
+average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py
index 89c29ce4..36efb603 100644
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@@ -14,22 +14,14 @@ from collections import defaultdict
 
 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 419  # Peabody
+PORTFOLIO_ID = 434  # Peabody
 SCENARIOS = [
-    871,  # EPC C - fabric first, no solid floor, ashp 3.0
-    863,  # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
-    862,  # EPC B - No solid floor, ASHP COP 3.0
-    861,  # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
-    859,  # EPC C - no solid floor, ashp 3.0
-    885,  # EPC B - fabric first, no solid floor, ashp 3.0
+    904,
+    905
 ]
 scenario_names = {
-    871: "EPC C, fabric first, no solid floor, ashp 3.0",
-    863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
-    862: "EPC B, No solid floor, ASHP COP 3.0",
-    861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
-    859: "EPC C, no solid floor, ashp 3.0",
-    885: "EPC B, fabric first, no solid floor, ashp 3.0"
+    904: "EPC C - no solid floor, ashp 3.0",
+    905: "EPC B - no solid floor, ashp 3.0",
 }
 
 
@@ -88,7 +80,8 @@ def get_data(portfolio_id, scenario_ids):
         Plan.id == PlanRecommendations.plan_id
     ).filter(
         PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default.is_(True)
+        Recommendation.default.is_(True),
+        Recommendation.already_installed.is_(False)
     ).all()
 
     recommendations_data = [
@@ -220,9 +213,7 @@ for scenario_id in SCENARIOS:
     df = properties_df[
         [
             "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
-            "heating", "windows",
-            "current_epc_rating",
-            "current_sap_points", "total_floor_area", "number_of_rooms",
+            "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
         ]
     ].merge(
         recommendations_measures_pivot, how="left", on="property_id"
@@ -240,7 +231,7 @@ for scenario_id in SCENARIOS:
 
     # Create excel to store to
     filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-                f"Project/{scenario_names[scenario_id]}.xlsx")
+                f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx")
     with pd.ExcelWriter(filename) as writer:
         df.to_excel(writer, sheet_name="properties", index=False)
 

From f44d58c08ee6015b280470ea5663806b7004a3bc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Jan 2026 14:10:28 +0000
Subject: [PATCH 2/2] added new ecr and predictions bucket

---
 infrastructure/terraform/main.tf | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index c3a585f7..5a67b793 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -177,6 +177,12 @@ module "retrofit_hotwater_kwh_predictions" {
   allowed_origins = var.allowed_origins
 }
 
+module "retrofit_sap_baseline_predictions" {
+  source          = "./modules/s3"
+  bucketname      = "retrofit-sap-baseline-predictions-${var.stage}"
+  allowed_origins = var.allowed_origins
+}
+
 // We make this bucket presignable, because we want to generate download links for the frontend
 module "retrofit_energy_assessments" {
   source          = "./modules/s3_presignable_bucket"
@@ -253,6 +259,12 @@ module "lambda_hotwater_kwh_prediction_ecr" {
   source   = "./modules/ecr"
 }
 
+# Baselining models
+module "sap_baseline_ecr" {
+  ecr_name = "sap-baseline-prediction-${var.stage}"
+  source   = "./modules/ecr"
+}
+
 ##############################################
 # CDN - Cloudfront
 ##############################################