minor peabody output work

2026-07-27 23:35:01 +00:00 · 2026-01-12 13:51:28 +00:00 · 2026-01-12 13:51:28 +00:00 · bae3e13e21
commit bae3e13e21
parent 3809066cbf
3 changed files with 317 additions and 20 deletions
--- a/Project/i_testing_parity_data.py
+++ b/Project/i_testing_parity_data.py
@ -1,8 +1,10 @@
 import pandas as pd

 df = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Downloads/Parity Data 08012026.xlsx"
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
+    "08012026.xlsx"
 )
+df["wall_combined"] = df["Wall Construction"] + "+" + df["Wall Insulation"].fillna("Unknown Insulation")

 df['SAP Score'].mean()

@ -18,4 +20,72 @@ df["SAP Band"].value_counts(normalize=True)
 z = df[df["SAP Band"] != df["Lodged EPC Band"]]
 agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count")

-zz = z[z["Lodged EPC Band"] == "A"]
+recommendations_epc_c = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, ashp 3.0 - corrected.xlsx"
+)
+recommendations_epc_c["uprn"] = recommendations_epc_c["uprn"].astype(int).astype(str)
+
+combined = recommendations_epc_c.merge(
+    df,
+    left_on="uprn",
+    right_on="UPRN",
+    suffixes=("_rec", "_sal")
+)
+
+combined = combined[["uprn", "SAP Score", "current_sap_points", "walls", "wall_combined"]]
+
+combined[combined["SAP Score"] < 69]["current_epc_rating"].value_counts()
+combined[combined["SAP Score"] < 69]["SAP Band"].value_counts()
+combined[combined["SAP Score"] < 69].shape
+combined[combined["current_sap_points"] < 69]
+
+combined["SAP Band"].value_counts()
+
+# Our Cs
+combined_cs = combined[combined["SAP Score"] < 69]
+combined_cs["SAP Band"].value_counts()
+# Their C and below
+
+
+compare = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69]
+
+packages = recommendations_epc_c[recommendations_epc_c["total_retrofit_cost"] > 0]
+packages["current_epc_rating"].value_counts()
+
+# TODO: 612 units
+23219 - 612
+errors = recommendations_epc_c[
+    (recommendations_epc_c["current_sap_points"] >= 69) &
+    (recommendations_epc_c["total_retrofit_cost"] > 0)
+    ]
+errors["total_retrofit_cost"].sum()
+
+below_epc_c = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69]
+
+below_epc_c_compare = below_epc_c.merge(
+    df,
+    left_on="uprn",
+    right_on="UPRN",
+    suffixes=("_rec", "_sal")
+)
+
+eg1 = below_epc_c_compare[below_epc_c_compare["SAP Band"] == "C"].copy()
+eg1["wall_combined"].value_counts()
+
+eg1_counts = eg1.groupby(["walls", "wall_combined"]).size().reset_index(name="count")
+eg1_counts = eg1_counts.sort_values("count", ascending=False)
+
+externally_insulated = eg1[
+    (eg1["wall_combined"] == "Solid Brick+External") &
+    pd.isnull(eg1["internal_wall_insulation"])
+    ]
+
+externally_insulated[externally_insulated.index == 823]["uprn"]
+
+recommendations_epc_c[
+    (recommendations_epc_c["current_sap_points"] < 69) &
+    (recommendations_epc_c["current_sap_points"] > 68)
+    ].shape
+
+recommendations_epc_c[recommendations_epc_c["wall_combined"] == ""]
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -0,0 +1,236 @@
+import pandas as pd
+
+epc_c_recommendations = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, ashp 3.0 - corrected.xlsx"
+)
+epc_b_recommendations = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
+    "solid floor, ashp 3.0 - corrected.xlsx"
+)
+
+epc_c_movers = epc_b_recommendations[
+    epc_b_recommendations["current_epc_rating"] == "Epc.C"
+    ]
+epc_c_movers["property_type"].value_counts()
+
+house_epc_c_movers = epc_c_movers[
+    epc_c_movers["property_type"] == "House"
+    ]
+house_epc_c_movers_with_solar = house_epc_c_movers[
+    ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
+    ]
+
+house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
+    ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
+]
+
+flat_epc_c_movers = epc_c_movers[
+    epc_c_movers["property_type"] == "Flat"
+    ]
+
+epc_c_recommendations["sap_points"].mean()
+epc_c_recommendations["sap_points"].mean()
+
+measure_cols = [
+    "air_source_heat_pump",
+    "boiler_upgrade",
+    "cavity_wall_insulation",
+    "double_glazing",
+    "external_wall_insulation",
+    "flat_roof_insulation",
+    "high_heat_retention_storage_heaters",
+    "internal_wall_insulation",
+    "loft_insulation",
+    "low_energy_lighting",
+    "mechanical_ventilation",
+    "room_roof_insulation",
+    "roomstat_programmer_trvs",
+    "sealing_open_fireplace",
+    "secondary_glazing",
+    "secondary_heating",
+    "solar_pv",
+    "solar_pv_with_battery",
+    "suspended_floor_insulation",
+    "time_temperature_zone_control",
+]
+
+epc_c_melted = (
+    epc_c_recommendations
+    .melt(
+        id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
+        value_vars=measure_cols,
+        var_name="measure_type",
+        value_name="value",
+    )
+    .dropna(subset=["value"])
+)
+epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
+epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+
+epc_b_melted = (
+    epc_b_recommendations
+    .melt(
+        id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
+        value_vars=measure_cols,
+        var_name="measure_type",
+        value_name="value",
+    )
+    .dropna(subset=["value"])
+)
+
+epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
+epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+
+measures_compared = epc_c_measures.merge(
+    epc_b_measures,
+    left_on="measure_type",
+    right_on="measure_type",
+    suffixes=("_epc_c", "_epc_b"),
+)
+
+epc_c_retrofits = epc_c_recommendations[
+    epc_c_recommendations["total_retrofit_cost"] > 0
+    ]
+
+epc_b_retrofits = epc_b_recommendations[
+    epc_b_recommendations["total_retrofit_cost"] > 0
+    ]
+
+epc_c_retrofits["sap_points"].mean()
+epc_b_retrofits["sap_points"].mean()
+
+properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
+
+properties_in_both["total_retrofit_cost_epc_c"].mean()
+properties_in_both["sap_points_epc_c"].mean()
+properties_in_both["total_retrofit_cost_epc_b"].mean()
+properties_in_both["sap_points_epc_b"].mean()
+
+# Solar PV savings - we need the amount of solar PV bill savings
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+from collections import defaultdict
+
+PORTFOLIO_ID = 434  # Peabody
+SCENARIOS = [
+    904,
+    905
+]
+scenario_names = {
+    904: "EPC C - no solid floor, ashp 3.0",
+    905: "EPC B - no solid floor, ashp 3.0",
+}
+
+
+def get_data(portfolio_id, scenario_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # --------------------
+    # Properties
+    # --------------------
+    properties_query = session.query(
+        PropertyModel,
+        PropertyDetailsEpcModel
+    ).join(
+        PropertyDetailsEpcModel,
+        PropertyModel.id == PropertyDetailsEpcModel.property_id
+    ).filter(
+        PropertyModel.portfolio_id == portfolio_id
+    ).all()
+
+    properties_data = [
+        {
+            **{col.name: getattr(p.PropertyModel, col.name)
+               for col in PropertyModel.__table__.columns},
+            **{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
+               for col in PropertyDetailsEpcModel.__table__.columns},
+        }
+        for p in properties_query
+    ]
+
+    # --------------------
+    # Plans
+    # --------------------
+    plans_query = session.query(Plan).filter(
+        Plan.scenario_id.in_(scenario_ids)
+    ).all()
+
+    plans_data = [
+        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        for plan in plans_query
+    ]
+
+    plan_ids = [p["id"] for p in plans_data]
+
+    # --------------------
+    # Recommendations (NO materials yet)
+    # --------------------
+    recommendations_query = session.query(
+        Recommendation,
+        Plan.scenario_id
+    ).join(
+        PlanRecommendations,
+        Recommendation.id == PlanRecommendations.recommendation_id
+    ).join(
+        Plan,
+        Plan.id == PlanRecommendations.plan_id
+    ).filter(
+        PlanRecommendations.plan_id.in_(plan_ids),
+        Recommendation.default.is_(True),
+        Recommendation.already_installed.is_(False)
+    ).all()
+
+    recommendations_data = [
+        {
+            **{col.name: getattr(r.Recommendation, col.name)
+               for col in Recommendation.__table__.columns},
+            "scenario_id": r.scenario_id,
+            "materials": []  # placeholder
+        }
+        for r in recommendations_query
+    ]
+
+    recommendation_ids = [r["id"] for r in recommendations_data]
+
+    # --------------------
+    # Recommendation materials (SEPARATE QUERY)
+    # --------------------
+    materials_query = session.query(
+        RecommendationMaterials
+    ).filter(
+        RecommendationMaterials.recommendation_id.in_(recommendation_ids)
+    ).all()
+
+    # Group materials by recommendation_id
+    materials_by_recommendation = defaultdict(list)
+
+    for m in materials_query:
+        materials_by_recommendation[m.recommendation_id].append({
+            "material_id": m.material_id,
+            "depth": m.depth,
+            "quantity": m.quantity,
+            "quantity_unit": m.quantity_unit,
+            "estimated_cost": m.estimated_cost,
+        })
+
+    # Attach materials safely (no filtering side effects)
+    for r in recommendations_data:
+        r["materials"] = materials_by_recommendation.get(r["id"], [])
+
+    session.close()
+
+    return properties_data, plans_data, recommendations_data
+
+
+properties_data, plans_data, recommendations_data = get_data(
+    portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
+)
+
+recommendations_df = pd.DataFrame(recommendations_data)
+
+solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
+average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -14,22 +14,14 @@ from collections import defaultdict

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 419  # Peabody
+PORTFOLIO_ID = 434  # Peabody
 SCENARIOS = [
-    871,  # EPC C - fabric first, no solid floor, ashp 3.0
-    863,  # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
-    862,  # EPC B - No solid floor, ASHP COP 3.0
-    861,  # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
-    859,  # EPC C - no solid floor, ashp 3.0
-    885,  # EPC B - fabric first, no solid floor, ashp 3.0
+    904,
+    905
 ]
 scenario_names = {
-    871: "EPC C, fabric first, no solid floor, ashp 3.0",
-    863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
-    862: "EPC B, No solid floor, ASHP COP 3.0",
-    861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
-    859: "EPC C, no solid floor, ashp 3.0",
-    885: "EPC B, fabric first, no solid floor, ashp 3.0"
+    904: "EPC C - no solid floor, ashp 3.0",
+    905: "EPC B - no solid floor, ashp 3.0",
 }


@ -88,7 +80,8 @@ def get_data(portfolio_id, scenario_ids):
        Plan.id == PlanRecommendations.plan_id
    ).filter(
        PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default.is_(True)
+        Recommendation.default.is_(True),
+        Recommendation.already_installed.is_(False)
    ).all()

    recommendations_data = [
@ -220,9 +213,7 @@ for scenario_id in SCENARIOS:
    df = properties_df[
        [
            "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
-            "heating", "windows",
-            "current_epc_rating",
-            "current_sap_points", "total_floor_area", "number_of_rooms",
+            "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
        ]
    ].merge(
        recommendations_measures_pivot, how="left", on="property_id"
@ -240,7 +231,7 @@ for scenario_id in SCENARIOS:

    # Create excel to store to
    filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-                f"Project/{scenario_names[scenario_id]}.xlsx")
+                f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx")
    with pd.ExcelWriter(filename) as writer:
        df.to_excel(writer, sheet_name="properties", index=False)