From bae3e13e219b47c04acbc5bcdf02262a9c8faab4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Jan 2026 13:51:28 +0000 Subject: [PATCH 1/2] minor peabody output work --- .../i_testing_parity_data.py | 74 +++++- .../k_deck_stats.py | 236 ++++++++++++++++++ sfr/principal_pitch/2_export_data.py | 27 +- 3 files changed, 317 insertions(+), 20 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py index c6fb86ea..41613bc3 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py @@ -1,8 +1,10 @@ import pandas as pd df = pd.read_excel( - "/Users/khalimconn-kowlessar/Downloads/Parity Data 08012026.xlsx" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx" ) +df["wall_combined"] = df["Wall Construction"] + "+" + df["Wall Insulation"].fillna("Unknown Insulation") df['SAP Score'].mean() @@ -18,4 +20,72 @@ df["SAP Band"].value_counts(normalize=True) z = df[df["SAP Band"] != df["Lodged EPC Band"]] agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count") -zz = z[z["Lodged EPC Band"] == "A"] +recommendations_epc_c = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) +recommendations_epc_c["uprn"] = recommendations_epc_c["uprn"].astype(int).astype(str) + +combined = recommendations_epc_c.merge( + df, + left_on="uprn", + right_on="UPRN", + suffixes=("_rec", "_sal") +) + +combined = combined[["uprn", "SAP Score", "current_sap_points", "walls", "wall_combined"]] + +combined[combined["SAP Score"] < 69]["current_epc_rating"].value_counts() +combined[combined["SAP Score"] < 69]["SAP Band"].value_counts() +combined[combined["SAP Score"] < 69].shape +combined[combined["current_sap_points"] < 69] + +combined["SAP Band"].value_counts() + +# Our Cs +combined_cs = combined[combined["SAP Score"] < 69] +combined_cs["SAP Band"].value_counts() +# Their C and below + + +compare = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69] + +packages = recommendations_epc_c[recommendations_epc_c["total_retrofit_cost"] > 0] +packages["current_epc_rating"].value_counts() + +# TODO: 612 units +23219 - 612 +errors = recommendations_epc_c[ + (recommendations_epc_c["current_sap_points"] >= 69) & + (recommendations_epc_c["total_retrofit_cost"] > 0) + ] +errors["total_retrofit_cost"].sum() + +below_epc_c = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69] + +below_epc_c_compare = below_epc_c.merge( + df, + left_on="uprn", + right_on="UPRN", + suffixes=("_rec", "_sal") +) + +eg1 = below_epc_c_compare[below_epc_c_compare["SAP Band"] == "C"].copy() +eg1["wall_combined"].value_counts() + +eg1_counts = eg1.groupby(["walls", "wall_combined"]).size().reset_index(name="count") +eg1_counts = eg1_counts.sort_values("count", ascending=False) + +externally_insulated = eg1[ + (eg1["wall_combined"] == "Solid Brick+External") & + pd.isnull(eg1["internal_wall_insulation"]) + ] + +externally_insulated[externally_insulated.index == 823]["uprn"] + +recommendations_epc_c[ + (recommendations_epc_c["current_sap_points"] < 69) & + (recommendations_epc_c["current_sap_points"] > 68) + ].shape + +recommendations_epc_c[recommendations_epc_c["wall_combined"] == ""] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py new file mode 100644 index 00000000..5200c34d --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -0,0 +1,236 @@ +import pandas as pd + +epc_c_recommendations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) +epc_b_recommendations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) + +epc_c_movers = epc_b_recommendations[ + epc_b_recommendations["current_epc_rating"] == "Epc.C" + ] +epc_c_movers["property_type"].value_counts() + +house_epc_c_movers = epc_c_movers[ + epc_c_movers["property_type"] == "House" + ] +house_epc_c_movers_with_solar = house_epc_c_movers[ + ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"]) + ] + +house_epc_c_movers_with_a_heatpump = house_epc_c_movers[ + ~pd.isnull(house_epc_c_movers["air_source_heat_pump"]) +] + +flat_epc_c_movers = epc_c_movers[ + epc_c_movers["property_type"] == "Flat" + ] + +epc_c_recommendations["sap_points"].mean() +epc_c_recommendations["sap_points"].mean() + +measure_cols = [ + "air_source_heat_pump", + "boiler_upgrade", + "cavity_wall_insulation", + "double_glazing", + "external_wall_insulation", + "flat_roof_insulation", + "high_heat_retention_storage_heaters", + "internal_wall_insulation", + "loft_insulation", + "low_energy_lighting", + "mechanical_ventilation", + "room_roof_insulation", + "roomstat_programmer_trvs", + "sealing_open_fireplace", + "secondary_glazing", + "secondary_heating", + "solar_pv", + "solar_pv_with_battery", + "suspended_floor_insulation", + "time_temperature_zone_control", +] + +epc_c_melted = ( + epc_c_recommendations + .melt( + id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols], + value_vars=measure_cols, + var_name="measure_type", + value_name="value", + ) + .dropna(subset=["value"]) +) +epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0] +epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() + +epc_b_melted = ( + epc_b_recommendations + .melt( + id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols], + value_vars=measure_cols, + var_name="measure_type", + value_name="value", + ) + .dropna(subset=["value"]) +) + +epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0] +epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() + +measures_compared = epc_c_measures.merge( + epc_b_measures, + left_on="measure_type", + right_on="measure_type", + suffixes=("_epc_c", "_epc_b"), +) + +epc_c_retrofits = epc_c_recommendations[ + epc_c_recommendations["total_retrofit_cost"] > 0 + ] + +epc_b_retrofits = epc_b_recommendations[ + epc_b_recommendations["total_retrofit_cost"] > 0 + ] + +epc_c_retrofits["sap_points"].mean() +epc_b_retrofits["sap_points"].mean() + +properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b")) + +properties_in_both["total_retrofit_cost_epc_c"].mean() +properties_in_both["sap_points_epc_c"].mean() +properties_in_both["total_retrofit_cost_epc_b"].mean() +properties_in_both["sap_points_epc_b"].mean() + +# Solar PV savings - we need the amount of solar PV bill savings +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from collections import defaultdict + +PORTFOLIO_ID = 434 # Peabody +SCENARIOS = [ + 904, + 905 +] +scenario_names = { + 904: "EPC C - no solid floor, ashp 3.0", + 905: "EPC B - no solid floor, ashp 3.0", +} + + +def get_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # -------------------- + # Properties + # -------------------- + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id + ).all() + + properties_data = [ + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query + ] + + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() + + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + plan_ids = [p["id"] for p in plans_data] + + # -------------------- + # Recommendations (NO materials yet) + # -------------------- + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, + Plan.id == PlanRecommendations.plan_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) + ).all() + + recommendations_data = [ + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query + ] + + recommendation_ids = [r["id"] for r in recommendations_data] + + # -------------------- + # Recommendation materials (SEPARATE QUERY) + # -------------------- + materials_query = session.query( + RecommendationMaterials + ).filter( + RecommendationMaterials.recommendation_id.in_(recommendation_ids) + ).all() + + # Group materials by recommendation_id + materials_by_recommendation = defaultdict(list) + + for m in materials_query: + materials_by_recommendation[m.recommendation_id].append({ + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + }) + + # Attach materials safely (no filtering side effects) + for r in recommendations_data: + r["materials"] = materials_by_recommendation.get(r["id"], []) + + session.close() + + return properties_data, plans_data, recommendations_data + + +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS +) + +recommendations_df = pd.DataFrame(recommendations_data) + +solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] +average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 89c29ce4..36efb603 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -14,22 +14,14 @@ from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 419 # Peabody +PORTFOLIO_ID = 434 # Peabody SCENARIOS = [ - 871, # EPC C - fabric first, no solid floor, ashp 3.0 - 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 862, # EPC B - No solid floor, ASHP COP 3.0 - 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 859, # EPC C - no solid floor, ashp 3.0 - 885, # EPC B - fabric first, no solid floor, ashp 3.0 + 904, + 905 ] scenario_names = { - 871: "EPC C, fabric first, no solid floor, ashp 3.0", - 863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP", - 862: "EPC B, No solid floor, ASHP COP 3.0", - 861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP", - 859: "EPC C, no solid floor, ashp 3.0", - 885: "EPC B, fabric first, no solid floor, ashp 3.0" + 904: "EPC C - no solid floor, ashp 3.0", + 905: "EPC B - no solid floor, ashp 3.0", } @@ -88,7 +80,8 @@ def get_data(portfolio_id, scenario_ids): Plan.id == PlanRecommendations.plan_id ).filter( PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True) + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) ).all() recommendations_data = [ @@ -220,9 +213,7 @@ for scenario_id in SCENARIOS: df = properties_df[ [ "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", - "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", + "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", ] ].merge( recommendations_measures_pivot, how="left", on="property_id" @@ -240,7 +231,7 @@ for scenario_id in SCENARIOS: # Create excel to store to filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - f"Project/{scenario_names[scenario_id]}.xlsx") + f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx") with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) From f44d58c08ee6015b280470ea5663806b7004a3bc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Jan 2026 14:10:28 +0000 Subject: [PATCH 2/2] added new ecr and predictions bucket --- infrastructure/terraform/main.tf | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index c3a585f7..5a67b793 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -177,6 +177,12 @@ module "retrofit_hotwater_kwh_predictions" { allowed_origins = var.allowed_origins } +module "retrofit_sap_baseline_predictions" { + source = "./modules/s3" + bucketname = "retrofit-sap-baseline-predictions-${var.stage}" + allowed_origins = var.allowed_origins +} + // We make this bucket presignable, because we want to generate download links for the frontend module "retrofit_energy_assessments" { source = "./modules/s3_presignable_bucket" @@ -253,6 +259,12 @@ module "lambda_hotwater_kwh_prediction_ecr" { source = "./modules/ecr" } +# Baselining models +module "sap_baseline_ecr" { + ecr_name = "sap-baseline-prediction-${var.stage}" + source = "./modules/ecr" +} + ############################################## # CDN - Cloudfront ##############################################