From b2d07cfd7c81f5b301d4efd0700f6afb1299fd82 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 7 Jan 2026 20:40:37 +0000 Subject: [PATCH 1/2] preparing the already installed code for Peabody --- backend/app/db/models/portfolio.py | 4 + backend/app/db/models/recommendations.py | 55 ++ .../f_diagnostics.py | 126 +++- .../g_rebaselining_installed_measrues.py | 714 ++++++++++++++++++ sfr/principal_pitch/2_export_data.py | 134 +++- 5 files changed, 998 insertions(+), 35 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index ea9f9976..e17e5856 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -106,6 +106,10 @@ class PropertyModel(Base): current_epc_rating = Column(Enum(Epc)) current_sap_points = Column(Float) current_valuation = Column(Float) + # Following fields are for recording already installed adjustments to a property's SAP + installed_measures_sap_point_adjustment = Column(Float) + is_sap_points_adjusted_for_installed_measures = Column(Boolean, default=False) + original_sap_points = Column(Float) class FeatureRating(enum.Enum): diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 800596ec..ed1fcefa 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -146,3 +146,58 @@ class Scenario(Base): valuation_return_on_investment = Column(String) property_valuation_increase = Column(Float) labour_days = Column(Float) + + +class MeasureType(enum.Enum): + air_source_heat_pump = "air_source_heat_pump" + boiler_upgrade = "boiler_upgrade" + high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters" + secondary_heating = "secondary_heating" + + roomstat_programmer_trvs = "roomstat_programmer_trvs" + time_temperature_zone_control = "time_temperature_zone_control" + cylinder_thermostat = "cylinder_thermostat" + + cavity_wall_insulation = "cavity_wall_insulation" + extension_cavity_wall_insulation = "extension_cavity_wall_insulation" + external_wall_insulation = "external_wall_insulation" + internal_wall_insulation = "internal_wall_insulation" + loft_insulation = "loft_insulation" + flat_roof_insulation = "flat_roof_insulation" + room_roof_insulation = "room_roof_insulation" + solid_floor_insulation = "solid_floor_insulation" + suspended_floor_insulation = "suspended_floor_insulation" + + double_glazing = "double_glazing" + secondary_glazing = "secondary_glazing" + draught_proofing = "draught_proofing" + + mechanical_ventilation = "mechanical_ventilation" + low_energy_lighting = "low_energy_lighting" + solar_pv = "solar_pv" + hot_water_tank_insulation = "hot_water_tank_insulation" + sealing_open_fireplace = "sealing_open_fireplace" + + +class InstalledMeasure(Base): + __tablename__ = "installed_measure" + + id = Column(BigInteger, primary_key=True, autoincrement=True) + uprn = Column(BigInteger, nullable=False) + measure_type = Column( + Enum( + MeasureType, + name="measure_type", + values_callable=lambda e: [m.value for m in e], + create_type=False, # <-- critical + ), + nullable=False, + ) + installed_at = Column(TIMESTAMP) + sap_points = Column(Float) + carbon_savings = Column(Float) + kwh_savings = Column(Float) + bill_savings = Column(Float) + heat_demand_savings = Column(Float) + source = Column(String) + is_active = Column(Boolean, nullable=False, default=True) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py index fa7383a2..4b946c60 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -120,13 +120,127 @@ retry.to_excel( # Delete associated plans # 1) Get the property IDs for these UPRNS, for this portfolio portfolio_id = 419 -uprns = retry +uprns = retry["epc_os_uprn"].tolist() # TODO: Delete all plans for these properties and re-build -# Plan notes: -# UPRN: 5870109770, property ID: 281244 - need to delete and re-build all scenarios -# UPRN: 100022725126, property ID: 283781 - need to delete and re-build all scenarios +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session +from backend.app.db.models.recommendations import Plan +from sqlalchemy import select, delete +from sqlalchemy.exc import NoResultFound +from sqlalchemy.orm import sessionmaker -# Bugs: -12156800 +def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]: + return [ + property.id + for property in session.query(PropertyModel) + .filter( + PropertyModel.portfolio_id == portfolio_id, + PropertyModel.uprn.in_(uprns) + ) + .all() + ] + + +with db_session() as session: + property_ids_to_delete = get_property_ids_for_uprns(session, portfolio_id, uprns) + + +# Get all and delete plans for these property IDs +def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]: + return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all() + + +def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]: + return [ + plan.id + for plan in session.query(Plan) + .filter(Plan.property_id.in_(property_ids)) + .all() + ] + + +with db_session() as session: + plan_ids_to_delete = get_ids_of_plans_for_deletion(session, property_ids_to_delete) + + +def chunked(iterable, size): + for i in range(0, len(iterable), size): + yield iterable[i:i + size] + + +from sqlalchemy import text +from sqlalchemy.orm import Session + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +batch_size = 25 +total = (len(plan_ids_to_delete) + batch_size - 1) // batch_size + +for i, batch in enumerate(chunked(plan_ids_to_delete, batch_size), start=1): + print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") + + with db_session() as session: + delete_plan_batch(session, batch) + + print(f"Batch {i} committed") diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py new file mode 100644 index 00000000..bc628630 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -0,0 +1,714 @@ +import pandas as pd +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine, db_read_session, db_session +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ + InstalledMeasure +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from sqlalchemy import func +from backend.app.utils import sap_to_epc +from typing import Dict, List, Set +from recommendations.Costs import Costs +from backend.app.db.models.portfolio import Epc + + +def get_all_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # -------------------- + # Properties + # -------------------- + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id + ).all() + + properties_data = [ + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query + ] + + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() + + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + plan_ids = [p["id"] for p in plans_data] + + # -------------------- + # Recommendations (NO materials yet) + # -------------------- + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, + Plan.id == PlanRecommendations.plan_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + ).all() + + recommendations_data = [ + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query + ] + + session.close() + + return properties_data, plans_data, recommendations_data + + +PORTFOLIO_ID = 419 # Peabody +SCENARIOS = [ + # 871, # EPC C - fabric first, no solid floor, ashp 3.0 + # 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 862, # EPC B - No solid floor, ASHP COP 3.0 + # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 859, # EPC C - no solid floor, ashp 3.0 + 885, # EPC B - fabric first, no solid floor, ashp 3.0 +] + +# properties_data, plans_data, recommendations_data = get_all_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) +# # Store this data as dataframes for analysis +# properties_df = pd.DataFrame(properties_data) +# plans_df = pd.DataFrame(plans_data) +# recommendations_df = pd.DataFrame(recommendations_data) + +# Save CSVs +# properties_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "f_peabody_properties_data_20260108.csv", +# index=False +# ) +# plans_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "f_peabody_plans_data_20260108.csv", +# index=False +# ) +# recommendations_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "f_peabody_recommendations_data_20260108.csv", +# index=False +# ) +# Read csvs +properties_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "f_peabody_properties_data_20260108.csv" +) +plans_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "f_peabody_plans_data_20260108.csv" +) +recommendations_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "f_peabody_recommendations_data_20260108.csv" +) + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# recommendations_df = pd.read_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/EPC B, " +# "No solid floor, ASHP COP 3.0.xlsx" +# ) + +# recommendations_df2 = recommendations_df2.merge( +# properties_df[["id", "uprn"]], +# left_on="property_id", +# right_on="id", +# how="left" +# ).rename(columns={"id_x": "id"}).drop(columns=["id_y"]) +# recommendations_df["uprn"] = recommendations_df["uprn"].astype(int).astype(str) + +# We just need all of the measure types, per property +recommendation_measure_types = recommendations_df[["property_id", "measure_type"]].drop_duplicates() +recommendation_measure_types["flag"] = True + +# We pivot +recommendations_measures_pivot = recommendation_measure_types.pivot( + index='property_id', + columns='measure_type', + values='flag' +) +recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + +# Create a total cost column +recommendations_total_cost = recommendations_df.groupby("property_id")["estimated_cost"].sum().reset_index() +recommendations_measures_pivot = recommendations_measures_pivot.merge( + recommendations_total_cost, how="left", on="property_id" +) + +properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).merge( + recommendations_measures_pivot, how="left", left_on="id", right_on="property_id" +) +properties_to_recs["estimated_cost"] = properties_to_recs["estimated_cost"].fillna(0) + +sustainability_data["has_cavity_insulation"] = sustainability_data["Wall Insulation"].isin( + ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] +) +sustainability_data["has_iwi"] = sustainability_data["Wall Insulation"].isin( + ["Internal", "FilledCavityPlusInternal"] +) +sustainability_data["has_ewi"] = sustainability_data["Wall Insulation"].isin( + ["External", "FilledCavityPlusExternal"] +) +sustainability_data["has_loft_insulation"] = sustainability_data["Roof Insulation"].isin( + ["mm300", "mm250"] +) +sustainability_data["has_glazing"] = sustainability_data["Glazing"].isin( + ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] +) + +sustainability_data["has_floor_insulation"] = sustainability_data["Floor Insulation"].isin( + ["RetroFitted"] +) + +sustainability_data["has_efficient_boiler"] = ( + sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) +) +sustainability_data["has_ashp"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) + +sustainability_data["has_top_heat_controls"] = ( + sustainability_data["Controls Adequacy"].isin(["Top Spec"]) +) + +sustainability_data["has_optimal_heat_controls"] = ( + sustainability_data["Controls Adequacy"].isin(["Optimal"]) +) +sustainability_data["has_flat_roof_insulation"] = ( + (sustainability_data["Roof Construction"] == "Flat") & + (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) +) + +properties_to_recs["uprn"] = properties_to_recs["uprn"].astype(str) +comparison = sustainability_data.merge( + properties_to_recs[ + ["uprn", "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", "loft_insulation", + "double_glazing", "secondary_glazing", "suspended_floor_insulation", "boiler_upgrade", "air_source_heat_pump", + "time_temperature_zone_control", "roomstat_programmer_trvs", "flat_roof_insulation", "room_roof_insulation" + ] + ], + left_on="UPRN", + right_on="uprn", + how="left" +) + +# Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation +# ------------ Walls ------------ +comparison["conflict_cavity_wall_insulation"] = ( + (comparison["has_cavity_insulation"]) & + (pd.isnull(comparison["cavity_wall_insulation"]) == False) +) +comparison["conflict_iwi_wall_insulation"] = ( + (comparison["has_iwi"]) & + (pd.isnull(comparison["internal_wall_insulation"]) == False) +) +comparison["conflict_ewi_wall_insulation"] = ( + (comparison["has_ewi"]) & + (pd.isnull(comparison["internal_wall_insulation"]) == False) +) + +cwi_conflicting = comparison[comparison["conflict_cavity_wall_insulation"] == True] +iwi_conflicting = comparison[comparison["conflict_iwi_wall_insulation"] == True] +ewi_conflicting = comparison[comparison["conflict_ewi_wall_insulation"] == True] + +# ------------ Roof ------------ +comparison["conflict_roof_insulation"] = ( + (comparison["has_loft_insulation"]) & + (pd.isnull(comparison["loft_insulation"]) == False) +) + +loft_conflicting = comparison[comparison["conflict_roof_insulation"] == True] + +# ------------ Windows ------------ +comparison["conflict_double_glazing"] = ( + (comparison["has_glazing"]) & + ( + (pd.isnull(comparison["double_glazing"]) == False) | (pd.isnull(comparison["secondary_glazing"]) == False) + ) +) +windows_conflicting = comparison[comparison["conflict_double_glazing"] == True] + +# ------------ Floors ------------ +comparison["conflict_suspended_floor_insulation"] = ( + (comparison["has_floor_insulation"]) & + (pd.isnull(comparison["suspended_floor_insulation"]) == False) +) +floors_conflicting = comparison[comparison["conflict_suspended_floor_insulation"] == True] + +# ------------ Boiler Upgrade ------------ +comparison["conflict_boiler_upgrade"] = ( + (comparison["has_efficient_boiler"]) & + (pd.isnull(comparison["boiler_upgrade"]) == False) +) +boiler_conflicting = comparison[comparison["conflict_boiler_upgrade"] == True] + +# ------------ ASHP ------------ +comparison["conflict_air_source_heat_pump"] = ( + (comparison["has_ashp"]) & + (pd.isnull(comparison["air_source_heat_pump"]) == False) +) +ashp_conflicting = comparison[comparison["conflict_air_source_heat_pump"] == True] + +# ------------ heat controls ------------ +comparison["conflict_time_temperature_zone_control"] = ( + (comparison["has_top_heat_controls"]) & + (pd.isnull(comparison["time_temperature_zone_control"]) == False) +) +comparison["conflict_roomstat_programmer_trvs"] = ( + (comparison["has_optimal_heat_controls"]) & + (pd.isnull(comparison["roomstat_programmer_trvs"]) == False) +) +ttzc_conflicting = comparison[comparison["conflict_time_temperature_zone_control"] == True] +rst_conflicting = comparison[comparison["conflict_roomstat_programmer_trvs"] == True] + +# ------------ Flat Roof Insulation ----------- +comparison["conflict_flat_roof_insulation"] = ( + (comparison["has_flat_roof_insulation"]) & + (pd.isnull(comparison["flat_roof_insulation"]) == False) +) +flat_roof_conflicting = comparison[comparison["conflict_flat_roof_insulation"] == True] + +# All properties with conflicts +all_conflicts = pd.concat( + [ + cwi_conflicting, + iwi_conflicting, + ewi_conflicting, + loft_conflicting, + windows_conflicting, + floors_conflicting, + boiler_conflicting, + ashp_conflicting, + ttzc_conflicting, + rst_conflicting, + flat_roof_conflicting + ] +) + +all_conflicts["UPRN"].nunique() + + +# What do I need to do: +# TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking +# at one scenario +# 1) I should store the current recommendations table, for the portfolio as a backup +# 2) I need a total of already installed SAP points for each property. This should probably be stored on the +# property_details_epc tabe +# 3) For anything already installed, I should mark already installed as True, and set the cost to zero +# 4) I need to update the plan cost to remove the cost of the installed measures + + +### Rebaselining + + +def get_installed_sap_adjustments_by_uprn_for_portfolio( + session, + portfolio_id: int, +) -> Dict[int, float]: + """ + Returns { uprn -> total_sap_delta } + """ + + uprn_subquery = ( + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.uprn.isnot(None)) + .subquery() + ) + + rows = ( + session.query( + InstalledMeasure.uprn, + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0), + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.uprn.in_(uprn_subquery)) + .group_by(InstalledMeasure.uprn) + .all() + ) + + return {uprn: float(delta) for uprn, delta in rows} + + +def get_installed_measure_types_by_uprn( + session, + uprn: int, +) -> Set[str]: + rows = ( + session.query(InstalledMeasure.measure_type) + .filter(InstalledMeasure.uprn == uprn) + .filter(InstalledMeasure.is_active.is_(True)) + .all() + ) + + # Convert enums → strings + return { + r[0].value if hasattr(r[0], "value") else r[0] + for r in rows + } + + +# ------------------------------------------------------------ +# PROPERTY REBASING (READ-ONLY) +# ------------------------------------------------------------ + +def compute_property_sap_updates( + properties: List[PropertyModel], + sap_adjustments: Dict[int, float], +) -> List[dict]: + """ + Returns property SAP rebasing results. + Does NOT mutate DB objects. + """ + + updates = [] + + for prop in properties: + if prop.uprn is None or prop.original_sap_points is None: + continue + + sap_delta = sap_adjustments.get(prop.uprn, 0.0) + new_sap = prop.original_sap_points + sap_delta + + updates.append({ + "property_id": prop.id, + "uprn": prop.uprn, + "original_sap_points": prop.original_sap_points, + "installed_sap_delta": sap_delta, + "new_sap_points": new_sap, + "is_adjusted": sap_delta != 0, + }) + + return updates + + +# ------------------------------------------------------------ +# PLAN RECOMPUTATION HELPERS +# ------------------------------------------------------------ + +def get_effective_plan_recommendations( + session, + plan_id: int, + excluded_measure_types: Set[str], +) -> List[Recommendation]: + q = ( + session.query(Recommendation) + .join(PlanRecommendations) + .filter(PlanRecommendations.plan_id == plan_id) + .filter(Recommendation.default.is_(True)) + ) + + if excluded_measure_types: + q = q.filter( + ~Recommendation.measure_type.in_(excluded_measure_types) + ) + + return q.all() + + +def aggregate_plan_metrics(recommendations: list[Recommendation]): + agg = { + "sap_points": 0.0, + "co2_savings": 0.0, + "energy_bill_savings": 0.0, + "energy_consumption_savings": 0.0, + "valuation_increase": 0.0, + "cost_of_works": 0.0, + "contingency_cost": 0.0, + } + + for r in recommendations: + agg["sap_points"] += r.sap_points or 0.0 + agg["co2_savings"] += r.co2_equivalent_savings or 0.0 + agg["energy_bill_savings"] += r.energy_cost_savings or 0.0 + agg["energy_consumption_savings"] += r.energy_savings or 0.0 + agg["valuation_increase"] += r.property_valuation_increase or 0.0 + + base_cost = r.estimated_cost or 0.0 + agg["cost_of_works"] += base_cost + agg["contingency_cost"] += calculate_contingency_for_recommendation(r) + + return agg + + +# ------------------------------------------------------------ +# PLAN REBASING (READ-ONLY) +# ------------------------------------------------------------ + +def compute_plan_updates( + session, + plans: List[Plan], + properties_by_id: Dict[int, PropertyModel], + epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], + property_sap_updates: Dict[int, dict], +) -> List[dict]: + """ + Computes plan metrics assuming properties are already rebased. + """ + + updates = [] + + for plan in plans: + prop = properties_by_id.get(plan.property_id) + epc = epcs_by_property_id.get(plan.property_id) + prop_update = property_sap_updates.get(plan.property_id) + + if not prop or not epc or not prop_update: + continue + + installed_types = get_installed_measure_types_by_uprn( + session, prop.uprn + ) + + future_recs = get_effective_plan_recommendations( + session, + plan.id, + installed_types, + ) + + metrics = aggregate_plan_metrics(future_recs) + + baseline_bill = ( + epc.heating_cost_current + + epc.hot_water_cost_current + + epc.lighting_cost_current + + epc.appliances_cost_current + + epc.gas_standing_charge + + epc.electricity_standing_charge + ) + + post_sap = prop_update["new_sap_points"] + metrics["sap_points"] + + updates.append({ + "plan_id": plan.id, + "property_id": plan.property_id, + + # SAP / EPC + "post_sap_points": post_sap, + "post_epc_rating": sap_to_epc(post_sap), + + # Carbon + "co2_savings": metrics["co2_savings"], + "post_co2_emissions": ( + epc.co2_emissions - metrics["co2_savings"] + if epc.co2_emissions is not None + else None + ), + + # Energy bills + "energy_bill_savings": metrics["energy_bill_savings"], + "post_energy_bill": baseline_bill - metrics["energy_bill_savings"], + + # Energy consumption + "energy_consumption_savings": metrics["energy_consumption_savings"], + "post_energy_consumption": ( + epc.primary_energy_consumption + - metrics["energy_consumption_savings"] + ), + + # Valuation + "valuation_increase": metrics["valuation_increase"], + "valuation_post_retrofit": ( + prop.current_valuation + metrics["valuation_increase"] + if prop.current_valuation is not None + else None + ), + + # Costs + "cost_of_works": metrics["cost_of_works"], + "contingency_cost": metrics["contingency_cost"], + }) + + return updates + + +def calculate_contingency_for_recommendation( + recommendation, +) -> float: + """ + Recompute contingency for a recommendation using the same + logic as the costing engine. + + Assumptions: + - recommendation.estimated_cost is the 'total' cost + - contingency is a percentage of total + """ + + if recommendation.estimated_cost is None: + return 0.0 + + # Normalise measure_type (Enum → str) + measure_type = ( + recommendation.measure_type.value + if hasattr(recommendation.measure_type, "value") + else recommendation.measure_type + ) + + # Measure-specific contingency if defined, else global fallback + contingency_rate = Costs.CONTINGENCIES.get( + measure_type, + Costs.CONTINGENCY, # default (e.g. 10%) + ) + + return recommendation.estimated_cost * contingency_rate + + +def persist_property_sap_updates( + property_updates_by_id: dict[int, dict], +): + """ + Writes adjusted SAP values back to property table. + Safe to re-run. + """ + + with db_session() as session: + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.id.in_(property_updates_by_id.keys())) + .all() + ) + + for prop in properties: + update = property_updates_by_id[prop.id] + + prop.installed_measures_sap_point_adjustment = update["installed_sap_delta"] + prop.is_sap_points_adjusted_for_installed_measures = update["is_adjusted"] + prop.current_sap_points = update["new_sap_points"] + prop.current_epc_rating = sap_to_epc(update["new_sap_points"]) + + print(f"✅ Updated {len(properties)} properties") + + +def persist_plan_updates(plan_updates: list[dict]): + """ + Writes recalculated plan metrics. + Safe to re-run. + """ + + with db_session() as session: + plans = ( + session.query(Plan) + .filter(Plan.id.in_([u["plan_id"] for u in plan_updates])) + .all() + ) + + plans_by_id = {p.id: p for p in plans} + + for update in plan_updates: + plan = plans_by_id.get(update["plan_id"]) + if not plan: + continue + + # SAP / EPC + plan.post_sap_points = update["post_sap_points"] + plan.post_epc_rating = Epc(update["post_epc_rating"]) + + # Carbon + plan.co2_savings = update["co2_savings"] + plan.post_co2_emissions = update["post_co2_emissions"] + + # Energy + plan.energy_bill_savings = update["energy_bill_savings"] + plan.post_energy_bill = update["post_energy_bill"] + + plan.energy_consumption_savings = update["energy_consumption_savings"] + plan.post_energy_consumption = update["post_energy_consumption"] + + # Valuation + plan.valuation_increase = update["valuation_increase"] + plan.valuation_post_retrofit = update["valuation_post_retrofit"] + + # Costs + plan.cost_of_works = update["cost_of_works"] + plan.contingency_cost = update["contingency_cost"] + + print(f"✅ Updated {len(plans)} plans") + + +# ------------------------------------------------------------ +# EXECUTION (DRY RUN) +# ------------------------------------------------------------ + +PORTFOLIO_ID = 430 +# TODO - run the original sap points update on the peabody portfolio + +with db_read_session() as session: + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + .all() + ) + + plans = ( + session.query(Plan) + .filter(Plan.portfolio_id == PORTFOLIO_ID) + .all() + ) + + epcs = { + e.property_id: e + for e in ( + session.query(PropertyDetailsEpcModel) + .join(PropertyModel) + .filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + .all() + ) + } + + sap_adjustments = get_installed_sap_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, + ) + + property_updates = compute_property_sap_updates( + properties, + sap_adjustments, + ) + + property_updates_by_id = { + u["property_id"]: u + for u in property_updates + } + + properties_by_id = {p.id: p for p in properties} + + plan_updates = compute_plan_updates( + session, + plans, + properties_by_id, + epcs, + property_updates_by_id, + ) + +# When ready to run! +persist_property_sap_updates(property_updates_by_id) +persist_plan_updates(plan_updates) diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 7574414c..89c29ce4 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -3,11 +3,14 @@ This script prepares the data for the financial model """ import pandas as pd +import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker -from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.connection import db_engine, db_read_session +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial +from backend.app.db.functions.materials_functions import get_materials +from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] @@ -18,6 +21,7 @@ SCENARIOS = [ 862, # EPC B - No solid floor, ASHP COP 3.0 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP 859, # EPC C - no solid floor, ashp 3.0 + 885, # EPC B - fabric first, no solid floor, ashp 3.0 ] scenario_names = { 871: "EPC C, fabric first, no solid floor, ashp 3.0", @@ -25,6 +29,7 @@ scenario_names = { 862: "EPC B, No solid floor, ASHP COP 3.0", 861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP", 859: "EPC C, no solid floor, ashp 3.0", + 885: "EPC B, fabric first, no solid floor, ashp 3.0" } @@ -32,60 +37,97 @@ def get_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() session.begin() - # Get properties and their details for a specific portfolio + # -------------------- + # Properties + # -------------------- properties_query = session.query( PropertyModel, PropertyDetailsEpcModel ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID + PropertyModel.portfolio_id == portfolio_id ).all() - # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} - for prop in properties_query + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query ] - # Get property IDs from fetched properties + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() - # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() - - # Transform plans data to include all fields dynamically plans_data = [ {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} for plan in plans_query ] - # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [p["id"] for p in plans_data] - # Get recommendations through PlanRecommendations for those plans and that are default + # -------------------- + # Recommendations (NO materials yet) + # -------------------- recommendations_query = session.query( Recommendation, Plan.scenario_id ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id + Plan, + Plan.id == PlanRecommendations.plan_id ).filter( PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations + Recommendation.default.is_(True) ).all() - # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} - for rec in recommendations_query + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query ] + recommendation_ids = [r["id"] for r in recommendations_data] + + # -------------------- + # Recommendation materials (SEPARATE QUERY) + # -------------------- + materials_query = session.query( + RecommendationMaterials + ).filter( + RecommendationMaterials.recommendation_id.in_(recommendation_ids) + ).all() + + # Group materials by recommendation_id + materials_by_recommendation = defaultdict(list) + + for m in materials_query: + materials_by_recommendation[m.recommendation_id].append({ + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + }) + + # Attach materials safely (no filtering side effects) + for r in recommendations_data: + r["materials"] = materials_by_recommendation.get(r["id"], []) + session.close() return properties_data, plans_data, recommendations_data @@ -97,6 +139,40 @@ properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) recommendations_df = pd.DataFrame(recommendations_data) +with db_read_session() as session: + materials = get_materials(session) + +materials = pd.DataFrame(materials) + +material_lookup = ( + materials + .set_index("id")[["type", "includes_battery"]] + .to_dict("index") +) + + +def has_solar_with_battery(materials_list): + for m in materials_list or []: + mat = material_lookup.get(m["material_id"]) + if not mat: + continue + if mat["type"] == "solar_pv" and mat["includes_battery"]: + return True + return False + + +recommendations_df["has_solar_with_battery"] = ( + recommendations_df["materials"].apply(has_solar_with_battery) +) + +recommendations_df["measure_type"] = np.where( + recommendations_df["has_solar_with_battery"] == True, + recommendations_df["measure_type"] + "_with_battery", + recommendations_df["measure_type"] +) + +# Adjust material type to indicate if there is a battery included + from utils.s3 import read_csv_from_s3, read_excel_from_s3 # asset_list = read_excel_from_s3( @@ -107,13 +183,13 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3 for scenario_id in SCENARIOS: # Get recs for this scenario - recommended_measures_df = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][ + recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ ["property_id", "measure_type", "estimated_cost", "default"] ] recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) - post_install_sap = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][ + post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ ["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id From fb6fca896627dd932ac157ca504586b926024052 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 12:18:40 +0000 Subject: [PATCH 2/2] added additional loggging into engine --- .../db/functions/recommendations_functions.py | 8 +- backend/app/db/models/portfolio.py | 12 + backend/engine/engine.py | 8 + .../g_rebaselining_installed_measrues.py | 405 +++++++++++++++--- .../h_reset_estimated_epcs.py | 156 +++++++ 5 files changed, 515 insertions(+), 74 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 726e919c..ae178c8a 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,15 +1,11 @@ from sqlalchemy import text -from sqlalchemy import insert, delete, select +from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario ) -from backend.app.db.models.portfolio import ( - PropertyModel, PropertyTargetsModel, PropertyDetailsEpcModel -) -from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage -from backend.app.db.models.inspections import InspectionModel +from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index e17e5856..d151bdc4 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -192,6 +192,18 @@ class PropertyDetailsEpcModel(Base): gas_standing_charge = Column(Float) electricity_standing_charge = Column(Float) + # Columns for re-baselining if we have an already installed measure + original_co2_emissions = Column(Float) + original_primary_energy_consumption = Column(Float) + original_current_energy_demand = Column(Float) + original_current_energy_demand_heating_hotwater = Column(Float) + # Adjustments + installed_measures_co2_adjustment = Column(Float) + installed_measures_energy_demand_adjustment = Column(Float) + installed_measures_total_energy_bill_adjustment = Column(Float) + installed_measures_heat_demand_adjustment = Column(Float) + is_epc_adjusted_for_installed_measures = Column(Boolean, default=False) + class PropertyDetailsSpatial(Base): __tablename__ = "property_details_spatial" diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f6990c5c..9a9c30a2 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -535,12 +535,14 @@ async def model_engine(body: PlanTriggerRequest): logger.info("Getting the inputs") if body.file_type == "xlsx": + logger.info("Getting the plan input") plan_input = read_excel_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, file_key=body.trigger_file_path, sheet_name=body.sheet_name, header_row=0, ) + logger.into("Got the plan input from excel") # We now handle the case where the input data is a Domna standardised assset list if body.file_format == "domna_asset_list": @@ -619,9 +621,11 @@ async def model_engine(body: PlanTriggerRequest): raise ValueError("Other formats not yet supported") else: + logger.info("Getting the plan input from csv") plan_input = read_csv_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path ) + logger.info("Got the plan input from csv") # We then slide it on the indexes if they are provided if body.index_start is not None and body.index_end is not None: @@ -640,12 +644,14 @@ async def model_engine(body: PlanTriggerRequest): if "domna_valuation" in plan_input[0]: valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input] + logger.info("Getting cleaning_data") cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) # Prepare input data addresses = Addresses.from_plan_input(plan_input, body) + logger.info("Checking database for existing properties") uprns = addresses.get_uprns() landlord_ids = addresses.get_landlord_ids() @@ -670,6 +676,7 @@ async def model_engine(body: PlanTriggerRequest): if key not in property_lookup: to_create.append(addr) + logger.info("Checking database for EPC cache") # Pre-requests to the db with db_read_session() as session: epc_cache_by_uprn = db_funcs.epc_functions.EpcStoreService.get_epcs_for_uprns(session, uprns) @@ -679,6 +686,7 @@ async def model_engine(body: PlanTriggerRequest): ) # If we have properties that need to be created, we cerate them in bulk + logger.info("Determine new properties to be created") new_property_ids = set() if to_create: logger.info("Creating %d new properties", len(to_create)) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index bc628630..d310ffa4 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -10,6 +10,10 @@ from typing import Dict, List, Set from recommendations.Costs import Costs from backend.app.db.models.portfolio import Epc +pd.set_option('display.max_rows', 500) +pd.set_option('display.max_columns', 500) +pd.set_option('display.width', 1000) + def get_all_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() @@ -140,70 +144,65 @@ sustainability_data = pd.read_excel( # "No solid floor, ASHP COP 3.0.xlsx" # ) -# recommendations_df2 = recommendations_df2.merge( -# properties_df[["id", "uprn"]], -# left_on="property_id", -# right_on="id", -# how="left" -# ).rename(columns={"id_x": "id"}).drop(columns=["id_y"]) -# recommendations_df["uprn"] = recommendations_df["uprn"].astype(int).astype(str) - # We just need all of the measure types, per property -recommendation_measure_types = recommendations_df[["property_id", "measure_type"]].drop_duplicates() +recommendation_measure_types = recommendations_df[ + ["property_id", "measure_type" + , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings" + ] +].drop_duplicates() recommendation_measure_types["flag"] = True # We pivot -recommendations_measures_pivot = recommendation_measure_types.pivot( +recommendations_measures_pivot = recommendation_measure_types[ + ["property_id", "measure_type", "flag"] +].drop_duplicates().pivot( index='property_id', columns='measure_type', values='flag' ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() -# Create a total cost column -recommendations_total_cost = recommendations_df.groupby("property_id")["estimated_cost"].sum().reset_index() -recommendations_measures_pivot = recommendations_measures_pivot.merge( - recommendations_total_cost, how="left", on="property_id" -) - properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).merge( - recommendations_measures_pivot, how="left", left_on="id", right_on="property_id" + recommendations_measures_pivot, how="left", on="property_id" ) -properties_to_recs["estimated_cost"] = properties_to_recs["estimated_cost"].fillna(0) -sustainability_data["has_cavity_insulation"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] ) -sustainability_data["has_iwi"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["Internal", "FilledCavityPlusInternal"] ) -sustainability_data["has_ewi"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["External", "FilledCavityPlusExternal"] ) -sustainability_data["has_loft_insulation"] = sustainability_data["Roof Insulation"].isin( +sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( ["mm300", "mm250"] ) -sustainability_data["has_glazing"] = sustainability_data["Glazing"].isin( +sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( + ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] +) +sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] ) -sustainability_data["has_floor_insulation"] = sustainability_data["Floor Insulation"].isin( +sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( ["RetroFitted"] ) -sustainability_data["has_efficient_boiler"] = ( +sustainability_data["boiler_upgrade"] = ( sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) ) -sustainability_data["has_ashp"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) +sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) -sustainability_data["has_top_heat_controls"] = ( +sustainability_data["time_temperature_zone_control"] = ( sustainability_data["Controls Adequacy"].isin(["Top Spec"]) ) -sustainability_data["has_optimal_heat_controls"] = ( +sustainability_data["roomstat_programmer_trvs"] = ( sustainability_data["Controls Adequacy"].isin(["Optimal"]) ) -sustainability_data["has_flat_roof_insulation"] = ( +sustainability_data["flat_roof_insulation"] = ( (sustainability_data["Roof Construction"] == "Flat") & (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) ) @@ -218,22 +217,23 @@ comparison = sustainability_data.merge( ], left_on="UPRN", right_on="uprn", - how="left" + how="left", + suffixes=("", "_from_recs") ) # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ comparison["conflict_cavity_wall_insulation"] = ( - (comparison["has_cavity_insulation"]) & - (pd.isnull(comparison["cavity_wall_insulation"]) == False) + (comparison["cavity_wall_insulation"]) & + (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) ) comparison["conflict_iwi_wall_insulation"] = ( - (comparison["has_iwi"]) & - (pd.isnull(comparison["internal_wall_insulation"]) == False) + (comparison["internal_wall_insulation"]) & + (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) ) comparison["conflict_ewi_wall_insulation"] = ( - (comparison["has_ewi"]) & - (pd.isnull(comparison["internal_wall_insulation"]) == False) + (comparison["external_wall_insulation"]) & + (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) ) cwi_conflicting = comparison[comparison["conflict_cavity_wall_insulation"] == True] @@ -241,59 +241,66 @@ iwi_conflicting = comparison[comparison["conflict_iwi_wall_insulation"] == True] ewi_conflicting = comparison[comparison["conflict_ewi_wall_insulation"] == True] # ------------ Roof ------------ -comparison["conflict_roof_insulation"] = ( - (comparison["has_loft_insulation"]) & - (pd.isnull(comparison["loft_insulation"]) == False) +comparison["conflict_loft_insulation"] = ( + (comparison["loft_insulation"]) & + (pd.isnull(comparison["loft_insulation_from_recs"]) == False) ) -loft_conflicting = comparison[comparison["conflict_roof_insulation"] == True] +loft_conflicting = comparison[comparison["conflict_loft_insulation"] == True] # ------------ Windows ------------ comparison["conflict_double_glazing"] = ( - (comparison["has_glazing"]) & + (comparison["double_glazing"]) & ( - (pd.isnull(comparison["double_glazing"]) == False) | (pd.isnull(comparison["secondary_glazing"]) == False) + (pd.isnull(comparison["double_glazing_from_recs"]) == False) ) ) -windows_conflicting = comparison[comparison["conflict_double_glazing"] == True] +comparison["conflict_secondary_glazing"] = ( + (comparison["secondary_glazing"]) & + ( + (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) + ) +) +double_glazing_conflicting = comparison[comparison["conflict_double_glazing"] == True] +secondary_glazing_conflicting = comparison[comparison["conflict_secondary_glazing"] == True] # ------------ Floors ------------ comparison["conflict_suspended_floor_insulation"] = ( - (comparison["has_floor_insulation"]) & - (pd.isnull(comparison["suspended_floor_insulation"]) == False) + (comparison["suspended_floor_insulation"]) & + (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) ) floors_conflicting = comparison[comparison["conflict_suspended_floor_insulation"] == True] # ------------ Boiler Upgrade ------------ comparison["conflict_boiler_upgrade"] = ( - (comparison["has_efficient_boiler"]) & - (pd.isnull(comparison["boiler_upgrade"]) == False) + (comparison["boiler_upgrade"]) & + (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) ) boiler_conflicting = comparison[comparison["conflict_boiler_upgrade"] == True] # ------------ ASHP ------------ comparison["conflict_air_source_heat_pump"] = ( - (comparison["has_ashp"]) & - (pd.isnull(comparison["air_source_heat_pump"]) == False) + (comparison["air_source_heat_pump"]) & + (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) ) ashp_conflicting = comparison[comparison["conflict_air_source_heat_pump"] == True] # ------------ heat controls ------------ comparison["conflict_time_temperature_zone_control"] = ( - (comparison["has_top_heat_controls"]) & - (pd.isnull(comparison["time_temperature_zone_control"]) == False) + (comparison["time_temperature_zone_control"]) & + (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) ) comparison["conflict_roomstat_programmer_trvs"] = ( - (comparison["has_optimal_heat_controls"]) & - (pd.isnull(comparison["roomstat_programmer_trvs"]) == False) + (comparison["roomstat_programmer_trvs"]) & + (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) ) ttzc_conflicting = comparison[comparison["conflict_time_temperature_zone_control"] == True] rst_conflicting = comparison[comparison["conflict_roomstat_programmer_trvs"] == True] # ------------ Flat Roof Insulation ----------- comparison["conflict_flat_roof_insulation"] = ( - (comparison["has_flat_roof_insulation"]) & - (pd.isnull(comparison["flat_roof_insulation"]) == False) + (comparison["flat_roof_insulation"]) & + (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) ) flat_roof_conflicting = comparison[comparison["conflict_flat_roof_insulation"] == True] @@ -304,7 +311,8 @@ all_conflicts = pd.concat( iwi_conflicting, ewi_conflicting, loft_conflicting, - windows_conflicting, + double_glazing_conflicting, + secondary_glazing_conflicting, floors_conflicting, boiler_conflicting, ashp_conflicting, @@ -314,8 +322,100 @@ all_conflicts = pd.concat( ] ) -all_conflicts["UPRN"].nunique() +all_conflicts = all_conflicts[ + [ + "uprn", + 'conflict_cavity_wall_insulation', + 'conflict_iwi_wall_insulation', + 'conflict_ewi_wall_insulation', + 'conflict_loft_insulation', + 'conflict_double_glazing', + 'conflict_secondary_glazing', + 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', + 'conflict_air_source_heat_pump', + 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] +] +all_conflicts = all_conflicts.rename( + columns={ + "conflict_cavity_wall_insulation": "cavity_wall_insulation", + "conflict_iwi_wall_insulation": "internal_wall_insulation", + "conflict_ewi_wall_insulation": "external_wall_insulation", + "conflict_loft_insulation": "loft_insulation", + "conflict_double_glazing": "double_glazing", + "conflict_secondary_glazing": "secondary_glazing", + "conflict_suspended_floor_insulation": "suspended_floor_insulation", + "conflict_boiler_upgrade": "boiler_upgrade", + "conflict_air_source_heat_pump": "air_source_heat_pump", + "conflict_time_temperature_zone_control": "time_temperature_zone_control", + "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", + "conflict_flat_roof_insulation": "flat_roof_insulation" + + } +) + +# Reshape by UPRN by melting +all_conflicts = all_conflicts.melt( + id_vars=["uprn"], + var_name="measure_type", + value_name="already_installed" +) + +recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) + +recs_with_uprn = recommendations_df.merge( + properties_df[["property_id", "uprn"]], + on="property_id", + how="left", + suffixes=("", "_prop") +) + +recs_with_uprn = ( + recs_with_uprn + .sort_values("sap_points", ascending=False) + .groupby(["uprn", "measure_type"], as_index=False) + .first() +) + +recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) + +installed_measures_df = all_conflicts.merge( + recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings"]], + how="left", + on=["uprn", "measure_type"] +) + +installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] + +for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: + print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) + +# Do some calcs on SAP impact +sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() +properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap["uprn"] = properties_sap["uprn"].astype(str) + +old_sap_vs_new = properties_sap.merge( + sap_impact, how="inner", on="uprn" +) +old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( + lambda x: sap_to_epc(x) +) +# How many properties go from below C to above +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +changed = old_sap_vs_new[ + (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) + ] +properties_df[properties_df["current_sap_points"] < 69].shape + +old_sap_vs_new[old_sap_vs_new["current_epc_rating"].isin(["Epc.F", "Epc.G"])] + +25979 - 3891 + +sustainability_data[sustainability_data["UPRN"] == "100021204260"] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -330,12 +430,26 @@ all_conflicts["UPRN"].nunique() ### Rebaselining -def get_installed_sap_adjustments_by_uprn_for_portfolio( +from typing import Dict +from sqlalchemy import func + + +def get_installed_measure_adjustments_by_uprn_for_portfolio( session, portfolio_id: int, -) -> Dict[int, float]: +) -> Dict[int, dict]: """ - Returns { uprn -> total_sap_delta } + Returns per-UPRN installed-measure adjustments. + + { + uprn: { + sap_points: float, + co2: float, + energy_kwh: float, + energy_bill: float, + heat_demand: float, + } + } """ uprn_subquery = ( @@ -347,8 +461,22 @@ def get_installed_sap_adjustments_by_uprn_for_portfolio( rows = ( session.query( - InstalledMeasure.uprn, - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0), + InstalledMeasure.uprn.label("uprn"), + + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) + .label("sap_points"), + + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) + .label("co2"), + + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) + .label("energy_kwh"), + + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) + .label("energy_bill"), + + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) + .label("heat_demand"), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.uprn.in_(uprn_subquery)) @@ -356,7 +484,16 @@ def get_installed_sap_adjustments_by_uprn_for_portfolio( .all() ) - return {uprn: float(delta) for uprn, delta in rows} + return { + row.uprn: { + "sap_points": float(row.sap_points), + "co2": float(row.co2), + "energy_kwh": float(row.energy_kwh), + "energy_bill": float(row.energy_bill), + "heat_demand": float(row.heat_demand), + } + for row in rows + } def get_installed_measure_types_by_uprn( @@ -608,6 +745,62 @@ def persist_property_sap_updates( print(f"✅ Updated {len(properties)} properties") +def compute_epc_rebasing_updates( + epcs: Dict[int, PropertyDetailsEpcModel], + properties_by_id: Dict[int, PropertyModel], + installed_adjustments_by_uprn: Dict[int, dict], +) -> Dict[int, dict]: + """ + Computes EPC rebasing updates without mutating DB objects. + Keyed by property_id. + """ + + updates: Dict[int, dict] = {} + + for property_id, epc in epcs.items(): + prop = properties_by_id.get(property_id) + if not prop or prop.uprn is None: + continue + + adj = installed_adjustments_by_uprn.get(prop.uprn) + if not adj: + continue + + updates[property_id] = { + "property_id": property_id, + + # Originals (only set once) + "original_co2_emissions": ( + epc.original_co2_emissions + if epc.original_co2_emissions is not None + else epc.co2_emissions + ), + "original_primary_energy_consumption": ( + epc.original_primary_energy_consumption + if epc.original_primary_energy_consumption is not None + else epc.primary_energy_consumption + ), + "original_current_energy_demand": ( + epc.original_current_energy_demand + if epc.original_current_energy_demand is not None + else epc.current_energy_demand + ), + "original_current_energy_demand_heating_hotwater": ( + epc.original_current_energy_demand_heating_hotwater + if epc.original_current_energy_demand_heating_hotwater is not None + else epc.current_energy_demand_heating_hotwater + ), + + # Adjustments (always re-applied from originals) + "installed_measures_co2_adjustment": adj["co2"], + "installed_measures_energy_demand_adjustment": adj["energy_kwh"], + "installed_measures_total_energy_bill_adjustment": adj["energy_bill"], + "installed_measures_heat_demand_adjustment": adj["heat_demand"], + } + + return updates + + def persist_plan_updates(plan_updates: list[dict]): """ Writes recalculated plan metrics. @@ -654,6 +847,74 @@ def persist_plan_updates(plan_updates: list[dict]): print(f"✅ Updated {len(plans)} plans") +def persist_epc_rebasing_updates( + epc_updates_by_property_id: Dict[int, dict], +): + """ + Overwrites EPC metrics using installed-measure rebasing. + Safe to re-run. + """ + + with db_session() as session: + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + PropertyDetailsEpcModel.property_id.in_( + epc_updates_by_property_id.keys() + ) + ) + .all() + ) + + for epc in epcs: + u = epc_updates_by_property_id[epc.property_id] + + # Store originals once + epc.original_co2_emissions = u["original_co2_emissions"] + epc.original_primary_energy_consumption = ( + u["original_primary_energy_consumption"] + ) + epc.original_current_energy_demand = ( + u["original_current_energy_demand"] + ) + epc.original_current_energy_demand_heating_hotwater = ( + u["original_current_energy_demand_heating_hotwater"] + ) + + # Apply rebased values + epc.co2_emissions = ( + u["original_co2_emissions"] + - u["installed_measures_co2_adjustment"] + ) + + epc.primary_energy_consumption = ( + u["original_primary_energy_consumption"] + - u["installed_measures_heat_demand_adjustment"] + ) + + epc.current_energy_demand = ( + u["original_current_energy_demand"] + - u["installed_measures_energy_demand_adjustment"] + ) + + # Flags + audit fields + epc.installed_measures_co2_adjustment = ( + u["installed_measures_co2_adjustment"] + ) + epc.installed_measures_energy_demand_adjustment = ( + u["installed_measures_energy_demand_adjustment"] + ) + epc.installed_measures_total_energy_bill_adjustment = ( + u["installed_measures_total_energy_bill_adjustment"] + ) + epc.installed_measures_heat_demand_adjustment = ( + u["installed_measures_heat_demand_adjustment"] + ) + epc.is_epc_adjusted_for_installed_measures = True + + print(f"✅ Updated {len(epcs)} EPC records") + + # ------------------------------------------------------------ # EXECUTION (DRY RUN) # ------------------------------------------------------------ @@ -684,22 +945,29 @@ with db_read_session() as session: ) } - sap_adjustments = get_installed_sap_adjustments_by_uprn_for_portfolio( - session, - PORTFOLIO_ID, + installed_adjustments = ( + get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, + ) ) property_updates = compute_property_sap_updates( properties, - sap_adjustments, + {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} ) + properties_by_id = {p.id: p for p in properties} property_updates_by_id = { u["property_id"]: u for u in property_updates } - properties_by_id = {p.id: p for p in properties} + epc_updates = compute_epc_rebasing_updates( + epcs, + properties_by_id, + installed_adjustments, + ) plan_updates = compute_plan_updates( session, @@ -712,3 +980,4 @@ with db_read_session() as session: # When ready to run! persist_property_sap_updates(property_updates_by_id) persist_plan_updates(plan_updates) +persist_epc_rebasing_updates(epc_updates) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py new file mode 100644 index 00000000..57b858ce --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -0,0 +1,156 @@ +import pandas as pd +from sqlalchemy.orm import Session +from sqlalchemy import text, select +from backend.app.db.connection import db_read_session +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import Plan + +PORTFOLIO_ID = 431 + +with db_read_session() as session: + # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 + estimated_epcs = session.query(PropertyDetailsEpcModel).filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + ) + ).all() + + # Get the ids + estimated_epc_ids = [epc.property_id for epc in estimated_epcs] + +# I want to get the UPRNS for these properties, from the property model +with db_read_session() as session: + estimated_uprns = session.query(PropertyModel.uprn).filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) + ) + ).all() + + estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] + +# Go the the SAL +sal_1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) +sal_2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx", + sheet_name="Standardised Asset List" +) + +sal = pd.concat([sal_1, sal_2]) +sal = sal.drop_duplicates(subset=['epc_os_uprn']) + +estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() + +SCENARIOS = [ + 871, # EPC C - fabric first, no solid floor, ashp 3.0 + 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 862, # EPC B - No solid floor, ASHP COP 3.0 + 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 859, # EPC C - no solid floor, ashp 3.0 + 885, # EPC B - fabric first, no solid floor, ashp 3.0 +] + +# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids +with db_read_session() as session: + result = session.execute( + select(Plan.id, Plan.property_id) + .where(Plan.property_id.in_(estimated_epc_ids)) + ) + plans = [ + { + "plan_id": row.id, + "property_id": row.property_id, + } for row in result + ] + +df = pd.DataFrame(plans) +df = df.sort_values("property_id", ascending=True) + +agg = df.groupby("property_id").size().reset_index(name="n_plans") +agg = agg.sort_values("n_plans", ascending=True) +agg[agg["n_plans"] != 1] +assert all(agg["n_plans"] == 1) + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +# Store the SAL +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx") + +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Top 1000 for testing + sal.iloc[0:1000, :].to_excel(writer, sheet_name="batch 1", index=False) + # Batch 2 is the next 20,000 + sal.iloc[1000:21000, :].to_excel(writer, sheet_name="batch 2", index=False) + # Batch 3 is the next 20,000 + sal.iloc[21000:41000, :].to_excel(writer, sheet_name="batch 3", index=False) + + sal.iloc[41000:61000, :].to_excel(writer, sheet_name="batch 4", index=False) + sal.iloc[61000:81000, :].to_excel(writer, sheet_name="batch 5", index=False) + sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 5", index=False)