From fb6fca896627dd932ac157ca504586b926024052 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jan 2026 12:18:40 +0000 Subject: [PATCH] added additional loggging into engine --- .../db/functions/recommendations_functions.py | 8 +- backend/app/db/models/portfolio.py | 12 + backend/engine/engine.py | 8 + .../g_rebaselining_installed_measrues.py | 405 +++++++++++++++--- .../h_reset_estimated_epcs.py | 156 +++++++ 5 files changed, 515 insertions(+), 74 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 726e919c..ae178c8a 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,15 +1,11 @@ from sqlalchemy import text -from sqlalchemy import insert, delete, select +from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario ) -from backend.app.db.models.portfolio import ( - PropertyModel, PropertyTargetsModel, PropertyDetailsEpcModel -) -from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage -from backend.app.db.models.inspections import InspectionModel +from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index e17e5856..d151bdc4 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -192,6 +192,18 @@ class PropertyDetailsEpcModel(Base): gas_standing_charge = Column(Float) electricity_standing_charge = Column(Float) + # Columns for re-baselining if we have an already installed measure + original_co2_emissions = Column(Float) + original_primary_energy_consumption = Column(Float) + original_current_energy_demand = Column(Float) + original_current_energy_demand_heating_hotwater = Column(Float) + # Adjustments + installed_measures_co2_adjustment = Column(Float) + installed_measures_energy_demand_adjustment = Column(Float) + installed_measures_total_energy_bill_adjustment = Column(Float) + installed_measures_heat_demand_adjustment = Column(Float) + is_epc_adjusted_for_installed_measures = Column(Boolean, default=False) + class PropertyDetailsSpatial(Base): __tablename__ = "property_details_spatial" diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f6990c5c..9a9c30a2 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -535,12 +535,14 @@ async def model_engine(body: PlanTriggerRequest): logger.info("Getting the inputs") if body.file_type == "xlsx": + logger.info("Getting the plan input") plan_input = read_excel_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, file_key=body.trigger_file_path, sheet_name=body.sheet_name, header_row=0, ) + logger.into("Got the plan input from excel") # We now handle the case where the input data is a Domna standardised assset list if body.file_format == "domna_asset_list": @@ -619,9 +621,11 @@ async def model_engine(body: PlanTriggerRequest): raise ValueError("Other formats not yet supported") else: + logger.info("Getting the plan input from csv") plan_input = read_csv_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path ) + logger.info("Got the plan input from csv") # We then slide it on the indexes if they are provided if body.index_start is not None and body.index_end is not None: @@ -640,12 +644,14 @@ async def model_engine(body: PlanTriggerRequest): if "domna_valuation" in plan_input[0]: valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input] + logger.info("Getting cleaning_data") cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) # Prepare input data addresses = Addresses.from_plan_input(plan_input, body) + logger.info("Checking database for existing properties") uprns = addresses.get_uprns() landlord_ids = addresses.get_landlord_ids() @@ -670,6 +676,7 @@ async def model_engine(body: PlanTriggerRequest): if key not in property_lookup: to_create.append(addr) + logger.info("Checking database for EPC cache") # Pre-requests to the db with db_read_session() as session: epc_cache_by_uprn = db_funcs.epc_functions.EpcStoreService.get_epcs_for_uprns(session, uprns) @@ -679,6 +686,7 @@ async def model_engine(body: PlanTriggerRequest): ) # If we have properties that need to be created, we cerate them in bulk + logger.info("Determine new properties to be created") new_property_ids = set() if to_create: logger.info("Creating %d new properties", len(to_create)) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index bc628630..d310ffa4 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -10,6 +10,10 @@ from typing import Dict, List, Set from recommendations.Costs import Costs from backend.app.db.models.portfolio import Epc +pd.set_option('display.max_rows', 500) +pd.set_option('display.max_columns', 500) +pd.set_option('display.width', 1000) + def get_all_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() @@ -140,70 +144,65 @@ sustainability_data = pd.read_excel( # "No solid floor, ASHP COP 3.0.xlsx" # ) -# recommendations_df2 = recommendations_df2.merge( -# properties_df[["id", "uprn"]], -# left_on="property_id", -# right_on="id", -# how="left" -# ).rename(columns={"id_x": "id"}).drop(columns=["id_y"]) -# recommendations_df["uprn"] = recommendations_df["uprn"].astype(int).astype(str) - # We just need all of the measure types, per property -recommendation_measure_types = recommendations_df[["property_id", "measure_type"]].drop_duplicates() +recommendation_measure_types = recommendations_df[ + ["property_id", "measure_type" + , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings" + ] +].drop_duplicates() recommendation_measure_types["flag"] = True # We pivot -recommendations_measures_pivot = recommendation_measure_types.pivot( +recommendations_measures_pivot = recommendation_measure_types[ + ["property_id", "measure_type", "flag"] +].drop_duplicates().pivot( index='property_id', columns='measure_type', values='flag' ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() -# Create a total cost column -recommendations_total_cost = recommendations_df.groupby("property_id")["estimated_cost"].sum().reset_index() -recommendations_measures_pivot = recommendations_measures_pivot.merge( - recommendations_total_cost, how="left", on="property_id" -) - properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).merge( - recommendations_measures_pivot, how="left", left_on="id", right_on="property_id" + recommendations_measures_pivot, how="left", on="property_id" ) -properties_to_recs["estimated_cost"] = properties_to_recs["estimated_cost"].fillna(0) -sustainability_data["has_cavity_insulation"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] ) -sustainability_data["has_iwi"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["Internal", "FilledCavityPlusInternal"] ) -sustainability_data["has_ewi"] = sustainability_data["Wall Insulation"].isin( +sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( ["External", "FilledCavityPlusExternal"] ) -sustainability_data["has_loft_insulation"] = sustainability_data["Roof Insulation"].isin( +sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( ["mm300", "mm250"] ) -sustainability_data["has_glazing"] = sustainability_data["Glazing"].isin( +sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( + ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] +) +sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] ) -sustainability_data["has_floor_insulation"] = sustainability_data["Floor Insulation"].isin( +sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( ["RetroFitted"] ) -sustainability_data["has_efficient_boiler"] = ( +sustainability_data["boiler_upgrade"] = ( sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) ) -sustainability_data["has_ashp"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) +sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) -sustainability_data["has_top_heat_controls"] = ( +sustainability_data["time_temperature_zone_control"] = ( sustainability_data["Controls Adequacy"].isin(["Top Spec"]) ) -sustainability_data["has_optimal_heat_controls"] = ( +sustainability_data["roomstat_programmer_trvs"] = ( sustainability_data["Controls Adequacy"].isin(["Optimal"]) ) -sustainability_data["has_flat_roof_insulation"] = ( +sustainability_data["flat_roof_insulation"] = ( (sustainability_data["Roof Construction"] == "Flat") & (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) ) @@ -218,22 +217,23 @@ comparison = sustainability_data.merge( ], left_on="UPRN", right_on="uprn", - how="left" + how="left", + suffixes=("", "_from_recs") ) # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ comparison["conflict_cavity_wall_insulation"] = ( - (comparison["has_cavity_insulation"]) & - (pd.isnull(comparison["cavity_wall_insulation"]) == False) + (comparison["cavity_wall_insulation"]) & + (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) ) comparison["conflict_iwi_wall_insulation"] = ( - (comparison["has_iwi"]) & - (pd.isnull(comparison["internal_wall_insulation"]) == False) + (comparison["internal_wall_insulation"]) & + (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) ) comparison["conflict_ewi_wall_insulation"] = ( - (comparison["has_ewi"]) & - (pd.isnull(comparison["internal_wall_insulation"]) == False) + (comparison["external_wall_insulation"]) & + (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) ) cwi_conflicting = comparison[comparison["conflict_cavity_wall_insulation"] == True] @@ -241,59 +241,66 @@ iwi_conflicting = comparison[comparison["conflict_iwi_wall_insulation"] == True] ewi_conflicting = comparison[comparison["conflict_ewi_wall_insulation"] == True] # ------------ Roof ------------ -comparison["conflict_roof_insulation"] = ( - (comparison["has_loft_insulation"]) & - (pd.isnull(comparison["loft_insulation"]) == False) +comparison["conflict_loft_insulation"] = ( + (comparison["loft_insulation"]) & + (pd.isnull(comparison["loft_insulation_from_recs"]) == False) ) -loft_conflicting = comparison[comparison["conflict_roof_insulation"] == True] +loft_conflicting = comparison[comparison["conflict_loft_insulation"] == True] # ------------ Windows ------------ comparison["conflict_double_glazing"] = ( - (comparison["has_glazing"]) & + (comparison["double_glazing"]) & ( - (pd.isnull(comparison["double_glazing"]) == False) | (pd.isnull(comparison["secondary_glazing"]) == False) + (pd.isnull(comparison["double_glazing_from_recs"]) == False) ) ) -windows_conflicting = comparison[comparison["conflict_double_glazing"] == True] +comparison["conflict_secondary_glazing"] = ( + (comparison["secondary_glazing"]) & + ( + (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) + ) +) +double_glazing_conflicting = comparison[comparison["conflict_double_glazing"] == True] +secondary_glazing_conflicting = comparison[comparison["conflict_secondary_glazing"] == True] # ------------ Floors ------------ comparison["conflict_suspended_floor_insulation"] = ( - (comparison["has_floor_insulation"]) & - (pd.isnull(comparison["suspended_floor_insulation"]) == False) + (comparison["suspended_floor_insulation"]) & + (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) ) floors_conflicting = comparison[comparison["conflict_suspended_floor_insulation"] == True] # ------------ Boiler Upgrade ------------ comparison["conflict_boiler_upgrade"] = ( - (comparison["has_efficient_boiler"]) & - (pd.isnull(comparison["boiler_upgrade"]) == False) + (comparison["boiler_upgrade"]) & + (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) ) boiler_conflicting = comparison[comparison["conflict_boiler_upgrade"] == True] # ------------ ASHP ------------ comparison["conflict_air_source_heat_pump"] = ( - (comparison["has_ashp"]) & - (pd.isnull(comparison["air_source_heat_pump"]) == False) + (comparison["air_source_heat_pump"]) & + (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) ) ashp_conflicting = comparison[comparison["conflict_air_source_heat_pump"] == True] # ------------ heat controls ------------ comparison["conflict_time_temperature_zone_control"] = ( - (comparison["has_top_heat_controls"]) & - (pd.isnull(comparison["time_temperature_zone_control"]) == False) + (comparison["time_temperature_zone_control"]) & + (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) ) comparison["conflict_roomstat_programmer_trvs"] = ( - (comparison["has_optimal_heat_controls"]) & - (pd.isnull(comparison["roomstat_programmer_trvs"]) == False) + (comparison["roomstat_programmer_trvs"]) & + (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) ) ttzc_conflicting = comparison[comparison["conflict_time_temperature_zone_control"] == True] rst_conflicting = comparison[comparison["conflict_roomstat_programmer_trvs"] == True] # ------------ Flat Roof Insulation ----------- comparison["conflict_flat_roof_insulation"] = ( - (comparison["has_flat_roof_insulation"]) & - (pd.isnull(comparison["flat_roof_insulation"]) == False) + (comparison["flat_roof_insulation"]) & + (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) ) flat_roof_conflicting = comparison[comparison["conflict_flat_roof_insulation"] == True] @@ -304,7 +311,8 @@ all_conflicts = pd.concat( iwi_conflicting, ewi_conflicting, loft_conflicting, - windows_conflicting, + double_glazing_conflicting, + secondary_glazing_conflicting, floors_conflicting, boiler_conflicting, ashp_conflicting, @@ -314,8 +322,100 @@ all_conflicts = pd.concat( ] ) -all_conflicts["UPRN"].nunique() +all_conflicts = all_conflicts[ + [ + "uprn", + 'conflict_cavity_wall_insulation', + 'conflict_iwi_wall_insulation', + 'conflict_ewi_wall_insulation', + 'conflict_loft_insulation', + 'conflict_double_glazing', + 'conflict_secondary_glazing', + 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', + 'conflict_air_source_heat_pump', + 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] +] +all_conflicts = all_conflicts.rename( + columns={ + "conflict_cavity_wall_insulation": "cavity_wall_insulation", + "conflict_iwi_wall_insulation": "internal_wall_insulation", + "conflict_ewi_wall_insulation": "external_wall_insulation", + "conflict_loft_insulation": "loft_insulation", + "conflict_double_glazing": "double_glazing", + "conflict_secondary_glazing": "secondary_glazing", + "conflict_suspended_floor_insulation": "suspended_floor_insulation", + "conflict_boiler_upgrade": "boiler_upgrade", + "conflict_air_source_heat_pump": "air_source_heat_pump", + "conflict_time_temperature_zone_control": "time_temperature_zone_control", + "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", + "conflict_flat_roof_insulation": "flat_roof_insulation" + + } +) + +# Reshape by UPRN by melting +all_conflicts = all_conflicts.melt( + id_vars=["uprn"], + var_name="measure_type", + value_name="already_installed" +) + +recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) + +recs_with_uprn = recommendations_df.merge( + properties_df[["property_id", "uprn"]], + on="property_id", + how="left", + suffixes=("", "_prop") +) + +recs_with_uprn = ( + recs_with_uprn + .sort_values("sap_points", ascending=False) + .groupby(["uprn", "measure_type"], as_index=False) + .first() +) + +recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) + +installed_measures_df = all_conflicts.merge( + recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings"]], + how="left", + on=["uprn", "measure_type"] +) + +installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] + +for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: + print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) + +# Do some calcs on SAP impact +sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() +properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap["uprn"] = properties_sap["uprn"].astype(str) + +old_sap_vs_new = properties_sap.merge( + sap_impact, how="inner", on="uprn" +) +old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( + lambda x: sap_to_epc(x) +) +# How many properties go from below C to above +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +changed = old_sap_vs_new[ + (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) + ] +properties_df[properties_df["current_sap_points"] < 69].shape + +old_sap_vs_new[old_sap_vs_new["current_epc_rating"].isin(["Epc.F", "Epc.G"])] + +25979 - 3891 + +sustainability_data[sustainability_data["UPRN"] == "100021204260"] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -330,12 +430,26 @@ all_conflicts["UPRN"].nunique() ### Rebaselining -def get_installed_sap_adjustments_by_uprn_for_portfolio( +from typing import Dict +from sqlalchemy import func + + +def get_installed_measure_adjustments_by_uprn_for_portfolio( session, portfolio_id: int, -) -> Dict[int, float]: +) -> Dict[int, dict]: """ - Returns { uprn -> total_sap_delta } + Returns per-UPRN installed-measure adjustments. + + { + uprn: { + sap_points: float, + co2: float, + energy_kwh: float, + energy_bill: float, + heat_demand: float, + } + } """ uprn_subquery = ( @@ -347,8 +461,22 @@ def get_installed_sap_adjustments_by_uprn_for_portfolio( rows = ( session.query( - InstalledMeasure.uprn, - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0), + InstalledMeasure.uprn.label("uprn"), + + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) + .label("sap_points"), + + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) + .label("co2"), + + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) + .label("energy_kwh"), + + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) + .label("energy_bill"), + + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) + .label("heat_demand"), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.uprn.in_(uprn_subquery)) @@ -356,7 +484,16 @@ def get_installed_sap_adjustments_by_uprn_for_portfolio( .all() ) - return {uprn: float(delta) for uprn, delta in rows} + return { + row.uprn: { + "sap_points": float(row.sap_points), + "co2": float(row.co2), + "energy_kwh": float(row.energy_kwh), + "energy_bill": float(row.energy_bill), + "heat_demand": float(row.heat_demand), + } + for row in rows + } def get_installed_measure_types_by_uprn( @@ -608,6 +745,62 @@ def persist_property_sap_updates( print(f"✅ Updated {len(properties)} properties") +def compute_epc_rebasing_updates( + epcs: Dict[int, PropertyDetailsEpcModel], + properties_by_id: Dict[int, PropertyModel], + installed_adjustments_by_uprn: Dict[int, dict], +) -> Dict[int, dict]: + """ + Computes EPC rebasing updates without mutating DB objects. + Keyed by property_id. + """ + + updates: Dict[int, dict] = {} + + for property_id, epc in epcs.items(): + prop = properties_by_id.get(property_id) + if not prop or prop.uprn is None: + continue + + adj = installed_adjustments_by_uprn.get(prop.uprn) + if not adj: + continue + + updates[property_id] = { + "property_id": property_id, + + # Originals (only set once) + "original_co2_emissions": ( + epc.original_co2_emissions + if epc.original_co2_emissions is not None + else epc.co2_emissions + ), + "original_primary_energy_consumption": ( + epc.original_primary_energy_consumption + if epc.original_primary_energy_consumption is not None + else epc.primary_energy_consumption + ), + "original_current_energy_demand": ( + epc.original_current_energy_demand + if epc.original_current_energy_demand is not None + else epc.current_energy_demand + ), + "original_current_energy_demand_heating_hotwater": ( + epc.original_current_energy_demand_heating_hotwater + if epc.original_current_energy_demand_heating_hotwater is not None + else epc.current_energy_demand_heating_hotwater + ), + + # Adjustments (always re-applied from originals) + "installed_measures_co2_adjustment": adj["co2"], + "installed_measures_energy_demand_adjustment": adj["energy_kwh"], + "installed_measures_total_energy_bill_adjustment": adj["energy_bill"], + "installed_measures_heat_demand_adjustment": adj["heat_demand"], + } + + return updates + + def persist_plan_updates(plan_updates: list[dict]): """ Writes recalculated plan metrics. @@ -654,6 +847,74 @@ def persist_plan_updates(plan_updates: list[dict]): print(f"✅ Updated {len(plans)} plans") +def persist_epc_rebasing_updates( + epc_updates_by_property_id: Dict[int, dict], +): + """ + Overwrites EPC metrics using installed-measure rebasing. + Safe to re-run. + """ + + with db_session() as session: + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + PropertyDetailsEpcModel.property_id.in_( + epc_updates_by_property_id.keys() + ) + ) + .all() + ) + + for epc in epcs: + u = epc_updates_by_property_id[epc.property_id] + + # Store originals once + epc.original_co2_emissions = u["original_co2_emissions"] + epc.original_primary_energy_consumption = ( + u["original_primary_energy_consumption"] + ) + epc.original_current_energy_demand = ( + u["original_current_energy_demand"] + ) + epc.original_current_energy_demand_heating_hotwater = ( + u["original_current_energy_demand_heating_hotwater"] + ) + + # Apply rebased values + epc.co2_emissions = ( + u["original_co2_emissions"] + - u["installed_measures_co2_adjustment"] + ) + + epc.primary_energy_consumption = ( + u["original_primary_energy_consumption"] + - u["installed_measures_heat_demand_adjustment"] + ) + + epc.current_energy_demand = ( + u["original_current_energy_demand"] + - u["installed_measures_energy_demand_adjustment"] + ) + + # Flags + audit fields + epc.installed_measures_co2_adjustment = ( + u["installed_measures_co2_adjustment"] + ) + epc.installed_measures_energy_demand_adjustment = ( + u["installed_measures_energy_demand_adjustment"] + ) + epc.installed_measures_total_energy_bill_adjustment = ( + u["installed_measures_total_energy_bill_adjustment"] + ) + epc.installed_measures_heat_demand_adjustment = ( + u["installed_measures_heat_demand_adjustment"] + ) + epc.is_epc_adjusted_for_installed_measures = True + + print(f"✅ Updated {len(epcs)} EPC records") + + # ------------------------------------------------------------ # EXECUTION (DRY RUN) # ------------------------------------------------------------ @@ -684,22 +945,29 @@ with db_read_session() as session: ) } - sap_adjustments = get_installed_sap_adjustments_by_uprn_for_portfolio( - session, - PORTFOLIO_ID, + installed_adjustments = ( + get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, + ) ) property_updates = compute_property_sap_updates( properties, - sap_adjustments, + {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} ) + properties_by_id = {p.id: p for p in properties} property_updates_by_id = { u["property_id"]: u for u in property_updates } - properties_by_id = {p.id: p for p in properties} + epc_updates = compute_epc_rebasing_updates( + epcs, + properties_by_id, + installed_adjustments, + ) plan_updates = compute_plan_updates( session, @@ -712,3 +980,4 @@ with db_read_session() as session: # When ready to run! persist_property_sap_updates(property_updates_by_id) persist_plan_updates(plan_updates) +persist_epc_rebasing_updates(epc_updates) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py new file mode 100644 index 00000000..57b858ce --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -0,0 +1,156 @@ +import pandas as pd +from sqlalchemy.orm import Session +from sqlalchemy import text, select +from backend.app.db.connection import db_read_session +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import Plan + +PORTFOLIO_ID = 431 + +with db_read_session() as session: + # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 + estimated_epcs = session.query(PropertyDetailsEpcModel).filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + ) + ).all() + + # Get the ids + estimated_epc_ids = [epc.property_id for epc in estimated_epcs] + +# I want to get the UPRNS for these properties, from the property model +with db_read_session() as session: + estimated_uprns = session.query(PropertyModel.uprn).filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) + ) + ).all() + + estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] + +# Go the the SAL +sal_1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) +sal_2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx", + sheet_name="Standardised Asset List" +) + +sal = pd.concat([sal_1, sal_2]) +sal = sal.drop_duplicates(subset=['epc_os_uprn']) + +estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() + +SCENARIOS = [ + 871, # EPC C - fabric first, no solid floor, ashp 3.0 + 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 862, # EPC B - No solid floor, ASHP COP 3.0 + 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + 859, # EPC C - no solid floor, ashp 3.0 + 885, # EPC B - fabric first, no solid floor, ashp 3.0 +] + +# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids +with db_read_session() as session: + result = session.execute( + select(Plan.id, Plan.property_id) + .where(Plan.property_id.in_(estimated_epc_ids)) + ) + plans = [ + { + "plan_id": row.id, + "property_id": row.property_id, + } for row in result + ] + +df = pd.DataFrame(plans) +df = df.sort_values("property_id", ascending=True) + +agg = df.groupby("property_id").size().reset_index(name="n_plans") +agg = agg.sort_values("n_plans", ascending=True) +agg[agg["n_plans"] != 1] +assert all(agg["n_plans"] == 1) + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +# Store the SAL +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx") + +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Top 1000 for testing + sal.iloc[0:1000, :].to_excel(writer, sheet_name="batch 1", index=False) + # Batch 2 is the next 20,000 + sal.iloc[1000:21000, :].to_excel(writer, sheet_name="batch 2", index=False) + # Batch 3 is the next 20,000 + sal.iloc[21000:41000, :].to_excel(writer, sheet_name="batch 3", index=False) + + sal.iloc[41000:61000, :].to_excel(writer, sheet_name="batch 4", index=False) + sal.iloc[61000:81000, :].to_excel(writer, sheet_name="batch 5", index=False) + sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 5", index=False)