diff --git a/backend/app/db/functions/__init__.py b/backend/app/db/functions/__init__.py index 0f239d6e..8e7495bf 100644 --- a/backend/app/db/functions/__init__.py +++ b/backend/app/db/functions/__init__.py @@ -10,3 +10,4 @@ from .materials_functions import * from .inspections_functions import * from .non_intrusive_surveys import * from .whlg_functions import * +from .already_installed_functions import * diff --git a/backend/app/db/functions/already_installed_functions.py b/backend/app/db/functions/already_installed_functions.py new file mode 100644 index 00000000..351419b0 --- /dev/null +++ b/backend/app/db/functions/already_installed_functions.py @@ -0,0 +1,40 @@ +from backend.app.db.models.recommendations import InstalledMeasure +from typing import Dict, List, Set +from collections import defaultdict + + +def get_installed_measure_types_by_uprns( + session, + uprns: List[int], +) -> Dict[int, Set[str]]: + """ + Returns installed measure types per UPRN. + + { + uprn: {"cavity_wall_insulation", "mechanical_ventilation", ...} + } + """ + + if not uprns: + return {} + + rows = ( + session.query( + InstalledMeasure.uprn, + InstalledMeasure.measure_type, + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.uprn.in_(uprns)) + .all() + ) + + out: Dict[int, Set[str]] = defaultdict(set) + + for uprn, measure_type in rows: + out[uprn].add( + measure_type.value + if hasattr(measure_type, "value") + else measure_type + ) + + return out diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index ae178c8a..4fdd9324 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -27,7 +27,8 @@ def prepare_plan_data( """ # Plan carbon savings co2_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) - post_co2_emissions = p.data["co2-emissions-current"] - co2_savings + raise Exception("CHECK ME") + post_co2_emissions = p.energy["co2_emissions"] - co2_savings # Plan bill savings energy_bill_savings = sum([r["energy_cost_savings"] for r in default_recommendations]) diff --git a/backend/app/plan/data_classes.py b/backend/app/plan/data_classes.py index cec5ed11..99f6156b 100644 --- a/backend/app/plan/data_classes.py +++ b/backend/app/plan/data_classes.py @@ -5,6 +5,5 @@ from typing import Any, Optional @dataclass class PropertyRequestData: patch: dict - already_installed: list non_invasive_recommendations: dict valuation: Optional[float] diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 52e2b0c4..33f391d4 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -52,7 +52,7 @@ def patch_epc(patch, epc_records): def extract_property_request_data( - address: Address, patches, already_installed, non_invasive_recommendations, valuation_data, uprn + address: Address, patches, non_invasive_recommendations, valuation_data, uprn ): patch_has_uprn = "uprn" in patches[0] if patches else True if patch_has_uprn: @@ -64,10 +64,6 @@ def extract_property_request_data( x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode) ), {}) - property_already_installed = next(( - x for x in already_installed if (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), []) - # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN # we need to check existence of uprn has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False @@ -119,7 +115,6 @@ def extract_property_request_data( # Return data class to give a structured format return PropertyRequestData( patch=patch, - already_installed=property_already_installed, non_invasive_recommendations=property_non_invasive_recommendations, valuation=property_valuation ) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 4f5ee3c1..f4e3ad3f 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -684,6 +684,9 @@ async def model_engine(body: PlanTriggerRequest): energy_assessments_by_uprn = db_funcs.energy_assessment_functions.get_latest_assessments_for_uprns( session, uprns ) + already_installed_by_uprn = db_funcs.already_installed_functions.get_installed_measure_types_by_uprns( + session, uprns + ) # If we have properties that need to be created, we cerate them in bulk logger.info("Determine new properties to be created") @@ -703,7 +706,7 @@ async def model_engine(body: PlanTriggerRequest): property_lookup[("uprn", uprn)] = prop_id if landlord_property_id: property_lookup[("landlord_property_id", landlord_property_id)] = prop_id - + logger.info("Processing each property for model input preparation") input_properties, inspections_map, eco_packages, epc_upserts = [], {}, {}, [] for addr, config in tqdm( @@ -725,6 +728,8 @@ async def model_engine(body: PlanTriggerRequest): energy_assessment = energy_assessments_by_uprn.get(addr.uprn) + property_already_installed = list(already_installed_by_uprn[addr.uprn]) + epc_searcher = SearchEpc( address1=addr.address1, postcode=addr.postcode, @@ -767,7 +772,6 @@ async def model_engine(body: PlanTriggerRequest): req_data = extract_property_request_data( address=addr, patches=patches, - already_installed=already_installed, non_invasive_recommendations=non_invasive_recommendations, valuation_data=valuation_data, uprn=addr.uprn, @@ -813,7 +817,7 @@ async def model_engine(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=req_data.already_installed + eco_packages.get(property_id)[3], + already_installed=property_already_installed + eco_packages.get(property_id)[3], property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, energy_assessment=energy_assessment, @@ -965,6 +969,8 @@ async def model_engine(body: PlanTriggerRequest): # Temp putting this here recommendations_scoring_data["is_post_sap10_ending"] = True + recommendations_scoring_data["sap_starting"] = 77 + recommendations_scoring_data = recommendations_scoring_data.drop( columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", "carbon_ending"] @@ -1189,7 +1195,8 @@ async def model_engine(body: PlanTriggerRequest): property_updates, property_epc_details, property_spatial_updates = [], [], [] plans_to_create, recommendations_to_create = [], [] - + # TODO: Check the update to carbon + print("NEED TO CHECK THE UPDATE TO CARBON") # Prepare the data that will need to be uploaded in bulk for p in input_properties: recommendations_for_property = recommendations.get(p.id, []) diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py new file mode 100644 index 00000000..2487c921 --- /dev/null +++ b/backend/onboarders/mappings/age_band.py @@ -0,0 +1,14 @@ +party_map = { + "Before 1900": 'England and Wales: before 1900', + "1900-1929": 'England and Wales: 1900-1929', + "1930-1949": 'England and Wales: 1930-1949', + "1950-1966": 'England and Wales: 1950-1966', + "1967-1975": 'England and Wales: 1967-1975', + "1976-1982": 'England and Wales: 1976-1982', + "1983-1990": 'England and Wales: 1983-1990', + "1991-1995": 'England and Wales: 1991-1995', + "1996-2002": 'England and Wales: 1996-2002', + "2003-2006": 'England and Wales: 2003-2006', + "2007-2011": 'England and Wales: 2007-2011', + "2012 onwards": 'England and Wales: 2012-2021', +} diff --git a/backend/onboarders/mappings/built_form.py b/backend/onboarders/mappings/built_form.py new file mode 100644 index 00000000..23901fc6 --- /dev/null +++ b/backend/onboarders/mappings/built_form.py @@ -0,0 +1,15 @@ +parity_map = { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "Detached": "Detached", + "SemiDetached": "Semi-Detached", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + "EnclosedEndTerrace": "Enclosed End-Terrace", +} + +# MidTerrace 41462 +# EndTerrace 20910 +# Detached 16875 +# SemiDetached 14725 +# EnclosedMidTerrace 3176 +# EnclosedEndTerrace 2393 diff --git a/backend/onboarders/mappings/property_type.py b/backend/onboarders/mappings/property_type.py new file mode 100644 index 00000000..75deef04 --- /dev/null +++ b/backend/onboarders/mappings/property_type.py @@ -0,0 +1,6 @@ +parity_map = { + "Flat": "Flat", + "Maisonette": "Maisonette", + "Bungalow": "Bungalow", + "House": "House", +} diff --git a/backend/onboarders/mappings/walls.py b/backend/onboarders/mappings/walls.py new file mode 100644 index 00000000..9b70b49c --- /dev/null +++ b/backend/onboarders/mappings/walls.py @@ -0,0 +1,3 @@ +parity_map = { + +} diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py new file mode 100644 index 00000000..f41ebeaf --- /dev/null +++ b/backend/onboarders/parity.py @@ -0,0 +1,95 @@ +import pandas as pd +from etl.epc.DataProcessor import construction_age_bounds_map +from backend.onboarders.mappings.property_type import parity_map as property_map +from backend.onboarders.mappings.age_band import party_map as age_band_map +from backend.onboarders.mappings.built_form import parity_map as built_form_map + + +def check_nulls(data, original_column, mapped_column): + # We only allow nulls if the oroginal value was null + null_vals = data[pd.isnull(data[mapped_column])] + if null_vals.empty: + return True + # We make sure all original values were null + assert pd.isnull(null_vals[original_column]).all(), ( + f"Some values in {mapped_column} were not mapped, but original values were not null" + ) + + +# Sample input data + +data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# We want to map the parity fields to standard EPC references. This will allow us to +# 1) Estimate EPCs, more accurately +# 2) Patch incorrect EPCs with ease +# 3) Indicate already installed measures + +# ------------ construction_age_band ------------ +# Map to EPC age bands +# def construction_date_to_band(year): +# if pd.isnull(year): +# return None +# # Get the year from the date which is numpy datetime format +# for label, ranges in construction_age_bounds_map.items(): +# if ranges["l"] <= year <= ranges["u"]: +# return label +# raise NotImplementedError("year out of bounds") +# +# +# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) + +data["construction_age_band"] = data["Construction Years"].map(age_band_map) + +check_nulls(data, "Construction Years", "construction_age_band") + +# ------------ property_type ------------ +data["property_type"] = data["Type"].map(property_map) + +assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" + +# ------------ built_form ------------ +data["built_form"] = data["Attachment"].map(built_form_map) + +assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" + +# ------------ Wall Construction ------------ + +data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") + +data["Wall Insulation"].value_counts() +data["Wall Construction"].value_counts() + +as_built_map = { + "Cavity": {"insulated_age_bands":[], "partial_insulated_age_bands": []}, + "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, +} + +def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): + if wall_insulation == "AsBuilt": + # Deduce based on wall construction and age band + bands = as_built_map.get(wall_constuction, None) + if bands is None: + raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") + + # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated + + + + +# Variables we want to map +'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', + 'Attachment', 'Construction Years', 'Wall Construction', + 'Wall Insulation', 'Roof Construction', 'Roof Insulation', + 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', + 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', + 'Total Floor Area (m2)' \ No newline at end of file diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index d310ffa4..8d4bc9da 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -1,10 +1,10 @@ import pandas as pd +from tqdm import tqdm from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session, db_session from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ InstalledMeasure from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from sqlalchemy import func from backend.app.utils import sap_to_epc from typing import Dict, List, Set from recommendations.Costs import Costs @@ -87,14 +87,9 @@ def get_all_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -PORTFOLIO_ID = 419 # Peabody +PORTFOLIO_ID = 431 # Peabody - new portfolio SCENARIOS = [ - # 871, # EPC C - fabric first, no solid floor, ashp 3.0 - # 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - # 862, # EPC B - No solid floor, ASHP COP 3.0 - # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - # 859, # EPC C - no solid floor, ashp 3.0 - 885, # EPC B - fabric first, no solid floor, ashp 3.0 + 891, # EPC B - No solid floor, ASHP COP 3.0 ] # properties_data, plans_data, recommendations_data = get_all_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) @@ -106,31 +101,31 @@ SCENARIOS = [ # Save CSVs # properties_df.to_csv( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" -# "f_peabody_properties_data_20260108.csv", +# "Final portfolio datasets/v2/peabody_properties_data_20260108.csv", # index=False # ) # plans_df.to_csv( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" -# "f_peabody_plans_data_20260108.csv", +# "Final portfolio datasets/v2/peabody_plans_data_20260108.csv", # index=False # ) # recommendations_df.to_csv( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" -# "f_peabody_recommendations_data_20260108.csv", +# "Final portfolio datasets/v2/peabody_recommendations_data_20260108.csv", # index=False # ) # Read csvs properties_df = pd.read_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" - "f_peabody_properties_data_20260108.csv" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/Final portfolio datasets/v2/peabody_properties_data_20260108.csv" ) plans_df = pd.read_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" - "f_peabody_plans_data_20260108.csv" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final portfolio " + "datasets/v2/peabody_plans_data_20260108.csv" ) recommendations_df = pd.read_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" - "f_peabody_recommendations_data_20260108.csv" + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final portfolio " + "datasets/v2/peabody_recommendations_data_20260108.csv" ) sustainability_data = pd.read_excel( @@ -138,11 +133,29 @@ sustainability_data = pd.read_excel( "- Data Extracts for Domna.xlsx", sheet_name="Sustainability" ) +sustainability_data_with_sap = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx", +) -# recommendations_df = pd.read_excel( -# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/EPC B, " -# "No solid floor, ASHP COP 3.0.xlsx" -# ) +properties_df["uprn"] = properties_df["uprn"].astype(str) +property_data_comparison = properties_df.merge( + sustainability_data, how="inner", left_on="uprn", right_on="UPRN", suffixes=("_prop", "_sust") +) + +property_data_comparison["wall_type"] = property_data_comparison["walls"].str.split(",").str[0].str.strip() + +column_pairs = { + "built_form": "Attachment", + "property_type": "Type", + "wall_type": "Wall Construction", + "heating": "Heating", +} +combination_tables = {} + +for v1, v2 in column_pairs.items(): + df = property_data_comparison.groupby([v1, v2]).size().reset_index(name='count') + combination_tables[v1] = df # We just need all of the measure types, per property recommendation_measure_types = recommendations_df[ @@ -177,7 +190,7 @@ sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insu ["External", "FilledCavityPlusExternal"] ) sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( - ["mm300", "mm250"] + ["mm300", "mm250", "mm350", "mm400", "mm270"] ) sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] @@ -223,86 +236,81 @@ comparison = sustainability_data.merge( # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ -comparison["conflict_cavity_wall_insulation"] = ( +cwi_conflicting = comparison[ (comparison["cavity_wall_insulation"]) & (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) -) -comparison["conflict_iwi_wall_insulation"] = ( + ].copy() +cwi_conflicting["conflict_cavity_wall_insulation"] = True +iwi_conflicting = comparison[ (comparison["internal_wall_insulation"]) & (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) -) -comparison["conflict_ewi_wall_insulation"] = ( + ].copy() +iwi_conflicting["conflict_iwi_wall_insulation"] = True + +ewi_conflicting = comparison[ (comparison["external_wall_insulation"]) & (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) -) - -cwi_conflicting = comparison[comparison["conflict_cavity_wall_insulation"] == True] -iwi_conflicting = comparison[comparison["conflict_iwi_wall_insulation"] == True] -ewi_conflicting = comparison[comparison["conflict_ewi_wall_insulation"] == True] + ].copy() +ewi_conflicting["conflict_ewi_wall_insulation"] = True # ------------ Roof ------------ -comparison["conflict_loft_insulation"] = ( +loft_conflicting = comparison[ (comparison["loft_insulation"]) & (pd.isnull(comparison["loft_insulation_from_recs"]) == False) -) - -loft_conflicting = comparison[comparison["conflict_loft_insulation"] == True] + ].copy() +loft_conflicting["conflict_loft_insulation"] = True # ------------ Windows ------------ -comparison["conflict_double_glazing"] = ( +double_glazing_conflicting = comparison[ (comparison["double_glazing"]) & - ( - (pd.isnull(comparison["double_glazing_from_recs"]) == False) - ) -) -comparison["conflict_secondary_glazing"] = ( + (pd.isnull(comparison["double_glazing_from_recs"]) == False) + ].copy() +double_glazing_conflicting["conflict_double_glazing"] = True +secondary_glazing_conflicting = comparison[ (comparison["secondary_glazing"]) & - ( - (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) - ) -) -double_glazing_conflicting = comparison[comparison["conflict_double_glazing"] == True] -secondary_glazing_conflicting = comparison[comparison["conflict_secondary_glazing"] == True] + (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) + ].copy() +secondary_glazing_conflicting["conflict_secondary_glazing"] = True # ------------ Floors ------------ -comparison["conflict_suspended_floor_insulation"] = ( +floors_conflicting = comparison[ (comparison["suspended_floor_insulation"]) & (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) -) -floors_conflicting = comparison[comparison["conflict_suspended_floor_insulation"] == True] + ].copy() +floors_conflicting["conflict_suspended_floor_insulation"] = True # ------------ Boiler Upgrade ------------ -comparison["conflict_boiler_upgrade"] = ( +boiler_conflicting = comparison[ (comparison["boiler_upgrade"]) & (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) -) -boiler_conflicting = comparison[comparison["conflict_boiler_upgrade"] == True] + ].copy() +boiler_conflicting["conflict_boiler_upgrade"] = True # ------------ ASHP ------------ -comparison["conflict_air_source_heat_pump"] = ( +ashp_conflicting = comparison[ (comparison["air_source_heat_pump"]) & (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) -) -ashp_conflicting = comparison[comparison["conflict_air_source_heat_pump"] == True] + ].copy() +ashp_conflicting["conflict_air_source_heat_pump"] = True # ------------ heat controls ------------ -comparison["conflict_time_temperature_zone_control"] = ( +ttzc_conflicting = comparison[ (comparison["time_temperature_zone_control"]) & (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) -) -comparison["conflict_roomstat_programmer_trvs"] = ( + ].copy() +ttzc_conflicting["conflict_time_temperature_zone_control"] = True +rst_conflicting = comparison[ (comparison["roomstat_programmer_trvs"]) & (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) -) -ttzc_conflicting = comparison[comparison["conflict_time_temperature_zone_control"] == True] -rst_conflicting = comparison[comparison["conflict_roomstat_programmer_trvs"] == True] + ].copy() +rst_conflicting["conflict_roomstat_programmer_trvs"] = True # ------------ Flat Roof Insulation ----------- -comparison["conflict_flat_roof_insulation"] = ( +flat_roof_conflicting = comparison[ (comparison["flat_roof_insulation"]) & (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) -) -flat_roof_conflicting = comparison[comparison["conflict_flat_roof_insulation"] == True] + ].copy() +flat_roof_conflicting["conflict_flat_roof_insulation"] = True # All properties with conflicts all_conflicts = pd.concat( @@ -389,6 +397,61 @@ installed_measures_df = all_conflicts.merge( installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] +## --- Sense checking ---- + +FABRIC_MEASURES = { + "external_wall_insulation", + "internal_wall_insulation", + "cavity_wall_insulation", +} + + +def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn): + """ + If a property has fabric insulation installed, also mark + mechanical ventilation as installed using recommendation metrics. + """ + + # Properties with fabric installed + fabric_uprns = installed_measures_df[ + installed_measures_df["measure_type"].isin(FABRIC_MEASURES) + ]["uprn"].unique() + + # Get MV recommendation metrics (pick max SAP per property as you decided) + mv_recs = ( + recs_with_uprn[ + (recs_with_uprn["measure_type"] == "mechanical_ventilation") + & (recs_with_uprn["uprn"].isin(fabric_uprns)) + ] + .sort_values("sap_points", ascending=False) + .drop_duplicates(subset=["uprn"]) + ) + + mv_installed = mv_recs[[ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ]].copy() + + mv_installed["already_installed"] = True + + return pd.concat( + [installed_measures_df, mv_installed], + ignore_index=True + ) + + +installed_measures_df = add_mechanical_ventilation_for_fabric( + installed_measures_df, + recs_with_uprn +) + +assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 + for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) @@ -401,21 +464,12 @@ old_sap_vs_new = properties_sap.merge( sap_impact, how="inner", on="uprn" ) old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] -old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( - lambda x: sap_to_epc(x) -) +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply(lambda x: sap_to_epc(x)) # How many properties go from below C to above old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() changed = old_sap_vs_new[ (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) ] -properties_df[properties_df["current_sap_points"] < 69].shape - -old_sap_vs_new[old_sap_vs_new["current_epc_rating"].isin(["Epc.F", "Epc.G"])] - -25979 - 3891 - -sustainability_data[sustainability_data["UPRN"] == "100021204260"] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -426,32 +480,92 @@ sustainability_data[sustainability_data["UPRN"] == "100021204260"] # 3) For anything already installed, I should mark already installed as True, and set the cost to zero # 4) I need to update the plan cost to remove the cost of the installed measures +# TODO: +# 1) Need to push the already installed measures to the database +from sqlalchemy.orm import sessionmaker +from datetime import datetime + +BATCH_SIZE = 5000 +SOURCE = "peabody_import_2026_01" + +Session = sessionmaker(bind=db_engine) + + +def bulk_insert_installed_measures(installed_measures_df): + session = Session() + + records = [] + now = datetime.utcnow() + + for _, row in installed_measures_df.iterrows(): + records.append({ + "uprn": int(row["uprn"]), + "measure_type": row["measure_type"], + "installed_at": now, + "sap_points": float(row["sap_points"]) if pd.notna(row["sap_points"]) else None, + "carbon_savings": float(row["co2_equivalent_savings"]) if pd.notna(row["co2_equivalent_savings"]) else None, + "kwh_savings": float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None, + "bill_savings": float(row["energy_cost_savings"]) if pd.notna(row["energy_cost_savings"]) else None, + "heat_demand_savings": float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None, + "source": SOURCE, + "is_active": True, + }) + + try: + for i in range(0, len(records), BATCH_SIZE): + batch = records[i:i + BATCH_SIZE] + session.bulk_insert_mappings(InstalledMeasure, batch) + session.commit() + print(f"✅ Inserted {i + len(batch)} / {len(records)}") + + except Exception: + session.rollback() + raise + finally: + session.close() + + +# bulk_insert_installed_measures(installed_measures_df) ### Rebaselining - - from typing import Dict from sqlalchemy import func +from typing import Dict +from sqlalchemy import func, case + +REBASING_EXCLUDED_MEASURES = { + "mechanical_ventilation", +} + def get_installed_measure_adjustments_by_uprn_for_portfolio( session, portfolio_id: int, ) -> Dict[int, dict]: """ - Returns per-UPRN installed-measure adjustments. + Returns per-UPRN installed-measure adjustments for PROPERTY / EPC rebasing. + IMPORTANT: + - Mechanical ventilation is EXCLUDED from rebasing calculations + (drag-along measure; should not alter baseline EPC/SAP). + - All other installed measures are fully applied. + + Output shape: { uprn: { - sap_points: float, - co2: float, - energy_kwh: float, - energy_bill: float, - heat_demand: float, + "sap_points": float, + "co2": float, + "energy_kwh": float, + "energy_bill": float, + "heat_demand": float, } } """ + # -------------------------------------------- + # Limit to UPRNs that belong to this portfolio + # -------------------------------------------- uprn_subquery = ( session.query(PropertyModel.uprn) .filter(PropertyModel.portfolio_id == portfolio_id) @@ -459,24 +573,55 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( .subquery() ) + # -------------------------------------------- + # CASE helper: exclude ventilation from rebasing + # -------------------------------------------- + def exclude_ventilation(column): + return case( + ( + InstalledMeasure.measure_type.notin_( + REBASING_EXCLUDED_MEASURES + ), + column, + ), + else_=0.0, + ) + + # -------------------------------------------- + # Aggregate installed-measure impacts per UPRN + # -------------------------------------------- rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) - .label("sap_points"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.sap_points)), + 0.0, + ).label("sap_points"), - func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) - .label("co2"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.carbon_savings)), + 0.0, + ).label("co2"), - func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) - .label("energy_kwh"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.kwh_savings)), + 0.0, + ).label("energy_kwh"), - func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) - .label("energy_bill"), + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.bill_savings)), + 0.0, + ).label("energy_bill"), - func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) - .label("heat_demand"), + func.coalesce( + func.sum( + exclude_ventilation( + InstalledMeasure.heat_demand_savings + ) + ), + 0.0, + ).label("heat_demand"), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.uprn.in_(uprn_subquery)) @@ -484,6 +629,9 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( .all() ) + # -------------------------------------------- + # Shape result for downstream consumers + # -------------------------------------------- return { row.uprn: { "sap_points": float(row.sap_points), @@ -520,20 +668,27 @@ def get_installed_measure_types_by_uprn( def compute_property_sap_updates( properties: List[PropertyModel], - sap_adjustments: Dict[int, float], + sap_adjustments: Dict[int, float], # keyed by uprn ) -> List[dict]: """ Returns property SAP rebasing results. - Does NOT mutate DB objects. + ONLY returns rows where installed measures exist. """ updates = [] for prop in properties: - if prop.uprn is None or prop.original_sap_points is None: + if prop.uprn is None: continue - sap_delta = sap_adjustments.get(prop.uprn, 0.0) + # 🚨 gatekeeper + if prop.uprn not in sap_adjustments: + continue + + if prop.original_sap_points is None: + continue + + sap_delta = sap_adjustments[prop.uprn] new_sap = prop.original_sap_points + sap_delta updates.append({ @@ -542,7 +697,7 @@ def compute_property_sap_updates( "original_sap_points": prop.original_sap_points, "installed_sap_delta": sap_delta, "new_sap_points": new_sap, - "is_adjusted": sap_delta != 0, + "is_adjusted": True, }) return updates @@ -553,17 +708,13 @@ def compute_property_sap_updates( # ------------------------------------------------------------ def get_effective_plan_recommendations( - session, - plan_id: int, - excluded_measure_types: Set[str], + session, plan_id: int, excluded_measure_types: Set[str] ) -> List[Recommendation]: q = ( session.query(Recommendation) .join(PlanRecommendations) .filter(PlanRecommendations.plan_id == plan_id) - .filter(Recommendation.default.is_(True)) - ) - + .filter(Recommendation.default.is_(True))) if excluded_measure_types: q = q.filter( ~Recommendation.measure_type.in_(excluded_measure_types) @@ -587,7 +738,7 @@ def aggregate_plan_metrics(recommendations: list[Recommendation]): agg["sap_points"] += r.sap_points or 0.0 agg["co2_savings"] += r.co2_equivalent_savings or 0.0 agg["energy_bill_savings"] += r.energy_cost_savings or 0.0 - agg["energy_consumption_savings"] += r.energy_savings or 0.0 + agg["energy_consumption_savings"] += r.kwh_savings or 0.0 agg["valuation_increase"] += r.property_valuation_increase or 0.0 base_cost = r.estimated_cost or 0.0 @@ -601,49 +752,173 @@ def aggregate_plan_metrics(recommendations: list[Recommendation]): # PLAN REBASING (READ-ONLY) # ------------------------------------------------------------ +# session,plans,properties_by_id,epcs_by_property_id, property_sap_updates = session, plans, properties_by_id, epcs, +# property_updates_by_id + +from collections import defaultdict + + +def get_installed_measure_types_by_property_id_for_portfolio( + session, + portfolio_id: int, +) -> dict[int, set[str]]: + """ + Returns: + { property_id: {measure_type, ...} } + + Includes drag-along measures (e.g. ventilation). + """ + + rows = ( + session.query( + PropertyModel.id.label("property_id"), + InstalledMeasure.measure_type, + ) + .join( + InstalledMeasure, + InstalledMeasure.uprn == PropertyModel.uprn, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(InstalledMeasure.is_active.is_(True)) + .all() + ) + + installed_by_property: dict[int, set[str]] = defaultdict(set) + + for property_id, measure_type in rows: + mt = measure_type.value if hasattr(measure_type, "value") else measure_type + installed_by_property[property_id].add(mt) + + # drag-along rules + if mt in {"cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"}: + installed_by_property[property_id].add("mechanical_ventilation") + + return installed_by_property + + +def get_all_default_plan_recommendations( + session, + plan_ids: list[int], +) -> dict[int, list[Recommendation]]: + """ + Returns {plan_id: [Recommendation, ...]} for ALL plans in one query. + """ + + rows = ( + session.query( + PlanRecommendations.plan_id, + Recommendation, + ) + .join(Recommendation, Recommendation.id == PlanRecommendations.recommendation_id) + .filter(PlanRecommendations.plan_id.in_(plan_ids)) + .filter(Recommendation.default.is_(True)) + .all() + ) + + by_plan: dict[int, list[Recommendation]] = {} + + for plan_id, rec in rows: + by_plan.setdefault(plan_id, []).append(rec) + + return by_plan + + +def filter_remaining_recommendations( + recommendations: list[Recommendation], + installed_types: set[str], +) -> list[Recommendation]: + """ + Removes recommendations whose measure_type is already installed. + """ + if not installed_types: + return recommendations + + return [ + r for r in recommendations + if ( + (r.measure_type.value if hasattr(r.measure_type, "value") else r.measure_type) + not in installed_types + ) + ] + + def compute_plan_updates( session, plans: List[Plan], properties_by_id: Dict[int, PropertyModel], epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], - property_sap_updates: Dict[int, dict], + installed_types_by_property_id, + all_ventilation_measures ) -> List[dict]: """ - Computes plan metrics assuming properties are already rebased. + Computes plan metrics after marking some recommendations as already installed. + + Rules: + - Baseline post metrics remain unchanged + - Savings + costs are recomputed excluding installed measures + - ONLY mechanical ventilation alters post metrics """ - updates = [] + all_plan_recs = get_all_default_plan_recommendations( + session, + [p.id for p in plans], + ) - for plan in plans: + updates = [] + property_to_installed_types = {} + for plan in tqdm(plans, total=len(plans)): prop = properties_by_id.get(plan.property_id) epc = epcs_by_property_id.get(plan.property_id) - prop_update = property_sap_updates.get(plan.property_id) - if not prop or not epc or not prop_update: + if not prop or not epc: continue - installed_types = get_installed_measure_types_by_uprn( - session, prop.uprn - ) + installed_types = installed_types_by_property_id.get(prop.id, set()) - future_recs = get_effective_plan_recommendations( - session, - plan.id, + if not installed_types: + continue + + plan_recs = all_plan_recs.get(plan.id, []) + + remaining_recs = filter_remaining_recommendations( + plan_recs, installed_types, ) - metrics = aggregate_plan_metrics(future_recs) + remaining = aggregate_plan_metrics(remaining_recs) - baseline_bill = ( - epc.heating_cost_current - + epc.hot_water_cost_current - + epc.lighting_cost_current - + epc.appliances_cost_current - + epc.gas_standing_charge - + epc.electricity_standing_charge - ) + # Detect ventilation removal + ventilation_removed = "mechanical_ventilation" in installed_types - post_sap = prop_update["new_sap_points"] + metrics["sap_points"] + # ------------------------------- + # Start from the previous plan + # ------------------------------- + post_sap = plan.post_sap_points + post_co2 = plan.post_co2_emissions + post_bill = plan.post_energy_bill + post_kwh = plan.post_energy_consumption + + # ------------------------------- + # Undo ventilation ONLY + # ------------------------------- + ventilation_impact = all_ventilation_measures.get(prop.uprn, None) + + if ventilation_removed and ventilation_impact is not None: + # ventilation impact = baseline - remaining + + post_sap -= ventilation_impact["sap_points"] + + post_co2 += ventilation_impact["co2"] # We save more with ventilation + + post_bill += ventilation_impact["energy_bill"] + post_kwh += ventilation_impact["energy_kwh"] + + # # Skip if nothing changes at all + # if ( + # remaining["cost_of_works"] == baseline["cost_of_works"] + # and not ventilation_removed + # ): + # continue updates.append({ "plan_id": plan.id, @@ -654,40 +929,52 @@ def compute_plan_updates( "post_epc_rating": sap_to_epc(post_sap), # Carbon - "co2_savings": metrics["co2_savings"], - "post_co2_emissions": ( - epc.co2_emissions - metrics["co2_savings"] - if epc.co2_emissions is not None - else None - ), + "co2_savings": remaining["co2_savings"], + "post_co2_emissions": post_co2, # Energy bills - "energy_bill_savings": metrics["energy_bill_savings"], - "post_energy_bill": baseline_bill - metrics["energy_bill_savings"], + "energy_bill_savings": remaining["energy_bill_savings"], + "post_energy_bill": post_bill, # Energy consumption - "energy_consumption_savings": metrics["energy_consumption_savings"], - "post_energy_consumption": ( - epc.primary_energy_consumption - - metrics["energy_consumption_savings"] - ), + "energy_consumption_savings": remaining["energy_consumption_savings"], + "post_energy_consumption": post_kwh, - # Valuation - "valuation_increase": metrics["valuation_increase"], + # Valuation (safe) + "valuation_increase": remaining["valuation_increase"], "valuation_post_retrofit": ( - prop.current_valuation + metrics["valuation_increase"] + prop.current_valuation + + remaining["valuation_increase"] if prop.current_valuation is not None else None ), # Costs - "cost_of_works": metrics["cost_of_works"], - "contingency_cost": metrics["contingency_cost"], + "cost_of_works": remaining["cost_of_works"], + "contingency_cost": remaining["contingency_cost"], }) + property_to_installed_types[prop.id] = installed_types + return updates +def build_installed_recommendation_pairs( + installed_types_by_property_id: dict[int, set[str]], +) -> list[tuple[int, str]]: + """ + Returns: + [(property_id, measure_type), ...] + """ + pairs = [] + + for property_id, measure_types in installed_types_by_property_id.items(): + for mt in measure_types: + pairs.append((property_id, mt)) + + return pairs + + def calculate_contingency_for_recommendation( recommendation, ) -> float: @@ -766,6 +1053,15 @@ def compute_epc_rebasing_updates( if not adj: continue + # if ( + # adj["sap_points"] == 0 + # and adj["co2"] == 0 + # and adj["energy_kwh"] == 0 + # and adj["energy_bill"] == 0 + # and adj["heat_demand"] == 0 + # ): + # continue + updates[property_id] = { "property_id": property_id, @@ -915,13 +1211,187 @@ def persist_epc_rebasing_updates( print(f"✅ Updated {len(epcs)} EPC records") +# For setting the original SAP, carbon, etc to the current values +def initialise_original_property_and_epc_values(portfolio_id: int): + """ + Initialise original_* columns for SAP + EPC. + Safe to re-run. Only fills NULL originals. + """ + + with db_session() as session: + # ------------------------- + # PROPERTY (SAP) + # ------------------------- + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.original_sap_points.is_(None)) + .all() + ) + + for prop in properties: + prop.original_sap_points = prop.current_sap_points + + print(f"✅ Initialised original_sap_points for {len(properties)} properties") + + # ------------------------- + # EPC (energy / carbon) + # ------------------------- + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .all() + ) + + epc_updates = 0 + + for epc in epcs: + updated = False + + if epc.original_co2_emissions is None: + epc.original_co2_emissions = epc.co2_emissions + updated = True + + if epc.original_primary_energy_consumption is None: + epc.original_primary_energy_consumption = ( + epc.primary_energy_consumption + ) + updated = True + + if epc.original_current_energy_demand is None: + epc.original_current_energy_demand = epc.current_energy_demand + updated = True + + if epc.original_current_energy_demand_heating_hotwater is None: + epc.original_current_energy_demand_heating_hotwater = ( + epc.current_energy_demand_heating_hotwater + ) + updated = True + + if updated: + epc_updates += 1 + + print(f"✅ Initialised EPC originals for {epc_updates} EPC records") + + session.commit() + + +from typing import Set, Dict +from sqlalchemy import distinct + +from typing import Dict +from sqlalchemy import func + + +def get_installed_ventilation_adjustments_by_uprn_for_portfolio( + session, + portfolio_id: int, +) -> Dict[int, dict]: + """ + Returns per-UPRN aggregated impact metrics for + already-installed MECHANICAL VENTILATION. + + { + uprn: { + sap_points: float, + co2: float, + energy_kwh: float, + energy_bill: float, + heat_demand: float, + } + } + """ + + # Only consider UPRNs that belong to this portfolio + uprn_subquery = ( + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.uprn.isnot(None)) + .subquery() + ) + + rows = ( + session.query( + InstalledMeasure.uprn.label("uprn"), + + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) + .label("sap_points"), + + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) + .label("co2"), + + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) + .label("energy_kwh"), + + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) + .label("energy_bill"), + + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) + .label("heat_demand"), + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.measure_type == "mechanical_ventilation") + .filter(InstalledMeasure.uprn.in_(uprn_subquery)) + .group_by(InstalledMeasure.uprn) + .all() + ) + + return { + row.uprn: { + "sap_points": float(row.sap_points), + "co2": float(row.co2), + "energy_kwh": float(row.energy_kwh), + "energy_bill": float(row.energy_bill), + "heat_demand": float(row.heat_demand), + } + for row in rows + } + + +from sqlalchemy import update, tuple_ + + +def mark_recommendations_as_installed( + session, + property_measure_pairs: list[tuple[int, str]], + dry_run: bool = True, +): + if not property_measure_pairs: + print("No recommendations to update") + return + + print(f"{len(property_measure_pairs)} recommendation matches found") + + if dry_run: + print("DRY RUN — no database changes") + return + + stmt = ( + update(Recommendation) + .where( + tuple_(Recommendation.property_id, Recommendation.measure_type) + .in_(property_measure_pairs) + ) + .values(already_installed=True) + ) + + result = session.execute(stmt) + session.commit() + + print(f"✅ Updated {result.rowcount} recommendations") + + # ------------------------------------------------------------ # EXECUTION (DRY RUN) # ------------------------------------------------------------ -PORTFOLIO_ID = 430 +PORTFOLIO_ID = 431 # TODO - run the original sap points update on the peabody portfolio +# Initialising +# initialise_original_property_and_epc_values(PORTFOLIO_ID) + + with db_read_session() as session: properties = ( session.query(PropertyModel) @@ -929,6 +1399,9 @@ with db_read_session() as session: .all() ) + all_ventilation_measures = get_installed_ventilation_adjustments_by_uprn_for_portfolio(session, PORTFOLIO_ID) + installed_types_by_property_id = get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + plans = ( session.query(Plan) .filter(Plan.portfolio_id == PORTFOLIO_ID) @@ -974,10 +1447,255 @@ with db_read_session() as session: plans, properties_by_id, epcs, - property_updates_by_id, + installed_types_by_property_id, + all_ventilation_measures, ) + # Used to mark recommendations + pairs = build_installed_recommendation_pairs( + installed_types_by_property_id + ) + +from copy import deepcopy + +plan_updates_comparison = deepcopy(plan_updates) +plans_by_planid = {p.id: p for p in plans} +for u in plan_updates_comparison: + before = plans_by_planid.get(u["plan_id"]) + if not before: + continue + + u.update({ + # SAP + "before_sap_points": before.post_sap_points, + "after_sap_points": u["post_sap_points"], + + # Carbon + "before_post_co2_emissions": before.post_co2_emissions, + "after_post_co2_emissions": u["post_co2_emissions"], + + # Costs + "before_cost_of_works": before.cost_of_works, + "after_cost_of_works": u["cost_of_works"], + + "before_contingency_cost": before.contingency_cost, + "after_contingency_cost": u["contingency_cost"], + }) + +plan_updates_df = pd.DataFrame(plan_updates_comparison) + +plan_updates_df["delta_sap_points"] = ( + plan_updates_df["after_sap_points"] + - plan_updates_df["before_sap_points"] +) +plan_updates_df["delta_carbon"] = ( + plan_updates_df["after_post_co2_emissions"] + - plan_updates_df["before_post_co2_emissions"] +) +plan_updates_df["delta_cost_of_works"] = ( + plan_updates_df["after_cost_of_works"] + - plan_updates_df["before_cost_of_works"] +) +plan_updates_df["delta_contingency_cost"] = ( + plan_updates_df["after_contingency_cost"] + - plan_updates_df["before_contingency_cost"] +) + +# High-level sanity checks +summary = plan_updates_df[[ + "delta_sap_points", + "delta_carbon", + "delta_cost_of_works", + "delta_contingency_cost", +]].sum() + +print(summary) + +# Grab some random samples +example = plan_updates_df[plan_updates_df["delta_cost_of_works"] < -1000].sample(1) +# example = plan_updates_df[plan_updates_df["delta_sap_points"] == 0].sample(1) +example = plan_updates_df[plan_updates_df["property_id"] == 434936].squeeze() + +print(example["property_id"]) +# Go the the db and get the UPRN +uprn_example = 202149883 +installed_adjustments[uprn_example] + +[x for x in plan_updates if x["property_id"] == example["property_id"].values[0]] + +installed_measures_example = {} + +example.squeeze() # When ready to run! -persist_property_sap_updates(property_updates_by_id) -persist_plan_updates(plan_updates) -persist_epc_rebasing_updates(epc_updates) +# persist_property_sap_updates(property_updates_by_id) +# persist_plan_updates(plan_updates) +# persist_epc_rebasing_updates(epc_updates) +# BATCH_SIZE = 1000 +# +# with db_session() as session: +# for i in range(0, len(pairs), BATCH_SIZE): +# batch = pairs[i:i + BATCH_SIZE] +# +# mark_recommendations_as_installed( +# session, +# batch, +# dry_run=False, +# ) +# +# session.commit() + +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/435084/ +# Current EPC rating should go to 68.6 - no it shouldn't! less + +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434930/ +# Should now be a C72, +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434930 +# Carbon should be 2.02, energy_kwh should be, 12311.5 + +# We need a follow-up query which switches off ventilation if ewi, iwi or cwi are already installed +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/435154/plans/1024673 +# Should go to C73 +# This is a good one to test also, marking the recommendation as non-default + +# Good example to check: +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434936/plans/1024455 +# Should go down by these: +# {'sap_points': 11.299999, 'co2': 1.85, 'energy_kwh': 7882.1997, 'energy_bill': 549.89935, 'heat_demand': 77.7} +# Before SAP: 55 +# Carbon 7.56 +# kwh: 28207 + +# Good example to check: +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434444/plans/1024063 +# SHould change by these +# {'sap_points': 10.3, 'co2': 2.54, 'energy_kwh': 3713.5, 'energy_bill': 1028.2682, 'heat_demand': 151.61} +# Current: SAP 54 +# Carbon: 4.45 +# kwh: 10307 + +# There's one final thing to do - we had an error in post carbon so we need to increase it by the appliances +# amount for all units +from backend.ml_models.AnnualBillSavings import AnnualBillSavings + + +# Need to add this on to the plan for each property +def calculate_appliance_carbon_tonnes(total_floor_area: float) -> float: + """ + Returns appliance carbon emissions in tonnes CO2. + """ + appliance_energy_kwh = AnnualBillSavings.estimate_appliances_energy_use( + total_floor_area=total_floor_area + ) + + # kgCO2 → tonnes CO2 + appliance_carbon_tonnes = (appliance_energy_kwh * 0.232) / 1000 + return appliance_carbon_tonnes + + +from sqlalchemy.orm import joinedload +from tqdm import tqdm + +from tqdm import tqdm + + +def apply_appliance_carbon_to_plans( + session, + portfolio_id: int, + dry_run: bool = True, +): + """ + Adds appliance-related carbon emissions to plan.post_co2_emissions + using EPC total_floor_area. + """ + + # -------------------------------------------- + # Load EPCs (floor area source of truth) + # -------------------------------------------- + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .filter(PropertyDetailsEpcModel.total_floor_area.isnot(None)) + .all() + ) + + epc_by_property_id = { + e.property_id: e for e in epcs + } + + # -------------------------------------------- + # Load plans with post carbon + # -------------------------------------------- + plans = ( + session.query(Plan) + .filter(Plan.portfolio_id == portfolio_id) + .filter(Plan.post_co2_emissions.isnot(None)) + .all() + ) + + updates = [] + total_delta = 0.0 + + for plan in tqdm(plans, total=len(plans)): + epc = epc_by_property_id.get(plan.property_id) + if not epc: + continue + + floor_area = epc.total_floor_area + if not floor_area or floor_area <= 0: + continue + + delta = float(calculate_appliance_carbon_tonnes(floor_area)) + + if delta == 0: + continue + + updates.append((plan, delta)) + total_delta += delta + + # -------------------------------------------- + # Reporting + # -------------------------------------------- + print(f"Plans affected: {len(updates)}") + print(f"Total appliance carbon added (tCO2): {total_delta:.4f}") + + if dry_run: + print("🟡 DRY RUN — no updates applied") + return + + # -------------------------------------------- + # Apply updates + # -------------------------------------------- + for plan, delta in updates: + plan.post_co2_emissions += delta + + session.commit() + print("✅ Appliance carbon successfully applied") + + +# with db_session() as session: +# apply_appliance_carbon_to_plans( +# session, +# portfolio_id=PORTFOLIO_ID, +# dry_run=False, +# ) + +# Get all uprns for entries in already installed, from the database +with db_read_session() as session: + db_uprns = { + str(r[0]) + for r in ( + session.query(InstalledMeasure.uprn) + .all() + ) + } + +# What is the overlap of these properties and the properties in portfolo 430 +sal_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260107 " + "corrected batch 6 sal.xlsx", + sheet_name="batch 1", +) + +len(sal_data[sal_data["epc_os_uprn"].astype(str).isin(db_uprns)]["epc_os_uprn"]) + +# len([uprn for uprn, v in installed_adjustments.items() if str(uprn) in sal_data["epc_os_uprn"].astype(str).tolist()]) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 83799eff..9c7b3d2f 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -75,6 +75,10 @@ df = df.sort_values("property_id", ascending=True) agg = df.groupby("property_id").size().reset_index(name="n_plans") agg = agg.sort_values("n_plans", ascending=True) + +agg[agg["n_plans"] == 3] +agg[agg["n_plans"] == 2].shape + agg[agg["n_plans"] != 3] assert all(agg["n_plans"] == 3) @@ -153,4 +157,54 @@ with pd.ExcelWriter(filename) as writer: sal.iloc[41000:61000, :].to_excel(writer, sheet_name="batch 4", index=False) sal.iloc[61000:81000, :].to_excel(writer, sheet_name="batch 5", index=False) - sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 5", index=False) + sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 6", index=False) + +# TODO - mistake was made when creating the final SAL +b1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 1" +) +b2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 2" +) +b3 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 3" +) +b4 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 4" +) +b5 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 5" +) +# Batch 6 should be the remaining +total = pd.concat([b1, b2, b3, b4, b5]) +remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)] +# Create new output +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" + "20260107 corrected batch 6 sal.xlsx") + +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Top 1000 for testing + b1.to_excel(writer, sheet_name="batch 1", index=False) + # Batch 2 is the next 20,000 + b2.to_excel(writer, sheet_name="batch 2", index=False) + # Batch 3 is the next 20,000 + b3.to_excel(writer, sheet_name="batch 3", index=False) + + b4.to_excel(writer, sheet_name="batch 4", index=False) + b5.to_excel(writer, sheet_name="batch 5", index=False) + remaining.to_excel(writer, sheet_name="batch 6", index=False) + +all_together = pd.concat( + [b1, b2, b3, b4, b5, remaining] +) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py new file mode 100644 index 00000000..c6fb86ea --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py @@ -0,0 +1,21 @@ +import pandas as pd + +df = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/Parity Data 08012026.xlsx" +) + +df['SAP Score'].mean() + +df[~pd.isnull(df["Lodged EPC Score"])]["Lodged EPC Score"].mean() +df[~pd.isnull(df["Lodged EPC Score"])]["SAP Score"].mean() + +df['Difference'] = abs(df['SAP Score'] - df['Lodged EPC Score']) +df[~pd.isnull(df["Lodged EPC Score"])]["Difference"].mean() + +df["Lodged EPC Band"].value_counts(normalize=True) +df["SAP Band"].value_counts(normalize=True) + +z = df[df["SAP Band"] != df["Lodged EPC Band"]] +agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count") + +zz = z[z["Lodged EPC Band"] == "A"] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py b/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py new file mode 100644 index 00000000..370473a1 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py @@ -0,0 +1,7 @@ +import pandas as pd + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index 7b85ac49..bac20af4 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -72,9 +72,12 @@ class WindowsRecommendations: elif "secondary_glazing" in measures and "double_glazing" not in measures: is_secondary_glazing = True else: - is_secondary_glazing = self.property.restricted_measures or ( - self.property.windows["glazing_type"] == "secondary" + # If the property currently has some secondary glazing but isn't in a conservation area + # + is_secondary_glazing = self.property.restricted_measures and ( + self.property.data["windows-energy-eff"] in ["Poor", "Very Poor"] ) + windows_area = self.property.windows_area if not number_of_windows: @@ -200,6 +203,8 @@ class WindowsRecommendations: else: glazed_type_ending = "secondary glazing" new_windows_description = "Multiple glazing throughout" + # Windows only end up with an average efficiency + windows_energy_eff = "Average" else: raise ValueError("Invalid glazing type - implement me") @@ -208,7 +213,6 @@ class WindowsRecommendations: windows_energy_eff = "Very Good" # For post 2002 windows, the energy efficiency is "Good" and so for the simulation, we simulate with "Good" - windows_ending_config = WindowAttributes(new_windows_description).process() windows_simulation_config = check_simulation_difference(