From bdc4c213ad1925093e4744153a1d2254cd9fede0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Dec 2025 09:40:24 +0000 Subject: [PATCH 1/2] working on cleaning epc data for old records --- backend/Property.py | 11 ++++--- backend/app/assumptions.py | 9 ++++++ backend/app/db/models/recommendations.py | 11 +++++++ backend/engine/engine.py | 39 ++++++++++++++++++++++-- 4 files changed, 62 insertions(+), 8 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index dd92a902..50fc865e 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -727,11 +727,12 @@ class Property: self.energy_cost_estimates = { "unadjusted": unadjusted_heating_costs, - "epc": { - "heating": float(self.data["heating-cost-current"]), - "hot_water": float(self.data["hot-water-cost-current"]), - "lighting": float(self.data["lighting-cost-current"]), - } + # Don't think we need the EPC + # "epc": { + # "heating": float(self.data["heating-cost-current"]), + # "hot_water": float(self.data["hot-water-cost-current"]), + # "lighting": float(self.data["lighting-cost-current"]), + # } } self.energy_consumption_estimates = { diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 31acbe29..898f586b 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -101,3 +101,12 @@ measures_needing_ventilation = [ # If we have a property beyond this size, we assume it's likely large enough to have an ASHP ASHP_FLOOR_AREA_THRESHOLD = 120 # m2 + +# Is a placeholder, used for cleaning data. Is a flat average based on the estimated +AVERAGE_LIGHTING_COST = 100 + +# Average bill, based on british gas is #1,838.71. Subtract 100 for lighting, 228 for hot water. This will include +# appliances so appliances should be removed when this is used +AVERAGE_HEATING_AND_APPLIANCE_COST = 1510.71 +# Based on https://energysavingtrust.org.uk/sites/default/files/reports/AtHomewithWater%287%29.pdf +AVERAGE_HOT_WATER_COST = 228 diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 2b7bf7c7..4c02268d 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -3,6 +3,7 @@ from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func from backend.app.db.models.portfolio import Portfolio, PropertyModel from backend.app.db.models.materials import Material +from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits import enum @@ -78,6 +79,16 @@ class Plan(Base): ), nullable=True, ) + post_sap_points = Column(Float) + post_epc_rating = Column(Enum(Epc)) + post_co2_emissions = Column(Float) + co2_savings = Column(Float) + post_energy_bill = Column(Float) + energy_bill_savings = Column(Float) + post_energy_consumption = Column(Float) # energy demand in kWh/year + energy_consumption_savings = Column(Float) + valuation_post_retrofit = Column(Float) + valuation_increase = Column(Float) class PlanRecommendations(Base): diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 215adfe4..f92da01a 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1,4 +1,3 @@ -import os import time import json from copy import deepcopy @@ -16,6 +15,7 @@ from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import sessionmaker from starlette.responses import Response +from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine @@ -415,8 +415,17 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): :return: """ - if not pd.isnull(prepared_epc.prepared_epc["number_habitable_rooms"]) and not pd.isnull( - prepared_epc.prepared_epc["number_heated_rooms"]) and not pd.isnull(prepared_epc.prepared_epc["floor_height"]): + variables_to_clean = [ + "number_habitable_rooms", + "number_heated_rooms", + "floor_height", + "lighting_cost_current", + "heating_cost_current", + "hot_water_cost_current", + "energy_consumption_potential", + ] + + if not any([pd.isnull(prepared_epc.prepared_epc[k]) for k in variables_to_clean]): # Nothing to do return prepared_epc @@ -461,6 +470,30 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): prepared_epc.prepared_epc["floor_height"] = clean_floor_height prepared_epc.floor_height = clean_floor_height + if pd.isnull(prepared_epc.lighting_cost_current): + # This is a basic assumption as an average + prepared_epc.prepared_epc["lighting_cost_current"] = assumptions.AVERAGE_LIGHTING_COST + prepared_epc.lighting_cost_current = assumptions.AVERAGE_LIGHTING_COST + + if pd.isnull(prepared_epc.heating_cost_current): + # This is a basic assumption as an average + appliance_cost = AnnualBillSavings.estimate_appliances_energy_use( + total_floor_area=prepared_epc.total_floor_area + ) * AnnualBillSavings.ELECTRICITY_PRICE_CAP + heating_cleaned_value = assumptions.AVERAGE_HEATING_AND_APPLIANCE_COST - appliance_cost + prepared_epc.prepared_epc["heating_cost_current"] = heating_cleaned_value + prepared_epc.heating_cost_current = heating_cleaned_value + + if pd.isnull(prepared_epc.hot_water_cost_current): + # This is a basic assumption as an average + prepared_epc.prepared_epc["hot_water_cost_current"] = assumptions.AVERAGE_HOT_WATER_COST + prepared_epc.hot_water_cost_current = assumptions.AVERAGE_HOT_WATER_COST + + if pd.isnull(prepared_epc.energy_consumption_potential): + # Set to current + prepared_epc.prepared_epc["energy_consumption_potential"] = prepared_epc.energy_consumption_current + prepared_epc.energy_consumption_potential = prepared_epc.energy_consumption_current + return prepared_epc From c10bf032dc9936084efd3000d6ed173a9581353a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Dec 2025 14:59:47 +0000 Subject: [PATCH 2/2] debugging fuel code --- backend/Property.py | 6 ++ backend/SearchEpc.py | 61 +++++++++++++--- .../db/functions/recommendations_functions.py | 62 ++++++++++++++++ backend/engine/engine.py | 71 ++++++++----------- 4 files changed, 147 insertions(+), 53 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 50fc865e..6328ac8c 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1220,6 +1220,12 @@ class Property: else: self.heating_energy_source = ['Electricity'] + if set(self.heating_energy_source) == {'Electricity', 'LPG'}: + if self.main_fuel["clean_description"] in ["Lpg not community", "Lpg community"]: + self.heating_energy_source = ['LPG'] + else: + self.heating_energy_source = ['Electricity'] + if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}: # It means they have mixed heating so we take the primary one, based on main fuel # This will probably happen in the case of an extension diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 5ceac5f9..a193a65f 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -144,6 +144,11 @@ class SearchEpc: "error": None } + # Keys that we check for missing values to determine if the EPC is incomplete + CHECK_MISSING_KEYS = [ + "lighting-cost-current", "heating-cost-current", "hot-water-cost-current", "energy-consumption-potential" + ] + def __init__( self, address1: str, @@ -217,6 +222,9 @@ class SearchEpc: # By default, this is set to false. This flag indicates whether we should overwrite SAP 2005 entires. self.overwrite_sap05 = False + # Be default, this is set to false. This flag indicates whether we should take the existing EPC, but use + # the estimated EPC to clean missings + self.clean_missing_on_expired = False def set_strict_property_type_search(self): """ @@ -988,20 +996,40 @@ class SearchEpc: ) = self.extract_epc_data(address=self.full_address) # Before we return, we check if we need to overwrite a SAP05 EPC - # If we have don't have SAP05 in the heating description and overwrite_sap05 is False, we return - is_sap_o5 = "SAP05:" in self.newest_epc.get("mainheat-description", "") - good_data = not is_sap_o5 and (response["status"] == 200) + # ---- SAP 05 overwriting logic ---- + is_sap_05 = "SAP05:" in self.newest_epc.get("mainheat-description", "") - if good_data or not overwrite_sap05: + needs_sap_05_overwrite = is_sap_05 and (response["status"] == 200) and overwrite_sap05 + + # ---- Cleaning expired EPC logic ---- + epc_is_expired = (pd.Timestamp.now() - pd.Timestamp( + self.newest_epc.get("lodgement-date", pd.Timestamp.now()))).days > 3650 + + epc_has_missing_key_data = any([self.newest_epc.get(k) in [None, ""] for k in self.CHECK_MISSING_KEYS]) + + epc_needs_cleaning = epc_is_expired and epc_has_missing_key_data + + # ---- We don't have an epc ---- + no_epc = response["status"] != 200 + + # If we don't have to overwrite SAP05, or we don't have missing data on an expired EPC, we return + if not needs_sap_05_overwrite and not epc_needs_cleaning and not no_epc: # If the data is fine, or we're preventing SAP05 overwrites, we just exit here return # By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC lmks_to_drop, exclude_old = [], False - if is_sap_o5: - self.overwrite_sap05 = True + if needs_sap_05_overwrite or epc_needs_cleaning: + self.overwrite_sap05 = needs_sap_05_overwrite + self.clean_missing_on_expired = epc_needs_cleaning lmks_to_drop = [self.newest_epc["lmk-key"]] exclude_old = True + self.heating_system = ( + self.newest_epc["mainheat-description"] if + self.clean_missing_on_expired and self.heating_system is None else self.heating_system + ) + self.ordnance_survey_client.property_type = self.newest_epc["property-type"] + self.ordnance_survey_client.built_form = self.newest_epc["built-form"] # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn if skip_os: @@ -1016,13 +1044,24 @@ class SearchEpc: exclude_old=exclude_old ) + # If we have overwritten a SAP05 EPC, we need to update older_epcs too if self.overwrite_sap05: # We keep a record of the fact that we have performed a SAP05 overwrite estimated_epc["sap_05_overwritten"] = True + self.older_epcs = [self.newest_epc.copy()] + self.newest_epc = estimated_epc + elif self.clean_missing_on_expired: + # We perform the cleaning + for k in self.CHECK_MISSING_KEYS: + if self.newest_epc[k] in ["", None]: + self.newest_epc[k] = estimated_epc[k] + + self.newest_epc["estimated"] = True + self.older_epcs = [] + else: + self.older_epcs = [] + self.newest_epc = estimated_epc - # If we have overwritten a SAP05 EPC, we need to update older_epcs too - self.older_epcs = [] if not self.overwrite_sap05 else [self.newest_epc.copy()] - self.newest_epc = estimated_epc self.full_sap_epc = {} # Finally, set a standardised address 1 and postcode @@ -1077,7 +1116,9 @@ class SearchEpc: if not self.newest_epc: raise ValueError("No EPC data available to set UPRN source - run find_property first") - if self.newest_epc.get("estimated") and file_format == "domna_asset_list" and (self.newest_epc["uprn"] < 0): + if (self.newest_epc.get("estimated") and + (file_format == "domna_asset_list") and + (float(self.newest_epc["uprn"]) < 0)): self.newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED def check_attribute_variations(self): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 55bf5824..542dde93 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -11,6 +11,68 @@ from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage from backend.app.db.models.inspections import InspectionModel +def prepare_plan_data( + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations +): + """ + Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured + function that will need improving in the future + :param p: Instantiated property + :param body: request body, PlanTriggerRequest + :param scenario_id: unique identifier for the scenario + :param eco_packages: Pre-constructed eco packages for a property + :param valuations: valuation improvement data + :param new_sap_points: sap points, post default recommendations + :param new_epc: new epc rating, post default recommendations + :param default_recommendations: list of default recommendations for a property + :return: + """ + # Plan carbon savings + co2_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations]) + post_co2_emissions = p.data["co2-emissions-current"] - co2_savings + + # Plan bill savings + energy_bill_savings = sum([r["energy_cost_savings"] for r in default_recommendations]) + post_energy_bill = sum(p.current_energy_bill.values()) - energy_bill_savings + + # energy consumption + energy_consumption_savings = sum([r["kwh_savings"] for r in default_recommendations]) + post_energy_consumption = p.current_energy_consumption - energy_consumption_savings + + valuation_post_retrofit, valuation_increase = None, None + if valuations["current_value"]: + valuation_increase = valuations["average_increase"] + valuation_post_retrofit = valuations["average_increased_value"] + + return { + "portfolio_id": body.portfolio_id, + "property_id": p.id, + "scenario_id": scenario_id, + "is_default": True if p.is_new else False, + "name": body.scenario_name, + "valuation_increase_lower_bound": ( + valuations["lower_bound_increased_value"] - valuations["current_value"] + ), + "valuation_increase_upper_bound": ( + valuations["upper_bound_increased_value"] - valuations["current_value"] + ), + "valuation_increase_average": ( + valuations["average_increased_value"] - valuations["current_value"] + ), + "post_sap_points": new_sap_points, + "post_epc_rating": new_epc, + "post_co2_emissions": post_co2_emissions, + "co2_savings": co2_savings, + "post_energy_bill": post_energy_bill, + "energy_bill_savings": energy_bill_savings, + "post_energy_consumption": post_energy_consumption, + "energy_consumption_savings": energy_consumption_savings, + "valuation_post_retrofit": valuation_post_retrofit, + "valuation_increase": valuation_increase, + "plan_type": eco_packages.get(p.id, (None, None, None))[2] + } + + def create_plan(session: Session, plan): """ This function will create a record for the plan in the database if it does not exist. diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f92da01a..2427ca8a 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -419,10 +419,6 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): "number_habitable_rooms", "number_heated_rooms", "floor_height", - "lighting_cost_current", - "heating_cost_current", - "hot_water_cost_current", - "energy_consumption_potential", ] if not any([pd.isnull(prepared_epc.prepared_epc[k]) for k in variables_to_clean]): @@ -470,29 +466,29 @@ def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): prepared_epc.prepared_epc["floor_height"] = clean_floor_height prepared_epc.floor_height = clean_floor_height - if pd.isnull(prepared_epc.lighting_cost_current): - # This is a basic assumption as an average - prepared_epc.prepared_epc["lighting_cost_current"] = assumptions.AVERAGE_LIGHTING_COST - prepared_epc.lighting_cost_current = assumptions.AVERAGE_LIGHTING_COST + # if pd.isnull(prepared_epc.lighting_cost_current): + # # This is a basic assumption as an average + # prepared_epc.prepared_epc["lighting_cost_current"] = assumptions.AVERAGE_LIGHTING_COST + # prepared_epc.lighting_cost_current = assumptions.AVERAGE_LIGHTING_COST - if pd.isnull(prepared_epc.heating_cost_current): - # This is a basic assumption as an average - appliance_cost = AnnualBillSavings.estimate_appliances_energy_use( - total_floor_area=prepared_epc.total_floor_area - ) * AnnualBillSavings.ELECTRICITY_PRICE_CAP - heating_cleaned_value = assumptions.AVERAGE_HEATING_AND_APPLIANCE_COST - appliance_cost - prepared_epc.prepared_epc["heating_cost_current"] = heating_cleaned_value - prepared_epc.heating_cost_current = heating_cleaned_value - - if pd.isnull(prepared_epc.hot_water_cost_current): - # This is a basic assumption as an average - prepared_epc.prepared_epc["hot_water_cost_current"] = assumptions.AVERAGE_HOT_WATER_COST - prepared_epc.hot_water_cost_current = assumptions.AVERAGE_HOT_WATER_COST - - if pd.isnull(prepared_epc.energy_consumption_potential): - # Set to current - prepared_epc.prepared_epc["energy_consumption_potential"] = prepared_epc.energy_consumption_current - prepared_epc.energy_consumption_potential = prepared_epc.energy_consumption_current + # if pd.isnull(prepared_epc.heating_cost_current): + # # This is a basic assumption as an average + # appliance_cost = AnnualBillSavings.estimate_appliances_energy_use( + # total_floor_area=prepared_epc.total_floor_area + # ) * AnnualBillSavings.ELECTRICITY_PRICE_CAP + # heating_cleaned_value = assumptions.AVERAGE_HEATING_AND_APPLIANCE_COST - appliance_cost + # prepared_epc.prepared_epc["heating_cost_current"] = heating_cleaned_value + # prepared_epc.heating_cost_current = heating_cleaned_value + # + # if pd.isnull(prepared_epc.hot_water_cost_current): + # # This is a basic assumption as an average + # prepared_epc.prepared_epc["hot_water_cost_current"] = assumptions.AVERAGE_HOT_WATER_COST + # prepared_epc.hot_water_cost_current = assumptions.AVERAGE_HOT_WATER_COST + # + # if pd.isnull(prepared_epc.energy_consumption_potential): + # # Set to current + # prepared_epc.prepared_epc["energy_consumption_potential"] = prepared_epc.energy_consumption_current + # prepared_epc.energy_consumption_potential = prepared_epc.energy_consumption_current return prepared_epc @@ -1281,6 +1277,10 @@ async def model_engine(body: PlanTriggerRequest): ) property_value_increase_ranges[p.id] = valuations + property_plan_data = db_funcs.recommendations_functions.prepare_plan_data( + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations + ) + # TODO - this is not right, especially if the existing run failed if p.is_new: property_details_epc = p.get_property_details_epc( @@ -1300,23 +1300,8 @@ async def model_engine(body: PlanTriggerRequest): if not recommendations_to_upload: continue - new_plan_id = db_funcs.recommendations_functions.create_plan(session, { - "portfolio_id": body.portfolio_id, - "property_id": p.id, - "scenario_id": scenario_id, - "is_default": True if p.is_new else False, - "name": body.scenario_name, - "valuation_increase_lower_bound": ( - valuations["lower_bound_increased_value"] - valuations["current_value"] - ), - "valuation_increase_upper_bound": ( - valuations["upper_bound_increased_value"] - valuations["current_value"] - ), - "valuation_increase_average": ( - valuations["average_increased_value"] - valuations["current_value"] - ), - "plan_type": eco_packages.get(p.id, (None, None, None))[2] - }) + + new_plan_id = db_funcs.recommendations_functions.create_plan(session, plan=property_plan_data) db_funcs.recommendations_functions.upload_recommendations( session, recommendations_to_upload, p.id, new_plan_id