From e93ee337885df0b779c96293a8e7700428d9888d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 3 Jan 2026 15:28:46 +0800 Subject: [PATCH] Fixing bug in EPC estimation when we have a heating system and do not find a matching property --- backend/SearchEpc.py | 29 +++++++++++---- backend/engine/engine.py | 2 +- .../d_restart_failed_subtasks.py | 36 +++++++++++++++++++ .../epc_attributes/FloorAttributes.py | 2 ++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 9af7330b..deb17ba5 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -663,7 +663,10 @@ class SearchEpc: params["property-type"] = property_type_api_map[property_type] # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes - epc_response = self.get_epc(params=params, size=100) + # If we get to the final iteration, we fetch more + + size = 1000 if len(postcode) <= 2 else 100 + epc_response = self.get_epc(params=params, size=size) if epc_response["status"] == 200: epc_data = pd.DataFrame(self.data["rows"]) @@ -690,6 +693,12 @@ class SearchEpc: if not epc_data.empty: epc_data = epc_data[~epc_data["mainheat-description"].str.lower().str.contains("sap05:")] + if not epc_data.empty and heating_system is not None: + # If we arrive at the final iteration, we allow ourself to be less strict on heating system + epc_data = epc_data[ + epc_data["mainheat-description"] == heating_system + ] + if not epc_data.empty: # Further processing of the EPC data @@ -747,6 +756,19 @@ class SearchEpc: estimation_built_form = "Enclosed Mid-Terrace" else: estimation_built_form = "Mid-Terrace" + elif built_form == "Detached" and property_type == "Flat": + # We add in a fallback to detached flats, where it can be rarer to see properties of this type + if len(postcode) <= 2: + if sum(epc_data["built-form"] == built_form) > 0: + estimation_built_form = built_form + elif sum(epc_data["built-form"] == "Semi-Detached") > 0: + estimation_built_form = "Semi-Detached" + elif sum(epc_data["built-form"] == "End-Terrace") > 0: + estimation_built_form = "End-Terrace" + else: + estimation_built_form = "Mid-Terrace" + else: + estimation_built_form = built_form else: estimation_built_form = built_form @@ -782,11 +804,6 @@ class SearchEpc: epc_data["property-type"] == estimation_property_type) ] - if heating_system is not None: - epc_data = epc_data[ - epc_data["mainheat-description"] == heating_system - ] - if not epc_data.empty: return epc_data # Return the filtered data if it's not empty diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f9820204..eb933cc0 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -730,7 +730,7 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.property_type = addr.property_type # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True, api_data=None, overwrite_sap05=True) + epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) epc_searcher.set_uprn_source(file_format=body.file_format) lookup_key = ( diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index 5ef901b2..a8ab230f 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -10,3 +10,39 @@ Additionally, we wil find the problematic records and remove them Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan or recommendations in case something went wrong """ +import pandas as pd +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session + + +def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]: + return [ + uprn + for (uprn,) in + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + if uprn is not None + ] + + +with db_session() as session: + completed_uprns = get_uprns_for_portfolio(session, 419) + +# We now find the portfolio of the SAL, which we did not set off +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) + +missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)] + +# Store +missed_properties.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "d_failed_properties_to_restart_20260102.xlsx", + sheet_name="Standardised Asset List", + index=False +) diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 47013aaa..cd1499c2 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -133,6 +133,8 @@ class FloorAttributes(Definitions): result["another_property_below"] = ( "(another dwelling below)" in description or "(other premises below)" in description + or "another dwelling below" in description + or "other premises below" in description ) thickness_map = {