From dd06b19c77829c1040c104e6d61dfe45c769fad0 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 24 Jun 2026 14:10:55 +0000 Subject: [PATCH 1/2] =?UTF-8?q?Fail=20unmodellable=20properties=20with=20a?= =?UTF-8?q?=20specific,=20debuggable=20error=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- applications/modelling_e2e/errors.py | 144 +++++++++++++++++ .../modelling_e2e/test_predict_epc_errors.py | 145 ++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 applications/modelling_e2e/errors.py create mode 100644 tests/applications/modelling_e2e/test_predict_epc_errors.py diff --git a/applications/modelling_e2e/errors.py b/applications/modelling_e2e/errors.py new file mode 100644 index 00000000..dbd47b2e --- /dev/null +++ b/applications/modelling_e2e/errors.py @@ -0,0 +1,144 @@ +"""Specific, debuggable failures for the modelling_e2e prediction path. + +When an EPC-less Property cannot be modelled the per-property failure handler +records ``str(exc)`` in the child SubTask's outputs (see ``handler.run_subtask``). +A single generic "not predictable" string conflated three unrelated causes — +unresolved property_type, an empty same-type cohort, and a degenerate +prediction — so the output could not tell an operator *which* one fired or which +data to fix. Each cause is its own exception carrying the Property's identity +plus the cause-specific context needed to debug it from the output alone. + +All subclass ``ValueError`` so the existing ``except Exception`` per-property +boundary (and any ``except ValueError`` callers) keep catching them. +""" + +from __future__ import annotations + +from typing import Optional + + +class PropertyNotModellableError(ValueError): + """Base: an EPC-less Property could not be predicted. Subclasses name the + specific cause. Carries the Property's identity so the failed SubTask output + identifies exactly which Property (and portfolio) to investigate.""" + + def __init__( + self, + *, + property_id: int, + uprn: int, + postcode: str, + portfolio_id: int, + cause: str, + ) -> None: + self.property_id = property_id + self.uprn = uprn + self.postcode = postcode + self.portfolio_id = portfolio_id + self.cause = cause + super().__init__( + f"cannot model property_id={property_id} uprn={uprn} " + f"postcode={postcode!r} portfolio_id={portfolio_id}: {cause}" + ) + + +class UnresolvedPropertyTypeError(PropertyNotModellableError): + """No lodged EPC and the Property's ``property_type`` could not be resolved, + so the hard same-type cohort filter cannot fire. Almost always a missing or + contradictory Landlord Override (e.g. a House/Bungalow lodged with an + "another dwelling above" roof is skipped by the override build), or a + ``property_type`` value with no gov-EPC code.""" + + def __init__( + self, + *, + property_id: int, + uprn: int, + postcode: str, + portfolio_id: int, + property_type: Optional[str], + built_form: Optional[str], + ) -> None: + self.property_type = property_type + self.built_form = built_form + super().__init__( + property_id=property_id, + uprn=uprn, + postcode=postcode, + portfolio_id=portfolio_id, + cause=( + "no lodged EPC and property_type could not be resolved " + f"(property_type={property_type!r}, built_form={built_form!r}), " + "so no same-type cohort can be selected. Usually a missing or " + "contradictory Landlord Override (e.g. a House/Bungalow with an " + "'another dwelling above' roof is skipped by the override build). " + "Resolve this Property's property_type override to model it." + ), + ) + + +class NoSameTypeComparablesError(PropertyNotModellableError): + """``property_type`` resolved, but no same-type comparable was found in the + Property's own postcode or — after broadening — the nearby-postcode cohort, + so it cannot be sized from a mixed-type cohort (ADR-0031).""" + + def __init__( + self, + *, + property_id: int, + uprn: int, + postcode: str, + portfolio_id: int, + property_type: str, + broadened: bool, + ) -> None: + self.property_type = property_type + self.broadened = broadened + scope = ( + "its own postcode or the broadened nearby-postcode cohort" + if broadened + else "its own postcode" + ) + super().__init__( + property_id=property_id, + uprn=uprn, + postcode=postcode, + portfolio_id=portfolio_id, + cause=( + f"no lodged EPC; property_type={property_type!r} resolved but no " + f"same-type comparable was found in {scope}. Cannot size the " + "Property from a mixed-type cohort." + ), + ) + + +class DegeneratePredictionError(PropertyNotModellableError): + """A same-type cohort was found, but the synthesised EPC carried no MAIN + building part — every comparable seeding the structure was lodged with a + null/unrecognised part identifier — so the SAP calculation has no main + dwelling to anchor on.""" + + def __init__( + self, + *, + property_id: int, + uprn: int, + postcode: str, + portfolio_id: int, + property_type: str, + cohort_size: int, + ) -> None: + self.property_type = property_type + self.cohort_size = cohort_size + super().__init__( + property_id=property_id, + uprn=uprn, + postcode=postcode, + portfolio_id=portfolio_id, + cause=( + f"no lodged EPC; predicted from a {cohort_size}-member " + f"property_type={property_type!r} cohort, but the synthesised EPC " + "had no MAIN building part (the structural template was lodged " + "with a null part identifier). Cannot anchor the SAP calculation." + ), + ) diff --git a/tests/applications/modelling_e2e/test_predict_epc_errors.py b/tests/applications/modelling_e2e/test_predict_epc_errors.py new file mode 100644 index 00000000..6984941c --- /dev/null +++ b/tests/applications/modelling_e2e/test_predict_epc_errors.py @@ -0,0 +1,145 @@ +"""Behaviour of ``_predict_epc``'s failure modes: an EPC-less Property that +cannot be predicted must fail loudly with a *specific*, debuggable exception +(carrying the Property's identity + the cause), not a single generic +"not predictable" string. The per-property handler records ``str(exc)`` in the +SubTask output, so these messages are the operator's only debugging signal. +""" + +from typing import Callable, Optional, cast +from unittest.mock import MagicMock + +import pytest + +from applications.modelling_e2e.errors import ( + DegeneratePredictionError, + NoSameTypeComparablesError, + UnresolvedPropertyTypeError, +) +from applications.modelling_e2e.handler import _predict_epc +from datatypes.epc.domain.epc_property_data import ( + BuildingPartIdentifier, + EpcPropertyData, + SapBuildingPart, +) +from domain.epc_prediction.comparable_properties import ComparableProperty +from domain.epc_prediction.epc_prediction import EpcPrediction +from domain.epc_prediction.prediction_target import ( + PredictionTarget, + PredictionTargetAttributes, +) +from repositories.property.override_backed_prediction_attributes_reader import ( + OverrideBackedPredictionAttributesReader, +) + + +class _FixedAttributesReader: + """An attributes reader that returns one fixed PredictionTargetAttributes.""" + + def __init__(self, attributes: PredictionTargetAttributes) -> None: + self._attributes = attributes + + def attributes_for(self, property_id: int) -> PredictionTargetAttributes: + return self._attributes + + +def _reader( + attributes: PredictionTargetAttributes, +) -> OverrideBackedPredictionAttributesReader: + return cast( + OverrideBackedPredictionAttributesReader, _FixedAttributesReader(attributes) + ) + + +def _no_cohort() -> Callable[[str], list[ComparableProperty]]: + return lambda _postcode: [] + + +def _no_broaden() -> Callable[[PredictionTarget], list[ComparableProperty]]: + return lambda _target: [] + + +def _epc(property_type: Optional[str]) -> EpcPropertyData: + epc: EpcPropertyData = object.__new__(EpcPropertyData) + epc.property_type = property_type + return epc + + +def test_unresolved_property_type_raises_with_property_identity() -> None: + # Arrange — no resolvable property_type (the contradictory/missing-override + # case): build_prediction_target gates the Property out before any cohort. + reader = _reader(PredictionTargetAttributes(property_type=None)) + + # Act / Assert + with pytest.raises(UnresolvedPropertyTypeError) as excinfo: + _predict_epc( + property_id=733290, + uprn=100021987320, + postcode="SE4 1DX", + portfolio_id=796, + attributes_reader=reader, + coordinates=None, + cohort_for=_no_cohort(), + broaden=_no_broaden(), + predictor=EpcPrediction(), + ) + + message = str(excinfo.value) + assert "733290" in message + assert "100021987320" in message + assert "SE4 1DX" in message + assert "796" in message + + +def test_resolved_type_but_empty_cohort_raises_no_same_type_comparables() -> None: + # Arrange — property_type resolves, but neither the own postcode nor the + # broadened cohort yields a same-type comparable. + reader = _reader(PredictionTargetAttributes(property_type="2")) + + # Act / Assert + with pytest.raises(NoSameTypeComparablesError) as excinfo: + _predict_epc( + property_id=723681, + uprn=100021935228, + postcode="SE13 5RT", + portfolio_id=796, + attributes_reader=reader, + coordinates=None, + cohort_for=_no_cohort(), + broaden=_no_broaden(), + predictor=EpcPrediction(), + ) + + assert excinfo.value.property_type == "2" + assert excinfo.value.broadened is True + assert "SE13 5RT" in str(excinfo.value) + + +def test_cohort_found_but_prediction_has_no_main_part_raises_degenerate() -> None: + # Arrange — a same-type cohort exists, but the synthesised EPC carries no + # MAIN building part (the template was lodged with a null part identifier). + reader = _reader(PredictionTargetAttributes(property_type="2")) + cohort = [ComparableProperty(epc=_epc("2"), certificate_number="c1")] + + no_main: EpcPropertyData = object.__new__(EpcPropertyData) + part: SapBuildingPart = object.__new__(SapBuildingPart) + part.identifier = BuildingPartIdentifier.OTHER + no_main.sap_building_parts = [part] + predictor = MagicMock() + predictor.predict.return_value = no_main + + # Act / Assert + with pytest.raises(DegeneratePredictionError) as excinfo: + _predict_epc( + property_id=712977, + uprn=100061743824, + postcode="LS6 1AA", + portfolio_id=796, + attributes_reader=reader, + coordinates=None, + cohort_for=lambda _postcode: cohort, + broaden=_no_broaden(), + predictor=cast(EpcPrediction, predictor), + ) + + assert excinfo.value.cohort_size == 1 + assert "100061743824" in str(excinfo.value) From 04ee16488ee95baea533d85cd1675eac9e8bc95d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 24 Jun 2026 14:13:28 +0000 Subject: [PATCH 2/2] =?UTF-8?q?Fail=20unmodellable=20properties=20with=20a?= =?UTF-8?q?=20specific,=20debuggable=20error=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _predict_epc returned None for three unrelated causes — unresolved property_type, an empty same-type cohort, and a degenerate (no MAIN part) prediction — which the handler collapsed into one generic "not predictable" string. The SubTask output could not say which cause fired or which data to fix. Raise a specific PropertyNotModellableError subclass per cause, each carrying the property's identity (property_id, uprn, postcode, portfolio_id) and cause-specific context. The unresolved-property-type message points at the likely missing/contradictory Landlord Override. All subclass ValueError, so the per-property failure boundary keeps catching them and records str(exc). Co-Authored-By: Claude Opus 4.8 (1M context) --- applications/modelling_e2e/handler.py | 51 ++++++++++++++----- .../modelling_e2e/test_predict_epc_errors.py | 4 +- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/applications/modelling_e2e/handler.py b/applications/modelling_e2e/handler.py index 32c76d7d..c3477bc1 100644 --- a/applications/modelling_e2e/handler.py +++ b/applications/modelling_e2e/handler.py @@ -67,6 +67,11 @@ from infrastructure.solar.google_solar_api_client import ( BuildingInsightsNotFoundError, GoogleSolarApiClient, ) +from applications.modelling_e2e.errors import ( + DegeneratePredictionError, + NoSameTypeComparablesError, + UnresolvedPropertyTypeError, +) from applications.modelling_e2e.modelling_e2e_trigger_body import ( ModellingE2ETriggerBody, ) @@ -181,16 +186,19 @@ def _predict_epc( cohort_for: Callable[[str], list[ComparableProperty]], broaden: Callable[[PredictionTarget], list[ComparableProperty]], predictor: EpcPrediction, -) -> Optional[EpcPropertyData]: +) -> EpcPropertyData: """Synthesise an EpcPropertyData for an EPC-less property from its postcode - cohort (EPC Prediction Path 3, ADR-0031), or None when ineligible. + cohort (EPC Prediction Path 3, ADR-0031). When the property's own postcode holds no same-type comparables (a sparse postcode — e.g. the only flat among houses), the cohort is broadened to the real unit postcodes physically nearest it (``broaden``) before giving up. - Returns None when property_type is unresolvable (hard cohort filter cannot - fire) or when even the broadened cohort is empty after filtering. + Raises a specific ``PropertyNotModellableError`` subclass — naming the cause + and carrying the property's identity — when it cannot predict: property_type + unresolved, an empty same-type cohort, or a degenerate (no MAIN part) + prediction. The per-property handler records ``str(exc)`` in the SubTask + output, so the cause is debuggable from the output alone. """ attributes = attributes_reader.attributes_for(property_id) identity = PropertyIdentity( @@ -198,18 +206,41 @@ def _predict_epc( ) target = build_prediction_target(identity, coordinates, attributes) if target is None: - return None + raise UnresolvedPropertyTypeError( + property_id=property_id, + uprn=uprn, + postcode=postcode, + portfolio_id=portfolio_id, + property_type=attributes.property_type, + built_form=attributes.built_form, + ) comparables = select_comparables(target, cohort_for(target.postcode)) + broadened = False if not comparables.members: + broadened = True comparables = select_comparables(target, broaden(target)) if not comparables.members: - return None + raise NoSameTypeComparablesError( + property_id=property_id, + uprn=uprn, + postcode=postcode, + portfolio_id=portfolio_id, + property_type=target.property_type, + broadened=broadened, + ) predicted = predictor.predict(target, comparables) if not any( part.identifier is BuildingPartIdentifier.MAIN for part in predicted.sap_building_parts ): - return None + raise DegeneratePredictionError( + property_id=property_id, + uprn=uprn, + postcode=postcode, + portfolio_id=portfolio_id, + property_type=target.property_type, + cohort_size=len(comparables.members), + ) return predicted @@ -344,12 +375,6 @@ def handler(body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator, broaden=_broaden, predictor=predictor, ) - if predicted_epc is None: - raise ValueError( - f"no EPC for UPRN {uprn} and not predictable " - f"(unresolved property_type, or no same-type " - f"comparables in or near '{postcode}')" - ) effective_epc = Property( identity=PropertyIdentity( portfolio_id=portfolio_id, diff --git a/tests/applications/modelling_e2e/test_predict_epc_errors.py b/tests/applications/modelling_e2e/test_predict_epc_errors.py index 6984941c..715da621 100644 --- a/tests/applications/modelling_e2e/test_predict_epc_errors.py +++ b/tests/applications/modelling_e2e/test_predict_epc_errors.py @@ -15,7 +15,9 @@ from applications.modelling_e2e.errors import ( NoSameTypeComparablesError, UnresolvedPropertyTypeError, ) -from applications.modelling_e2e.handler import _predict_epc +from applications.modelling_e2e.handler import ( + _predict_epc, # pyright: ignore[reportPrivateUsage] +) from datatypes.epc.domain.epc_property_data import ( BuildingPartIdentifier, EpcPropertyData,