diff --git a/applications/modelling_e2e/handler.py b/applications/modelling_e2e/handler.py index f9c55696..c5aa189b 100644 --- a/applications/modelling_e2e/handler.py +++ b/applications/modelling_e2e/handler.py @@ -212,35 +212,6 @@ def _predict_epc( return predicted -# --- TEMPORARY GUARD: remove once the SAP calculator's oil-heating under-score -# is fixed (predicted oil-boiler picture scores SAP 13/G vs a recorded 50/E). --- -# A predicted EpcPropertyData carries its own recorded SAP (energy_rating_current, -# synthesised from the cohort). When the calculator's baseline score contradicts -# that by more than ~one EPC band the picture is being mis-scored, so any Plan -# built on it overshoots (e.g. goal C lands at band A). Quarantine the property — -# skip its Plan — rather than ship nonsense. Lodged properties are unaffected: -# they have a real recorded cert and the Rebaseliner already owns this check. -_PREDICTED_BASELINE_DIVERGENCE_GUARD = 20.0 # SAP points (~one EPC band) - - -class ImplausiblePredictedBaseline(Exception): - """A predicted Property's calculator baseline contradicts its recorded SAP by - more than a band — the calculator is mis-scoring the synthesised picture, so - the Plan is untrustworthy and is withheld (caught per-property as a failure).""" - - -def _predicted_baseline_is_implausible( - baseline_sap: float, recorded_sap: Optional[int] -) -> bool: - """True when a predicted Property's calculator baseline diverges from the - picture's own recorded SAP by more than the guard band. A missing recorded - SAP (no reference) is never implausible — the guard only fires on a concrete - contradiction.""" - if recorded_sap is None: - return False - return abs(baseline_sap - recorded_sap) > _PREDICTED_BASELINE_DIVERGENCE_GUARD - - @task_handler(task_source="modelling_e2e", source=Source.PROPERTY) def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]: trigger = ModellingE2ETriggerBody.model_validate(body) @@ -418,22 +389,6 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]: f"measures={len(plan.measures)}" ) - # Quarantine a predicted Property whose calculator baseline - # contradicts its synthesised recorded SAP (TEMPORARY guard — - # see _predicted_baseline_is_implausible). Raising drops this one - # property into `failures` and skips its Plan/Baseline; the rest - # of the batch is unaffected. - if predicted_epc is not None and _predicted_baseline_is_implausible( - plan.baseline.sap_continuous, effective_epc.energy_rating_current - ): - raise ImplausiblePredictedBaseline( - f"property={property_id}: predicted baseline SAP " - f"{plan.baseline.sap_continuous:.1f} diverges from the " - f"picture's recorded SAP {effective_epc.energy_rating_current} " - f"by > {_PREDICTED_BASELINE_DIVERGENCE_GUARD:.0f} points — " - f"likely a calculator mis-score; withholding the plan" - ) - if dry_run: measure_types = ( ", ".join(m.measure_type for m in plan.measures) or "none" diff --git a/tests/applications/modelling_e2e/test_handler.py b/tests/applications/modelling_e2e/test_handler.py index 18939029..baa20103 100644 --- a/tests/applications/modelling_e2e/test_handler.py +++ b/tests/applications/modelling_e2e/test_handler.py @@ -68,9 +68,6 @@ def _plan_mock() -> MagicMock: plan = MagicMock() plan.measures = [] plan.cost_of_works = 0.0 - # A plausible baseline so the predicted-baseline guard stays silent (it - # compares this against the picture's recorded SAP). - plan.baseline.sap_continuous = 50.0 return plan @@ -333,7 +330,6 @@ def test_prediction_path_saves_predicted_epc_plan_and_baseline( mock_part = MagicMock() mock_part.identifier = BuildingPartIdentifier.MAIN mock_predicted_epc.sap_building_parts = [mock_part] - mock_predicted_epc.energy_rating_current = 50 # matches plan baseline -> guard silent mock_comparables = MagicMock() mock_comparables.members = [MagicMock()] # non-empty cohort @@ -538,7 +534,6 @@ def test_empty_own_postcode_broadens_to_nearby_and_predicts() -> None: mock_part = MagicMock() mock_part.identifier = BuildingPartIdentifier.MAIN mock_predicted_epc.sap_building_parts = [mock_part] - mock_predicted_epc.energy_rating_current = 50 # matches plan baseline -> guard silent # First select_comparables (own postcode) is empty → broaden; the second # (nearby cohort) finds comparables. @@ -762,7 +757,6 @@ def test_cohort_cache_prevents_duplicate_candidates_for_calls() -> None: mock_part = MagicMock() mock_part.identifier = BuildingPartIdentifier.MAIN mock_predicted_epc.sap_building_parts = [mock_part] - mock_predicted_epc.energy_rating_current = 50 # matches plan baseline -> guard silent mock_comparables = MagicMock() mock_comparables.members = [MagicMock()] @@ -925,26 +919,3 @@ def test_dry_run_skips_all_db_writes() -> None: # Assert — UoW never entered MockUoW.return_value.__enter__.assert_not_called() - - -def test_predicted_baseline_within_band_is_plausible() -> None: - # A predicted picture whose calculator baseline tracks its recorded SAP - # (here 50 vs 47) is trusted — the guard does not fire. - from applications.modelling_e2e.handler import _predicted_baseline_is_implausible - - assert _predicted_baseline_is_implausible(47.0, 50) is False - - -def test_predicted_baseline_beyond_band_is_implausible() -> None: - # The 713406 case: calculator scores the oil-boiler picture at 13 while the - # synthesised cert records 50 — a >20-point contradiction the guard rejects. - from applications.modelling_e2e.handler import _predicted_baseline_is_implausible - - assert _predicted_baseline_is_implausible(13.2, 50) is True - - -def test_predicted_baseline_without_a_recorded_sap_is_not_judged() -> None: - # No recorded SAP means no reference to contradict, so the guard stays silent. - from applications.modelling_e2e.handler import _predicted_baseline_is_implausible - - assert _predicted_baseline_is_implausible(13.2, None) is False