Revert "guard(modelling_e2e): quarantine predicted Properties the calculator mis-scores"

This reverts commit 0c70280dea.
2026-06-30 13:10:47 +00:00 · 2026-06-24 09:33:54 +00:00 · 2026-06-24 09:33:54 +00:00 · 80985865a2
commit 80985865a2
parent 0c70280dea
2 changed files with 0 additions and 74 deletions
--- a/applications/modelling_e2e/handler.py
+++ b/applications/modelling_e2e/handler.py
@ -212,35 +212,6 @@ def _predict_epc(
    return predicted


-# --- TEMPORARY GUARD: remove once the SAP calculator's oil-heating under-score
-# is fixed (predicted oil-boiler picture scores SAP 13/G vs a recorded 50/E). ---
-# A predicted EpcPropertyData carries its own recorded SAP (energy_rating_current,
-# synthesised from the cohort). When the calculator's baseline score contradicts
-# that by more than ~one EPC band the picture is being mis-scored, so any Plan
-# built on it overshoots (e.g. goal C lands at band A). Quarantine the property —
-# skip its Plan — rather than ship nonsense. Lodged properties are unaffected:
-# they have a real recorded cert and the Rebaseliner already owns this check.
-_PREDICTED_BASELINE_DIVERGENCE_GUARD = 20.0  # SAP points (~one EPC band)
-
-
-class ImplausiblePredictedBaseline(Exception):
-    """A predicted Property's calculator baseline contradicts its recorded SAP by
-    more than a band — the calculator is mis-scoring the synthesised picture, so
-    the Plan is untrustworthy and is withheld (caught per-property as a failure)."""
-
-
-def _predicted_baseline_is_implausible(
-    baseline_sap: float, recorded_sap: Optional[int]
-) -> bool:
-    """True when a predicted Property's calculator baseline diverges from the
-    picture's own recorded SAP by more than the guard band. A missing recorded
-    SAP (no reference) is never implausible — the guard only fires on a concrete
-    contradiction."""
-    if recorded_sap is None:
-        return False
-    return abs(baseline_sap - recorded_sap) > _PREDICTED_BASELINE_DIVERGENCE_GUARD
-
-
@task_handler(task_source="modelling_e2e", source=Source.PROPERTY)
 def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
    trigger = ModellingE2ETriggerBody.model_validate(body)
@ -418,22 +389,6 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
                    f"measures={len(plan.measures)}"
                )

-                # Quarantine a predicted Property whose calculator baseline
-                # contradicts its synthesised recorded SAP (TEMPORARY guard —
-                # see _predicted_baseline_is_implausible). Raising drops this one
-                # property into `failures` and skips its Plan/Baseline; the rest
-                # of the batch is unaffected.
-                if predicted_epc is not None and _predicted_baseline_is_implausible(
-                    plan.baseline.sap_continuous, effective_epc.energy_rating_current
-                ):
-                    raise ImplausiblePredictedBaseline(
-                        f"property={property_id}: predicted baseline SAP "
-                        f"{plan.baseline.sap_continuous:.1f} diverges from the "
-                        f"picture's recorded SAP {effective_epc.energy_rating_current} "
-                        f"by > {_PREDICTED_BASELINE_DIVERGENCE_GUARD:.0f} points — "
-                        f"likely a calculator mis-score; withholding the plan"
-                    )
-
                if dry_run:
                    measure_types = (
                        ", ".join(m.measure_type for m in plan.measures) or "none"
--- a/tests/applications/modelling_e2e/test_handler.py
+++ b/tests/applications/modelling_e2e/test_handler.py
@ -68,9 +68,6 @@ def _plan_mock() -> MagicMock:
    plan = MagicMock()
    plan.measures = []
    plan.cost_of_works = 0.0
-    # A plausible baseline so the predicted-baseline guard stays silent (it
-    # compares this against the picture's recorded SAP).
-    plan.baseline.sap_continuous = 50.0
    return plan


@ -333,7 +330,6 @@ def test_prediction_path_saves_predicted_epc_plan_and_baseline(
    mock_part = MagicMock()
    mock_part.identifier = BuildingPartIdentifier.MAIN
    mock_predicted_epc.sap_building_parts = [mock_part]
-    mock_predicted_epc.energy_rating_current = 50  # matches plan baseline -> guard silent

    mock_comparables = MagicMock()
    mock_comparables.members = [MagicMock()]  # non-empty cohort
@ -538,7 +534,6 @@ def test_empty_own_postcode_broadens_to_nearby_and_predicts() -> None:
    mock_part = MagicMock()
    mock_part.identifier = BuildingPartIdentifier.MAIN
    mock_predicted_epc.sap_building_parts = [mock_part]
-    mock_predicted_epc.energy_rating_current = 50  # matches plan baseline -> guard silent

    # First select_comparables (own postcode) is empty → broaden; the second
    # (nearby cohort) finds comparables.
@ -762,7 +757,6 @@ def test_cohort_cache_prevents_duplicate_candidates_for_calls() -> None:
    mock_part = MagicMock()
    mock_part.identifier = BuildingPartIdentifier.MAIN
    mock_predicted_epc.sap_building_parts = [mock_part]
-    mock_predicted_epc.energy_rating_current = 50  # matches plan baseline -> guard silent

    mock_comparables = MagicMock()
    mock_comparables.members = [MagicMock()]
@ -925,26 +919,3 @@ def test_dry_run_skips_all_db_writes() -> None:

    # Assert — UoW never entered
    MockUoW.return_value.__enter__.assert_not_called()
-
-
-def test_predicted_baseline_within_band_is_plausible() -> None:
-    # A predicted picture whose calculator baseline tracks its recorded SAP
-    # (here 50 vs 47) is trusted — the guard does not fire.
-    from applications.modelling_e2e.handler import _predicted_baseline_is_implausible
-
-    assert _predicted_baseline_is_implausible(47.0, 50) is False
-
-
-def test_predicted_baseline_beyond_band_is_implausible() -> None:
-    # The 713406 case: calculator scores the oil-boiler picture at 13 while the
-    # synthesised cert records 50 — a >20-point contradiction the guard rejects.
-    from applications.modelling_e2e.handler import _predicted_baseline_is_implausible
-
-    assert _predicted_baseline_is_implausible(13.2, 50) is True
-
-
-def test_predicted_baseline_without_a_recorded_sap_is_not_judged() -> None:
-    # No recorded SAP means no reference to contradict, so the guard stays silent.
-    from applications.modelling_e2e.handler import _predicted_baseline_is_implausible
-
-    assert _predicted_baseline_is_implausible(13.2, None) is False