From ffaedd8d14bfc5665f1d3606f01cb5c8997eba7f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Jun 2026 10:01:20 +0000 Subject: [PATCH] =?UTF-8?q?feat(epc-prediction):=20=C2=B11-band=20age=20sc?= =?UTF-8?q?oring=20+=20window=5Fcount=20cosmetic=20(#1222)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measurement honesty so we optimise SAP-relevant accuracy, not SAP-neutral misses (ADR-0030 Component Accuracy): - Add construction_age_band_pm1: an exact-or-adjacent-band hit. Adjacent RdSAP age bands carry near-identical U-values, so an off-by-one is ~SAP-neutral. Full corpus: exact 78.5% but ±1-band 91.7% (fixture 63.9% -> 83.3%) — most age misses are adjacent. - Drop window_count from the gate's residual ceilings (cosmetic): the predicted picture clusters at a mapper-default 4 windows vs actuals 1-21, but total_window_area (the SAP-relevant signal) stays tight at ~3.4 m2. Gate: + construction_age_band_pm1 floor 0.8333; window_count no longer gated. Closes #1222 Co-Authored-By: Claude Opus 4.8 --- .../epc_prediction/prediction_comparison.py | 30 +++++++++++++++++++ .../test_component_accuracy_gate.py | 5 +++- .../test_prediction_comparison.py | 28 +++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/domain/epc_prediction/prediction_comparison.py b/domain/epc_prediction/prediction_comparison.py index 148038e4..9332bca6 100644 --- a/domain/epc_prediction/prediction_comparison.py +++ b/domain/epc_prediction/prediction_comparison.py @@ -59,6 +59,32 @@ def _classify(predicted: object, actual: object) -> Optional[bool]: return predicted == actual +# RdSAP construction age bands, oldest → newest. Adjacent bands carry near- +# identical U-values, so an off-by-one is treated as a (SAP-neutral) ±1 hit. +_AGE_BAND_ORDER: str = "ABCDEFGHIJKL" + + +def _age_band_within_one(predicted: object, actual: object) -> Optional[bool]: + """A ±1-band age hit: None when the actual is absent, True on an exact or + adjacent-band match, else False (issue #1222 — exact match overstates the + SAP impact of age-band misses).""" + if actual is None: + return None + if predicted == actual: + return True + if ( + isinstance(predicted, str) + and isinstance(actual, str) + and predicted in _AGE_BAND_ORDER + and actual in _AGE_BAND_ORDER + ): + return ( + abs(_AGE_BAND_ORDER.index(predicted) - _AGE_BAND_ORDER.index(actual)) + <= 1 + ) + return False + + def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]: """The primary heating system's detail row, or None when none is lodged.""" details = epc.sap_heating.main_heating_details @@ -179,6 +205,10 @@ def compare_prediction( _main(predicted).construction_age_band, _main(actual).construction_age_band, ), + "construction_age_band_pm1": _age_band_within_one( + _main(predicted).construction_age_band, + _main(actual).construction_age_band, + ), "roof_construction": _classify( _main(predicted).roof_construction, _main(actual).roof_construction, diff --git a/tests/domain/epc_prediction/test_component_accuracy_gate.py b/tests/domain/epc_prediction/test_component_accuracy_gate.py index 8edac364..6564e97f 100644 --- a/tests/domain/epc_prediction/test_component_accuracy_gate.py +++ b/tests/domain/epc_prediction/test_component_accuracy_gate.py @@ -33,6 +33,7 @@ _RATE_FLOORS: dict[str, float] = { "wall_construction": 0.8889, "wall_insulation_type": 0.7778, "construction_age_band": 0.6389, + "construction_age_band_pm1": 0.8333, "roof_construction": 0.7222, "floor_construction": 0.7500, "heating_main_fuel": 0.9722, @@ -52,9 +53,11 @@ _RATE_FLOORS: dict[str, float] = { } # Maximum mean absolute residual per numeric component (ratchet ceilings). +# window_count is deliberately excluded — it is cosmetic for SAP (issue #1222): +# the predicted picture clusters at a mapper-default 4 windows while actuals +# spread 1-21, yet total_window_area (the SAP-relevant signal) stays tight. _RESIDUAL_CEILINGS: dict[str, float] = { "floor_area": 12.2175, - "window_count": 3.8889, "total_window_area": 4.4067, "building_parts": 0.3333, "door_count": 0.6389, diff --git a/tests/domain/epc_prediction/test_prediction_comparison.py b/tests/domain/epc_prediction/test_prediction_comparison.py index e6a092e9..ab19f5ef 100644 --- a/tests/domain/epc_prediction/test_prediction_comparison.py +++ b/tests/domain/epc_prediction/test_prediction_comparison.py @@ -97,6 +97,34 @@ def _epc( return epc +def test_scores_age_band_within_one_band() -> None: + # Arrange — predicted age band K, actual J (adjacent). Adjacent RdSAP age + # bands carry near-identical U-values, so an off-by-one is ~SAP-neutral: it + # misses the exact hit but counts as a ±1-band hit (issue #1222). + predicted = _epc(construction_age_band="K") + actual = _epc(construction_age_band="J") + + # Act + hits = compare_prediction(predicted, actual).categorical_hits + + # Assert + assert hits["construction_age_band"] is False + assert hits["construction_age_band_pm1"] is True + + +def test_age_band_two_apart_misses_both() -> None: + # Arrange — predicted K, actual H (three bands apart): a real miss on both. + predicted = _epc(construction_age_band="K") + actual = _epc(construction_age_band="H") + + # Act + hits = compare_prediction(predicted, actual).categorical_hits + + # Assert + assert hits["construction_age_band"] is False + assert hits["construction_age_band_pm1"] is False + + def test_flags_a_correct_main_wall_construction_classification() -> None: # Arrange — predicted and actual agree on cavity (1). predicted = _epc(wall_construction=1)