diff --git a/domain/epc_prediction/prediction_comparison.py b/domain/epc_prediction/prediction_comparison.py index 148038e4..9332bca6 100644 --- a/domain/epc_prediction/prediction_comparison.py +++ b/domain/epc_prediction/prediction_comparison.py @@ -59,6 +59,32 @@ def _classify(predicted: object, actual: object) -> Optional[bool]: return predicted == actual +# RdSAP construction age bands, oldest → newest. Adjacent bands carry near- +# identical U-values, so an off-by-one is treated as a (SAP-neutral) ±1 hit. +_AGE_BAND_ORDER: str = "ABCDEFGHIJKL" + + +def _age_band_within_one(predicted: object, actual: object) -> Optional[bool]: + """A ±1-band age hit: None when the actual is absent, True on an exact or + adjacent-band match, else False (issue #1222 — exact match overstates the + SAP impact of age-band misses).""" + if actual is None: + return None + if predicted == actual: + return True + if ( + isinstance(predicted, str) + and isinstance(actual, str) + and predicted in _AGE_BAND_ORDER + and actual in _AGE_BAND_ORDER + ): + return ( + abs(_AGE_BAND_ORDER.index(predicted) - _AGE_BAND_ORDER.index(actual)) + <= 1 + ) + return False + + def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]: """The primary heating system's detail row, or None when none is lodged.""" details = epc.sap_heating.main_heating_details @@ -179,6 +205,10 @@ def compare_prediction( _main(predicted).construction_age_band, _main(actual).construction_age_band, ), + "construction_age_band_pm1": _age_band_within_one( + _main(predicted).construction_age_band, + _main(actual).construction_age_band, + ), "roof_construction": _classify( _main(predicted).roof_construction, _main(actual).roof_construction, diff --git a/tests/domain/epc_prediction/test_component_accuracy_gate.py b/tests/domain/epc_prediction/test_component_accuracy_gate.py index 8edac364..6564e97f 100644 --- a/tests/domain/epc_prediction/test_component_accuracy_gate.py +++ b/tests/domain/epc_prediction/test_component_accuracy_gate.py @@ -33,6 +33,7 @@ _RATE_FLOORS: dict[str, float] = { "wall_construction": 0.8889, "wall_insulation_type": 0.7778, "construction_age_band": 0.6389, + "construction_age_band_pm1": 0.8333, "roof_construction": 0.7222, "floor_construction": 0.7500, "heating_main_fuel": 0.9722, @@ -52,9 +53,11 @@ _RATE_FLOORS: dict[str, float] = { } # Maximum mean absolute residual per numeric component (ratchet ceilings). +# window_count is deliberately excluded — it is cosmetic for SAP (issue #1222): +# the predicted picture clusters at a mapper-default 4 windows while actuals +# spread 1-21, yet total_window_area (the SAP-relevant signal) stays tight. _RESIDUAL_CEILINGS: dict[str, float] = { "floor_area": 12.2175, - "window_count": 3.8889, "total_window_area": 4.4067, "building_parts": 0.3333, "door_count": 0.6389, diff --git a/tests/domain/epc_prediction/test_prediction_comparison.py b/tests/domain/epc_prediction/test_prediction_comparison.py index e6a092e9..ab19f5ef 100644 --- a/tests/domain/epc_prediction/test_prediction_comparison.py +++ b/tests/domain/epc_prediction/test_prediction_comparison.py @@ -97,6 +97,34 @@ def _epc( return epc +def test_scores_age_band_within_one_band() -> None: + # Arrange — predicted age band K, actual J (adjacent). Adjacent RdSAP age + # bands carry near-identical U-values, so an off-by-one is ~SAP-neutral: it + # misses the exact hit but counts as a ±1-band hit (issue #1222). + predicted = _epc(construction_age_band="K") + actual = _epc(construction_age_band="J") + + # Act + hits = compare_prediction(predicted, actual).categorical_hits + + # Assert + assert hits["construction_age_band"] is False + assert hits["construction_age_band_pm1"] is True + + +def test_age_band_two_apart_misses_both() -> None: + # Arrange — predicted K, actual H (three bands apart): a real miss on both. + predicted = _epc(construction_age_band="K") + actual = _epc(construction_age_band="H") + + # Act + hits = compare_prediction(predicted, actual).categorical_hits + + # Assert + assert hits["construction_age_band"] is False + assert hits["construction_age_band_pm1"] is False + + def test_flags_a_correct_main_wall_construction_classification() -> None: # Arrange — predicted and actual agree on cavity (1). predicted = _epc(wall_construction=1)