feat(epc-prediction): ±1-band age scoring + window_count cosmetic (#1222)

Measurement honesty so we optimise SAP-relevant accuracy, not SAP-neutral
misses (ADR-0030 Component Accuracy):
- Add construction_age_band_pm1: an exact-or-adjacent-band hit. Adjacent
  RdSAP age bands carry near-identical U-values, so an off-by-one is
  ~SAP-neutral. Full corpus: exact 78.5% but ±1-band 91.7% (fixture
  63.9% -> 83.3%) — most age misses are adjacent.
- Drop window_count from the gate's residual ceilings (cosmetic): the
  predicted picture clusters at a mapper-default 4 windows vs actuals 1-21,
  but total_window_area (the SAP-relevant signal) stays tight at ~3.4 m2.

Gate: + construction_age_band_pm1 floor 0.8333; window_count no longer gated.

Closes #1222

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 10:01:20 +00:00
parent a5b7310911
commit ffaedd8d14
3 changed files with 62 additions and 1 deletions

View file

@ -59,6 +59,32 @@ def _classify(predicted: object, actual: object) -> Optional[bool]:
return predicted == actual return predicted == actual
# RdSAP construction age bands, oldest → newest. Adjacent bands carry near-
# identical U-values, so an off-by-one is treated as a (SAP-neutral) ±1 hit.
_AGE_BAND_ORDER: str = "ABCDEFGHIJKL"
def _age_band_within_one(predicted: object, actual: object) -> Optional[bool]:
"""A ±1-band age hit: None when the actual is absent, True on an exact or
adjacent-band match, else False (issue #1222 — exact match overstates the
SAP impact of age-band misses)."""
if actual is None:
return None
if predicted == actual:
return True
if (
isinstance(predicted, str)
and isinstance(actual, str)
and predicted in _AGE_BAND_ORDER
and actual in _AGE_BAND_ORDER
):
return (
abs(_AGE_BAND_ORDER.index(predicted) - _AGE_BAND_ORDER.index(actual))
<= 1
)
return False
def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]: def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]:
"""The primary heating system's detail row, or None when none is lodged.""" """The primary heating system's detail row, or None when none is lodged."""
details = epc.sap_heating.main_heating_details details = epc.sap_heating.main_heating_details
@ -179,6 +205,10 @@ def compare_prediction(
_main(predicted).construction_age_band, _main(predicted).construction_age_band,
_main(actual).construction_age_band, _main(actual).construction_age_band,
), ),
"construction_age_band_pm1": _age_band_within_one(
_main(predicted).construction_age_band,
_main(actual).construction_age_band,
),
"roof_construction": _classify( "roof_construction": _classify(
_main(predicted).roof_construction, _main(predicted).roof_construction,
_main(actual).roof_construction, _main(actual).roof_construction,

View file

@ -33,6 +33,7 @@ _RATE_FLOORS: dict[str, float] = {
"wall_construction": 0.8889, "wall_construction": 0.8889,
"wall_insulation_type": 0.7778, "wall_insulation_type": 0.7778,
"construction_age_band": 0.6389, "construction_age_band": 0.6389,
"construction_age_band_pm1": 0.8333,
"roof_construction": 0.7222, "roof_construction": 0.7222,
"floor_construction": 0.7500, "floor_construction": 0.7500,
"heating_main_fuel": 0.9722, "heating_main_fuel": 0.9722,
@ -52,9 +53,11 @@ _RATE_FLOORS: dict[str, float] = {
} }
# Maximum mean absolute residual per numeric component (ratchet ceilings). # Maximum mean absolute residual per numeric component (ratchet ceilings).
# window_count is deliberately excluded — it is cosmetic for SAP (issue #1222):
# the predicted picture clusters at a mapper-default 4 windows while actuals
# spread 1-21, yet total_window_area (the SAP-relevant signal) stays tight.
_RESIDUAL_CEILINGS: dict[str, float] = { _RESIDUAL_CEILINGS: dict[str, float] = {
"floor_area": 12.2175, "floor_area": 12.2175,
"window_count": 3.8889,
"total_window_area": 4.4067, "total_window_area": 4.4067,
"building_parts": 0.3333, "building_parts": 0.3333,
"door_count": 0.6389, "door_count": 0.6389,

View file

@ -97,6 +97,34 @@ def _epc(
return epc return epc
def test_scores_age_band_within_one_band() -> None:
# Arrange — predicted age band K, actual J (adjacent). Adjacent RdSAP age
# bands carry near-identical U-values, so an off-by-one is ~SAP-neutral: it
# misses the exact hit but counts as a ±1-band hit (issue #1222).
predicted = _epc(construction_age_band="K")
actual = _epc(construction_age_band="J")
# Act
hits = compare_prediction(predicted, actual).categorical_hits
# Assert
assert hits["construction_age_band"] is False
assert hits["construction_age_band_pm1"] is True
def test_age_band_two_apart_misses_both() -> None:
# Arrange — predicted K, actual H (three bands apart): a real miss on both.
predicted = _epc(construction_age_band="K")
actual = _epc(construction_age_band="H")
# Act
hits = compare_prediction(predicted, actual).categorical_hits
# Assert
assert hits["construction_age_band"] is False
assert hits["construction_age_band_pm1"] is False
def test_flags_a_correct_main_wall_construction_classification() -> None: def test_flags_a_correct_main_wall_construction_classification() -> None:
# Arrange — predicted and actual agree on cavity (1). # Arrange — predicted and actual agree on cavity (1).
predicted = _epc(wall_construction=1) predicted = _epc(wall_construction=1)