feat(epc-prediction): roof-insulation +/-1-bucket reporting

Adds roof_insulation_thickness_pm1 (mirrors construction_age_band_pm1, issue
#1222): adjacent RdSAP thickness buckets (0/NI,12mm..400mm+) carry near-
identical roof U-values, so an off-by-one bucket is a SAP-neutral hit. 'ND'
(no-data) is off the ordered scale, so only an exact match counts there.
Honest measurement of SAP-relevant roof-insulation quality.

Corpus (150pc/514): exact 49.3% -> +/-1 53.7% (the misses are often multiple
buckets or ND, so the band gain is smaller than age's). Fixture: exact ==
+/-1 (0.4118) — its misses are all >1 bucket; gate floor added at 0.4118.

Also fixes two pre-existing pyright errors in the touched test file
(_epc main_fuel_type/main_heating_control were Optional but the
MainHeatingDetail attributes are non-optional Union[int, str]).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-15 14:04:18 +00:00
parent 7f48495ed5
commit 4afab2c3d8
3 changed files with 93 additions and 2 deletions

View file

@ -85,6 +85,49 @@ def _age_band_within_one(predicted: object, actual: object) -> Optional[bool]:
return False
# RdSAP roof-insulation thickness buckets, thinnest → thickest. Uninsulated is
# lodged as either 0 or "NI" (not insulated), so both map to the bottom rung;
# "ND" (no data) is off the scale entirely. Adjacent buckets carry near-identical
# roof U-values, so an off-by-one bucket is treated as a (SAP-neutral) ±1 hit —
# the same measurement honesty as the construction age band (issue #1222).
_ROOF_THICKNESS_ORDINAL: dict[object, int] = {
0: 0,
"NI": 0,
"12mm": 1,
"25mm": 2,
"50mm": 3,
"75mm": 4,
"100mm": 5,
"125mm": 6,
"150mm": 7,
"175mm": 8,
"200mm": 9,
"225mm": 10,
"250mm": 11,
"270mm": 12,
"300mm": 13,
"350mm": 14,
"400mm+": 15,
}
def _roof_insulation_within_one(
predicted: object, actual: object
) -> Optional[bool]:
"""A ±1-bucket roof-insulation hit: None when the actual is absent, True on an
exact or adjacent-bucket match, else False. Off the ordered scale (e.g. the
"ND" no-data category) only an exact match counts."""
if actual is None:
return None
if predicted == actual:
return True
pred_rung = _ROOF_THICKNESS_ORDINAL.get(predicted)
actual_rung = _ROOF_THICKNESS_ORDINAL.get(actual)
if pred_rung is None or actual_rung is None:
return False
return abs(pred_rung - actual_rung) <= 1
def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]:
"""The primary heating system's detail row, or None when none is lodged."""
details = epc.sap_heating.main_heating_details
@ -159,6 +202,10 @@ def _renewables_and_fabric_hits(
_main(predicted).roof_insulation_thickness,
_main(actual).roof_insulation_thickness,
),
"roof_insulation_thickness_pm1": _roof_insulation_within_one(
_main(predicted).roof_insulation_thickness,
_main(actual).roof_insulation_thickness,
),
"floor_insulation": _classify(
_main_floor_insulation(predicted), _main_floor_insulation(actual)
),

View file

@ -45,6 +45,7 @@ _RATE_FLOORS: dict[str, float] = {
"cylinder_insulation_type": 0.5000,
"secondary_heating_type": 0.0000,
"roof_insulation_thickness": 0.4118,
"roof_insulation_thickness_pm1": 0.4118,
"floor_insulation": 0.9375,
"has_room_in_roof": 0.8333,
"modal_glazing_type": 0.5278,

View file

@ -38,9 +38,9 @@ def _epc(
door_count: int = 2,
has_pv: bool = False,
solar_water_heating: bool = False,
main_fuel_type: Optional[int] = 20,
main_fuel_type: Union[int, str] = 20,
main_heating_category: Optional[int] = 2,
main_heating_control: Optional[Union[int, str]] = 2100,
main_heating_control: Union[int, str] = 2100,
water_heating_fuel: Optional[int] = 20,
water_heating_code: Optional[int] = 901,
has_hot_water_cylinder: bool = True,
@ -125,6 +125,49 @@ def test_age_band_two_apart_misses_both() -> None:
assert hits["construction_age_band_pm1"] is False
def test_scores_roof_insulation_within_one_bucket() -> None:
# Arrange — predicted 250mm, actual 270mm (adjacent RdSAP buckets). Adjacent
# thicknesses carry near-identical roof U-values, so it misses the exact hit
# but counts as a ±1-bucket hit, like the age band (issue #1222).
predicted = _epc(roof_insulation_thickness="250mm")
actual = _epc(roof_insulation_thickness="270mm")
# Act
hits = compare_prediction(predicted, actual).categorical_hits
# Assert
assert hits["roof_insulation_thickness"] is False
assert hits["roof_insulation_thickness_pm1"] is True
def test_roof_insulation_two_buckets_apart_misses_both() -> None:
# Arrange — predicted 100mm, actual 200mm (three buckets apart: 100/150/200):
# a real miss on both exact and ±1.
predicted = _epc(roof_insulation_thickness="100mm")
actual = _epc(roof_insulation_thickness="200mm")
# Act
hits = compare_prediction(predicted, actual).categorical_hits
# Assert
assert hits["roof_insulation_thickness"] is False
assert hits["roof_insulation_thickness_pm1"] is False
def test_roof_insulation_off_scale_no_data_only_exact_counts() -> None:
# Arrange — actual is the off-scale "ND" (no-data) category; a non-equal
# prediction can't be an adjacent-bucket hit.
predicted = _epc(roof_insulation_thickness="200mm")
actual = _epc(roof_insulation_thickness="ND")
# Act
hits = compare_prediction(predicted, actual).categorical_hits
# Assert
assert hits["roof_insulation_thickness"] is False
assert hits["roof_insulation_thickness_pm1"] is False
def test_flags_a_correct_main_wall_construction_classification() -> None:
# Arrange — predicted and actual agree on cavity (1).
predicted = _epc(wall_construction=1)