feat(epc-prediction): classify roof/floor/insulation/age categoricals (ADR-0029)

The comparison only scored main wall_construction; everything else the
predictor produces (by template-copy) went unmeasured. Extend
compare_prediction to the rest of the ADR-0029 homogeneous categoricals —
wall insulation type, construction age band, roof construction, floor
construction — and aggregate per-categorical classification rates in the
runner. A categorical hit is "not applicable" (None, excluded from the
denominator) when the actual lodges no value, so absent-roof flats don't
score free wins.

Smoke corpus (29 leave-one-out, all but wall are template-copied today):
  wall_construction      93.1%
  wall_insulation_type   93.1%
  construction_age_band  55.2%   <- loud; candidate for cohort-mode
  roof_construction      72.4%
  floor_construction     46.2%   (n=13)

These numbers drive the next slice (extend cohort-mode coverage).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 00:10:56 +00:00
parent 4fa20ae76b
commit ed96df9315
3 changed files with 125 additions and 8 deletions

View file

@ -12,6 +12,7 @@ runner, which has the calculator and the lodged SAP.
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
@ -19,9 +20,16 @@ from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingP
@dataclass(frozen=True)
class PredictionComparison:
"""One Property's prediction accuracy: classification hits + geometry
residuals (predicted actual)."""
residuals (predicted actual). A categorical hit is `None` ("not
applicable") when the actual lodges no value there, so the harness can keep
it out of the classification-rate denominator rather than score a free win.
"""
wall_construction_correct: bool
wall_insulation_type_correct: bool
construction_age_band_correct: bool
roof_construction_correct: Optional[bool]
floor_construction_correct: Optional[bool]
floor_area_residual: float
building_parts_residual: int
window_count_residual: int
@ -32,6 +40,21 @@ def _main(epc: EpcPropertyData) -> SapBuildingPart:
return epc.sap_building_parts[0]
def _main_floor_construction(epc: EpcPropertyData) -> Optional[int]:
"""The main building part's ground-floor construction code, or None when no
floor dimension is lodged."""
dims = _main(epc).sap_floor_dimensions
return dims[0].floor_construction if dims else None
def _classify(predicted: object, actual: object) -> Optional[bool]:
"""A categorical hit: None ("not applicable") when the actual is absent,
else whether the predicted value matches it."""
if actual is None:
return None
return predicted == actual
def _total_window_area(epc: EpcPropertyData) -> float:
return sum(w.window_width * w.window_height for w in epc.sap_windows)
@ -45,6 +68,20 @@ def compare_prediction(
wall_construction_correct=(
_main(predicted).wall_construction == _main(actual).wall_construction
),
wall_insulation_type_correct=(
_main(predicted).wall_insulation_type
== _main(actual).wall_insulation_type
),
construction_age_band_correct=(
_main(predicted).construction_age_band
== _main(actual).construction_age_band
),
roof_construction_correct=_classify(
_main(predicted).roof_construction, _main(actual).roof_construction
),
floor_construction_correct=_classify(
_main_floor_construction(predicted), _main_floor_construction(actual)
),
floor_area_residual=(
predicted.total_floor_area_m2 - actual.total_floor_area_m2
),

View file

@ -75,7 +75,15 @@ def main() -> None:
calculator = Sap10Calculator()
predictor = EpcPrediction()
wall_hits = wall_total = 0
# Classification: name -> [hits, applicable-total]. A None hit (the actual
# lodges no value) is excluded from the denominator.
categoricals: dict[str, list[int]] = {
"wall_construction": [0, 0],
"wall_insulation_type": [0, 0],
"construction_age_band": [0, 0],
"roof_construction": [0, 0],
"floor_construction": [0, 0],
}
floor_res: list[float] = []
window_count_res: list[int] = []
window_area_res: list[float] = []
@ -105,8 +113,19 @@ def main() -> None:
predicted_n += 1
cmp = compare_prediction(predicted, actual)
wall_total += 1
wall_hits += int(cmp.wall_construction_correct)
_tally(categoricals["wall_construction"], cmp.wall_construction_correct)
_tally(
categoricals["wall_insulation_type"],
cmp.wall_insulation_type_correct,
)
_tally(
categoricals["construction_age_band"],
cmp.construction_age_band_correct,
)
_tally(categoricals["roof_construction"], cmp.roof_construction_correct)
_tally(
categoricals["floor_construction"], cmp.floor_construction_correct
)
floor_res.append(cmp.floor_area_residual)
window_count_res.append(cmp.window_count_residual)
window_area_res.append(cmp.total_window_area_residual)
@ -130,9 +149,10 @@ def main() -> None:
print(f"corpus: {CORPUS}")
print(f"predicted {predicted_n} held-out certs ({skipped_no_cohort} had no cohort)\n")
if wall_total:
print(f"CLASSIFICATION wall_construction: {wall_hits}/{wall_total} = "
f"{wall_hits / wall_total:.1%}")
for name, (hits, total) in categoricals.items():
if total:
print(f"CLASSIFICATION {name}: {hits}/{total} = {hits / total:.1%}")
print()
_residual("floor_area (m2)", floor_res)
_residual("window_count", [float(x) for x in window_count_res])
_residual("total_window_area (m2)", window_area_res)
@ -143,6 +163,15 @@ def main() -> None:
_sap_line("SAP |neighbour-mean lodged| (baseline)", sap_vs_neighbour_mean)
def _tally(counter: list[int], hit: Optional[bool]) -> None:
"""Record one classification outcome: a None hit (actual absent) is not
applicable and skipped; else increment the applicable total and the hits."""
if hit is None:
return
counter[1] += 1
counter[0] += int(hit)
def _residual(label: str, values: list[float]) -> None:
if not values:
print(f"RESIDUAL {label}: (none)")

View file

@ -5,11 +5,12 @@ and residuals on the geometry. Pure; SAP residual is computed in the runner
(it needs the calculator + lodged SAP).
"""
from typing import Optional
from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
SapBuildingPart,
SapFloorDimension,
SapWindow,
)
from domain.epc_prediction.prediction_comparison import compare_prediction
@ -18,6 +19,10 @@ from domain.epc_prediction.prediction_comparison import compare_prediction
def _epc(
*,
wall_construction: int = 1,
wall_insulation_type: Union[int, str] = 1,
construction_age_band: str = "K",
roof_construction: Optional[int] = 1,
floor_construction: Optional[int] = 1,
floor_area: float = 80.0,
building_parts: int = 1,
windows: Optional[list[tuple[float, float]]] = None,
@ -28,6 +33,12 @@ def _epc(
for _ in range(building_parts):
part: SapBuildingPart = object.__new__(SapBuildingPart)
part.wall_construction = wall_construction
part.wall_insulation_type = wall_insulation_type
part.construction_age_band = construction_age_band
part.roof_construction = roof_construction
floor_dim: SapFloorDimension = object.__new__(SapFloorDimension)
floor_dim.floor_construction = floor_construction
part.sap_floor_dimensions = [floor_dim]
parts.append(part)
epc.sap_building_parts = parts
sap_windows: list[SapWindow] = []
@ -64,6 +75,46 @@ def test_flags_an_incorrect_main_wall_construction_classification() -> None:
assert comparison.wall_construction_correct is False
def test_classifies_the_extra_homogeneous_categoricals() -> None:
# Arrange — predicted agrees on age band, wall insulation, roof and floor
# construction with the actual; only wall insulation differs.
predicted = _epc(
construction_age_band="K",
wall_insulation_type=2,
roof_construction=3,
floor_construction=1,
)
actual = _epc(
construction_age_band="K",
wall_insulation_type=1,
roof_construction=3,
floor_construction=1,
)
# Act
comparison = compare_prediction(predicted, actual)
# Assert
assert comparison.construction_age_band_correct is True
assert comparison.wall_insulation_type_correct is False
assert comparison.roof_construction_correct is True
assert comparison.floor_construction_correct is True
def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
# Arrange — the actual lodges no roof construction (a flat under another
# dwelling). A hit there is not applicable, not a free win, so it must not
# count towards the roof classification rate.
predicted = _epc(roof_construction=3)
actual = _epc(roof_construction=None)
# Act
comparison = compare_prediction(predicted, actual)
# Assert
assert comparison.roof_construction_correct is None
def test_reports_the_floor_area_residual_as_predicted_minus_actual() -> None:
# Arrange — predicted 90 m², actual 100 m² (a 10 m² under-prediction).
predicted = _epc(floor_area=90.0)