refactor(epc-prediction): name-keyed categorical_hits for Component Accuracy (ADR-0030)

ADR-0030 commits Component Accuracy to ~19 categorical components (5 today
+ 8 heating + glazing/renewables). Flat *_correct dataclass fields don't
scale — each needs manual runner wiring. Collapse them into a single
`categorical_hits: dict[str, Optional[bool]]` keyed by component name, which
also matches the runner's name-keyed aggregation (now generic: it tallies
whatever components the comparison reports). No behaviour change; the
classification rates are identical (wall n 578->575 is the 3 certs whose
actual wall is None, now correctly counted as not-applicable via _classify).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 08:50:34 +00:00
parent 35a7c07812
commit 41b5ce5057
3 changed files with 43 additions and 56 deletions

View file

@ -19,17 +19,15 @@ from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingP
@dataclass(frozen=True)
class PredictionComparison:
"""One Property's prediction accuracy: classification hits + geometry
residuals (predicted actual). A categorical hit is `None` ("not
applicable") when the actual lodges no value there, so the harness can keep
it out of the classification-rate denominator rather than score a free win.
"""
"""One Property's prediction accuracy: per-component classification hits +
geometry residuals (predicted actual). `categorical_hits` maps a component
name to its hit: True / False, or `None` ("not applicable") when the actual
lodges no value there, so the harness can keep it out of the
classification-rate denominator rather than score a free win. Keyed by name
(not flat fields) so the component set can grow without reshaping the
runner see ADR-0030 Component Accuracy."""
wall_construction_correct: bool
wall_insulation_type_correct: bool
construction_age_band_correct: bool
roof_construction_correct: Optional[bool]
floor_construction_correct: Optional[bool]
categorical_hits: dict[str, Optional[bool]]
floor_area_residual: float
building_parts_residual: int
window_count_residual: int
@ -65,23 +63,28 @@ def compare_prediction(
"""Compare a predicted picture against the actual one, field by field. All
residuals are signed, predicted actual."""
return PredictionComparison(
wall_construction_correct=(
_main(predicted).wall_construction == _main(actual).wall_construction
),
wall_insulation_type_correct=(
_main(predicted).wall_insulation_type
== _main(actual).wall_insulation_type
),
construction_age_band_correct=(
_main(predicted).construction_age_band
== _main(actual).construction_age_band
),
roof_construction_correct=_classify(
_main(predicted).roof_construction, _main(actual).roof_construction
),
floor_construction_correct=_classify(
_main_floor_construction(predicted), _main_floor_construction(actual)
),
categorical_hits={
"wall_construction": _classify(
_main(predicted).wall_construction,
_main(actual).wall_construction,
),
"wall_insulation_type": _classify(
_main(predicted).wall_insulation_type,
_main(actual).wall_insulation_type,
),
"construction_age_band": _classify(
_main(predicted).construction_age_band,
_main(actual).construction_age_band,
),
"roof_construction": _classify(
_main(predicted).roof_construction,
_main(actual).roof_construction,
),
"floor_construction": _classify(
_main_floor_construction(predicted),
_main_floor_construction(actual),
),
},
floor_area_residual=(
predicted.total_floor_area_m2 - actual.total_floor_area_m2
),

View file

@ -117,15 +117,10 @@ def main() -> None:
calculator = Sap10Calculator()
predictor = EpcPrediction()
# Classification: name -> [hits, applicable-total]. A None hit (the actual
# lodges no value) is excluded from the denominator.
categoricals: dict[str, list[int]] = {
"wall_construction": [0, 0],
"wall_insulation_type": [0, 0],
"construction_age_band": [0, 0],
"roof_construction": [0, 0],
"floor_construction": [0, 0],
}
# Classification: name -> [hits, applicable-total], populated from whatever
# components compare_prediction reports (insertion order preserved). A None
# hit (the actual lodges no value) is excluded from the denominator.
categoricals: dict[str, list[int]] = {}
floor_res: list[float] = []
window_count_res: list[int] = []
window_area_res: list[float] = []
@ -162,19 +157,8 @@ def main() -> None:
predicted_n += 1
cmp = compare_prediction(predicted, actual)
_tally(categoricals["wall_construction"], cmp.wall_construction_correct)
_tally(
categoricals["wall_insulation_type"],
cmp.wall_insulation_type_correct,
)
_tally(
categoricals["construction_age_band"],
cmp.construction_age_band_correct,
)
_tally(categoricals["roof_construction"], cmp.roof_construction_correct)
_tally(
categoricals["floor_construction"], cmp.floor_construction_correct
)
for name, hit in cmp.categorical_hits.items():
_tally(categoricals.setdefault(name, [0, 0]), hit)
floor_res.append(cmp.floor_area_residual)
window_count_res.append(cmp.window_count_residual)
window_area_res.append(cmp.total_window_area_residual)

View file

@ -60,7 +60,7 @@ def test_flags_a_correct_main_wall_construction_classification() -> None:
comparison = compare_prediction(predicted, actual)
# Assert
assert comparison.wall_construction_correct is True
assert comparison.categorical_hits["wall_construction"] is True
def test_flags_an_incorrect_main_wall_construction_classification() -> None:
@ -72,7 +72,7 @@ def test_flags_an_incorrect_main_wall_construction_classification() -> None:
comparison = compare_prediction(predicted, actual)
# Assert
assert comparison.wall_construction_correct is False
assert comparison.categorical_hits["wall_construction"] is False
def test_classifies_the_extra_homogeneous_categoricals() -> None:
@ -95,10 +95,10 @@ def test_classifies_the_extra_homogeneous_categoricals() -> None:
comparison = compare_prediction(predicted, actual)
# Assert
assert comparison.construction_age_band_correct is True
assert comparison.wall_insulation_type_correct is False
assert comparison.roof_construction_correct is True
assert comparison.floor_construction_correct is True
assert comparison.categorical_hits["construction_age_band"] is True
assert comparison.categorical_hits["wall_insulation_type"] is False
assert comparison.categorical_hits["roof_construction"] is True
assert comparison.categorical_hits["floor_construction"] is True
def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
@ -112,7 +112,7 @@ def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
comparison = compare_prediction(predicted, actual)
# Assert
assert comparison.roof_construction_correct is None
assert comparison.categorical_hits["roof_construction"] is None
def test_reports_the_floor_area_residual_as_predicted_minus_actual() -> None: