refactor(epc-prediction): name-keyed categorical_hits for Component Accuracy (ADR-0030)

ADR-0030 commits Component Accuracy to ~19 categorical components (5 today + 8 heating + glazing/renewables). Flat *_correct dataclass fields don't scale — each needs manual runner wiring. Collapse them into a single `categorical_hits: dict[str, Optional[bool]]` keyed by component name, which also matches the runner's name-keyed aggregation (now generic: it tallies whatever components the comparison reports). No behaviour change; the classification rates are identical (wall n 578->575 is the 3 certs whose actual wall is None, now correctly counted as not-applicable via _classify). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-30 13:10:47 +00:00 · 2026-06-14 08:50:34 +00:00 · 2026-06-14 08:50:34 +00:00 · 41b5ce5057
commit 41b5ce5057
parent 35a7c07812
3 changed files with 43 additions and 56 deletions
--- a/domain/epc_prediction/prediction_comparison.py
+++ b/domain/epc_prediction/prediction_comparison.py
@ -19,17 +19,15 @@ from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingP

@dataclass(frozen=True)
 class PredictionComparison:
-    """One Property's prediction accuracy: classification hits + geometry
-    residuals (predicted − actual). A categorical hit is `None` ("not
-    applicable") when the actual lodges no value there, so the harness can keep
-    it out of the classification-rate denominator rather than score a free win.
-    """
+    """One Property's prediction accuracy: per-component classification hits +
+    geometry residuals (predicted − actual). `categorical_hits` maps a component
+    name to its hit: True / False, or `None` ("not applicable") when the actual
+    lodges no value there, so the harness can keep it out of the
+    classification-rate denominator rather than score a free win. Keyed by name
+    (not flat fields) so the component set can grow without reshaping the
+    runner — see ADR-0030 Component Accuracy."""

-    wall_construction_correct: bool
-    wall_insulation_type_correct: bool
-    construction_age_band_correct: bool
-    roof_construction_correct: Optional[bool]
-    floor_construction_correct: Optional[bool]
+    categorical_hits: dict[str, Optional[bool]]
    floor_area_residual: float
    building_parts_residual: int
    window_count_residual: int
@ -65,23 +63,28 @@ def compare_prediction(
    """Compare a predicted picture against the actual one, field by field. All
    residuals are signed, predicted − actual."""
    return PredictionComparison(
-        wall_construction_correct=(
-            _main(predicted).wall_construction == _main(actual).wall_construction
-        ),
-        wall_insulation_type_correct=(
-            _main(predicted).wall_insulation_type
-            == _main(actual).wall_insulation_type
-        ),
-        construction_age_band_correct=(
-            _main(predicted).construction_age_band
-            == _main(actual).construction_age_band
-        ),
-        roof_construction_correct=_classify(
-            _main(predicted).roof_construction, _main(actual).roof_construction
-        ),
-        floor_construction_correct=_classify(
-            _main_floor_construction(predicted), _main_floor_construction(actual)
-        ),
+        categorical_hits={
+            "wall_construction": _classify(
+                _main(predicted).wall_construction,
+                _main(actual).wall_construction,
+            ),
+            "wall_insulation_type": _classify(
+                _main(predicted).wall_insulation_type,
+                _main(actual).wall_insulation_type,
+            ),
+            "construction_age_band": _classify(
+                _main(predicted).construction_age_band,
+                _main(actual).construction_age_band,
+            ),
+            "roof_construction": _classify(
+                _main(predicted).roof_construction,
+                _main(actual).roof_construction,
+            ),
+            "floor_construction": _classify(
+                _main_floor_construction(predicted),
+                _main_floor_construction(actual),
+            ),
+        },
        floor_area_residual=(
            predicted.total_floor_area_m2 - actual.total_floor_area_m2
        ),
--- a/scripts/validate_epc_prediction.py
+++ b/scripts/validate_epc_prediction.py
@ -117,15 +117,10 @@ def main() -> None:
    calculator = Sap10Calculator()
    predictor = EpcPrediction()

-    # Classification: name -> [hits, applicable-total]. A None hit (the actual
-    # lodges no value) is excluded from the denominator.
-    categoricals: dict[str, list[int]] = {
-        "wall_construction": [0, 0],
-        "wall_insulation_type": [0, 0],
-        "construction_age_band": [0, 0],
-        "roof_construction": [0, 0],
-        "floor_construction": [0, 0],
-    }
+    # Classification: name -> [hits, applicable-total], populated from whatever
+    # components compare_prediction reports (insertion order preserved). A None
+    # hit (the actual lodges no value) is excluded from the denominator.
+    categoricals: dict[str, list[int]] = {}
    floor_res: list[float] = []
    window_count_res: list[int] = []
    window_area_res: list[float] = []
@ -162,19 +157,8 @@ def main() -> None:
            predicted_n += 1

            cmp = compare_prediction(predicted, actual)
-            _tally(categoricals["wall_construction"], cmp.wall_construction_correct)
-            _tally(
-                categoricals["wall_insulation_type"],
-                cmp.wall_insulation_type_correct,
-            )
-            _tally(
-                categoricals["construction_age_band"],
-                cmp.construction_age_band_correct,
-            )
-            _tally(categoricals["roof_construction"], cmp.roof_construction_correct)
-            _tally(
-                categoricals["floor_construction"], cmp.floor_construction_correct
-            )
+            for name, hit in cmp.categorical_hits.items():
+                _tally(categoricals.setdefault(name, [0, 0]), hit)
            floor_res.append(cmp.floor_area_residual)
            window_count_res.append(cmp.window_count_residual)
            window_area_res.append(cmp.total_window_area_residual)
--- a/tests/domain/epc_prediction/test_prediction_comparison.py
+++ b/tests/domain/epc_prediction/test_prediction_comparison.py
@ -60,7 +60,7 @@ def test_flags_a_correct_main_wall_construction_classification() -> None:
    comparison = compare_prediction(predicted, actual)

    # Assert
-    assert comparison.wall_construction_correct is True
+    assert comparison.categorical_hits["wall_construction"] is True


 def test_flags_an_incorrect_main_wall_construction_classification() -> None:
@ -72,7 +72,7 @@ def test_flags_an_incorrect_main_wall_construction_classification() -> None:
    comparison = compare_prediction(predicted, actual)

    # Assert
-    assert comparison.wall_construction_correct is False
+    assert comparison.categorical_hits["wall_construction"] is False


 def test_classifies_the_extra_homogeneous_categoricals() -> None:
@ -95,10 +95,10 @@ def test_classifies_the_extra_homogeneous_categoricals() -> None:
    comparison = compare_prediction(predicted, actual)

    # Assert
-    assert comparison.construction_age_band_correct is True
-    assert comparison.wall_insulation_type_correct is False
-    assert comparison.roof_construction_correct is True
-    assert comparison.floor_construction_correct is True
+    assert comparison.categorical_hits["construction_age_band"] is True
+    assert comparison.categorical_hits["wall_insulation_type"] is False
+    assert comparison.categorical_hits["roof_construction"] is True
+    assert comparison.categorical_hits["floor_construction"] is True


 def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
@ -112,7 +112,7 @@ def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
    comparison = compare_prediction(predicted, actual)

    # Assert
-    assert comparison.roof_construction_correct is None
+    assert comparison.categorical_hits["roof_construction"] is None


 def test_reports_the_floor_area_residual_as_predicted_minus_actual() -> None: