From cd43c52cf9d24565db20e9432ee21f72e16cda47 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 14 Jun 2026 08:53:15 +0000
Subject: [PATCH] feat(epc-prediction): score the heating components (ADR-0030
 Component Accuracy)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Heating is the dominant SAP lever (ablating it to actual cut the SAP error
~7 -> ~4.5) yet was entirely unscored. Add the heating group to
compare_prediction's categorical_hits: main fuel / category / control (off
the primary MainHeatingDetail), water-heating fuel / code, has-cylinder,
cylinder insulation, secondary heating (off SapHeating).

Template-copied baseline on the 40-postcode corpus (no predictor change
yet — this just makes the signal visible):
  heating_main_fuel        93.4%
  heating_main_category    92.7%
  water_heating_fuel/code  91.7% / 92.4%
  heating_main_control     62.1%   <- weak
  has_hot_water_cylinder   78.5%
  cylinder_insulation_type 35.8% (n=120)   <- weak
  secondary_heating_type   16.8% (n=125)   <- weak

Fuel/category predict well from the template; controls, cylinder, and
secondary heating are poor and now drive the next predictor slices.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../epc_prediction/prediction_comparison.py   | 98 ++++++++++++++-----
 .../test_prediction_comparison.py             | 64 ++++++++++++
 2 files changed, 139 insertions(+), 23 deletions(-)

diff --git a/domain/epc_prediction/prediction_comparison.py b/domain/epc_prediction/prediction_comparison.py
index 291bc9c4..76af5261 100644
--- a/domain/epc_prediction/prediction_comparison.py
+++ b/domain/epc_prediction/prediction_comparison.py
@@ -14,7 +14,11 @@ from __future__ import annotations
 from dataclasses import dataclass
 from typing import Optional
 
-from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
+from datatypes.epc.domain.epc_property_data import (
+    EpcPropertyData,
+    MainHeatingDetail,
+    SapBuildingPart,
+)
 
 
 @dataclass(frozen=True)
@@ -53,6 +57,53 @@ def _classify(predicted: object, actual: object) -> Optional[bool]:
     return predicted == actual
 
 
+def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]:
+    """The primary heating system's detail row, or None when none is lodged."""
+    details = epc.sap_heating.main_heating_details
+    return details[0] if details else None
+
+
+def _heating_hits(
+    predicted: EpcPropertyData, actual: EpcPropertyData
+) -> dict[str, Optional[bool]]:
+    """Classification hits for the heating components — the dominant SAP lever
+    (ADR-0030). Main-system fields come off the primary `MainHeatingDetail`;
+    hot-water + secondary fields off `SapHeating`."""
+    pred_main = _main_heating_detail(predicted)
+    actual_main = _main_heating_detail(actual)
+    pred_h = predicted.sap_heating
+    actual_h = actual.sap_heating
+    return {
+        "heating_main_fuel": _classify(
+            getattr(pred_main, "main_fuel_type", None),
+            getattr(actual_main, "main_fuel_type", None),
+        ),
+        "heating_main_category": _classify(
+            getattr(pred_main, "main_heating_category", None),
+            getattr(actual_main, "main_heating_category", None),
+        ),
+        "heating_main_control": _classify(
+            getattr(pred_main, "main_heating_control", None),
+            getattr(actual_main, "main_heating_control", None),
+        ),
+        "water_heating_fuel": _classify(
+            pred_h.water_heating_fuel, actual_h.water_heating_fuel
+        ),
+        "water_heating_code": _classify(
+            pred_h.water_heating_code, actual_h.water_heating_code
+        ),
+        "has_hot_water_cylinder": _classify(
+            predicted.has_hot_water_cylinder, actual.has_hot_water_cylinder
+        ),
+        "cylinder_insulation_type": _classify(
+            pred_h.cylinder_insulation_type, actual_h.cylinder_insulation_type
+        ),
+        "secondary_heating_type": _classify(
+            pred_h.secondary_heating_type, actual_h.secondary_heating_type
+        ),
+    }
+
+
 def _total_window_area(epc: EpcPropertyData) -> float:
     return sum(w.window_width * w.window_height for w in epc.sap_windows)
 
@@ -62,29 +113,30 @@ def compare_prediction(
 ) -> PredictionComparison:
     """Compare a predicted picture against the actual one, field by field. All
     residuals are signed, predicted − actual."""
+    fabric_hits: dict[str, Optional[bool]] = {
+        "wall_construction": _classify(
+            _main(predicted).wall_construction,
+            _main(actual).wall_construction,
+        ),
+        "wall_insulation_type": _classify(
+            _main(predicted).wall_insulation_type,
+            _main(actual).wall_insulation_type,
+        ),
+        "construction_age_band": _classify(
+            _main(predicted).construction_age_band,
+            _main(actual).construction_age_band,
+        ),
+        "roof_construction": _classify(
+            _main(predicted).roof_construction,
+            _main(actual).roof_construction,
+        ),
+        "floor_construction": _classify(
+            _main_floor_construction(predicted),
+            _main_floor_construction(actual),
+        ),
+    }
     return PredictionComparison(
-        categorical_hits={
-            "wall_construction": _classify(
-                _main(predicted).wall_construction,
-                _main(actual).wall_construction,
-            ),
-            "wall_insulation_type": _classify(
-                _main(predicted).wall_insulation_type,
-                _main(actual).wall_insulation_type,
-            ),
-            "construction_age_band": _classify(
-                _main(predicted).construction_age_band,
-                _main(actual).construction_age_band,
-            ),
-            "roof_construction": _classify(
-                _main(predicted).roof_construction,
-                _main(actual).roof_construction,
-            ),
-            "floor_construction": _classify(
-                _main_floor_construction(predicted),
-                _main_floor_construction(actual),
-            ),
-        },
+        categorical_hits={**fabric_hits, **_heating_hits(predicted, actual)},
         floor_area_residual=(
             predicted.total_floor_area_m2 - actual.total_floor_area_m2
         ),
diff --git a/tests/domain/epc_prediction/test_prediction_comparison.py b/tests/domain/epc_prediction/test_prediction_comparison.py
index b67f1c19..eb087a1d 100644
--- a/tests/domain/epc_prediction/test_prediction_comparison.py
+++ b/tests/domain/epc_prediction/test_prediction_comparison.py
@@ -9,8 +9,10 @@ from typing import Optional, Union
 
 from datatypes.epc.domain.epc_property_data import (
     EpcPropertyData,
+    MainHeatingDetail,
     SapBuildingPart,
     SapFloorDimension,
+    SapHeating,
     SapWindow,
 )
 from domain.epc_prediction.prediction_comparison import compare_prediction
@@ -26,6 +28,14 @@ def _epc(
     floor_area: float = 80.0,
     building_parts: int = 1,
     windows: Optional[list[tuple[float, float]]] = None,
+    main_fuel_type: Optional[int] = 20,
+    main_heating_category: Optional[int] = 2,
+    main_heating_control: Optional[Union[int, str]] = 2100,
+    water_heating_fuel: Optional[int] = 20,
+    water_heating_code: Optional[int] = 901,
+    has_hot_water_cylinder: bool = True,
+    cylinder_insulation_type: Optional[Union[int, str]] = 1,
+    secondary_heating_type: Optional[Union[int, str]] = None,
 ) -> EpcPropertyData:
     epc: EpcPropertyData = object.__new__(EpcPropertyData)
     epc.total_floor_area_m2 = floor_area
@@ -41,6 +51,18 @@ def _epc(
         part.sap_floor_dimensions = [floor_dim]
         parts.append(part)
     epc.sap_building_parts = parts
+    detail: MainHeatingDetail = object.__new__(MainHeatingDetail)
+    detail.main_fuel_type = main_fuel_type
+    detail.main_heating_category = main_heating_category
+    detail.main_heating_control = main_heating_control
+    heating: SapHeating = object.__new__(SapHeating)
+    heating.main_heating_details = [detail]
+    heating.water_heating_fuel = water_heating_fuel
+    heating.water_heating_code = water_heating_code
+    heating.cylinder_insulation_type = cylinder_insulation_type
+    heating.secondary_heating_type = secondary_heating_type
+    epc.sap_heating = heating
+    epc.has_hot_water_cylinder = has_hot_water_cylinder
     sap_windows: list[SapWindow] = []
     for width, height in windows or []:
         w: SapWindow = object.__new__(SapWindow)
@@ -101,6 +123,48 @@ def test_classifies_the_extra_homogeneous_categoricals() -> None:
     assert comparison.categorical_hits["floor_construction"] is True
 
 
+def test_classifies_the_heating_components() -> None:
+    # Arrange — predicted and actual agree on everything heating except the main
+    # fuel (predicted oil 28, actual gas 20) and secondary heating (predicted
+    # none, actual a wood stove 693). Heating is the dominant SAP lever, so each
+    # heating component is scored (ADR-0030 Component Accuracy).
+    predicted = _epc(
+        main_fuel_type=28,
+        main_heating_category=2,
+        main_heating_control=2100,
+        water_heating_fuel=20,
+        water_heating_code=901,
+        has_hot_water_cylinder=True,
+        cylinder_insulation_type=1,
+        secondary_heating_type=None,
+    )
+    actual = _epc(
+        main_fuel_type=20,
+        main_heating_category=2,
+        main_heating_control=2100,
+        water_heating_fuel=20,
+        water_heating_code=901,
+        has_hot_water_cylinder=True,
+        cylinder_insulation_type=1,
+        secondary_heating_type=693,
+    )
+
+    # Act
+    hits = compare_prediction(predicted, actual).categorical_hits
+
+    # Assert
+    assert hits["heating_main_fuel"] is False
+    assert hits["heating_main_category"] is True
+    assert hits["heating_main_control"] is True
+    assert hits["water_heating_fuel"] is True
+    assert hits["water_heating_code"] is True
+    assert hits["has_hot_water_cylinder"] is True
+    assert hits["cylinder_insulation_type"] is True
+    # Secondary heating is absent in the prediction but present in the actual —
+    # a real miss (predicted None ≠ actual 693), not "not applicable".
+    assert hits["secondary_heating_type"] is False
+
+
 def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
     # Arrange — the actual lodges no roof construction (a flat under another
     # dwelling). A hit there is not applicable, not a free win, so it must not