feat(epc-prediction): score the heating components (ADR-0030 Component Accuracy)

Heating is the dominant SAP lever (ablating it to actual cut the SAP error
~7 -> ~4.5) yet was entirely unscored. Add the heating group to
compare_prediction's categorical_hits: main fuel / category / control (off
the primary MainHeatingDetail), water-heating fuel / code, has-cylinder,
cylinder insulation, secondary heating (off SapHeating).

Template-copied baseline on the 40-postcode corpus (no predictor change
yet — this just makes the signal visible):
  heating_main_fuel        93.4%
  heating_main_category    92.7%
  water_heating_fuel/code  91.7% / 92.4%
  heating_main_control     62.1%   <- weak
  has_hot_water_cylinder   78.5%
  cylinder_insulation_type 35.8% (n=120)   <- weak
  secondary_heating_type   16.8% (n=125)   <- weak

Fuel/category predict well from the template; controls, cylinder, and
secondary heating are poor and now drive the next predictor slices.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 08:53:15 +00:00
parent 41b5ce5057
commit cd43c52cf9
2 changed files with 139 additions and 23 deletions

View file

@ -14,7 +14,11 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
MainHeatingDetail,
SapBuildingPart,
)
@dataclass(frozen=True)
@ -53,6 +57,53 @@ def _classify(predicted: object, actual: object) -> Optional[bool]:
return predicted == actual
def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]:
"""The primary heating system's detail row, or None when none is lodged."""
details = epc.sap_heating.main_heating_details
return details[0] if details else None
def _heating_hits(
predicted: EpcPropertyData, actual: EpcPropertyData
) -> dict[str, Optional[bool]]:
"""Classification hits for the heating components — the dominant SAP lever
(ADR-0030). Main-system fields come off the primary `MainHeatingDetail`;
hot-water + secondary fields off `SapHeating`."""
pred_main = _main_heating_detail(predicted)
actual_main = _main_heating_detail(actual)
pred_h = predicted.sap_heating
actual_h = actual.sap_heating
return {
"heating_main_fuel": _classify(
getattr(pred_main, "main_fuel_type", None),
getattr(actual_main, "main_fuel_type", None),
),
"heating_main_category": _classify(
getattr(pred_main, "main_heating_category", None),
getattr(actual_main, "main_heating_category", None),
),
"heating_main_control": _classify(
getattr(pred_main, "main_heating_control", None),
getattr(actual_main, "main_heating_control", None),
),
"water_heating_fuel": _classify(
pred_h.water_heating_fuel, actual_h.water_heating_fuel
),
"water_heating_code": _classify(
pred_h.water_heating_code, actual_h.water_heating_code
),
"has_hot_water_cylinder": _classify(
predicted.has_hot_water_cylinder, actual.has_hot_water_cylinder
),
"cylinder_insulation_type": _classify(
pred_h.cylinder_insulation_type, actual_h.cylinder_insulation_type
),
"secondary_heating_type": _classify(
pred_h.secondary_heating_type, actual_h.secondary_heating_type
),
}
def _total_window_area(epc: EpcPropertyData) -> float:
return sum(w.window_width * w.window_height for w in epc.sap_windows)
@ -62,29 +113,30 @@ def compare_prediction(
) -> PredictionComparison:
"""Compare a predicted picture against the actual one, field by field. All
residuals are signed, predicted actual."""
fabric_hits: dict[str, Optional[bool]] = {
"wall_construction": _classify(
_main(predicted).wall_construction,
_main(actual).wall_construction,
),
"wall_insulation_type": _classify(
_main(predicted).wall_insulation_type,
_main(actual).wall_insulation_type,
),
"construction_age_band": _classify(
_main(predicted).construction_age_band,
_main(actual).construction_age_band,
),
"roof_construction": _classify(
_main(predicted).roof_construction,
_main(actual).roof_construction,
),
"floor_construction": _classify(
_main_floor_construction(predicted),
_main_floor_construction(actual),
),
}
return PredictionComparison(
categorical_hits={
"wall_construction": _classify(
_main(predicted).wall_construction,
_main(actual).wall_construction,
),
"wall_insulation_type": _classify(
_main(predicted).wall_insulation_type,
_main(actual).wall_insulation_type,
),
"construction_age_band": _classify(
_main(predicted).construction_age_band,
_main(actual).construction_age_band,
),
"roof_construction": _classify(
_main(predicted).roof_construction,
_main(actual).roof_construction,
),
"floor_construction": _classify(
_main_floor_construction(predicted),
_main_floor_construction(actual),
),
},
categorical_hits={**fabric_hits, **_heating_hits(predicted, actual)},
floor_area_residual=(
predicted.total_floor_area_m2 - actual.total_floor_area_m2
),

View file

@ -9,8 +9,10 @@ from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
MainHeatingDetail,
SapBuildingPart,
SapFloorDimension,
SapHeating,
SapWindow,
)
from domain.epc_prediction.prediction_comparison import compare_prediction
@ -26,6 +28,14 @@ def _epc(
floor_area: float = 80.0,
building_parts: int = 1,
windows: Optional[list[tuple[float, float]]] = None,
main_fuel_type: Optional[int] = 20,
main_heating_category: Optional[int] = 2,
main_heating_control: Optional[Union[int, str]] = 2100,
water_heating_fuel: Optional[int] = 20,
water_heating_code: Optional[int] = 901,
has_hot_water_cylinder: bool = True,
cylinder_insulation_type: Optional[Union[int, str]] = 1,
secondary_heating_type: Optional[Union[int, str]] = None,
) -> EpcPropertyData:
epc: EpcPropertyData = object.__new__(EpcPropertyData)
epc.total_floor_area_m2 = floor_area
@ -41,6 +51,18 @@ def _epc(
part.sap_floor_dimensions = [floor_dim]
parts.append(part)
epc.sap_building_parts = parts
detail: MainHeatingDetail = object.__new__(MainHeatingDetail)
detail.main_fuel_type = main_fuel_type
detail.main_heating_category = main_heating_category
detail.main_heating_control = main_heating_control
heating: SapHeating = object.__new__(SapHeating)
heating.main_heating_details = [detail]
heating.water_heating_fuel = water_heating_fuel
heating.water_heating_code = water_heating_code
heating.cylinder_insulation_type = cylinder_insulation_type
heating.secondary_heating_type = secondary_heating_type
epc.sap_heating = heating
epc.has_hot_water_cylinder = has_hot_water_cylinder
sap_windows: list[SapWindow] = []
for width, height in windows or []:
w: SapWindow = object.__new__(SapWindow)
@ -101,6 +123,48 @@ def test_classifies_the_extra_homogeneous_categoricals() -> None:
assert comparison.categorical_hits["floor_construction"] is True
def test_classifies_the_heating_components() -> None:
# Arrange — predicted and actual agree on everything heating except the main
# fuel (predicted oil 28, actual gas 20) and secondary heating (predicted
# none, actual a wood stove 693). Heating is the dominant SAP lever, so each
# heating component is scored (ADR-0030 Component Accuracy).
predicted = _epc(
main_fuel_type=28,
main_heating_category=2,
main_heating_control=2100,
water_heating_fuel=20,
water_heating_code=901,
has_hot_water_cylinder=True,
cylinder_insulation_type=1,
secondary_heating_type=None,
)
actual = _epc(
main_fuel_type=20,
main_heating_category=2,
main_heating_control=2100,
water_heating_fuel=20,
water_heating_code=901,
has_hot_water_cylinder=True,
cylinder_insulation_type=1,
secondary_heating_type=693,
)
# Act
hits = compare_prediction(predicted, actual).categorical_hits
# Assert
assert hits["heating_main_fuel"] is False
assert hits["heating_main_category"] is True
assert hits["heating_main_control"] is True
assert hits["water_heating_fuel"] is True
assert hits["water_heating_code"] is True
assert hits["has_hot_water_cylinder"] is True
assert hits["cylinder_insulation_type"] is True
# Secondary heating is absent in the prediction but present in the actual —
# a real miss (predicted None ≠ actual 693), not "not applicable".
assert hits["secondary_heating_type"] is False
def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
# Arrange — the actual lodges no roof construction (a flat under another
# dwelling). A hit there is not applicable, not a free win, so it must not