From cd43c52cf9d24565db20e9432ee21f72e16cda47 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Jun 2026 08:53:15 +0000 Subject: [PATCH] feat(epc-prediction): score the heating components (ADR-0030 Component Accuracy) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Heating is the dominant SAP lever (ablating it to actual cut the SAP error ~7 -> ~4.5) yet was entirely unscored. Add the heating group to compare_prediction's categorical_hits: main fuel / category / control (off the primary MainHeatingDetail), water-heating fuel / code, has-cylinder, cylinder insulation, secondary heating (off SapHeating). Template-copied baseline on the 40-postcode corpus (no predictor change yet — this just makes the signal visible): heating_main_fuel 93.4% heating_main_category 92.7% water_heating_fuel/code 91.7% / 92.4% heating_main_control 62.1% <- weak has_hot_water_cylinder 78.5% cylinder_insulation_type 35.8% (n=120) <- weak secondary_heating_type 16.8% (n=125) <- weak Fuel/category predict well from the template; controls, cylinder, and secondary heating are poor and now drive the next predictor slices. Co-Authored-By: Claude Opus 4.8 --- .../epc_prediction/prediction_comparison.py | 98 ++++++++++++++----- .../test_prediction_comparison.py | 64 ++++++++++++ 2 files changed, 139 insertions(+), 23 deletions(-) diff --git a/domain/epc_prediction/prediction_comparison.py b/domain/epc_prediction/prediction_comparison.py index 291bc9c4..76af5261 100644 --- a/domain/epc_prediction/prediction_comparison.py +++ b/domain/epc_prediction/prediction_comparison.py @@ -14,7 +14,11 @@ from __future__ import annotations from dataclasses import dataclass from typing import Optional -from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + MainHeatingDetail, + SapBuildingPart, +) @dataclass(frozen=True) @@ -53,6 +57,53 @@ def _classify(predicted: object, actual: object) -> Optional[bool]: return predicted == actual +def _main_heating_detail(epc: EpcPropertyData) -> Optional[MainHeatingDetail]: + """The primary heating system's detail row, or None when none is lodged.""" + details = epc.sap_heating.main_heating_details + return details[0] if details else None + + +def _heating_hits( + predicted: EpcPropertyData, actual: EpcPropertyData +) -> dict[str, Optional[bool]]: + """Classification hits for the heating components — the dominant SAP lever + (ADR-0030). Main-system fields come off the primary `MainHeatingDetail`; + hot-water + secondary fields off `SapHeating`.""" + pred_main = _main_heating_detail(predicted) + actual_main = _main_heating_detail(actual) + pred_h = predicted.sap_heating + actual_h = actual.sap_heating + return { + "heating_main_fuel": _classify( + getattr(pred_main, "main_fuel_type", None), + getattr(actual_main, "main_fuel_type", None), + ), + "heating_main_category": _classify( + getattr(pred_main, "main_heating_category", None), + getattr(actual_main, "main_heating_category", None), + ), + "heating_main_control": _classify( + getattr(pred_main, "main_heating_control", None), + getattr(actual_main, "main_heating_control", None), + ), + "water_heating_fuel": _classify( + pred_h.water_heating_fuel, actual_h.water_heating_fuel + ), + "water_heating_code": _classify( + pred_h.water_heating_code, actual_h.water_heating_code + ), + "has_hot_water_cylinder": _classify( + predicted.has_hot_water_cylinder, actual.has_hot_water_cylinder + ), + "cylinder_insulation_type": _classify( + pred_h.cylinder_insulation_type, actual_h.cylinder_insulation_type + ), + "secondary_heating_type": _classify( + pred_h.secondary_heating_type, actual_h.secondary_heating_type + ), + } + + def _total_window_area(epc: EpcPropertyData) -> float: return sum(w.window_width * w.window_height for w in epc.sap_windows) @@ -62,29 +113,30 @@ def compare_prediction( ) -> PredictionComparison: """Compare a predicted picture against the actual one, field by field. All residuals are signed, predicted − actual.""" + fabric_hits: dict[str, Optional[bool]] = { + "wall_construction": _classify( + _main(predicted).wall_construction, + _main(actual).wall_construction, + ), + "wall_insulation_type": _classify( + _main(predicted).wall_insulation_type, + _main(actual).wall_insulation_type, + ), + "construction_age_band": _classify( + _main(predicted).construction_age_band, + _main(actual).construction_age_band, + ), + "roof_construction": _classify( + _main(predicted).roof_construction, + _main(actual).roof_construction, + ), + "floor_construction": _classify( + _main_floor_construction(predicted), + _main_floor_construction(actual), + ), + } return PredictionComparison( - categorical_hits={ - "wall_construction": _classify( - _main(predicted).wall_construction, - _main(actual).wall_construction, - ), - "wall_insulation_type": _classify( - _main(predicted).wall_insulation_type, - _main(actual).wall_insulation_type, - ), - "construction_age_band": _classify( - _main(predicted).construction_age_band, - _main(actual).construction_age_band, - ), - "roof_construction": _classify( - _main(predicted).roof_construction, - _main(actual).roof_construction, - ), - "floor_construction": _classify( - _main_floor_construction(predicted), - _main_floor_construction(actual), - ), - }, + categorical_hits={**fabric_hits, **_heating_hits(predicted, actual)}, floor_area_residual=( predicted.total_floor_area_m2 - actual.total_floor_area_m2 ), diff --git a/tests/domain/epc_prediction/test_prediction_comparison.py b/tests/domain/epc_prediction/test_prediction_comparison.py index b67f1c19..eb087a1d 100644 --- a/tests/domain/epc_prediction/test_prediction_comparison.py +++ b/tests/domain/epc_prediction/test_prediction_comparison.py @@ -9,8 +9,10 @@ from typing import Optional, Union from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, + MainHeatingDetail, SapBuildingPart, SapFloorDimension, + SapHeating, SapWindow, ) from domain.epc_prediction.prediction_comparison import compare_prediction @@ -26,6 +28,14 @@ def _epc( floor_area: float = 80.0, building_parts: int = 1, windows: Optional[list[tuple[float, float]]] = None, + main_fuel_type: Optional[int] = 20, + main_heating_category: Optional[int] = 2, + main_heating_control: Optional[Union[int, str]] = 2100, + water_heating_fuel: Optional[int] = 20, + water_heating_code: Optional[int] = 901, + has_hot_water_cylinder: bool = True, + cylinder_insulation_type: Optional[Union[int, str]] = 1, + secondary_heating_type: Optional[Union[int, str]] = None, ) -> EpcPropertyData: epc: EpcPropertyData = object.__new__(EpcPropertyData) epc.total_floor_area_m2 = floor_area @@ -41,6 +51,18 @@ def _epc( part.sap_floor_dimensions = [floor_dim] parts.append(part) epc.sap_building_parts = parts + detail: MainHeatingDetail = object.__new__(MainHeatingDetail) + detail.main_fuel_type = main_fuel_type + detail.main_heating_category = main_heating_category + detail.main_heating_control = main_heating_control + heating: SapHeating = object.__new__(SapHeating) + heating.main_heating_details = [detail] + heating.water_heating_fuel = water_heating_fuel + heating.water_heating_code = water_heating_code + heating.cylinder_insulation_type = cylinder_insulation_type + heating.secondary_heating_type = secondary_heating_type + epc.sap_heating = heating + epc.has_hot_water_cylinder = has_hot_water_cylinder sap_windows: list[SapWindow] = [] for width, height in windows or []: w: SapWindow = object.__new__(SapWindow) @@ -101,6 +123,48 @@ def test_classifies_the_extra_homogeneous_categoricals() -> None: assert comparison.categorical_hits["floor_construction"] is True +def test_classifies_the_heating_components() -> None: + # Arrange — predicted and actual agree on everything heating except the main + # fuel (predicted oil 28, actual gas 20) and secondary heating (predicted + # none, actual a wood stove 693). Heating is the dominant SAP lever, so each + # heating component is scored (ADR-0030 Component Accuracy). + predicted = _epc( + main_fuel_type=28, + main_heating_category=2, + main_heating_control=2100, + water_heating_fuel=20, + water_heating_code=901, + has_hot_water_cylinder=True, + cylinder_insulation_type=1, + secondary_heating_type=None, + ) + actual = _epc( + main_fuel_type=20, + main_heating_category=2, + main_heating_control=2100, + water_heating_fuel=20, + water_heating_code=901, + has_hot_water_cylinder=True, + cylinder_insulation_type=1, + secondary_heating_type=693, + ) + + # Act + hits = compare_prediction(predicted, actual).categorical_hits + + # Assert + assert hits["heating_main_fuel"] is False + assert hits["heating_main_category"] is True + assert hits["heating_main_control"] is True + assert hits["water_heating_fuel"] is True + assert hits["water_heating_code"] is True + assert hits["has_hot_water_cylinder"] is True + assert hits["cylinder_insulation_type"] is True + # Secondary heating is absent in the prediction but present in the actual — + # a real miss (predicted None ≠ actual 693), not "not applicable". + assert hits["secondary_heating_type"] is False + + def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None: # Arrange — the actual lodges no roof construction (a flat under another # dwelling). A hit there is not applicable, not a free win, so it must not