feat(epc-prediction): complete component coverage — fabric/glazing/renewables/doors (ADR-0030)

Finish the ADR-0030 Component Accuracy set: roof insulation thickness,
floor insulation, room-in-roof presence, modal glazing type, PV presence,
solar water heating (categoricals) + door count (residual). Presence flags
(room-in-roof, PV, solar) are always-applicable — predicting absence when
present is a real miss.

Template-copied baseline (40-postcode corpus), newly visible:
  floor_insulation         94.0%   solar_water_heating  99.7%
  has_pv                   98.6%   has_room_in_roof     91.9%
  modal_glazing_type       59.0%   <- weak
  roof_insulation_thickness 30.6%  <- weak
  door_count  mean|.| 0.40

compare_prediction now scores 19 categoricals + 5 residuals across every
SAP-load-bearing component group.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 09:00:30 +00:00
parent cd43c52cf9
commit 275a30a825
3 changed files with 140 additions and 1 deletions

View file

@ -11,6 +11,7 @@ runner, which has the calculator and the lodged SAP.
from __future__ import annotations
from collections import Counter
from dataclasses import dataclass
from typing import Optional
@ -36,6 +37,7 @@ class PredictionComparison:
building_parts_residual: int
window_count_residual: int
total_window_area_residual: float
door_count_residual: int
def _main(epc: EpcPropertyData) -> SapBuildingPart:
@ -104,6 +106,57 @@ def _heating_hits(
}
def _modal_glazing_type(epc: EpcPropertyData) -> Optional[object]:
"""The most common glazing type across the dwelling's windows, or None when
none are lodged. A single dwelling-level glazing signal, robust to one
odd window."""
types = [w.glazing_type for w in epc.sap_windows]
return Counter(types).most_common(1)[0][0] if types else None
def _has_pv(epc: EpcPropertyData) -> bool:
"""True iff the dwelling lodges any photovoltaic supply (either path)."""
source = epc.sap_energy_source
return source.photovoltaic_supply is not None or bool(
source.photovoltaic_arrays
)
def _renewables_and_fabric_hits(
predicted: EpcPropertyData, actual: EpcPropertyData
) -> dict[str, Optional[bool]]:
"""Hits for the remaining fabric-insulation, glazing and renewables
components (ADR-0030). Presence flags (room-in-roof, PV, solar) are always
applicable predicting absence when present is a real miss."""
return {
"roof_insulation_thickness": _classify(
_main(predicted).roof_insulation_thickness,
_main(actual).roof_insulation_thickness,
),
"floor_insulation": _classify(
_main_floor_insulation(predicted), _main_floor_insulation(actual)
),
"has_room_in_roof": _classify(
_main(predicted).sap_room_in_roof is not None,
_main(actual).sap_room_in_roof is not None,
),
"modal_glazing_type": _classify(
_modal_glazing_type(predicted), _modal_glazing_type(actual)
),
"has_pv": _classify(_has_pv(predicted), _has_pv(actual)),
"solar_water_heating": _classify(
predicted.solar_water_heating, actual.solar_water_heating
),
}
def _main_floor_insulation(epc: EpcPropertyData) -> Optional[int]:
"""The main building part's ground-floor insulation code, or None when no
floor dimension is lodged."""
dims = _main(epc).sap_floor_dimensions
return dims[0].floor_insulation if dims else None
def _total_window_area(epc: EpcPropertyData) -> float:
return sum(w.window_width * w.window_height for w in epc.sap_windows)
@ -136,7 +189,11 @@ def compare_prediction(
),
}
return PredictionComparison(
categorical_hits={**fabric_hits, **_heating_hits(predicted, actual)},
categorical_hits={
**fabric_hits,
**_heating_hits(predicted, actual),
**_renewables_and_fabric_hits(predicted, actual),
},
floor_area_residual=(
predicted.total_floor_area_m2 - actual.total_floor_area_m2
),
@ -149,4 +206,5 @@ def compare_prediction(
total_window_area_residual=(
_total_window_area(predicted) - _total_window_area(actual)
),
door_count_residual=predicted.door_count - actual.door_count,
)

View file

@ -125,6 +125,7 @@ def main() -> None:
window_count_res: list[int] = []
window_area_res: list[float] = []
parts_res: list[int] = []
door_res: list[int] = []
sap_vs_lodged: list[float] = []
sap_vs_calc_actual: list[float] = []
sap_vs_neighbour_mean: list[float] = []
@ -163,6 +164,7 @@ def main() -> None:
window_count_res.append(cmp.window_count_residual)
window_area_res.append(cmp.total_window_area_residual)
parts_res.append(cmp.building_parts_residual)
door_res.append(cmp.door_count_residual)
sap_pred = _sap(calculator, predicted)
lodged = actual.energy_rating_current
@ -190,6 +192,7 @@ def main() -> None:
_residual("window_count", [float(x) for x in window_count_res])
_residual("total_window_area (m2)", window_area_res)
_residual("building_parts", [float(x) for x in parts_res])
_residual("door_count", [float(x) for x in door_res])
print()
_sap_line("SAP |pred-calc lodged|", sap_vs_lodged)
_sap_line("SAP |pred-calc calc(actual)|", sap_vs_calc_actual)

View file

@ -10,9 +10,12 @@ from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
MainHeatingDetail,
PhotovoltaicSupply,
SapBuildingPart,
SapEnergySource,
SapFloorDimension,
SapHeating,
SapRoomInRoof,
SapWindow,
)
from domain.epc_prediction.prediction_comparison import compare_prediction
@ -24,10 +27,17 @@ def _epc(
wall_insulation_type: Union[int, str] = 1,
construction_age_band: str = "K",
roof_construction: Optional[int] = 1,
roof_insulation_thickness: Optional[Union[str, int]] = 100,
floor_construction: Optional[int] = 1,
floor_insulation: Optional[int] = 1,
has_room_in_roof: bool = False,
floor_area: float = 80.0,
building_parts: int = 1,
windows: Optional[list[tuple[float, float]]] = None,
glazing_type: Union[int, str] = 3,
door_count: int = 2,
has_pv: bool = False,
solar_water_heating: bool = False,
main_fuel_type: Optional[int] = 20,
main_heating_category: Optional[int] = 2,
main_heating_control: Optional[Union[int, str]] = 2100,
@ -39,6 +49,8 @@ def _epc(
) -> EpcPropertyData:
epc: EpcPropertyData = object.__new__(EpcPropertyData)
epc.total_floor_area_m2 = floor_area
epc.door_count = door_count
epc.solar_water_heating = solar_water_heating
parts: list[SapBuildingPart] = []
for _ in range(building_parts):
part: SapBuildingPart = object.__new__(SapBuildingPart)
@ -46,8 +58,13 @@ def _epc(
part.wall_insulation_type = wall_insulation_type
part.construction_age_band = construction_age_band
part.roof_construction = roof_construction
part.roof_insulation_thickness = roof_insulation_thickness
part.sap_room_in_roof = (
object.__new__(SapRoomInRoof) if has_room_in_roof else None
)
floor_dim: SapFloorDimension = object.__new__(SapFloorDimension)
floor_dim.floor_construction = floor_construction
floor_dim.floor_insulation = floor_insulation
part.sap_floor_dimensions = [floor_dim]
parts.append(part)
epc.sap_building_parts = parts
@ -68,8 +85,15 @@ def _epc(
w: SapWindow = object.__new__(SapWindow)
w.window_width = width
w.window_height = height
w.glazing_type = glazing_type
sap_windows.append(w)
epc.sap_windows = sap_windows
energy: SapEnergySource = object.__new__(SapEnergySource)
energy.photovoltaic_supply = (
object.__new__(PhotovoltaicSupply) if has_pv else None
)
energy.photovoltaic_arrays = None
epc.sap_energy_source = energy
return epc
@ -165,6 +189,60 @@ def test_classifies_the_heating_components() -> None:
assert hits["secondary_heating_type"] is False
def test_classifies_fabric_insulation_and_room_in_roof() -> None:
# Arrange — predicted and actual disagree on roof insulation thickness and on
# whether there's a room-in-roof, but agree on floor insulation.
predicted = _epc(
roof_insulation_thickness=100,
floor_insulation=1,
has_room_in_roof=False,
)
actual = _epc(
roof_insulation_thickness=270,
floor_insulation=1,
has_room_in_roof=True,
)
# Act
hits = compare_prediction(predicted, actual).categorical_hits
# Assert
assert hits["roof_insulation_thickness"] is False
assert hits["floor_insulation"] is True
# Room-in-roof presence is always applicable — predicting "no RR" when there
# is one is a real miss, not "not applicable".
assert hits["has_room_in_roof"] is False
def test_classifies_glazing_renewables_and_door_count() -> None:
# Arrange — predicted glazing type, PV and solar disagree with the actual;
# door count is over-predicted by one.
predicted = _epc(
windows=[(1.0, 1.0), (1.0, 1.0)],
glazing_type=3,
has_pv=False,
solar_water_heating=False,
door_count=3,
)
actual = _epc(
windows=[(1.0, 1.0), (1.0, 1.0)],
glazing_type=4,
has_pv=True,
solar_water_heating=True,
door_count=2,
)
# Act
comparison = compare_prediction(predicted, actual)
hits = comparison.categorical_hits
# Assert
assert hits["modal_glazing_type"] is False
assert hits["has_pv"] is False
assert hits["solar_water_heating"] is False
assert comparison.door_count_residual == 1
def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
# Arrange — the actual lodges no roof construction (a flat under another
# dwelling). A hit there is not applicable, not a free win, so it must not