slice 9: building parts with main-dwelling carve-out

Thirteen building-parts features land: five cross-all-parts physical
aggregates (count, total_heat_loss_perimeter_m,
total_party_wall_length_m, total_floor_area_from_parts_m2,
avg_room_height_m) and eight Main-Dwelling-specific columns
(heat_loss_perimeter, party_wall_length, total_floor_area,
avg_room_height, has_room_in_roof, construction_age_band,
wall_construction, roof_construction). Main-Dwelling columns are
None when no part has identifier == 'Main Dwelling' — honest about
data quality rather than silently falling back to the first part.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 15:45:21 +00:00
parent 079e6f9a68
commit fb773fa635
3 changed files with 352 additions and 4 deletions

View file

@ -15,13 +15,62 @@ from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
InstantaneousWwhrs,
RenewableHeatIncentive,
SapBuildingPart,
SapEnergySource,
SapFloorDimension,
SapHeating,
SapRoomInRoof,
SapWindow,
WindowTransmissionDetails,
)
def make_floor_dimension(
*,
total_floor_area_m2: float = 50.0,
room_height_m: float = 2.5,
party_wall_length_m: float = 5.0,
heat_loss_perimeter_m: float = 20.0,
floor: Optional[int] = 0,
) -> SapFloorDimension:
"""Build a SapFloorDimension with sensible defaults."""
return SapFloorDimension(
room_height_m=room_height_m,
total_floor_area_m2=total_floor_area_m2,
party_wall_length_m=party_wall_length_m,
heat_loss_perimeter_m=heat_loss_perimeter_m,
floor=floor,
)
def make_building_part(
*,
identifier: str = "Main Dwelling",
construction_age_band: str = "B",
wall_construction: Union[int, str] = 3,
wall_insulation_type: Union[int, str] = 2,
wall_thickness_measured: bool = True,
party_wall_construction: Union[int, str] = 1,
roof_construction: Optional[int] = 4,
floor_dimensions: Optional[list[SapFloorDimension]] = None,
sap_room_in_roof: Optional[SapRoomInRoof] = None,
) -> SapBuildingPart:
"""Build a SapBuildingPart with sensible SAP10 defaults."""
return SapBuildingPart(
identifier=identifier,
construction_age_band=construction_age_band,
wall_construction=wall_construction,
wall_insulation_type=wall_insulation_type,
wall_thickness_measured=wall_thickness_measured,
party_wall_construction=party_wall_construction,
roof_construction=roof_construction,
sap_floor_dimensions=floor_dimensions
if floor_dimensions is not None
else [make_floor_dimension()],
sap_room_in_roof=sap_room_in_roof,
)
def make_window(
*,
orientation: Union[int, str] = 5, # SAP10: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW
@ -86,6 +135,7 @@ def make_minimal_sap10_epc(
region_code: Optional[str] = None,
country_code: Optional[str] = None,
sap_windows: Optional[list[SapWindow]] = None,
sap_building_parts: Optional[list[SapBuildingPart]] = None,
) -> EpcPropertyData:
"""Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets."""
return EpcPropertyData(
@ -117,7 +167,7 @@ def make_minimal_sap10_epc(
wind_turbines_terrain_type="Suburban",
electricity_smart_meter_present=False,
),
sap_building_parts=[],
sap_building_parts=list(sap_building_parts) if sap_building_parts is not None else [],
solar_water_heating=solar_water_heating,
has_hot_water_cylinder=has_hot_water_cylinder,
has_fixed_air_conditioning=has_fixed_air_conditioning,

View file

@ -2,9 +2,14 @@
import pytest
from datatypes.epc.domain.epc_property_data import WindowTransmissionDetails
from datatypes.epc.domain.epc_property_data import SapRoomInRoof, WindowTransmissionDetails
from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.tests._fixtures import make_minimal_sap10_epc, make_window
from domain.ml.tests._fixtures import (
make_building_part,
make_floor_dimension,
make_minimal_sap10_epc,
make_window,
)
from domain.ml.transform import EpcMlTransform
@ -500,6 +505,162 @@ def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> Non
assert row["window_pct_pvc_frame"] is None
_BUILDING_PART_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
# name → (dtype, nullable, categorical)
"building_parts_count": (int, False, False),
"total_heat_loss_perimeter_m": (float, False, False),
"total_party_wall_length_m": (float, False, False),
"total_floor_area_from_parts_m2": (float, False, False),
"avg_room_height_m": (float, True, False),
"main_dwelling_heat_loss_perimeter_m": (float, True, False),
"main_dwelling_party_wall_length_m": (float, True, False),
"main_dwelling_total_floor_area_m2": (float, True, False),
"main_dwelling_avg_room_height_m": (float, True, False),
"main_dwelling_has_room_in_roof": (bool, True, False),
"main_dwelling_construction_age_band": (str, True, True),
"main_dwelling_wall_construction": (int, True, True),
"main_dwelling_roof_construction": (int, True, True),
}
def test_schema_advertises_building_part_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_BUILDING_PART_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None:
# Arrange — Main Dwelling (two floors, age band B, wall 3, roof 4) plus one extension.
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="B",
wall_construction=3,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=30.0, room_height_m=2.5,
party_wall_length_m=6.0, heat_loss_perimeter_m=20.0,
),
make_floor_dimension(
total_floor_area_m2=28.0, room_height_m=2.4,
party_wall_length_m=6.0, heat_loss_perimeter_m=18.0,
),
],
)
extension = make_building_part(
identifier="Extension 1",
construction_age_band="L",
wall_construction=4,
roof_construction=5,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=12.0, room_height_m=2.6,
party_wall_length_m=0.0, heat_loss_perimeter_m=10.0,
),
],
)
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_building_parts=[main, extension],
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert — cross-all aggregates
assert row["building_parts_count"] == 2
assert row["total_heat_loss_perimeter_m"] == pytest.approx(48.0)
assert row["total_party_wall_length_m"] == pytest.approx(12.0)
assert row["total_floor_area_from_parts_m2"] == pytest.approx(70.0)
# avg_room_height area-weighted across all floors: (2.5*30 + 2.4*28 + 2.6*12) / 70
# = (75 + 67.2 + 31.2) / 70 = 173.4 / 70 = 2.4771...
assert row["avg_room_height_m"] == pytest.approx(2.4771, abs=0.001)
# Main Dwelling aggregates
assert row["main_dwelling_heat_loss_perimeter_m"] == pytest.approx(38.0)
assert row["main_dwelling_party_wall_length_m"] == pytest.approx(12.0)
assert row["main_dwelling_total_floor_area_m2"] == pytest.approx(58.0)
# main avg height = (2.5*30 + 2.4*28) / 58 = (75 + 67.2) / 58 = 142.2 / 58 = 2.4517
assert row["main_dwelling_avg_room_height_m"] == pytest.approx(2.4517, abs=0.001)
assert row["main_dwelling_has_room_in_roof"] is False
# Main Dwelling categoricals
assert row["main_dwelling_construction_age_band"] == "B"
assert row["main_dwelling_wall_construction"] == 3
assert row["main_dwelling_roof_construction"] == 4
def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None:
# Arrange
main = make_building_part(
identifier="Main Dwelling",
sap_room_in_roof=SapRoomInRoof(floor_area=15.0, construction_age_band="B"),
)
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_building_parts=[main])
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["main_dwelling_has_room_in_roof"] is True
def test_to_row_returns_building_part_nones_when_no_main_dwelling_identified() -> None:
# Arrange — single part with identifier that doesn't match "Main Dwelling"
sole_part = make_building_part(identifier="Extension 1")
epc = make_minimal_sap10_epc(
energy_rating_current=82, sap_building_parts=[sole_part]
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert — cross-all aggregates still populate
assert row["building_parts_count"] == 1
assert row["total_heat_loss_perimeter_m"] == pytest.approx(20.0)
# Main-dwelling-specific columns are None — honest about data quality
assert row["main_dwelling_heat_loss_perimeter_m"] is None
assert row["main_dwelling_party_wall_length_m"] is None
assert row["main_dwelling_total_floor_area_m2"] is None
assert row["main_dwelling_avg_room_height_m"] is None
assert row["main_dwelling_has_room_in_roof"] is None
assert row["main_dwelling_construction_age_band"] is None
assert row["main_dwelling_wall_construction"] is None
assert row["main_dwelling_roof_construction"] is None
def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["building_parts_count"] == 0
assert row["total_heat_loss_perimeter_m"] == 0.0
assert row["total_party_wall_length_m"] == 0.0
assert row["total_floor_area_from_parts_m2"] == 0.0
assert row["avg_room_height_m"] is None
assert row["main_dwelling_heat_loss_perimeter_m"] is None
assert row["main_dwelling_construction_age_band"] is None
assert row["main_dwelling_wall_construction"] is None
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
# Arrange — two windows with transmission details; one without.
sap_windows = [

View file

@ -13,11 +13,18 @@ See docs/adr/0007-kwh-as-ml-target.md for the target set and rationale.
from typing import Any, Optional
from datatypes.epc.domain.epc import Epc
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
SapBuildingPart,
SapWindow,
)
from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.ucl import apply_ucl_correction
_MAIN_DWELLING_IDENTIFIER = "Main Dwelling"
# SAP10 orientation codes: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW.
# Anything else (0, "NR", etc.) is treated as unrecorded — it contributes to
# `window_count` and `window_total_area_m2` but to no octant.
@ -207,6 +214,64 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
nullable=True,
description="Area share of windows with PVC frame; null when no windows.",
),
# Building parts — cross-all-parts physical aggregates
"building_parts_count": ColumnSpec(
dtype=int, nullable=False, description="Number of sap_building_parts."
),
"total_heat_loss_perimeter_m": ColumnSpec(
dtype=float,
nullable=False,
description="Total heat-loss perimeter (m), summed across all floor dimensions.",
),
"total_party_wall_length_m": ColumnSpec(
dtype=float,
nullable=False,
description="Total party-wall length (m), summed across all floor dimensions.",
),
"total_floor_area_from_parts_m2": ColumnSpec(
dtype=float,
nullable=False,
description="Total floor area (m²) summed across sap_building_parts (sanity vs total_floor_area_m2).",
),
"avg_room_height_m": ColumnSpec(
dtype=float,
nullable=True,
description="Floor-area-weighted mean room height (m) across all floor dimensions.",
),
# Building parts — Main Dwelling carve-out (none of these are populated if the
# property has no part identified as 'Main Dwelling')
"main_dwelling_heat_loss_perimeter_m": ColumnSpec(
dtype=float, nullable=True,
description="Heat-loss perimeter (m) for the Main Dwelling only.",
),
"main_dwelling_party_wall_length_m": ColumnSpec(
dtype=float, nullable=True,
description="Party-wall length (m) for the Main Dwelling only.",
),
"main_dwelling_total_floor_area_m2": ColumnSpec(
dtype=float, nullable=True,
description="Total floor area (m²) for the Main Dwelling only.",
),
"main_dwelling_avg_room_height_m": ColumnSpec(
dtype=float, nullable=True,
description="Floor-area-weighted mean room height (m) for the Main Dwelling.",
),
"main_dwelling_has_room_in_roof": ColumnSpec(
dtype=bool, nullable=True,
description="True if the Main Dwelling carries a sap_room_in_roof block.",
),
"main_dwelling_construction_age_band": ColumnSpec(
dtype=str, nullable=True, categorical=True,
description="Main Dwelling construction age band (A-M, '0', or 'NR').",
),
"main_dwelling_wall_construction": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling wall construction SAP10 code.",
),
"main_dwelling_roof_construction": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling roof construction SAP10 code.",
),
}
@ -282,6 +347,7 @@ class EpcMlTransform:
"""
rhi = epc.renewable_heat_incentive
window_aggregates = _window_aggregates(epc.sap_windows)
building_part_aggregates = _building_part_aggregates(epc.sap_building_parts)
return {
# Features — geometry
"total_floor_area_m2": epc.total_floor_area_m2,
@ -314,6 +380,8 @@ class EpcMlTransform:
"country_code": epc.country_code,
# Features — window aggregates (physics + orientation)
**window_aggregates,
# Features — building parts aggregates + Main Dwelling carve-out
**building_part_aggregates,
# Targets
"sap_score": epc.energy_rating_current,
"co2_emissions": epc.co2_emissions_current,
@ -336,6 +404,75 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
return apply_ucl_correction(float(epc.energy_consumption_current), band)
def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
"""Aggregate sap_building_parts into the 13 columns: 5 cross-all + 8 Main-Dwelling.
Cross-all aggregates always populate (zeros when no parts). Main-Dwelling
columns populate only when a part with `identifier == "Main Dwelling"` is
present otherwise None (we don't silently fall back to the first part).
"""
main = next(
(p for p in parts if p.identifier == _MAIN_DWELLING_IDENTIFIER), None
)
aggregates: dict[str, Any] = {
"building_parts_count": len(parts),
"total_heat_loss_perimeter_m": 0.0,
"total_party_wall_length_m": 0.0,
"total_floor_area_from_parts_m2": 0.0,
"avg_room_height_m": None,
"main_dwelling_heat_loss_perimeter_m": None,
"main_dwelling_party_wall_length_m": None,
"main_dwelling_total_floor_area_m2": None,
"main_dwelling_avg_room_height_m": None,
"main_dwelling_has_room_in_roof": None,
"main_dwelling_construction_age_band": None,
"main_dwelling_wall_construction": None,
"main_dwelling_roof_construction": None,
}
if not parts:
return aggregates
total_floor_area = 0.0
weighted_room_height = 0.0
for p in parts:
for fd in p.sap_floor_dimensions:
aggregates["total_heat_loss_perimeter_m"] += fd.heat_loss_perimeter_m
aggregates["total_party_wall_length_m"] += fd.party_wall_length_m
total_floor_area += fd.total_floor_area_m2
weighted_room_height += fd.room_height_m * fd.total_floor_area_m2
aggregates["total_floor_area_from_parts_m2"] = total_floor_area
if total_floor_area > 0:
aggregates["avg_room_height_m"] = weighted_room_height / total_floor_area
if main is not None:
main_floor_area = 0.0
main_weighted_height = 0.0
main_hlp = 0.0
main_pwl = 0.0
for fd in main.sap_floor_dimensions:
main_hlp += fd.heat_loss_perimeter_m
main_pwl += fd.party_wall_length_m
main_floor_area += fd.total_floor_area_m2
main_weighted_height += fd.room_height_m * fd.total_floor_area_m2
aggregates["main_dwelling_heat_loss_perimeter_m"] = main_hlp
aggregates["main_dwelling_party_wall_length_m"] = main_pwl
aggregates["main_dwelling_total_floor_area_m2"] = main_floor_area
if main_floor_area > 0:
aggregates["main_dwelling_avg_room_height_m"] = (
main_weighted_height / main_floor_area
)
aggregates["main_dwelling_has_room_in_roof"] = main.sap_room_in_roof is not None
aggregates["main_dwelling_construction_age_band"] = main.construction_age_band
aggregates["main_dwelling_wall_construction"] = (
main.wall_construction
if isinstance(main.wall_construction, int)
else None
)
aggregates["main_dwelling_roof_construction"] = main.roof_construction
return aggregates
def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
"""Aggregate a list of windows into the 30 window-feature columns.