From fb773fa6355b778907c7da437b5b91d4824fa84f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 May 2026 15:45:21 +0000 Subject: [PATCH] slice 9: building parts with main-dwelling carve-out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thirteen building-parts features land: five cross-all-parts physical aggregates (count, total_heat_loss_perimeter_m, total_party_wall_length_m, total_floor_area_from_parts_m2, avg_room_height_m) and eight Main-Dwelling-specific columns (heat_loss_perimeter, party_wall_length, total_floor_area, avg_room_height, has_room_in_roof, construction_age_band, wall_construction, roof_construction). Main-Dwelling columns are None when no part has identifier == 'Main Dwelling' — honest about data quality rather than silently falling back to the first part. Co-Authored-By: Claude Opus 4.7 --- .../domain/src/domain/ml/tests/_fixtures.py | 52 +++++- .../src/domain/ml/tests/test_transform.py | 165 +++++++++++++++++- packages/domain/src/domain/ml/transform.py | 139 ++++++++++++++- 3 files changed, 352 insertions(+), 4 deletions(-) diff --git a/packages/domain/src/domain/ml/tests/_fixtures.py b/packages/domain/src/domain/ml/tests/_fixtures.py index b376b862..b8858485 100644 --- a/packages/domain/src/domain/ml/tests/_fixtures.py +++ b/packages/domain/src/domain/ml/tests/_fixtures.py @@ -15,13 +15,62 @@ from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, InstantaneousWwhrs, RenewableHeatIncentive, + SapBuildingPart, SapEnergySource, + SapFloorDimension, SapHeating, + SapRoomInRoof, SapWindow, WindowTransmissionDetails, ) +def make_floor_dimension( + *, + total_floor_area_m2: float = 50.0, + room_height_m: float = 2.5, + party_wall_length_m: float = 5.0, + heat_loss_perimeter_m: float = 20.0, + floor: Optional[int] = 0, +) -> SapFloorDimension: + """Build a SapFloorDimension with sensible defaults.""" + return SapFloorDimension( + room_height_m=room_height_m, + total_floor_area_m2=total_floor_area_m2, + party_wall_length_m=party_wall_length_m, + heat_loss_perimeter_m=heat_loss_perimeter_m, + floor=floor, + ) + + +def make_building_part( + *, + identifier: str = "Main Dwelling", + construction_age_band: str = "B", + wall_construction: Union[int, str] = 3, + wall_insulation_type: Union[int, str] = 2, + wall_thickness_measured: bool = True, + party_wall_construction: Union[int, str] = 1, + roof_construction: Optional[int] = 4, + floor_dimensions: Optional[list[SapFloorDimension]] = None, + sap_room_in_roof: Optional[SapRoomInRoof] = None, +) -> SapBuildingPart: + """Build a SapBuildingPart with sensible SAP10 defaults.""" + return SapBuildingPart( + identifier=identifier, + construction_age_band=construction_age_band, + wall_construction=wall_construction, + wall_insulation_type=wall_insulation_type, + wall_thickness_measured=wall_thickness_measured, + party_wall_construction=party_wall_construction, + roof_construction=roof_construction, + sap_floor_dimensions=floor_dimensions + if floor_dimensions is not None + else [make_floor_dimension()], + sap_room_in_roof=sap_room_in_roof, + ) + + def make_window( *, orientation: Union[int, str] = 5, # SAP10: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW @@ -86,6 +135,7 @@ def make_minimal_sap10_epc( region_code: Optional[str] = None, country_code: Optional[str] = None, sap_windows: Optional[list[SapWindow]] = None, + sap_building_parts: Optional[list[SapBuildingPart]] = None, ) -> EpcPropertyData: """Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets.""" return EpcPropertyData( @@ -117,7 +167,7 @@ def make_minimal_sap10_epc( wind_turbines_terrain_type="Suburban", electricity_smart_meter_present=False, ), - sap_building_parts=[], + sap_building_parts=list(sap_building_parts) if sap_building_parts is not None else [], solar_water_heating=solar_water_heating, has_hot_water_cylinder=has_hot_water_cylinder, has_fixed_air_conditioning=has_fixed_air_conditioning, diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 27d48654..6ff44c9f 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -2,9 +2,14 @@ import pytest -from datatypes.epc.domain.epc_property_data import WindowTransmissionDetails +from datatypes.epc.domain.epc_property_data import SapRoomInRoof, WindowTransmissionDetails from domain.ml.schema import ColumnSpec, TransformSchema -from domain.ml.tests._fixtures import make_minimal_sap10_epc, make_window +from domain.ml.tests._fixtures import ( + make_building_part, + make_floor_dimension, + make_minimal_sap10_epc, + make_window, +) from domain.ml.transform import EpcMlTransform @@ -500,6 +505,162 @@ def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> Non assert row["window_pct_pvc_frame"] is None +_BUILDING_PART_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { + # name → (dtype, nullable, categorical) + "building_parts_count": (int, False, False), + "total_heat_loss_perimeter_m": (float, False, False), + "total_party_wall_length_m": (float, False, False), + "total_floor_area_from_parts_m2": (float, False, False), + "avg_room_height_m": (float, True, False), + "main_dwelling_heat_loss_perimeter_m": (float, True, False), + "main_dwelling_party_wall_length_m": (float, True, False), + "main_dwelling_total_floor_area_m2": (float, True, False), + "main_dwelling_avg_room_height_m": (float, True, False), + "main_dwelling_has_room_in_roof": (bool, True, False), + "main_dwelling_construction_age_band": (str, True, True), + "main_dwelling_wall_construction": (int, True, True), + "main_dwelling_roof_construction": (int, True, True), +} + + +def test_schema_advertises_building_part_features() -> None: + # Arrange + transform = EpcMlTransform() + + # Act + schema = transform.schema() + + # Assert + for name, (expected_dtype, expected_nullable, expected_categorical) in ( + _BUILDING_PART_FEATURES_NULLABLE.items() + ): + assert name in schema.feature_columns, name + column = schema.feature_columns[name] + assert column.dtype is expected_dtype, name + assert column.nullable is expected_nullable, name + assert column.categorical is expected_categorical, name + + +def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None: + # Arrange — Main Dwelling (two floors, age band B, wall 3, roof 4) plus one extension. + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="B", + wall_construction=3, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=30.0, room_height_m=2.5, + party_wall_length_m=6.0, heat_loss_perimeter_m=20.0, + ), + make_floor_dimension( + total_floor_area_m2=28.0, room_height_m=2.4, + party_wall_length_m=6.0, heat_loss_perimeter_m=18.0, + ), + ], + ) + extension = make_building_part( + identifier="Extension 1", + construction_age_band="L", + wall_construction=4, + roof_construction=5, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=12.0, room_height_m=2.6, + party_wall_length_m=0.0, heat_loss_perimeter_m=10.0, + ), + ], + ) + epc = make_minimal_sap10_epc( + energy_rating_current=82, + sap_building_parts=[main, extension], + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert — cross-all aggregates + assert row["building_parts_count"] == 2 + assert row["total_heat_loss_perimeter_m"] == pytest.approx(48.0) + assert row["total_party_wall_length_m"] == pytest.approx(12.0) + assert row["total_floor_area_from_parts_m2"] == pytest.approx(70.0) + # avg_room_height area-weighted across all floors: (2.5*30 + 2.4*28 + 2.6*12) / 70 + # = (75 + 67.2 + 31.2) / 70 = 173.4 / 70 = 2.4771... + assert row["avg_room_height_m"] == pytest.approx(2.4771, abs=0.001) + # Main Dwelling aggregates + assert row["main_dwelling_heat_loss_perimeter_m"] == pytest.approx(38.0) + assert row["main_dwelling_party_wall_length_m"] == pytest.approx(12.0) + assert row["main_dwelling_total_floor_area_m2"] == pytest.approx(58.0) + # main avg height = (2.5*30 + 2.4*28) / 58 = (75 + 67.2) / 58 = 142.2 / 58 = 2.4517 + assert row["main_dwelling_avg_room_height_m"] == pytest.approx(2.4517, abs=0.001) + assert row["main_dwelling_has_room_in_roof"] is False + # Main Dwelling categoricals + assert row["main_dwelling_construction_age_band"] == "B" + assert row["main_dwelling_wall_construction"] == 3 + assert row["main_dwelling_roof_construction"] == 4 + + +def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None: + # Arrange + main = make_building_part( + identifier="Main Dwelling", + sap_room_in_roof=SapRoomInRoof(floor_area=15.0, construction_age_band="B"), + ) + epc = make_minimal_sap10_epc(energy_rating_current=82, sap_building_parts=[main]) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["main_dwelling_has_room_in_roof"] is True + + +def test_to_row_returns_building_part_nones_when_no_main_dwelling_identified() -> None: + # Arrange — single part with identifier that doesn't match "Main Dwelling" + sole_part = make_building_part(identifier="Extension 1") + epc = make_minimal_sap10_epc( + energy_rating_current=82, sap_building_parts=[sole_part] + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert — cross-all aggregates still populate + assert row["building_parts_count"] == 1 + assert row["total_heat_loss_perimeter_m"] == pytest.approx(20.0) + # Main-dwelling-specific columns are None — honest about data quality + assert row["main_dwelling_heat_loss_perimeter_m"] is None + assert row["main_dwelling_party_wall_length_m"] is None + assert row["main_dwelling_total_floor_area_m2"] is None + assert row["main_dwelling_avg_room_height_m"] is None + assert row["main_dwelling_has_room_in_roof"] is None + assert row["main_dwelling_construction_age_band"] is None + assert row["main_dwelling_wall_construction"] is None + assert row["main_dwelling_roof_construction"] is None + + +def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None: + # Arrange + epc = make_minimal_sap10_epc(energy_rating_current=82) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["building_parts_count"] == 0 + assert row["total_heat_loss_perimeter_m"] == 0.0 + assert row["total_party_wall_length_m"] == 0.0 + assert row["total_floor_area_from_parts_m2"] == 0.0 + assert row["avg_room_height_m"] is None + assert row["main_dwelling_heat_loss_perimeter_m"] is None + assert row["main_dwelling_construction_age_band"] is None + assert row["main_dwelling_wall_construction"] is None + + def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None: # Arrange — two windows with transmission details; one without. sap_windows = [ diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 919df345..473763b5 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -13,11 +13,18 @@ See docs/adr/0007-kwh-as-ml-target.md for the target set and rationale. from typing import Any, Optional from datatypes.epc.domain.epc import Epc -from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + SapBuildingPart, + SapWindow, +) from domain.ml.schema import ColumnSpec, TransformSchema from domain.ml.ucl import apply_ucl_correction +_MAIN_DWELLING_IDENTIFIER = "Main Dwelling" + + # SAP10 orientation codes: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW. # Anything else (0, "NR", etc.) is treated as unrecorded — it contributes to # `window_count` and `window_total_area_m2` but to no octant. @@ -207,6 +214,64 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { nullable=True, description="Area share of windows with PVC frame; null when no windows.", ), + # Building parts — cross-all-parts physical aggregates + "building_parts_count": ColumnSpec( + dtype=int, nullable=False, description="Number of sap_building_parts." + ), + "total_heat_loss_perimeter_m": ColumnSpec( + dtype=float, + nullable=False, + description="Total heat-loss perimeter (m), summed across all floor dimensions.", + ), + "total_party_wall_length_m": ColumnSpec( + dtype=float, + nullable=False, + description="Total party-wall length (m), summed across all floor dimensions.", + ), + "total_floor_area_from_parts_m2": ColumnSpec( + dtype=float, + nullable=False, + description="Total floor area (m²) summed across sap_building_parts (sanity vs total_floor_area_m2).", + ), + "avg_room_height_m": ColumnSpec( + dtype=float, + nullable=True, + description="Floor-area-weighted mean room height (m) across all floor dimensions.", + ), + # Building parts — Main Dwelling carve-out (none of these are populated if the + # property has no part identified as 'Main Dwelling') + "main_dwelling_heat_loss_perimeter_m": ColumnSpec( + dtype=float, nullable=True, + description="Heat-loss perimeter (m) for the Main Dwelling only.", + ), + "main_dwelling_party_wall_length_m": ColumnSpec( + dtype=float, nullable=True, + description="Party-wall length (m) for the Main Dwelling only.", + ), + "main_dwelling_total_floor_area_m2": ColumnSpec( + dtype=float, nullable=True, + description="Total floor area (m²) for the Main Dwelling only.", + ), + "main_dwelling_avg_room_height_m": ColumnSpec( + dtype=float, nullable=True, + description="Floor-area-weighted mean room height (m) for the Main Dwelling.", + ), + "main_dwelling_has_room_in_roof": ColumnSpec( + dtype=bool, nullable=True, + description="True if the Main Dwelling carries a sap_room_in_roof block.", + ), + "main_dwelling_construction_age_band": ColumnSpec( + dtype=str, nullable=True, categorical=True, + description="Main Dwelling construction age band (A-M, '0', or 'NR').", + ), + "main_dwelling_wall_construction": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling wall construction SAP10 code.", + ), + "main_dwelling_roof_construction": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling roof construction SAP10 code.", + ), } @@ -282,6 +347,7 @@ class EpcMlTransform: """ rhi = epc.renewable_heat_incentive window_aggregates = _window_aggregates(epc.sap_windows) + building_part_aggregates = _building_part_aggregates(epc.sap_building_parts) return { # Features — geometry "total_floor_area_m2": epc.total_floor_area_m2, @@ -314,6 +380,8 @@ class EpcMlTransform: "country_code": epc.country_code, # Features — window aggregates (physics + orientation) **window_aggregates, + # Features — building parts aggregates + Main Dwelling carve-out + **building_part_aggregates, # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, @@ -336,6 +404,75 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]: return apply_ucl_correction(float(epc.energy_consumption_current), band) +def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: + """Aggregate sap_building_parts into the 13 columns: 5 cross-all + 8 Main-Dwelling. + + Cross-all aggregates always populate (zeros when no parts). Main-Dwelling + columns populate only when a part with `identifier == "Main Dwelling"` is + present — otherwise None (we don't silently fall back to the first part). + """ + main = next( + (p for p in parts if p.identifier == _MAIN_DWELLING_IDENTIFIER), None + ) + aggregates: dict[str, Any] = { + "building_parts_count": len(parts), + "total_heat_loss_perimeter_m": 0.0, + "total_party_wall_length_m": 0.0, + "total_floor_area_from_parts_m2": 0.0, + "avg_room_height_m": None, + "main_dwelling_heat_loss_perimeter_m": None, + "main_dwelling_party_wall_length_m": None, + "main_dwelling_total_floor_area_m2": None, + "main_dwelling_avg_room_height_m": None, + "main_dwelling_has_room_in_roof": None, + "main_dwelling_construction_age_band": None, + "main_dwelling_wall_construction": None, + "main_dwelling_roof_construction": None, + } + if not parts: + return aggregates + + total_floor_area = 0.0 + weighted_room_height = 0.0 + for p in parts: + for fd in p.sap_floor_dimensions: + aggregates["total_heat_loss_perimeter_m"] += fd.heat_loss_perimeter_m + aggregates["total_party_wall_length_m"] += fd.party_wall_length_m + total_floor_area += fd.total_floor_area_m2 + weighted_room_height += fd.room_height_m * fd.total_floor_area_m2 + aggregates["total_floor_area_from_parts_m2"] = total_floor_area + if total_floor_area > 0: + aggregates["avg_room_height_m"] = weighted_room_height / total_floor_area + + if main is not None: + main_floor_area = 0.0 + main_weighted_height = 0.0 + main_hlp = 0.0 + main_pwl = 0.0 + for fd in main.sap_floor_dimensions: + main_hlp += fd.heat_loss_perimeter_m + main_pwl += fd.party_wall_length_m + main_floor_area += fd.total_floor_area_m2 + main_weighted_height += fd.room_height_m * fd.total_floor_area_m2 + aggregates["main_dwelling_heat_loss_perimeter_m"] = main_hlp + aggregates["main_dwelling_party_wall_length_m"] = main_pwl + aggregates["main_dwelling_total_floor_area_m2"] = main_floor_area + if main_floor_area > 0: + aggregates["main_dwelling_avg_room_height_m"] = ( + main_weighted_height / main_floor_area + ) + aggregates["main_dwelling_has_room_in_roof"] = main.sap_room_in_roof is not None + aggregates["main_dwelling_construction_age_band"] = main.construction_age_band + aggregates["main_dwelling_wall_construction"] = ( + main.wall_construction + if isinstance(main.wall_construction, int) + else None + ) + aggregates["main_dwelling_roof_construction"] = main.roof_construction + + return aggregates + + def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: """Aggregate a list of windows into the 30 window-feature columns.