mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
slice 9: building parts with main-dwelling carve-out
Thirteen building-parts features land: five cross-all-parts physical aggregates (count, total_heat_loss_perimeter_m, total_party_wall_length_m, total_floor_area_from_parts_m2, avg_room_height_m) and eight Main-Dwelling-specific columns (heat_loss_perimeter, party_wall_length, total_floor_area, avg_room_height, has_room_in_roof, construction_age_band, wall_construction, roof_construction). Main-Dwelling columns are None when no part has identifier == 'Main Dwelling' — honest about data quality rather than silently falling back to the first part. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
079e6f9a68
commit
fb773fa635
3 changed files with 352 additions and 4 deletions
|
|
@ -15,13 +15,62 @@ from datatypes.epc.domain.epc_property_data import (
|
|||
EpcPropertyData,
|
||||
InstantaneousWwhrs,
|
||||
RenewableHeatIncentive,
|
||||
SapBuildingPart,
|
||||
SapEnergySource,
|
||||
SapFloorDimension,
|
||||
SapHeating,
|
||||
SapRoomInRoof,
|
||||
SapWindow,
|
||||
WindowTransmissionDetails,
|
||||
)
|
||||
|
||||
|
||||
def make_floor_dimension(
|
||||
*,
|
||||
total_floor_area_m2: float = 50.0,
|
||||
room_height_m: float = 2.5,
|
||||
party_wall_length_m: float = 5.0,
|
||||
heat_loss_perimeter_m: float = 20.0,
|
||||
floor: Optional[int] = 0,
|
||||
) -> SapFloorDimension:
|
||||
"""Build a SapFloorDimension with sensible defaults."""
|
||||
return SapFloorDimension(
|
||||
room_height_m=room_height_m,
|
||||
total_floor_area_m2=total_floor_area_m2,
|
||||
party_wall_length_m=party_wall_length_m,
|
||||
heat_loss_perimeter_m=heat_loss_perimeter_m,
|
||||
floor=floor,
|
||||
)
|
||||
|
||||
|
||||
def make_building_part(
|
||||
*,
|
||||
identifier: str = "Main Dwelling",
|
||||
construction_age_band: str = "B",
|
||||
wall_construction: Union[int, str] = 3,
|
||||
wall_insulation_type: Union[int, str] = 2,
|
||||
wall_thickness_measured: bool = True,
|
||||
party_wall_construction: Union[int, str] = 1,
|
||||
roof_construction: Optional[int] = 4,
|
||||
floor_dimensions: Optional[list[SapFloorDimension]] = None,
|
||||
sap_room_in_roof: Optional[SapRoomInRoof] = None,
|
||||
) -> SapBuildingPart:
|
||||
"""Build a SapBuildingPart with sensible SAP10 defaults."""
|
||||
return SapBuildingPart(
|
||||
identifier=identifier,
|
||||
construction_age_band=construction_age_band,
|
||||
wall_construction=wall_construction,
|
||||
wall_insulation_type=wall_insulation_type,
|
||||
wall_thickness_measured=wall_thickness_measured,
|
||||
party_wall_construction=party_wall_construction,
|
||||
roof_construction=roof_construction,
|
||||
sap_floor_dimensions=floor_dimensions
|
||||
if floor_dimensions is not None
|
||||
else [make_floor_dimension()],
|
||||
sap_room_in_roof=sap_room_in_roof,
|
||||
)
|
||||
|
||||
|
||||
def make_window(
|
||||
*,
|
||||
orientation: Union[int, str] = 5, # SAP10: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW
|
||||
|
|
@ -86,6 +135,7 @@ def make_minimal_sap10_epc(
|
|||
region_code: Optional[str] = None,
|
||||
country_code: Optional[str] = None,
|
||||
sap_windows: Optional[list[SapWindow]] = None,
|
||||
sap_building_parts: Optional[list[SapBuildingPart]] = None,
|
||||
) -> EpcPropertyData:
|
||||
"""Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets."""
|
||||
return EpcPropertyData(
|
||||
|
|
@ -117,7 +167,7 @@ def make_minimal_sap10_epc(
|
|||
wind_turbines_terrain_type="Suburban",
|
||||
electricity_smart_meter_present=False,
|
||||
),
|
||||
sap_building_parts=[],
|
||||
sap_building_parts=list(sap_building_parts) if sap_building_parts is not None else [],
|
||||
solar_water_heating=solar_water_heating,
|
||||
has_hot_water_cylinder=has_hot_water_cylinder,
|
||||
has_fixed_air_conditioning=has_fixed_air_conditioning,
|
||||
|
|
|
|||
|
|
@ -2,9 +2,14 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import WindowTransmissionDetails
|
||||
from datatypes.epc.domain.epc_property_data import SapRoomInRoof, WindowTransmissionDetails
|
||||
from domain.ml.schema import ColumnSpec, TransformSchema
|
||||
from domain.ml.tests._fixtures import make_minimal_sap10_epc, make_window
|
||||
from domain.ml.tests._fixtures import (
|
||||
make_building_part,
|
||||
make_floor_dimension,
|
||||
make_minimal_sap10_epc,
|
||||
make_window,
|
||||
)
|
||||
from domain.ml.transform import EpcMlTransform
|
||||
|
||||
|
||||
|
|
@ -500,6 +505,162 @@ def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> Non
|
|||
assert row["window_pct_pvc_frame"] is None
|
||||
|
||||
|
||||
_BUILDING_PART_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
|
||||
# name → (dtype, nullable, categorical)
|
||||
"building_parts_count": (int, False, False),
|
||||
"total_heat_loss_perimeter_m": (float, False, False),
|
||||
"total_party_wall_length_m": (float, False, False),
|
||||
"total_floor_area_from_parts_m2": (float, False, False),
|
||||
"avg_room_height_m": (float, True, False),
|
||||
"main_dwelling_heat_loss_perimeter_m": (float, True, False),
|
||||
"main_dwelling_party_wall_length_m": (float, True, False),
|
||||
"main_dwelling_total_floor_area_m2": (float, True, False),
|
||||
"main_dwelling_avg_room_height_m": (float, True, False),
|
||||
"main_dwelling_has_room_in_roof": (bool, True, False),
|
||||
"main_dwelling_construction_age_band": (str, True, True),
|
||||
"main_dwelling_wall_construction": (int, True, True),
|
||||
"main_dwelling_roof_construction": (int, True, True),
|
||||
}
|
||||
|
||||
|
||||
def test_schema_advertises_building_part_features() -> None:
|
||||
# Arrange
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
schema = transform.schema()
|
||||
|
||||
# Assert
|
||||
for name, (expected_dtype, expected_nullable, expected_categorical) in (
|
||||
_BUILDING_PART_FEATURES_NULLABLE.items()
|
||||
):
|
||||
assert name in schema.feature_columns, name
|
||||
column = schema.feature_columns[name]
|
||||
assert column.dtype is expected_dtype, name
|
||||
assert column.nullable is expected_nullable, name
|
||||
assert column.categorical is expected_categorical, name
|
||||
|
||||
|
||||
def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None:
|
||||
# Arrange — Main Dwelling (two floors, age band B, wall 3, roof 4) plus one extension.
|
||||
main = make_building_part(
|
||||
identifier="Main Dwelling",
|
||||
construction_age_band="B",
|
||||
wall_construction=3,
|
||||
roof_construction=4,
|
||||
floor_dimensions=[
|
||||
make_floor_dimension(
|
||||
total_floor_area_m2=30.0, room_height_m=2.5,
|
||||
party_wall_length_m=6.0, heat_loss_perimeter_m=20.0,
|
||||
),
|
||||
make_floor_dimension(
|
||||
total_floor_area_m2=28.0, room_height_m=2.4,
|
||||
party_wall_length_m=6.0, heat_loss_perimeter_m=18.0,
|
||||
),
|
||||
],
|
||||
)
|
||||
extension = make_building_part(
|
||||
identifier="Extension 1",
|
||||
construction_age_band="L",
|
||||
wall_construction=4,
|
||||
roof_construction=5,
|
||||
floor_dimensions=[
|
||||
make_floor_dimension(
|
||||
total_floor_area_m2=12.0, room_height_m=2.6,
|
||||
party_wall_length_m=0.0, heat_loss_perimeter_m=10.0,
|
||||
),
|
||||
],
|
||||
)
|
||||
epc = make_minimal_sap10_epc(
|
||||
energy_rating_current=82,
|
||||
sap_building_parts=[main, extension],
|
||||
)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert — cross-all aggregates
|
||||
assert row["building_parts_count"] == 2
|
||||
assert row["total_heat_loss_perimeter_m"] == pytest.approx(48.0)
|
||||
assert row["total_party_wall_length_m"] == pytest.approx(12.0)
|
||||
assert row["total_floor_area_from_parts_m2"] == pytest.approx(70.0)
|
||||
# avg_room_height area-weighted across all floors: (2.5*30 + 2.4*28 + 2.6*12) / 70
|
||||
# = (75 + 67.2 + 31.2) / 70 = 173.4 / 70 = 2.4771...
|
||||
assert row["avg_room_height_m"] == pytest.approx(2.4771, abs=0.001)
|
||||
# Main Dwelling aggregates
|
||||
assert row["main_dwelling_heat_loss_perimeter_m"] == pytest.approx(38.0)
|
||||
assert row["main_dwelling_party_wall_length_m"] == pytest.approx(12.0)
|
||||
assert row["main_dwelling_total_floor_area_m2"] == pytest.approx(58.0)
|
||||
# main avg height = (2.5*30 + 2.4*28) / 58 = (75 + 67.2) / 58 = 142.2 / 58 = 2.4517
|
||||
assert row["main_dwelling_avg_room_height_m"] == pytest.approx(2.4517, abs=0.001)
|
||||
assert row["main_dwelling_has_room_in_roof"] is False
|
||||
# Main Dwelling categoricals
|
||||
assert row["main_dwelling_construction_age_band"] == "B"
|
||||
assert row["main_dwelling_wall_construction"] == 3
|
||||
assert row["main_dwelling_roof_construction"] == 4
|
||||
|
||||
|
||||
def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None:
|
||||
# Arrange
|
||||
main = make_building_part(
|
||||
identifier="Main Dwelling",
|
||||
sap_room_in_roof=SapRoomInRoof(floor_area=15.0, construction_age_band="B"),
|
||||
)
|
||||
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_building_parts=[main])
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
assert row["main_dwelling_has_room_in_roof"] is True
|
||||
|
||||
|
||||
def test_to_row_returns_building_part_nones_when_no_main_dwelling_identified() -> None:
|
||||
# Arrange — single part with identifier that doesn't match "Main Dwelling"
|
||||
sole_part = make_building_part(identifier="Extension 1")
|
||||
epc = make_minimal_sap10_epc(
|
||||
energy_rating_current=82, sap_building_parts=[sole_part]
|
||||
)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert — cross-all aggregates still populate
|
||||
assert row["building_parts_count"] == 1
|
||||
assert row["total_heat_loss_perimeter_m"] == pytest.approx(20.0)
|
||||
# Main-dwelling-specific columns are None — honest about data quality
|
||||
assert row["main_dwelling_heat_loss_perimeter_m"] is None
|
||||
assert row["main_dwelling_party_wall_length_m"] is None
|
||||
assert row["main_dwelling_total_floor_area_m2"] is None
|
||||
assert row["main_dwelling_avg_room_height_m"] is None
|
||||
assert row["main_dwelling_has_room_in_roof"] is None
|
||||
assert row["main_dwelling_construction_age_band"] is None
|
||||
assert row["main_dwelling_wall_construction"] is None
|
||||
assert row["main_dwelling_roof_construction"] is None
|
||||
|
||||
|
||||
def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None:
|
||||
# Arrange
|
||||
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
assert row["building_parts_count"] == 0
|
||||
assert row["total_heat_loss_perimeter_m"] == 0.0
|
||||
assert row["total_party_wall_length_m"] == 0.0
|
||||
assert row["total_floor_area_from_parts_m2"] == 0.0
|
||||
assert row["avg_room_height_m"] is None
|
||||
assert row["main_dwelling_heat_loss_perimeter_m"] is None
|
||||
assert row["main_dwelling_construction_age_band"] is None
|
||||
assert row["main_dwelling_wall_construction"] is None
|
||||
|
||||
|
||||
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
|
||||
# Arrange — two windows with transmission details; one without.
|
||||
sap_windows = [
|
||||
|
|
|
|||
|
|
@ -13,11 +13,18 @@ See docs/adr/0007-kwh-as-ml-target.md for the target set and rationale.
|
|||
from typing import Any, Optional
|
||||
|
||||
from datatypes.epc.domain.epc import Epc
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow
|
||||
from datatypes.epc.domain.epc_property_data import (
|
||||
EpcPropertyData,
|
||||
SapBuildingPart,
|
||||
SapWindow,
|
||||
)
|
||||
from domain.ml.schema import ColumnSpec, TransformSchema
|
||||
from domain.ml.ucl import apply_ucl_correction
|
||||
|
||||
|
||||
_MAIN_DWELLING_IDENTIFIER = "Main Dwelling"
|
||||
|
||||
|
||||
# SAP10 orientation codes: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW.
|
||||
# Anything else (0, "NR", etc.) is treated as unrecorded — it contributes to
|
||||
# `window_count` and `window_total_area_m2` but to no octant.
|
||||
|
|
@ -207,6 +214,64 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
|
|||
nullable=True,
|
||||
description="Area share of windows with PVC frame; null when no windows.",
|
||||
),
|
||||
# Building parts — cross-all-parts physical aggregates
|
||||
"building_parts_count": ColumnSpec(
|
||||
dtype=int, nullable=False, description="Number of sap_building_parts."
|
||||
),
|
||||
"total_heat_loss_perimeter_m": ColumnSpec(
|
||||
dtype=float,
|
||||
nullable=False,
|
||||
description="Total heat-loss perimeter (m), summed across all floor dimensions.",
|
||||
),
|
||||
"total_party_wall_length_m": ColumnSpec(
|
||||
dtype=float,
|
||||
nullable=False,
|
||||
description="Total party-wall length (m), summed across all floor dimensions.",
|
||||
),
|
||||
"total_floor_area_from_parts_m2": ColumnSpec(
|
||||
dtype=float,
|
||||
nullable=False,
|
||||
description="Total floor area (m²) summed across sap_building_parts (sanity vs total_floor_area_m2).",
|
||||
),
|
||||
"avg_room_height_m": ColumnSpec(
|
||||
dtype=float,
|
||||
nullable=True,
|
||||
description="Floor-area-weighted mean room height (m) across all floor dimensions.",
|
||||
),
|
||||
# Building parts — Main Dwelling carve-out (none of these are populated if the
|
||||
# property has no part identified as 'Main Dwelling')
|
||||
"main_dwelling_heat_loss_perimeter_m": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Heat-loss perimeter (m) for the Main Dwelling only.",
|
||||
),
|
||||
"main_dwelling_party_wall_length_m": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Party-wall length (m) for the Main Dwelling only.",
|
||||
),
|
||||
"main_dwelling_total_floor_area_m2": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Total floor area (m²) for the Main Dwelling only.",
|
||||
),
|
||||
"main_dwelling_avg_room_height_m": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Floor-area-weighted mean room height (m) for the Main Dwelling.",
|
||||
),
|
||||
"main_dwelling_has_room_in_roof": ColumnSpec(
|
||||
dtype=bool, nullable=True,
|
||||
description="True if the Main Dwelling carries a sap_room_in_roof block.",
|
||||
),
|
||||
"main_dwelling_construction_age_band": ColumnSpec(
|
||||
dtype=str, nullable=True, categorical=True,
|
||||
description="Main Dwelling construction age band (A-M, '0', or 'NR').",
|
||||
),
|
||||
"main_dwelling_wall_construction": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Main Dwelling wall construction SAP10 code.",
|
||||
),
|
||||
"main_dwelling_roof_construction": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Main Dwelling roof construction SAP10 code.",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -282,6 +347,7 @@ class EpcMlTransform:
|
|||
"""
|
||||
rhi = epc.renewable_heat_incentive
|
||||
window_aggregates = _window_aggregates(epc.sap_windows)
|
||||
building_part_aggregates = _building_part_aggregates(epc.sap_building_parts)
|
||||
return {
|
||||
# Features — geometry
|
||||
"total_floor_area_m2": epc.total_floor_area_m2,
|
||||
|
|
@ -314,6 +380,8 @@ class EpcMlTransform:
|
|||
"country_code": epc.country_code,
|
||||
# Features — window aggregates (physics + orientation)
|
||||
**window_aggregates,
|
||||
# Features — building parts aggregates + Main Dwelling carve-out
|
||||
**building_part_aggregates,
|
||||
# Targets
|
||||
"sap_score": epc.energy_rating_current,
|
||||
"co2_emissions": epc.co2_emissions_current,
|
||||
|
|
@ -336,6 +404,75 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
|
|||
return apply_ucl_correction(float(epc.energy_consumption_current), band)
|
||||
|
||||
|
||||
def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
|
||||
"""Aggregate sap_building_parts into the 13 columns: 5 cross-all + 8 Main-Dwelling.
|
||||
|
||||
Cross-all aggregates always populate (zeros when no parts). Main-Dwelling
|
||||
columns populate only when a part with `identifier == "Main Dwelling"` is
|
||||
present — otherwise None (we don't silently fall back to the first part).
|
||||
"""
|
||||
main = next(
|
||||
(p for p in parts if p.identifier == _MAIN_DWELLING_IDENTIFIER), None
|
||||
)
|
||||
aggregates: dict[str, Any] = {
|
||||
"building_parts_count": len(parts),
|
||||
"total_heat_loss_perimeter_m": 0.0,
|
||||
"total_party_wall_length_m": 0.0,
|
||||
"total_floor_area_from_parts_m2": 0.0,
|
||||
"avg_room_height_m": None,
|
||||
"main_dwelling_heat_loss_perimeter_m": None,
|
||||
"main_dwelling_party_wall_length_m": None,
|
||||
"main_dwelling_total_floor_area_m2": None,
|
||||
"main_dwelling_avg_room_height_m": None,
|
||||
"main_dwelling_has_room_in_roof": None,
|
||||
"main_dwelling_construction_age_band": None,
|
||||
"main_dwelling_wall_construction": None,
|
||||
"main_dwelling_roof_construction": None,
|
||||
}
|
||||
if not parts:
|
||||
return aggregates
|
||||
|
||||
total_floor_area = 0.0
|
||||
weighted_room_height = 0.0
|
||||
for p in parts:
|
||||
for fd in p.sap_floor_dimensions:
|
||||
aggregates["total_heat_loss_perimeter_m"] += fd.heat_loss_perimeter_m
|
||||
aggregates["total_party_wall_length_m"] += fd.party_wall_length_m
|
||||
total_floor_area += fd.total_floor_area_m2
|
||||
weighted_room_height += fd.room_height_m * fd.total_floor_area_m2
|
||||
aggregates["total_floor_area_from_parts_m2"] = total_floor_area
|
||||
if total_floor_area > 0:
|
||||
aggregates["avg_room_height_m"] = weighted_room_height / total_floor_area
|
||||
|
||||
if main is not None:
|
||||
main_floor_area = 0.0
|
||||
main_weighted_height = 0.0
|
||||
main_hlp = 0.0
|
||||
main_pwl = 0.0
|
||||
for fd in main.sap_floor_dimensions:
|
||||
main_hlp += fd.heat_loss_perimeter_m
|
||||
main_pwl += fd.party_wall_length_m
|
||||
main_floor_area += fd.total_floor_area_m2
|
||||
main_weighted_height += fd.room_height_m * fd.total_floor_area_m2
|
||||
aggregates["main_dwelling_heat_loss_perimeter_m"] = main_hlp
|
||||
aggregates["main_dwelling_party_wall_length_m"] = main_pwl
|
||||
aggregates["main_dwelling_total_floor_area_m2"] = main_floor_area
|
||||
if main_floor_area > 0:
|
||||
aggregates["main_dwelling_avg_room_height_m"] = (
|
||||
main_weighted_height / main_floor_area
|
||||
)
|
||||
aggregates["main_dwelling_has_room_in_roof"] = main.sap_room_in_roof is not None
|
||||
aggregates["main_dwelling_construction_age_band"] = main.construction_age_band
|
||||
aggregates["main_dwelling_wall_construction"] = (
|
||||
main.wall_construction
|
||||
if isinstance(main.wall_construction, int)
|
||||
else None
|
||||
)
|
||||
aggregates["main_dwelling_roof_construction"] = main.roof_construction
|
||||
|
||||
return aggregates
|
||||
|
||||
|
||||
def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
||||
"""Aggregate a list of windows into the 30 window-feature columns.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue