From fca88159910b02c0c72687a65fd2cf3dbf1ff2b7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 May 2026 11:53:43 +0000 Subject: [PATCH] slice 16c: envelope_heat_loss_w_per_k feature New module domain.ml.envelope sums Sigma(U*A) + y*A_exposed across every sap_building_part on a cert. U-values come from rdsap_uvalues' cascade defaults, so the feature is never null. Per-part inputs: wall / roof / floor / party-wall / windows / doors. Windows + doors are apportioned to the main part (first in the list) per RdSAP10 convention. Wired into EpcMlTransform.to_row; transform VERSION 0.1.0 -> 0.2.0 (MINOR bump for an additive column per the ADR-0007 policy). 7 envelope unit tests + 2 transform-level tests, all AAA. Reference geometry: 100 m^2 age-G mid-terrace -> ~208 W/K; doubles for two storeys; drops with better insulation; sums across extensions. --- packages/domain/src/domain/ml/envelope.py | 235 +++++++++++++++++ .../src/domain/ml/tests/test_envelope.py | 241 ++++++++++++++++++ .../src/domain/ml/tests/test_transform.py | 49 +++- packages/domain/src/domain/ml/transform.py | 24 +- 4 files changed, 547 insertions(+), 2 deletions(-) create mode 100644 packages/domain/src/domain/ml/envelope.py create mode 100644 packages/domain/src/domain/ml/tests/test_envelope.py diff --git a/packages/domain/src/domain/ml/envelope.py b/packages/domain/src/domain/ml/envelope.py new file mode 100644 index 00000000..2fa1f9a7 --- /dev/null +++ b/packages/domain/src/domain/ml/envelope.py @@ -0,0 +1,235 @@ +"""Envelope heat-loss (W/K) summed across all building parts. + +Computes Sigma(U * A) + y * A_exposed over the main dwelling and every +extension on a cert. U-values come from the cascade-defaulting helpers in +`rdsap_uvalues`; geometry is read off `sap_building_parts` + the cert's +pre-aggregated window area and door count. + +Used by `transform.py` to populate the `envelope_heat_loss_w_per_k` feature +in v16.x. See ADR-0008 for the physics-as-feature rationale. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from datatypes.epc.domain.epc_property_data import SapBuildingPart + +from domain.ml.rdsap_uvalues import ( + Country, + WALL_CAVITY, + WALL_UNKNOWN, + thermal_bridging_y, + u_door, + u_floor, + u_party_wall, + u_roof, + u_wall, + u_window, +) + + +# SAP10 wall_insulation_type code 4 ("None") marks no insulation declared. +_WALL_INSULATION_NONE: int = 4 + +# Standard SAP10 external door area (m^2) when door dimensions aren't given. +_DEFAULT_DOOR_AREA_M2: float = 1.85 + + +def _int_or_none(value: Any) -> Optional[int]: + return value if isinstance(value, int) else None + + +def _parse_thickness_mm(value: Any) -> Optional[int]: + if value is None: + return None + if isinstance(value, int): + return value + if not isinstance(value, str): + return None + s = value.strip() + if s.upper() == "NI": + return 0 + digits = "" + for c in s: + if c.isdigit(): + digits += c + else: + break + return int(digits) if digits else None + + +def _part_geometry(part: SapBuildingPart) -> dict[str, float]: + """Sum floor area / heat-loss perimeter / party-wall length / room heights + across the floor dimensions in a building part.""" + if not part.sap_floor_dimensions: + return { + "ground_floor_area_m2": 0.0, + "ground_perimeter_m": 0.0, + "top_floor_area_m2": 0.0, + "total_perimeter_m": 0.0, + "party_wall_length_m": 0.0, + "avg_room_height_m": 2.5, + "storey_count": 1.0, + } + fds = list(part.sap_floor_dimensions) + # Ground floor = floor == 0 if present, else the first entry. + ground = next((fd for fd in fds if fd.floor == 0), fds[0]) + # Top floor = floor with the largest non-None index, else the last entry. + indexed = [(fd.floor if fd.floor is not None else 0, fd) for fd in fds] + top = max(indexed, key=lambda kv: kv[0])[1] + total_area = sum(fd.total_floor_area_m2 or 0.0 for fd in fds) + total_perimeter = sum(fd.heat_loss_perimeter_m or 0.0 for fd in fds) + party_length = sum(fd.party_wall_length_m or 0.0 for fd in fds) + weighted_height = sum( + (fd.total_floor_area_m2 or 0.0) * (fd.room_height_m or 2.5) for fd in fds + ) + avg_height = (weighted_height / total_area) if total_area > 0 else 2.5 + return { + "ground_floor_area_m2": ground.total_floor_area_m2 or 0.0, + "ground_perimeter_m": ground.heat_loss_perimeter_m or 0.0, + "top_floor_area_m2": top.total_floor_area_m2 or 0.0, + "total_perimeter_m": total_perimeter, + "party_wall_length_m": party_length, + "avg_room_height_m": avg_height, + "storey_count": float(len(fds)), + } + + +def _part_heat_loss_w_per_k( + part: SapBuildingPart, + country: Country, + window_area_m2: float, + door_area_m2: float, + window_u_value: float, + door_u_value: float, +) -> float: + """Heat loss coefficient (W/K) for a single building part: walls + roof + + floor + party walls + windows + doors + thermal bridging. + + The aggregate-level caller (`envelope_heat_loss_w_per_k`) apportions windows + and doors to whichever part it considers primary (currently the first part); + other parts pass 0 for the window/door area. + """ + geom = _part_geometry(part) + + age_band = part.construction_age_band + wall_construction = _int_or_none(part.wall_construction) + wall_ins_type = _int_or_none(part.wall_insulation_type) + wall_ins_thickness = _parse_thickness_mm(part.wall_insulation_thickness) + wall_ins_present = wall_ins_type is not None and wall_ins_type != _WALL_INSULATION_NONE + party_construction = _int_or_none(part.party_wall_construction) + roof_thickness = _parse_thickness_mm(getattr(part, "roof_insulation_thickness", None)) + floor_ins_thickness = _parse_thickness_mm(getattr(part, "floor_insulation_thickness", None)) + + # Floor — pick the ground-floor's floor_dimension for the BS EN ISO 13370 + # area/perimeter inputs. + ground_fd = next( + (fd for fd in part.sap_floor_dimensions if fd.floor == 0), + part.sap_floor_dimensions[0] if part.sap_floor_dimensions else None, + ) + floor_area = ground_fd.total_floor_area_m2 if ground_fd is not None else None + floor_perimeter = ground_fd.heat_loss_perimeter_m if ground_fd is not None else None + floor_construction = ( + _int_or_none(ground_fd.floor_construction) if ground_fd is not None else None + ) + + uw = u_wall( + country=country, + age_band=age_band, + construction=wall_construction if wall_construction != WALL_UNKNOWN else None, + insulation_thickness_mm=wall_ins_thickness, + insulation_present=wall_ins_present, + ) + ur = u_roof(country=country, age_band=age_band, insulation_thickness_mm=roof_thickness) + uf = u_floor( + country=country, + age_band=age_band, + construction=floor_construction, + insulation_thickness_mm=floor_ins_thickness, + area_m2=floor_area, + perimeter_m=floor_perimeter, + wall_thickness_mm=part.wall_thickness_mm, + ) + upw = u_party_wall(party_wall_construction=party_construction) + y = thermal_bridging_y(age_band=age_band) + + # Areas. + storey_count = geom["storey_count"] + storey_height = geom["avg_room_height_m"] + # SAP10.2 wall area: gross exposed perimeter * storey height * storey count + # minus openings. Heat-loss perimeter (heat_loss_perimeter_m on each floor + # dimension) already excludes party walls. + gross_wall_area = geom["ground_perimeter_m"] * storey_height * storey_count + net_wall_area = max(0.0, gross_wall_area - window_area_m2 - door_area_m2) + party_area = geom["party_wall_length_m"] * storey_height * storey_count + roof_area = geom["top_floor_area_m2"] + floor_area_total = geom["ground_floor_area_m2"] + + conduction = ( + uw * net_wall_area + + upw * party_area + + ur * roof_area + + uf * floor_area_total + + window_u_value * window_area_m2 + + door_u_value * door_area_m2 + ) + bridging_area = net_wall_area + party_area + roof_area + floor_area_total + window_area_m2 + door_area_m2 + return conduction + y * bridging_area + + +def envelope_heat_loss_w_per_k( + sap_building_parts: list[SapBuildingPart], + *, + country_code: Optional[str], + window_total_area_m2: float, + window_avg_u_value: Optional[float], + door_count: int, + insulated_door_count: int, + insulated_door_u_value: Optional[float], + age_band_for_door: Optional[str] = None, +) -> float: + """Total envelope heat-loss coefficient (W/K) summed over all building parts. + + Windows and doors are apportioned entirely to the first part (the main + dwelling) per RdSAP10 convention -- the cert's window list is not split + across extensions. All U-values cascade through `rdsap_uvalues` defaults, + so the return is never null. + """ + if not sap_building_parts: + return 0.0 + country = Country.from_code(country_code) + door_area = max(0, door_count) * _DEFAULT_DOOR_AREA_M2 + if window_avg_u_value is None or window_avg_u_value <= 0: + window_u = u_window(installed_year=None, glazing_type=None, frame_type=None) + else: + window_u = window_avg_u_value + # Door U: blend insulated/uninsulated by share. + door_uninsulated = u_door( + country=country, + age_band=age_band_for_door or sap_building_parts[0].construction_age_band, + insulated=False, + insulated_u_value=None, + ) + door_insulated = ( + insulated_door_u_value + if insulated_door_u_value is not None + else u_door(country=country, age_band="M", insulated=True, insulated_u_value=None) + ) + insulated_share = (insulated_door_count or 0) / door_count if door_count > 0 else 0.0 + door_u = (1.0 - insulated_share) * door_uninsulated + insulated_share * door_insulated + + total = 0.0 + for i, part in enumerate(sap_building_parts): + # Windows and doors only on the first (main) part. + w_area = window_total_area_m2 if i == 0 else 0.0 + d_area = door_area if i == 0 else 0.0 + total += _part_heat_loss_w_per_k( + part=part, + country=country, + window_area_m2=w_area, + door_area_m2=d_area, + window_u_value=window_u, + door_u_value=door_u, + ) + return total diff --git a/packages/domain/src/domain/ml/tests/test_envelope.py b/packages/domain/src/domain/ml/tests/test_envelope.py new file mode 100644 index 00000000..443f101b --- /dev/null +++ b/packages/domain/src/domain/ml/tests/test_envelope.py @@ -0,0 +1,241 @@ +"""Tests for envelope_heat_loss_w_per_k. + +Uses the existing `make_building_part` / `make_floor_dimension` fixtures so +test cases stay close to the shape transform.py sees on a real cert. +""" + +import pytest + +from domain.ml.envelope import envelope_heat_loss_w_per_k +from domain.ml.tests._fixtures import make_building_part, make_floor_dimension + + +def test_envelope_single_storey_no_windows_no_doors_age_g_cavity_returns_expected_w_per_k() -> None: + # Arrange — Mid-terrace, age G cavity-as-built, 100 m^2, 40 m perimeter, 5 m party wall, + # 2.5 m room height, single storey, no windows, no doors. + # Expected (RdSAP10 Tables 6,15,18,21): + # U_wall = 0.60, U_roof = 0.40, U_floor ~= 0.61 (ISO 13370 with A=100, P=40), + # U_party = 0.0 (solid masonry default), y = 0.15. + # Wall area = 40 * 2.5 - 0 - 0 = 100 m^2; party = 5 * 2.5 = 12.5 m^2. + # Heat loss ~= 0.60*100 + 0.40*100 + 0.61*100 + 0.0*12.5 + 0.15*(100+100+100+12.5) + # = 60 + 40 + 61 + 0 + 46.875 ~= 208 W/K. + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="G", + wall_construction=4, # cavity + wall_insulation_type=4, # none + party_wall_construction=1, # solid masonry + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, + room_height_m=2.5, + party_wall_length_m=5.0, + heat_loss_perimeter_m=40.0, + floor=0, + ) + ], + ) + + # Act + result = envelope_heat_loss_w_per_k( + sap_building_parts=[main], + country_code="ENG", + window_total_area_m2=0.0, + window_avg_u_value=None, + door_count=0, + insulated_door_count=0, + insulated_door_u_value=None, + ) + + # Assert + assert result == pytest.approx(208.0, abs=8.0) + + +def test_envelope_doubles_for_two_storey_dwelling() -> None: + # Arrange — same floor plan but 2 storeys (wall area + party area double; roof+floor stay). + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="G", + wall_construction=4, + wall_insulation_type=4, + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0, + ), + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=1, + ), + ], + ) + + # Act + result = envelope_heat_loss_w_per_k( + sap_building_parts=[main], + country_code="ENG", + window_total_area_m2=0.0, + window_avg_u_value=None, + door_count=0, + insulated_door_count=0, + insulated_door_u_value=None, + ) + + # Assert — 0.60*200 + 0.40*100 + 0.61*100 + 0 + 0.15*(200+100+100+25) ~= 285 W/K. + assert result == pytest.approx(285.0, abs=12.0) + + +def test_envelope_drops_with_better_insulation() -> None: + # Arrange — same geometry, age band M (post-2012, well insulated). + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="M", + wall_construction=4, + wall_insulation_type=2, # filled cavity + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0, + ) + ], + ) + age_g_main = make_building_part( + identifier="Main Dwelling", + construction_age_band="G", + wall_construction=4, + wall_insulation_type=4, + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0, + ) + ], + ) + + # Act + age_m = envelope_heat_loss_w_per_k( + sap_building_parts=[main], country_code="ENG", window_total_area_m2=0.0, + window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None, + ) + age_g = envelope_heat_loss_w_per_k( + sap_building_parts=[age_g_main], country_code="ENG", window_total_area_m2=0.0, + window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None, + ) + + # Assert + assert age_m < age_g + + +def test_envelope_returns_zero_when_no_building_parts() -> None: + # Arrange / Act / Assert + assert envelope_heat_loss_w_per_k( + sap_building_parts=[], country_code="ENG", window_total_area_m2=0.0, + window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None, + ) == 0.0 + + +def test_envelope_sums_main_and_extension_contributions() -> None: + # Arrange — main + one extension; combined > main alone. + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="G", + wall_construction=4, + wall_insulation_type=4, + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0, + ) + ], + ) + extension = make_building_part( + identifier="Extension 1", + construction_age_band="L", + wall_construction=4, + wall_insulation_type=2, + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=15.0, room_height_m=2.4, + party_wall_length_m=0.0, heat_loss_perimeter_m=16.0, floor=0, + ) + ], + ) + + # Act + main_only = envelope_heat_loss_w_per_k( + sap_building_parts=[main], country_code="ENG", window_total_area_m2=0.0, + window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None, + ) + with_extension = envelope_heat_loss_w_per_k( + sap_building_parts=[main, extension], country_code="ENG", window_total_area_m2=0.0, + window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None, + ) + + # Assert + assert with_extension > main_only + + +def test_envelope_increases_with_windows_and_doors() -> None: + # Arrange — same base, but with 15 m^2 of windows + 1 door. + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="G", + wall_construction=4, + wall_insulation_type=4, + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0, + ) + ], + ) + + # Act + no_openings = envelope_heat_loss_w_per_k( + sap_building_parts=[main], country_code="ENG", window_total_area_m2=0.0, + window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None, + ) + with_openings = envelope_heat_loss_w_per_k( + sap_building_parts=[main], country_code="ENG", window_total_area_m2=15.0, + window_avg_u_value=2.8, door_count=1, insulated_door_count=0, insulated_door_u_value=None, + ) + + # Assert — openings add net heat loss because U_window (2.8) > U_wall (0.60). + assert with_openings > no_openings + + +def test_envelope_never_null_even_with_missing_fields() -> None: + # Arrange — minimal building part with most fields unspecified. + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="NR", # unrecorded + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=80.0, room_height_m=2.5, + party_wall_length_m=0.0, heat_loss_perimeter_m=36.0, floor=0, + ) + ], + ) + + # Act + result = envelope_heat_loss_w_per_k( + sap_building_parts=[main], country_code=None, + window_total_area_m2=0.0, window_avg_u_value=None, + door_count=0, insulated_door_count=0, insulated_door_u_value=None, + ) + + # Assert — finite, positive. + assert result > 0.0 diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index cfc00894..5fad2da7 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None: # Assert assert isinstance(schema, TransformSchema) - assert schema.transform_version == "0.1.0" + assert schema.transform_version == "0.2.0" assert schema.transform_version == EpcMlTransform.VERSION assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys()) for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items(): @@ -1251,6 +1251,53 @@ def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None: assert row["main_dwelling_roof_insulation_thickness_mm"] is None +def test_schema_advertises_envelope_heat_loss_feature() -> None: + # Arrange + transform = EpcMlTransform() + + # Act + schema = transform.schema() + + # Assert + assert "envelope_heat_loss_w_per_k" in schema.feature_columns + column = schema.feature_columns["envelope_heat_loss_w_per_k"] + assert column.dtype is float + assert column.nullable is False + + +def test_to_row_emits_positive_envelope_heat_loss_for_sap10_epc() -> None: + # Arrange + from domain.ml.tests._fixtures import make_building_part, make_floor_dimension + + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="G", + wall_construction=4, + wall_insulation_type=4, + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0, + ) + ], + ) + epc = make_minimal_sap10_epc( + energy_rating_current=70, + sap_building_parts=[main], + total_floor_area_m2=100.0, + country_code="ENG", + ) + + # Act + row = EpcMlTransform().to_row(epc) + + # Assert — envelope_heat_loss in plausible range for a 100 m^2 age-G semi. + assert row["envelope_heat_loss_w_per_k"] > 100.0 + assert row["envelope_heat_loss_w_per_k"] < 400.0 + + def test_to_row_threads_top_level_fabric_and_demand_scalars() -> None: # Arrange from dataclasses import replace diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index bc23ff1b..85c46227 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -22,6 +22,7 @@ from datatypes.epc.domain.epc_property_data import ( SapHeating, SapWindow, ) +from domain.ml.envelope import envelope_heat_loss_w_per_k from domain.ml.schema import ColumnSpec, TransformSchema from domain.ml.ucl import apply_ucl_correction @@ -757,6 +758,16 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=False, description="Number of building parts beyond Main Dwelling and the secondary part.", ), + "envelope_heat_loss_w_per_k": ColumnSpec( + dtype=float, nullable=False, + description=( + "Sum of U*A over walls / roof / floor / party walls / windows / doors " + "plus thermal-bridging factor y times total exposed area, summed across " + "every sap_building_part. U-values cascade-default per ADR-0008 so the " + "feature is never null. Approximates the SAP10.2 worksheet's envelope " + "conduction loss in W/K." + ), + ), } @@ -811,7 +822,7 @@ class EpcMlTransform: Version 0.1.0 — schema contract only; feature columns added in subsequent slices. """ - VERSION: str = "0.1.0" + VERSION: str = "0.2.0" def schema(self) -> TransformSchema: """The cross-repo ML data contract. @@ -858,6 +869,15 @@ class EpcMlTransform: heating_aggregates = _heating_aggregates(epc.sap_heating) pv_aggregates = _pv_aggregates(epc.sap_energy_source) energy_source_other = _energy_source_other_aggregates(epc.sap_energy_source) + envelope_w_per_k = envelope_heat_loss_w_per_k( + sap_building_parts=epc.sap_building_parts, + country_code=epc.country_code, + window_total_area_m2=float(window_aggregates.get("window_total_area_m2") or 0.0), + window_avg_u_value=window_aggregates.get("window_avg_u_value"), + door_count=epc.door_count, + insulated_door_count=epc.insulated_door_count, + insulated_door_u_value=epc.insulated_door_u_value, + ) return { # Features — geometry "total_floor_area_m2": epc.total_floor_area_m2, @@ -890,6 +910,8 @@ class EpcMlTransform: **window_aggregates, # Features — building parts aggregates + Main Dwelling carve-out **building_part_aggregates, + # Features — engineered physics: envelope heat-loss W/K (ADR-0008) + "envelope_heat_loss_w_per_k": envelope_w_per_k, # Features — heating system (primary slot + water + secondary) **heating_aggregates, # Features — PV (capacity source + array aggregates by SAP octant)