slice 16c: envelope_heat_loss_w_per_k feature

New module domain.ml.envelope sums Sigma(U*A) + y*A_exposed across every
sap_building_part on a cert. U-values come from rdsap_uvalues' cascade
defaults, so the feature is never null.

Per-part inputs: wall / roof / floor / party-wall / windows / doors.
Windows + doors are apportioned to the main part (first in the list)
per RdSAP10 convention.

Wired into EpcMlTransform.to_row; transform VERSION 0.1.0 -> 0.2.0
(MINOR bump for an additive column per the ADR-0007 policy).

7 envelope unit tests + 2 transform-level tests, all AAA. Reference
geometry: 100 m^2 age-G mid-terrace -> ~208 W/K; doubles for two
storeys; drops with better insulation; sums across extensions.
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 11:53:43 +00:00
parent 67a4f92d53
commit fca8815991
4 changed files with 547 additions and 2 deletions

View file

@ -0,0 +1,235 @@
"""Envelope heat-loss (W/K) summed across all building parts.
Computes Sigma(U * A) + y * A_exposed over the main dwelling and every
extension on a cert. U-values come from the cascade-defaulting helpers in
`rdsap_uvalues`; geometry is read off `sap_building_parts` + the cert's
pre-aggregated window area and door count.
Used by `transform.py` to populate the `envelope_heat_loss_w_per_k` feature
in v16.x. See ADR-0008 for the physics-as-feature rationale.
"""
from __future__ import annotations
from typing import Any, Optional
from datatypes.epc.domain.epc_property_data import SapBuildingPart
from domain.ml.rdsap_uvalues import (
Country,
WALL_CAVITY,
WALL_UNKNOWN,
thermal_bridging_y,
u_door,
u_floor,
u_party_wall,
u_roof,
u_wall,
u_window,
)
# SAP10 wall_insulation_type code 4 ("None") marks no insulation declared.
_WALL_INSULATION_NONE: int = 4
# Standard SAP10 external door area (m^2) when door dimensions aren't given.
_DEFAULT_DOOR_AREA_M2: float = 1.85
def _int_or_none(value: Any) -> Optional[int]:
return value if isinstance(value, int) else None
def _parse_thickness_mm(value: Any) -> Optional[int]:
if value is None:
return None
if isinstance(value, int):
return value
if not isinstance(value, str):
return None
s = value.strip()
if s.upper() == "NI":
return 0
digits = ""
for c in s:
if c.isdigit():
digits += c
else:
break
return int(digits) if digits else None
def _part_geometry(part: SapBuildingPart) -> dict[str, float]:
"""Sum floor area / heat-loss perimeter / party-wall length / room heights
across the floor dimensions in a building part."""
if not part.sap_floor_dimensions:
return {
"ground_floor_area_m2": 0.0,
"ground_perimeter_m": 0.0,
"top_floor_area_m2": 0.0,
"total_perimeter_m": 0.0,
"party_wall_length_m": 0.0,
"avg_room_height_m": 2.5,
"storey_count": 1.0,
}
fds = list(part.sap_floor_dimensions)
# Ground floor = floor == 0 if present, else the first entry.
ground = next((fd for fd in fds if fd.floor == 0), fds[0])
# Top floor = floor with the largest non-None index, else the last entry.
indexed = [(fd.floor if fd.floor is not None else 0, fd) for fd in fds]
top = max(indexed, key=lambda kv: kv[0])[1]
total_area = sum(fd.total_floor_area_m2 or 0.0 for fd in fds)
total_perimeter = sum(fd.heat_loss_perimeter_m or 0.0 for fd in fds)
party_length = sum(fd.party_wall_length_m or 0.0 for fd in fds)
weighted_height = sum(
(fd.total_floor_area_m2 or 0.0) * (fd.room_height_m or 2.5) for fd in fds
)
avg_height = (weighted_height / total_area) if total_area > 0 else 2.5
return {
"ground_floor_area_m2": ground.total_floor_area_m2 or 0.0,
"ground_perimeter_m": ground.heat_loss_perimeter_m or 0.0,
"top_floor_area_m2": top.total_floor_area_m2 or 0.0,
"total_perimeter_m": total_perimeter,
"party_wall_length_m": party_length,
"avg_room_height_m": avg_height,
"storey_count": float(len(fds)),
}
def _part_heat_loss_w_per_k(
part: SapBuildingPart,
country: Country,
window_area_m2: float,
door_area_m2: float,
window_u_value: float,
door_u_value: float,
) -> float:
"""Heat loss coefficient (W/K) for a single building part: walls + roof +
floor + party walls + windows + doors + thermal bridging.
The aggregate-level caller (`envelope_heat_loss_w_per_k`) apportions windows
and doors to whichever part it considers primary (currently the first part);
other parts pass 0 for the window/door area.
"""
geom = _part_geometry(part)
age_band = part.construction_age_band
wall_construction = _int_or_none(part.wall_construction)
wall_ins_type = _int_or_none(part.wall_insulation_type)
wall_ins_thickness = _parse_thickness_mm(part.wall_insulation_thickness)
wall_ins_present = wall_ins_type is not None and wall_ins_type != _WALL_INSULATION_NONE
party_construction = _int_or_none(part.party_wall_construction)
roof_thickness = _parse_thickness_mm(getattr(part, "roof_insulation_thickness", None))
floor_ins_thickness = _parse_thickness_mm(getattr(part, "floor_insulation_thickness", None))
# Floor — pick the ground-floor's floor_dimension for the BS EN ISO 13370
# area/perimeter inputs.
ground_fd = next(
(fd for fd in part.sap_floor_dimensions if fd.floor == 0),
part.sap_floor_dimensions[0] if part.sap_floor_dimensions else None,
)
floor_area = ground_fd.total_floor_area_m2 if ground_fd is not None else None
floor_perimeter = ground_fd.heat_loss_perimeter_m if ground_fd is not None else None
floor_construction = (
_int_or_none(ground_fd.floor_construction) if ground_fd is not None else None
)
uw = u_wall(
country=country,
age_band=age_band,
construction=wall_construction if wall_construction != WALL_UNKNOWN else None,
insulation_thickness_mm=wall_ins_thickness,
insulation_present=wall_ins_present,
)
ur = u_roof(country=country, age_band=age_band, insulation_thickness_mm=roof_thickness)
uf = u_floor(
country=country,
age_band=age_band,
construction=floor_construction,
insulation_thickness_mm=floor_ins_thickness,
area_m2=floor_area,
perimeter_m=floor_perimeter,
wall_thickness_mm=part.wall_thickness_mm,
)
upw = u_party_wall(party_wall_construction=party_construction)
y = thermal_bridging_y(age_band=age_band)
# Areas.
storey_count = geom["storey_count"]
storey_height = geom["avg_room_height_m"]
# SAP10.2 wall area: gross exposed perimeter * storey height * storey count
# minus openings. Heat-loss perimeter (heat_loss_perimeter_m on each floor
# dimension) already excludes party walls.
gross_wall_area = geom["ground_perimeter_m"] * storey_height * storey_count
net_wall_area = max(0.0, gross_wall_area - window_area_m2 - door_area_m2)
party_area = geom["party_wall_length_m"] * storey_height * storey_count
roof_area = geom["top_floor_area_m2"]
floor_area_total = geom["ground_floor_area_m2"]
conduction = (
uw * net_wall_area
+ upw * party_area
+ ur * roof_area
+ uf * floor_area_total
+ window_u_value * window_area_m2
+ door_u_value * door_area_m2
)
bridging_area = net_wall_area + party_area + roof_area + floor_area_total + window_area_m2 + door_area_m2
return conduction + y * bridging_area
def envelope_heat_loss_w_per_k(
sap_building_parts: list[SapBuildingPart],
*,
country_code: Optional[str],
window_total_area_m2: float,
window_avg_u_value: Optional[float],
door_count: int,
insulated_door_count: int,
insulated_door_u_value: Optional[float],
age_band_for_door: Optional[str] = None,
) -> float:
"""Total envelope heat-loss coefficient (W/K) summed over all building parts.
Windows and doors are apportioned entirely to the first part (the main
dwelling) per RdSAP10 convention -- the cert's window list is not split
across extensions. All U-values cascade through `rdsap_uvalues` defaults,
so the return is never null.
"""
if not sap_building_parts:
return 0.0
country = Country.from_code(country_code)
door_area = max(0, door_count) * _DEFAULT_DOOR_AREA_M2
if window_avg_u_value is None or window_avg_u_value <= 0:
window_u = u_window(installed_year=None, glazing_type=None, frame_type=None)
else:
window_u = window_avg_u_value
# Door U: blend insulated/uninsulated by share.
door_uninsulated = u_door(
country=country,
age_band=age_band_for_door or sap_building_parts[0].construction_age_band,
insulated=False,
insulated_u_value=None,
)
door_insulated = (
insulated_door_u_value
if insulated_door_u_value is not None
else u_door(country=country, age_band="M", insulated=True, insulated_u_value=None)
)
insulated_share = (insulated_door_count or 0) / door_count if door_count > 0 else 0.0
door_u = (1.0 - insulated_share) * door_uninsulated + insulated_share * door_insulated
total = 0.0
for i, part in enumerate(sap_building_parts):
# Windows and doors only on the first (main) part.
w_area = window_total_area_m2 if i == 0 else 0.0
d_area = door_area if i == 0 else 0.0
total += _part_heat_loss_w_per_k(
part=part,
country=country,
window_area_m2=w_area,
door_area_m2=d_area,
window_u_value=window_u,
door_u_value=door_u,
)
return total

View file

@ -0,0 +1,241 @@
"""Tests for envelope_heat_loss_w_per_k.
Uses the existing `make_building_part` / `make_floor_dimension` fixtures so
test cases stay close to the shape transform.py sees on a real cert.
"""
import pytest
from domain.ml.envelope import envelope_heat_loss_w_per_k
from domain.ml.tests._fixtures import make_building_part, make_floor_dimension
def test_envelope_single_storey_no_windows_no_doors_age_g_cavity_returns_expected_w_per_k() -> None:
# Arrange — Mid-terrace, age G cavity-as-built, 100 m^2, 40 m perimeter, 5 m party wall,
# 2.5 m room height, single storey, no windows, no doors.
# Expected (RdSAP10 Tables 6,15,18,21):
# U_wall = 0.60, U_roof = 0.40, U_floor ~= 0.61 (ISO 13370 with A=100, P=40),
# U_party = 0.0 (solid masonry default), y = 0.15.
# Wall area = 40 * 2.5 - 0 - 0 = 100 m^2; party = 5 * 2.5 = 12.5 m^2.
# Heat loss ~= 0.60*100 + 0.40*100 + 0.61*100 + 0.0*12.5 + 0.15*(100+100+100+12.5)
# = 60 + 40 + 61 + 0 + 46.875 ~= 208 W/K.
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="G",
wall_construction=4, # cavity
wall_insulation_type=4, # none
party_wall_construction=1, # solid masonry
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0,
room_height_m=2.5,
party_wall_length_m=5.0,
heat_loss_perimeter_m=40.0,
floor=0,
)
],
)
# Act
result = envelope_heat_loss_w_per_k(
sap_building_parts=[main],
country_code="ENG",
window_total_area_m2=0.0,
window_avg_u_value=None,
door_count=0,
insulated_door_count=0,
insulated_door_u_value=None,
)
# Assert
assert result == pytest.approx(208.0, abs=8.0)
def test_envelope_doubles_for_two_storey_dwelling() -> None:
# Arrange — same floor plan but 2 storeys (wall area + party area double; roof+floor stay).
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="G",
wall_construction=4,
wall_insulation_type=4,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
),
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=1,
),
],
)
# Act
result = envelope_heat_loss_w_per_k(
sap_building_parts=[main],
country_code="ENG",
window_total_area_m2=0.0,
window_avg_u_value=None,
door_count=0,
insulated_door_count=0,
insulated_door_u_value=None,
)
# Assert — 0.60*200 + 0.40*100 + 0.61*100 + 0 + 0.15*(200+100+100+25) ~= 285 W/K.
assert result == pytest.approx(285.0, abs=12.0)
def test_envelope_drops_with_better_insulation() -> None:
# Arrange — same geometry, age band M (post-2012, well insulated).
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="M",
wall_construction=4,
wall_insulation_type=2, # filled cavity
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
)
],
)
age_g_main = make_building_part(
identifier="Main Dwelling",
construction_age_band="G",
wall_construction=4,
wall_insulation_type=4,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
)
],
)
# Act
age_m = envelope_heat_loss_w_per_k(
sap_building_parts=[main], country_code="ENG", window_total_area_m2=0.0,
window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None,
)
age_g = envelope_heat_loss_w_per_k(
sap_building_parts=[age_g_main], country_code="ENG", window_total_area_m2=0.0,
window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None,
)
# Assert
assert age_m < age_g
def test_envelope_returns_zero_when_no_building_parts() -> None:
# Arrange / Act / Assert
assert envelope_heat_loss_w_per_k(
sap_building_parts=[], country_code="ENG", window_total_area_m2=0.0,
window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None,
) == 0.0
def test_envelope_sums_main_and_extension_contributions() -> None:
# Arrange — main + one extension; combined > main alone.
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="G",
wall_construction=4,
wall_insulation_type=4,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
)
],
)
extension = make_building_part(
identifier="Extension 1",
construction_age_band="L",
wall_construction=4,
wall_insulation_type=2,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=15.0, room_height_m=2.4,
party_wall_length_m=0.0, heat_loss_perimeter_m=16.0, floor=0,
)
],
)
# Act
main_only = envelope_heat_loss_w_per_k(
sap_building_parts=[main], country_code="ENG", window_total_area_m2=0.0,
window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None,
)
with_extension = envelope_heat_loss_w_per_k(
sap_building_parts=[main, extension], country_code="ENG", window_total_area_m2=0.0,
window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None,
)
# Assert
assert with_extension > main_only
def test_envelope_increases_with_windows_and_doors() -> None:
# Arrange — same base, but with 15 m^2 of windows + 1 door.
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="G",
wall_construction=4,
wall_insulation_type=4,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
)
],
)
# Act
no_openings = envelope_heat_loss_w_per_k(
sap_building_parts=[main], country_code="ENG", window_total_area_m2=0.0,
window_avg_u_value=None, door_count=0, insulated_door_count=0, insulated_door_u_value=None,
)
with_openings = envelope_heat_loss_w_per_k(
sap_building_parts=[main], country_code="ENG", window_total_area_m2=15.0,
window_avg_u_value=2.8, door_count=1, insulated_door_count=0, insulated_door_u_value=None,
)
# Assert — openings add net heat loss because U_window (2.8) > U_wall (0.60).
assert with_openings > no_openings
def test_envelope_never_null_even_with_missing_fields() -> None:
# Arrange — minimal building part with most fields unspecified.
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="NR", # unrecorded
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=80.0, room_height_m=2.5,
party_wall_length_m=0.0, heat_loss_perimeter_m=36.0, floor=0,
)
],
)
# Act
result = envelope_heat_loss_w_per_k(
sap_building_parts=[main], country_code=None,
window_total_area_m2=0.0, window_avg_u_value=None,
door_count=0, insulated_door_count=0, insulated_door_u_value=None,
)
# Assert — finite, positive.
assert result > 0.0

View file

@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:
# Assert
assert isinstance(schema, TransformSchema)
assert schema.transform_version == "0.1.0"
assert schema.transform_version == "0.2.0"
assert schema.transform_version == EpcMlTransform.VERSION
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():
@ -1251,6 +1251,53 @@ def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None:
assert row["main_dwelling_roof_insulation_thickness_mm"] is None
def test_schema_advertises_envelope_heat_loss_feature() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
assert "envelope_heat_loss_w_per_k" in schema.feature_columns
column = schema.feature_columns["envelope_heat_loss_w_per_k"]
assert column.dtype is float
assert column.nullable is False
def test_to_row_emits_positive_envelope_heat_loss_for_sap10_epc() -> None:
# Arrange
from domain.ml.tests._fixtures import make_building_part, make_floor_dimension
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="G",
wall_construction=4,
wall_insulation_type=4,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
)
],
)
epc = make_minimal_sap10_epc(
energy_rating_current=70,
sap_building_parts=[main],
total_floor_area_m2=100.0,
country_code="ENG",
)
# Act
row = EpcMlTransform().to_row(epc)
# Assert — envelope_heat_loss in plausible range for a 100 m^2 age-G semi.
assert row["envelope_heat_loss_w_per_k"] > 100.0
assert row["envelope_heat_loss_w_per_k"] < 400.0
def test_to_row_threads_top_level_fabric_and_demand_scalars() -> None:
# Arrange
from dataclasses import replace

View file

@ -22,6 +22,7 @@ from datatypes.epc.domain.epc_property_data import (
SapHeating,
SapWindow,
)
from domain.ml.envelope import envelope_heat_loss_w_per_k
from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.ucl import apply_ucl_correction
@ -757,6 +758,16 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
dtype=int, nullable=False,
description="Number of building parts beyond Main Dwelling and the secondary part.",
),
"envelope_heat_loss_w_per_k": ColumnSpec(
dtype=float, nullable=False,
description=(
"Sum of U*A over walls / roof / floor / party walls / windows / doors "
"plus thermal-bridging factor y times total exposed area, summed across "
"every sap_building_part. U-values cascade-default per ADR-0008 so the "
"feature is never null. Approximates the SAP10.2 worksheet's envelope "
"conduction loss in W/K."
),
),
}
@ -811,7 +822,7 @@ class EpcMlTransform:
Version 0.1.0 schema contract only; feature columns added in subsequent slices.
"""
VERSION: str = "0.1.0"
VERSION: str = "0.2.0"
def schema(self) -> TransformSchema:
"""The cross-repo ML data contract.
@ -858,6 +869,15 @@ class EpcMlTransform:
heating_aggregates = _heating_aggregates(epc.sap_heating)
pv_aggregates = _pv_aggregates(epc.sap_energy_source)
energy_source_other = _energy_source_other_aggregates(epc.sap_energy_source)
envelope_w_per_k = envelope_heat_loss_w_per_k(
sap_building_parts=epc.sap_building_parts,
country_code=epc.country_code,
window_total_area_m2=float(window_aggregates.get("window_total_area_m2") or 0.0),
window_avg_u_value=window_aggregates.get("window_avg_u_value"),
door_count=epc.door_count,
insulated_door_count=epc.insulated_door_count,
insulated_door_u_value=epc.insulated_door_u_value,
)
return {
# Features — geometry
"total_floor_area_m2": epc.total_floor_area_m2,
@ -890,6 +910,8 @@ class EpcMlTransform:
**window_aggregates,
# Features — building parts aggregates + Main Dwelling carve-out
**building_part_aggregates,
# Features — engineered physics: envelope heat-loss W/K (ADR-0008)
"envelope_heat_loss_w_per_k": envelope_w_per_k,
# Features — heating system (primary slot + water + secondary)
**heating_aggregates,
# Features — PV (capacity source + array aggregates by SAP octant)