slice 8a: window physics and orientation aggregates

Thirteen window-aggregate features land on the transform: count,
total area, eight SAP-octant area columns (N/NE/E/SE/S/SW/W/NW),
area-weighted draught-proofing pct, and area-weighted u_value +
solar transmittance (nullable, populated only when windows carry
transmission_details). Windows with orientation outside 1-8 (0,
NR) contribute to count and total area but no octant.

Also: epc codes CSV (gov api /api/codes export, RdSAP-Schema-21.x +
older versions) moved next to EpcPropertyData as epc_codes.csv —
canonical SAP enum source for upcoming categorical-share slices.
.gitignore exception added so the reference CSV is tracked.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 15:32:45 +00:00
parent 9c8aa75469
commit dba254e316
5 changed files with 6053 additions and 4 deletions

2
.gitignore vendored
View file

@ -279,6 +279,8 @@ cache/
*.png
*.pptx
*.csv
# Tracked reference CSV: SAP enum codes (gov api /api/codes) co-located with EpcPropertyData.
!datatypes/epc/domain/epc_codes.csv
*.xlsx
# *.pdf
**/Chunks/

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,13 @@
`make_minimal_sap10_epc()` constructs a valid EpcPropertyData with the smallest
sensible defaults for required fields; target values are passed by kwarg so each
test parametrises only the fields it cares about.
`make_window()` builds a SapWindow with sensible SAP10 defaults; pass the fields
relevant to the test (orientation / dimensions / glazing / draught proofing).
"""
from datetime import date
from typing import Optional
from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
@ -14,9 +17,43 @@ from datatypes.epc.domain.epc_property_data import (
RenewableHeatIncentive,
SapEnergySource,
SapHeating,
SapWindow,
WindowTransmissionDetails,
)
def make_window(
*,
orientation: Union[int, str] = 5, # SAP10: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW
width: float = 1.0,
height: float = 1.0,
draught_proofed: bool = True,
glazing_type: Union[int, str] = 2, # "double glazing 2002-2022"
glazing_gap: Union[int, str] = "16+",
window_type: Union[int, str] = 1,
window_location: Union[int, str] = 0,
window_wall_type: Union[int, str] = 1,
permanent_shutters_present: Union[bool, str] = False,
frame_material: Optional[str] = "PVC",
window_transmission_details: Optional[WindowTransmissionDetails] = None,
) -> SapWindow:
"""Build a SapWindow with SAP10 defaults; override the fields the test cares about."""
return SapWindow(
frame_material=frame_material,
glazing_gap=glazing_gap,
orientation=orientation,
window_type=window_type,
glazing_type=glazing_type,
window_width=width,
window_height=height,
draught_proofed=draught_proofed,
window_location=window_location,
window_wall_type=window_wall_type,
permanent_shutters_present=permanent_shutters_present,
window_transmission_details=window_transmission_details,
)
def make_minimal_sap10_epc(
*,
energy_rating_current: Optional[int] = None,
@ -48,6 +85,7 @@ def make_minimal_sap10_epc(
built_form: Optional[str] = None,
region_code: Optional[str] = None,
country_code: Optional[str] = None,
sap_windows: Optional[list[SapWindow]] = None,
) -> EpcPropertyData:
"""Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets."""
return EpcPropertyData(
@ -68,7 +106,7 @@ def make_minimal_sap10_epc(
main_heating_details=[],
has_fixed_air_conditioning=False,
),
sap_windows=[],
sap_windows=list(sap_windows) if sap_windows is not None else [],
sap_energy_source=SapEnergySource(
mains_gas=True,
meter_type="Single",

View file

@ -1,7 +1,10 @@
"""Tests for EpcMlTransform v0.1.0 — schema-contract surface and target extraction."""
import pytest
from datatypes.epc.domain.epc_property_data import WindowTransmissionDetails
from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.tests._fixtures import make_minimal_sap10_epc
from domain.ml.tests._fixtures import make_minimal_sap10_epc, make_window
from domain.ml.transform import EpcMlTransform
@ -303,3 +306,144 @@ def test_to_row_extracts_categorical_features() -> None:
assert row["built_form"] == "2"
assert row["region_code"] == "6"
assert row["country_code"] == "ENG"
_WINDOW_PHYSICS_FEATURES_NULLABLE: dict[str, tuple[type, bool]] = {
"window_count": (int, False),
"window_total_area_m2": (float, False),
"window_area_orientation_N": (float, False),
"window_area_orientation_NE": (float, False),
"window_area_orientation_E": (float, False),
"window_area_orientation_SE": (float, False),
"window_area_orientation_S": (float, False),
"window_area_orientation_SW": (float, False),
"window_area_orientation_W": (float, False),
"window_area_orientation_NW": (float, False),
"window_pct_draught_proofed": (float, True),
"window_avg_u_value": (float, True),
"window_avg_solar_transmittance": (float, True),
}
def test_schema_advertises_window_physics_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for feature_name, (expected_dtype, expected_nullable) in _WINDOW_PHYSICS_FEATURES_NULLABLE.items():
assert feature_name in schema.feature_columns, feature_name
column = schema.feature_columns[feature_name]
assert column.dtype is expected_dtype
assert column.nullable is expected_nullable
assert column.categorical is False
def test_to_row_aggregates_window_physics_and_orientation() -> None:
# Arrange — 3 windows: 2.0 m² S, 1.5 m² N, 1.0 m² E (orientations 5/1/3)
sap_windows = [
make_window(orientation=5, width=1.0, height=2.0, draught_proofed=True),
make_window(orientation=1, width=1.0, height=1.5, draught_proofed=False),
make_window(orientation=3, width=1.0, height=1.0, draught_proofed=True),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["window_count"] == 3
assert row["window_total_area_m2"] == pytest.approx(4.5)
assert row["window_area_orientation_N"] == pytest.approx(1.5)
assert row["window_area_orientation_NE"] == 0.0
assert row["window_area_orientation_E"] == pytest.approx(1.0)
assert row["window_area_orientation_SE"] == 0.0
assert row["window_area_orientation_S"] == pytest.approx(2.0)
assert row["window_area_orientation_SW"] == 0.0
assert row["window_area_orientation_W"] == 0.0
assert row["window_area_orientation_NW"] == 0.0
# area-weighted draught-proofing: (2.0 + 1.0) / 4.5 * 100 = 66.66...%
assert row["window_pct_draught_proofed"] == pytest.approx(66.666, abs=0.01)
assert row["window_avg_u_value"] is None
assert row["window_avg_solar_transmittance"] is None
def test_to_row_skips_windows_with_unrecorded_orientation() -> None:
# Arrange — two S windows + one with orientation=0 (horizontal/unrecorded);
# the unrecorded one contributes to count and total_area but to no octant.
sap_windows = [
make_window(orientation=5, width=1.0, height=2.0),
make_window(orientation=5, width=1.0, height=1.0),
make_window(orientation=0, width=1.0, height=0.5),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["window_count"] == 3
assert row["window_total_area_m2"] == pytest.approx(3.5)
assert row["window_area_orientation_S"] == pytest.approx(3.0)
# The horizontal window's 0.5 m² is not assigned to any octant
sum_octants = sum(
row[f"window_area_orientation_{c}"]
for c in ("N", "NE", "E", "SE", "S", "SW", "W", "NW")
)
assert sum_octants == pytest.approx(3.0)
def test_to_row_returns_window_zeros_for_property_with_no_windows() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["window_count"] == 0
assert row["window_total_area_m2"] == 0.0
for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"):
assert row[f"window_area_orientation_{cardinal}"] == 0.0
assert row["window_pct_draught_proofed"] is None
assert row["window_avg_u_value"] is None
assert row["window_avg_solar_transmittance"] is None
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
# Arrange — two windows with transmission details; one without.
sap_windows = [
make_window(
orientation=5,
width=2.0,
height=1.0,
window_transmission_details=WindowTransmissionDetails(
u_value=1.4, data_source=2, solar_transmittance=0.72
),
),
make_window(
orientation=1,
width=1.0,
height=1.0,
window_transmission_details=WindowTransmissionDetails(
u_value=2.0, data_source=2, solar_transmittance=0.60
),
),
make_window(orientation=3, width=1.0, height=1.0), # no details
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Area-weighted u: (1.4 * 2.0 + 2.0 * 1.0) / (2.0 + 1.0) = 4.8 / 3.0 = 1.6
assert row["window_avg_u_value"] == pytest.approx(1.6)
# Area-weighted solar transmittance: (0.72 * 2.0 + 0.60 * 1.0) / 3.0 = 2.04 / 3.0 = 0.68
assert row["window_avg_solar_transmittance"] == pytest.approx(0.68)

View file

@ -13,11 +13,26 @@ See docs/adr/0007-kwh-as-ml-target.md for the target set and rationale.
from typing import Any, Optional
from datatypes.epc.domain.epc import Epc
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow
from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.ucl import apply_ucl_correction
# SAP10 orientation codes: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW.
# Anything else (0, "NR", etc.) is treated as unrecorded — it contributes to
# `window_count` and `window_total_area_m2` but to no octant.
_OCTANT_NAMES: dict[int, str] = {
1: "N",
2: "NE",
3: "E",
4: "SE",
5: "S",
6: "SW",
7: "W",
8: "NW",
}
_FEATURE_COLUMNS: dict[str, ColumnSpec] = {
# Geometry
"total_floor_area_m2": ColumnSpec(
@ -135,6 +150,38 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
categorical=True,
description="ISO-style country code, e.g. 'ENG', 'WAL', 'EAW'.",
),
# Window aggregates — physics + orientation distribution
"window_count": ColumnSpec(
dtype=int, nullable=False, description="Number of windows."
),
"window_total_area_m2": ColumnSpec(
dtype=float,
nullable=False,
description="Total window area in square metres, summed across all windows.",
),
**{
f"window_area_orientation_{name}": ColumnSpec(
dtype=float,
nullable=False,
description=f"Total window area in m² facing {name} (SAP orientation code).",
)
for name in _OCTANT_NAMES.values()
},
"window_pct_draught_proofed": ColumnSpec(
dtype=float,
nullable=True,
description="Area-weighted percentage of windows with draught proofing (0-100).",
),
"window_avg_u_value": ColumnSpec(
dtype=float,
nullable=True,
description="Area-weighted mean window U-value (W/m²K); null when no transmission details.",
),
"window_avg_solar_transmittance": ColumnSpec(
dtype=float,
nullable=True,
description="Area-weighted mean window solar transmittance; null when no transmission details.",
),
}
@ -209,6 +256,7 @@ class EpcMlTransform:
v0.1.0 populates the six targets. Feature columns land in later slices.
"""
rhi = epc.renewable_heat_incentive
window_aggregates = _window_aggregates(epc.sap_windows)
return {
# Features — geometry
"total_floor_area_m2": epc.total_floor_area_m2,
@ -239,6 +287,8 @@ class EpcMlTransform:
"built_form": epc.built_form,
"region_code": epc.region_code,
"country_code": epc.country_code,
# Features — window aggregates (physics + orientation)
**window_aggregates,
# Targets
"sap_score": epc.energy_rating_current,
"co2_emissions": epc.co2_emissions_current,
@ -259,3 +309,56 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
return None
band = Epc.from_sap_score(epc.energy_rating_current)
return apply_ucl_correction(float(epc.energy_consumption_current), band)
def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
"""Aggregate a list of windows into the 13 physics + orientation columns.
With no windows: counts/areas are 0; nullable averages are None.
Windows whose `orientation` isn't an integer in 1-8 contribute to count and
total area but to no octant they're treated as unrecorded.
"""
octant_areas: dict[str, float] = {name: 0.0 for name in _OCTANT_NAMES.values()}
aggregates: dict[str, Any] = {
"window_count": len(windows),
"window_total_area_m2": 0.0,
**{f"window_area_orientation_{name}": 0.0 for name in _OCTANT_NAMES.values()},
"window_pct_draught_proofed": None,
"window_avg_u_value": None,
"window_avg_solar_transmittance": None,
}
if not windows:
return aggregates
total_area = 0.0
draught_proofed_area = 0.0
transmission_area = 0.0
weighted_u_value = 0.0
weighted_solar_transmittance = 0.0
for w in windows:
area = w.window_width * w.window_height
total_area += area
if w.draught_proofed is True or w.draught_proofed == "true":
draught_proofed_area += area
if isinstance(w.orientation, int) and w.orientation in _OCTANT_NAMES:
octant_areas[_OCTANT_NAMES[w.orientation]] += area
if w.window_transmission_details is not None:
transmission_area += area
weighted_u_value += w.window_transmission_details.u_value * area
weighted_solar_transmittance += (
w.window_transmission_details.solar_transmittance * area
)
aggregates["window_total_area_m2"] = total_area
for name, area in octant_areas.items():
aggregates[f"window_area_orientation_{name}"] = area
if total_area > 0:
aggregates["window_pct_draught_proofed"] = (
draught_proofed_area / total_area * 100.0
)
if transmission_area > 0:
aggregates["window_avg_u_value"] = weighted_u_value / transmission_area
aggregates["window_avg_solar_transmittance"] = (
weighted_solar_transmittance / transmission_area
)
return aggregates