slice 8b: window glazed_type and pvc_frame shares

Adds seventeen window-categorical-share features: one float per
SAP10 glazed_type code (1-15) plus a `_other` bucket for anything
outside the enum, and a single `window_pct_pvc_frame` for the
area-weighted PVC-frame share. All shares are area-weighted over
total window area; null pvc_frame share for window-less properties.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 15:36:05 +00:00
parent dba254e316
commit 079e6f9a68
2 changed files with 131 additions and 4 deletions

View file

@ -415,6 +415,91 @@ def test_to_row_returns_window_zeros_for_property_with_no_windows() -> None:
assert row["window_avg_solar_transmittance"] is None
_GLAZED_TYPE_CODES: tuple[int, ...] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
def test_schema_advertises_window_categorical_share_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert — one float share per known glazed_type code + `_other`, plus pvc_frame share
for code in _GLAZED_TYPE_CODES:
name = f"window_pct_glazed_type_{code}"
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is float
assert column.nullable is False
assert column.categorical is False
assert "window_pct_glazed_type_other" in schema.feature_columns
assert "window_pct_pvc_frame" in schema.feature_columns
assert schema.feature_columns["window_pct_pvc_frame"].dtype is float
assert schema.feature_columns["window_pct_pvc_frame"].nullable is True
def test_to_row_aggregates_glazed_type_and_pvc_frame_shares() -> None:
# Arrange — three windows: 3.0 m² glazed_type=2 PVC, 1.5 m² glazed_type=13 PVC,
# 0.5 m² glazed_type=5 (single, no PVC). Total area = 5.0 m².
sap_windows = [
make_window(width=1.5, height=2.0, glazing_type=2, frame_material="PVC"),
make_window(width=1.0, height=1.5, glazing_type=13, frame_material="PVC"),
make_window(width=0.5, height=1.0, glazing_type=5, frame_material=None),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Shares (area-weighted) — 3.0/5.0=0.6 type 2; 1.5/5.0=0.3 type 13; 0.5/5.0=0.1 type 5.
assert row["window_pct_glazed_type_2"] == pytest.approx(0.6)
assert row["window_pct_glazed_type_13"] == pytest.approx(0.3)
assert row["window_pct_glazed_type_5"] == pytest.approx(0.1)
# All other known glazed_type codes are zero.
for code in _GLAZED_TYPE_CODES:
if code not in (2, 5, 13):
assert row[f"window_pct_glazed_type_{code}"] == 0.0
assert row["window_pct_glazed_type_other"] == 0.0
# PVC frame area share: (3.0 + 1.5) / 5.0 = 0.9
assert row["window_pct_pvc_frame"] == pytest.approx(0.9)
def test_to_row_routes_unknown_glazed_type_to_other_bucket() -> None:
# Arrange — one window has glazing_type=99 (not in the SAP10 enum 1-15)
sap_windows = [
make_window(width=2.0, height=1.0, glazing_type=2, frame_material="PVC"),
make_window(width=1.0, height=1.0, glazing_type=99, frame_material="PVC"),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Total area = 3.0; known type 2 = 2.0/3.0; unknown 99 → _other = 1.0/3.0
assert row["window_pct_glazed_type_2"] == pytest.approx(2 / 3)
assert row["window_pct_glazed_type_other"] == pytest.approx(1 / 3)
def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
for code in _GLAZED_TYPE_CODES:
assert row[f"window_pct_glazed_type_{code}"] == 0.0
assert row["window_pct_glazed_type_other"] == 0.0
assert row["window_pct_pvc_frame"] is None
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
# Arrange — two windows with transmission details; one without.
sap_windows = [

View file

@ -32,6 +32,12 @@ _OCTANT_NAMES: dict[int, str] = {
8: "NW",
}
# SAP10 glazed_type enumeration (codes 1-15 per the gov api /api/codes export at
# datatypes/epc/domain/epc_codes.csv, schema RdSAP-21.0.x). Anything outside this set
# (the documentation "ND" sentinel, future codes, or unexpected strings) falls into
# the `_other` bucket so share columns always sum to 1.0 of total window area.
_GLAZED_TYPE_CODES: tuple[int, ...] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
_FEATURE_COLUMNS: dict[str, ColumnSpec] = {
# Geometry
@ -182,6 +188,25 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
nullable=True,
description="Area-weighted mean window solar transmittance; null when no transmission details.",
),
# Window glazed_type categorical share columns (sum to 1.0 over total area when any windows present)
**{
f"window_pct_glazed_type_{code}": ColumnSpec(
dtype=float,
nullable=False,
description=f"Area share of windows with glazed_type {code} (0.0-1.0).",
)
for code in _GLAZED_TYPE_CODES
},
"window_pct_glazed_type_other": ColumnSpec(
dtype=float,
nullable=False,
description="Area share of windows with glazed_type outside the SAP10 1-15 enum.",
),
"window_pct_pvc_frame": ColumnSpec(
dtype=float,
nullable=True,
description="Area share of windows with PVC frame; null when no windows.",
),
}
@ -312,13 +337,18 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
"""Aggregate a list of windows into the 13 physics + orientation columns.
"""Aggregate a list of windows into the 30 window-feature columns.
With no windows: counts/areas are 0; nullable averages are None.
Windows whose `orientation` isn't an integer in 1-8 contribute to count and
total area but to no octant they're treated as unrecorded.
With no windows: counts/areas/shares are 0; nullable averages and the
pvc_frame share are None. Windows whose `orientation` isn't an integer in 1-8
contribute to count and total area but to no octant. Windows whose
`glazing_type` isn't in the SAP10 1-15 enum fall into the `_other` share.
"""
octant_areas: dict[str, float] = {name: 0.0 for name in _OCTANT_NAMES.values()}
glazed_type_areas: dict[str, float] = {
f"window_pct_glazed_type_{code}": 0.0 for code in _GLAZED_TYPE_CODES
}
glazed_type_areas["window_pct_glazed_type_other"] = 0.0
aggregates: dict[str, Any] = {
"window_count": len(windows),
"window_total_area_m2": 0.0,
@ -326,12 +356,15 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
"window_pct_draught_proofed": None,
"window_avg_u_value": None,
"window_avg_solar_transmittance": None,
**glazed_type_areas,
"window_pct_pvc_frame": None,
}
if not windows:
return aggregates
total_area = 0.0
draught_proofed_area = 0.0
pvc_frame_area = 0.0
transmission_area = 0.0
weighted_u_value = 0.0
weighted_solar_transmittance = 0.0
@ -340,8 +373,14 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
total_area += area
if w.draught_proofed is True or w.draught_proofed == "true":
draught_proofed_area += area
if w.frame_material == "PVC":
pvc_frame_area += area
if isinstance(w.orientation, int) and w.orientation in _OCTANT_NAMES:
octant_areas[_OCTANT_NAMES[w.orientation]] += area
if isinstance(w.glazing_type, int) and w.glazing_type in _GLAZED_TYPE_CODES:
glazed_type_areas[f"window_pct_glazed_type_{w.glazing_type}"] += area
else:
glazed_type_areas["window_pct_glazed_type_other"] += area
if w.window_transmission_details is not None:
transmission_area += area
weighted_u_value += w.window_transmission_details.u_value * area
@ -356,6 +395,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
aggregates["window_pct_draught_proofed"] = (
draught_proofed_area / total_area * 100.0
)
aggregates["window_pct_pvc_frame"] = pvc_frame_area / total_area
for column, area in glazed_type_areas.items():
aggregates[column] = area / total_area
if transmission_area > 0:
aggregates["window_avg_u_value"] = weighted_u_value / transmission_area
aggregates["window_avg_solar_transmittance"] = (