mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
slice 8b: window glazed_type and pvc_frame shares
Adds seventeen window-categorical-share features: one float per SAP10 glazed_type code (1-15) plus a `_other` bucket for anything outside the enum, and a single `window_pct_pvc_frame` for the area-weighted PVC-frame share. All shares are area-weighted over total window area; null pvc_frame share for window-less properties. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
dba254e316
commit
079e6f9a68
2 changed files with 131 additions and 4 deletions
|
|
@ -415,6 +415,91 @@ def test_to_row_returns_window_zeros_for_property_with_no_windows() -> None:
|
|||
assert row["window_avg_solar_transmittance"] is None
|
||||
|
||||
|
||||
_GLAZED_TYPE_CODES: tuple[int, ...] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
|
||||
|
||||
|
||||
def test_schema_advertises_window_categorical_share_features() -> None:
|
||||
# Arrange
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
schema = transform.schema()
|
||||
|
||||
# Assert — one float share per known glazed_type code + `_other`, plus pvc_frame share
|
||||
for code in _GLAZED_TYPE_CODES:
|
||||
name = f"window_pct_glazed_type_{code}"
|
||||
assert name in schema.feature_columns, name
|
||||
column = schema.feature_columns[name]
|
||||
assert column.dtype is float
|
||||
assert column.nullable is False
|
||||
assert column.categorical is False
|
||||
assert "window_pct_glazed_type_other" in schema.feature_columns
|
||||
assert "window_pct_pvc_frame" in schema.feature_columns
|
||||
assert schema.feature_columns["window_pct_pvc_frame"].dtype is float
|
||||
assert schema.feature_columns["window_pct_pvc_frame"].nullable is True
|
||||
|
||||
|
||||
def test_to_row_aggregates_glazed_type_and_pvc_frame_shares() -> None:
|
||||
# Arrange — three windows: 3.0 m² glazed_type=2 PVC, 1.5 m² glazed_type=13 PVC,
|
||||
# 0.5 m² glazed_type=5 (single, no PVC). Total area = 5.0 m².
|
||||
sap_windows = [
|
||||
make_window(width=1.5, height=2.0, glazing_type=2, frame_material="PVC"),
|
||||
make_window(width=1.0, height=1.5, glazing_type=13, frame_material="PVC"),
|
||||
make_window(width=0.5, height=1.0, glazing_type=5, frame_material=None),
|
||||
]
|
||||
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
# Shares (area-weighted) — 3.0/5.0=0.6 type 2; 1.5/5.0=0.3 type 13; 0.5/5.0=0.1 type 5.
|
||||
assert row["window_pct_glazed_type_2"] == pytest.approx(0.6)
|
||||
assert row["window_pct_glazed_type_13"] == pytest.approx(0.3)
|
||||
assert row["window_pct_glazed_type_5"] == pytest.approx(0.1)
|
||||
# All other known glazed_type codes are zero.
|
||||
for code in _GLAZED_TYPE_CODES:
|
||||
if code not in (2, 5, 13):
|
||||
assert row[f"window_pct_glazed_type_{code}"] == 0.0
|
||||
assert row["window_pct_glazed_type_other"] == 0.0
|
||||
# PVC frame area share: (3.0 + 1.5) / 5.0 = 0.9
|
||||
assert row["window_pct_pvc_frame"] == pytest.approx(0.9)
|
||||
|
||||
|
||||
def test_to_row_routes_unknown_glazed_type_to_other_bucket() -> None:
|
||||
# Arrange — one window has glazing_type=99 (not in the SAP10 enum 1-15)
|
||||
sap_windows = [
|
||||
make_window(width=2.0, height=1.0, glazing_type=2, frame_material="PVC"),
|
||||
make_window(width=1.0, height=1.0, glazing_type=99, frame_material="PVC"),
|
||||
]
|
||||
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
# Total area = 3.0; known type 2 = 2.0/3.0; unknown 99 → _other = 1.0/3.0
|
||||
assert row["window_pct_glazed_type_2"] == pytest.approx(2 / 3)
|
||||
assert row["window_pct_glazed_type_other"] == pytest.approx(1 / 3)
|
||||
|
||||
|
||||
def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> None:
|
||||
# Arrange
|
||||
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
||||
transform = EpcMlTransform()
|
||||
|
||||
# Act
|
||||
row = transform.to_row(epc)
|
||||
|
||||
# Assert
|
||||
for code in _GLAZED_TYPE_CODES:
|
||||
assert row[f"window_pct_glazed_type_{code}"] == 0.0
|
||||
assert row["window_pct_glazed_type_other"] == 0.0
|
||||
assert row["window_pct_pvc_frame"] is None
|
||||
|
||||
|
||||
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
|
||||
# Arrange — two windows with transmission details; one without.
|
||||
sap_windows = [
|
||||
|
|
|
|||
|
|
@ -32,6 +32,12 @@ _OCTANT_NAMES: dict[int, str] = {
|
|||
8: "NW",
|
||||
}
|
||||
|
||||
# SAP10 glazed_type enumeration (codes 1-15 per the gov api /api/codes export at
|
||||
# datatypes/epc/domain/epc_codes.csv, schema RdSAP-21.0.x). Anything outside this set
|
||||
# (the documentation "ND" sentinel, future codes, or unexpected strings) falls into
|
||||
# the `_other` bucket so share columns always sum to 1.0 of total window area.
|
||||
_GLAZED_TYPE_CODES: tuple[int, ...] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
|
||||
|
||||
|
||||
_FEATURE_COLUMNS: dict[str, ColumnSpec] = {
|
||||
# Geometry
|
||||
|
|
@ -182,6 +188,25 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
|
|||
nullable=True,
|
||||
description="Area-weighted mean window solar transmittance; null when no transmission details.",
|
||||
),
|
||||
# Window glazed_type categorical share columns (sum to 1.0 over total area when any windows present)
|
||||
**{
|
||||
f"window_pct_glazed_type_{code}": ColumnSpec(
|
||||
dtype=float,
|
||||
nullable=False,
|
||||
description=f"Area share of windows with glazed_type {code} (0.0-1.0).",
|
||||
)
|
||||
for code in _GLAZED_TYPE_CODES
|
||||
},
|
||||
"window_pct_glazed_type_other": ColumnSpec(
|
||||
dtype=float,
|
||||
nullable=False,
|
||||
description="Area share of windows with glazed_type outside the SAP10 1-15 enum.",
|
||||
),
|
||||
"window_pct_pvc_frame": ColumnSpec(
|
||||
dtype=float,
|
||||
nullable=True,
|
||||
description="Area share of windows with PVC frame; null when no windows.",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -312,13 +337,18 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
|
|||
|
||||
|
||||
def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
||||
"""Aggregate a list of windows into the 13 physics + orientation columns.
|
||||
"""Aggregate a list of windows into the 30 window-feature columns.
|
||||
|
||||
With no windows: counts/areas are 0; nullable averages are None.
|
||||
Windows whose `orientation` isn't an integer in 1-8 contribute to count and
|
||||
total area but to no octant — they're treated as unrecorded.
|
||||
With no windows: counts/areas/shares are 0; nullable averages and the
|
||||
pvc_frame share are None. Windows whose `orientation` isn't an integer in 1-8
|
||||
contribute to count and total area but to no octant. Windows whose
|
||||
`glazing_type` isn't in the SAP10 1-15 enum fall into the `_other` share.
|
||||
"""
|
||||
octant_areas: dict[str, float] = {name: 0.0 for name in _OCTANT_NAMES.values()}
|
||||
glazed_type_areas: dict[str, float] = {
|
||||
f"window_pct_glazed_type_{code}": 0.0 for code in _GLAZED_TYPE_CODES
|
||||
}
|
||||
glazed_type_areas["window_pct_glazed_type_other"] = 0.0
|
||||
aggregates: dict[str, Any] = {
|
||||
"window_count": len(windows),
|
||||
"window_total_area_m2": 0.0,
|
||||
|
|
@ -326,12 +356,15 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
|||
"window_pct_draught_proofed": None,
|
||||
"window_avg_u_value": None,
|
||||
"window_avg_solar_transmittance": None,
|
||||
**glazed_type_areas,
|
||||
"window_pct_pvc_frame": None,
|
||||
}
|
||||
if not windows:
|
||||
return aggregates
|
||||
|
||||
total_area = 0.0
|
||||
draught_proofed_area = 0.0
|
||||
pvc_frame_area = 0.0
|
||||
transmission_area = 0.0
|
||||
weighted_u_value = 0.0
|
||||
weighted_solar_transmittance = 0.0
|
||||
|
|
@ -340,8 +373,14 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
|||
total_area += area
|
||||
if w.draught_proofed is True or w.draught_proofed == "true":
|
||||
draught_proofed_area += area
|
||||
if w.frame_material == "PVC":
|
||||
pvc_frame_area += area
|
||||
if isinstance(w.orientation, int) and w.orientation in _OCTANT_NAMES:
|
||||
octant_areas[_OCTANT_NAMES[w.orientation]] += area
|
||||
if isinstance(w.glazing_type, int) and w.glazing_type in _GLAZED_TYPE_CODES:
|
||||
glazed_type_areas[f"window_pct_glazed_type_{w.glazing_type}"] += area
|
||||
else:
|
||||
glazed_type_areas["window_pct_glazed_type_other"] += area
|
||||
if w.window_transmission_details is not None:
|
||||
transmission_area += area
|
||||
weighted_u_value += w.window_transmission_details.u_value * area
|
||||
|
|
@ -356,6 +395,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
|||
aggregates["window_pct_draught_proofed"] = (
|
||||
draught_proofed_area / total_area * 100.0
|
||||
)
|
||||
aggregates["window_pct_pvc_frame"] = pvc_frame_area / total_area
|
||||
for column, area in glazed_type_areas.items():
|
||||
aggregates[column] = area / total_area
|
||||
if transmission_area > 0:
|
||||
aggregates["window_avg_u_value"] = weighted_u_value / transmission_area
|
||||
aggregates["window_avg_solar_transmittance"] = (
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue