mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Sibling migration to the sap10_calculator move — `domain.ml` now lives
at the root-level layout (`domain/sap10_ml/`) matching the pattern
already used by `domain.addresses`, `domain.tasks`, `domain.postcode`,
and `domain.sap10_calculator`.
Changes:
- `git mv packages/domain/src/domain/ml → domain/sap10_ml` (19 files;
history preserved).
- Subpackage rename: `domain.ml` → `domain.sap10_ml`. 32 references
rewritten across .py and .md files: 11 internal + 21 external
(datatypes/epc/domain/mapper.py, 14 files in domain/sap10_calculator,
2 backend tests, 2 ADRs, 1 README, 1 design doc).
- Path-string updates: `pytest.ini` testpath
`packages/domain/src/domain/ml/tests` → `domain/sap10_ml/tests` so
ML tests stay in the default auto-discovered sweep. `CONTEXT.md`
also updated.
`packages/domain/src/domain/` is now empty — the workspace `domain/`
tree has been fully migrated. Together with the `domain/__init__.py`
deletions from the sap10_calculator commit (29ac35cc), `domain` is
now a single root-level namespace package with subpackages
{addresses, sap10_calculator, sap10_ml, tasks} + the standalone
`postcode.py` module.
Verified:
- Focused sweep (backend mapper-chain + sap10_calculator worksheet
e2e + golden fixtures): 99 passed / 19 failed — identical baseline.
- Wider sweep (all sap10_calculator + sap10_ml): 1654 passed / 20
failed (same pre-existing failures).
- domain/sap10_ml/tests: 210/210 PASSED at new path.
- Pyright net-zero: heat_transmission.py 13, cert_to_inputs.py 35,
mapper.py 33, rdsap_uvalues.py 1 (all unchanged from baseline).
Note: `packages/domain/pyproject.toml` still declares
`packages = ["src/domain"]` for the hatchling wheel — that target
directory is now empty and the wheel build is effectively a no-op.
Retiring the workspace package or repointing the wheel is a follow-up.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1321 lines
44 KiB
Python
1321 lines
44 KiB
Python
"""Tests for EpcMlTransform v0.1.0 — schema-contract surface and target extraction."""
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from datatypes.epc.domain.epc_property_data import (
|
|
BuildingPartIdentifier,
|
|
SapRoomInRoof,
|
|
WindowTransmissionDetails,
|
|
)
|
|
from domain.sap10_ml.schema import ColumnSpec, TransformSchema
|
|
from domain.sap10_ml.tests._fixtures import (
|
|
make_building_part,
|
|
make_floor_dimension,
|
|
make_main_heating_detail,
|
|
make_minimal_sap10_epc,
|
|
make_pv_array,
|
|
make_sap_heating,
|
|
make_window,
|
|
)
|
|
from domain.sap10_ml.transform import EpcMlTransform
|
|
|
|
|
|
_EXPECTED_TARGET_DTYPES: dict[str, type] = {
|
|
"sap_score": int,
|
|
"co2_emissions": float,
|
|
"peui_raw": int,
|
|
"peui_ucl": float,
|
|
"space_heating_kwh": float,
|
|
"hot_water_kwh": float,
|
|
}
|
|
|
|
|
|
def test_transform_advertises_version_and_target_columns() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
assert isinstance(schema, TransformSchema)
|
|
assert schema.transform_version == "2.7.1"
|
|
assert schema.transform_version == EpcMlTransform.VERSION
|
|
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
|
|
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():
|
|
column = schema.target_columns[target_name]
|
|
assert isinstance(column, ColumnSpec)
|
|
assert column.dtype is expected_dtype
|
|
|
|
|
|
def test_to_row_extracts_targets_from_epc_property_data() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
co2_emissions_current=2.7,
|
|
energy_consumption_current=232,
|
|
space_heating_kwh=10128.81,
|
|
water_heating_kwh=2166.19,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["sap_score"] == 82
|
|
assert row["co2_emissions"] == 2.7
|
|
assert row["peui_raw"] == 232
|
|
assert row["space_heating_kwh"] == 10128.81
|
|
assert row["hot_water_kwh"] == 2166.19
|
|
|
|
|
|
def test_to_row_applies_ucl_correction_in_band_e() -> None:
|
|
# Arrange — SAP 45 = band E; Few et al. 2023 band-E correction is non-trivial
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=45,
|
|
energy_consumption_current=300,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
# Band E: gradient=-0.70, intercept=160 → cd = -0.70*300 + 160 = -50
|
|
# adjusted = 300 + (-50) = 250.0
|
|
assert row["peui_ucl"] == 250.0
|
|
|
|
|
|
def test_to_row_clamps_ucl_correction_when_band_b_would_increase_peui() -> None:
|
|
# Arrange — SAP 82 = band B; per-band linear correction yields a *positive*
|
|
# consumption_difference for this PEUI, which must be clamped to zero
|
|
# (EPCs over-predict only — we never adjust upwards).
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
energy_consumption_current=232,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
# Band B: gradient=-0.10, intercept=28 → cd = -0.10*232 + 28 = +4.8 → clamp to 0
|
|
# adjusted = 232 + 0 = 232.0
|
|
assert row["peui_ucl"] == 232.0
|
|
|
|
|
|
def test_schema_advertises_total_floor_area_m2_feature() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
assert "total_floor_area_m2" in schema.feature_columns
|
|
column = schema.feature_columns["total_floor_area_m2"]
|
|
assert isinstance(column, ColumnSpec)
|
|
assert column.dtype is float
|
|
assert column.nullable is False
|
|
|
|
|
|
def test_to_row_extracts_total_floor_area_m2() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
# make_minimal_sap10_epc sets total_floor_area_m2=70.0 by default
|
|
assert row["total_floor_area_m2"] == 70.0
|
|
|
|
|
|
_EXPECTED_COUNT_FEATURES: dict[str, type] = {
|
|
"door_count": int,
|
|
"habitable_rooms_count": int,
|
|
"heated_rooms_count": int,
|
|
"wet_rooms_count": int,
|
|
"extensions_count": int,
|
|
"open_chimneys_count": int,
|
|
"insulated_door_count": int,
|
|
"cfl_fixed_lighting_bulbs_count": int,
|
|
"led_fixed_lighting_bulbs_count": int,
|
|
"incandescent_fixed_lighting_bulbs_count": int,
|
|
}
|
|
|
|
|
|
def test_schema_advertises_count_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for feature_name, expected_dtype in _EXPECTED_COUNT_FEATURES.items():
|
|
assert feature_name in schema.feature_columns, feature_name
|
|
column = schema.feature_columns[feature_name]
|
|
assert isinstance(column, ColumnSpec)
|
|
assert column.dtype is expected_dtype
|
|
assert column.nullable is False
|
|
|
|
|
|
def test_to_row_extracts_count_features() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
door_count=3,
|
|
habitable_rooms_count=5,
|
|
heated_rooms_count=4,
|
|
wet_rooms_count=1,
|
|
extensions_count=1,
|
|
open_chimneys_count=0,
|
|
insulated_door_count=2,
|
|
cfl_fixed_lighting_bulbs_count=0,
|
|
led_fixed_lighting_bulbs_count=8,
|
|
incandescent_fixed_lighting_bulbs_count=2,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["door_count"] == 3
|
|
assert row["habitable_rooms_count"] == 5
|
|
assert row["heated_rooms_count"] == 4
|
|
assert row["wet_rooms_count"] == 1
|
|
assert row["extensions_count"] == 1
|
|
assert row["open_chimneys_count"] == 0
|
|
assert row["insulated_door_count"] == 2
|
|
assert row["cfl_fixed_lighting_bulbs_count"] == 0
|
|
assert row["led_fixed_lighting_bulbs_count"] == 8
|
|
assert row["incandescent_fixed_lighting_bulbs_count"] == 2
|
|
|
|
|
|
_EXPECTED_FLAT_BOOLEAN_FEATURES: tuple[str, ...] = (
|
|
"solar_water_heating",
|
|
"has_hot_water_cylinder",
|
|
"has_fixed_air_conditioning",
|
|
)
|
|
|
|
|
|
_EXPECTED_OPTIONAL_INT_FEATURES: tuple[str, ...] = (
|
|
"percent_draughtproofed",
|
|
)
|
|
|
|
|
|
def test_schema_advertises_boolean_and_optional_int_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for feature_name in _EXPECTED_FLAT_BOOLEAN_FEATURES:
|
|
assert feature_name in schema.feature_columns, feature_name
|
|
column = schema.feature_columns[feature_name]
|
|
assert column.dtype is bool
|
|
assert column.nullable is False
|
|
for feature_name in _EXPECTED_OPTIONAL_INT_FEATURES:
|
|
assert feature_name in schema.feature_columns, feature_name
|
|
column = schema.feature_columns[feature_name]
|
|
assert column.dtype is int
|
|
assert column.nullable is True
|
|
|
|
|
|
def test_to_row_extracts_boolean_and_optional_int_features() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
solar_water_heating=True,
|
|
has_hot_water_cylinder=True,
|
|
has_fixed_air_conditioning=False,
|
|
percent_draughtproofed=100,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["solar_water_heating"] is True
|
|
assert row["has_hot_water_cylinder"] is True
|
|
assert row["has_fixed_air_conditioning"] is False
|
|
assert row["percent_draughtproofed"] == 100
|
|
|
|
|
|
_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = (
|
|
"property_type",
|
|
"built_form",
|
|
"region_code",
|
|
"country_code",
|
|
)
|
|
|
|
|
|
_NON_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = (
|
|
"dwelling_type",
|
|
"transaction_type",
|
|
)
|
|
|
|
|
|
def test_schema_advertises_categorical_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for feature_name in _NULLABLE_CATEGORICAL_FEATURES:
|
|
assert feature_name in schema.feature_columns, feature_name
|
|
column = schema.feature_columns[feature_name]
|
|
assert column.dtype is str
|
|
assert column.categorical is True
|
|
assert column.nullable is True
|
|
for feature_name in _NON_NULLABLE_CATEGORICAL_FEATURES:
|
|
assert feature_name in schema.feature_columns, feature_name
|
|
column = schema.feature_columns[feature_name]
|
|
assert column.dtype is str
|
|
assert column.categorical is True
|
|
assert column.nullable is False
|
|
|
|
|
|
def test_to_row_extracts_categorical_features() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
dwelling_type="End-terrace house",
|
|
transaction_type="8",
|
|
property_type="0",
|
|
built_form="2",
|
|
region_code="6",
|
|
country_code="ENG",
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["dwelling_type"] == "End-terrace house"
|
|
assert "tenure" not in row
|
|
assert row["transaction_type"] == "8"
|
|
assert row["property_type"] == "0"
|
|
assert row["built_form"] == "2"
|
|
assert row["region_code"] == "6"
|
|
assert row["country_code"] == "ENG"
|
|
|
|
|
|
_WINDOW_PHYSICS_FEATURES_NULLABLE: dict[str, tuple[type, bool]] = {
|
|
"window_count": (int, False),
|
|
"window_total_area_m2": (float, False),
|
|
"window_area_orientation_N": (float, False),
|
|
"window_area_orientation_NE": (float, False),
|
|
"window_area_orientation_E": (float, False),
|
|
"window_area_orientation_SE": (float, False),
|
|
"window_area_orientation_S": (float, False),
|
|
"window_area_orientation_SW": (float, False),
|
|
"window_area_orientation_W": (float, False),
|
|
"window_area_orientation_NW": (float, False),
|
|
"window_pct_draught_proofed": (float, True),
|
|
"window_avg_u_value": (float, True),
|
|
"window_avg_solar_transmittance": (float, True),
|
|
}
|
|
|
|
|
|
def test_schema_advertises_window_physics_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for feature_name, (expected_dtype, expected_nullable) in _WINDOW_PHYSICS_FEATURES_NULLABLE.items():
|
|
assert feature_name in schema.feature_columns, feature_name
|
|
column = schema.feature_columns[feature_name]
|
|
assert column.dtype is expected_dtype
|
|
assert column.nullable is expected_nullable
|
|
assert column.categorical is False
|
|
|
|
|
|
def test_to_row_aggregates_window_physics_and_orientation() -> None:
|
|
# Arrange — 3 windows: 2.0 m² S, 1.5 m² N, 1.0 m² E (orientations 5/1/3)
|
|
sap_windows = [
|
|
make_window(orientation=5, width=1.0, height=2.0, draught_proofed=True),
|
|
make_window(orientation=1, width=1.0, height=1.5, draught_proofed=False),
|
|
make_window(orientation=3, width=1.0, height=1.0, draught_proofed=True),
|
|
]
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["window_count"] == 3
|
|
assert row["window_total_area_m2"] == pytest.approx(4.5)
|
|
assert row["window_area_orientation_N"] == pytest.approx(1.5)
|
|
assert row["window_area_orientation_NE"] == 0.0
|
|
assert row["window_area_orientation_E"] == pytest.approx(1.0)
|
|
assert row["window_area_orientation_SE"] == 0.0
|
|
assert row["window_area_orientation_S"] == pytest.approx(2.0)
|
|
assert row["window_area_orientation_SW"] == 0.0
|
|
assert row["window_area_orientation_W"] == 0.0
|
|
assert row["window_area_orientation_NW"] == 0.0
|
|
# area-weighted draught-proofing: (2.0 + 1.0) / 4.5 * 100 = 66.66...%
|
|
assert row["window_pct_draught_proofed"] == pytest.approx(66.666, abs=0.01)
|
|
assert row["window_avg_u_value"] is None
|
|
assert row["window_avg_solar_transmittance"] is None
|
|
|
|
|
|
def test_to_row_skips_windows_with_unrecorded_orientation() -> None:
|
|
# Arrange — two S windows + one with orientation=0 (horizontal/unrecorded);
|
|
# the unrecorded one contributes to count and total_area but to no octant.
|
|
sap_windows = [
|
|
make_window(orientation=5, width=1.0, height=2.0),
|
|
make_window(orientation=5, width=1.0, height=1.0),
|
|
make_window(orientation=0, width=1.0, height=0.5),
|
|
]
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["window_count"] == 3
|
|
assert row["window_total_area_m2"] == pytest.approx(3.5)
|
|
assert row["window_area_orientation_S"] == pytest.approx(3.0)
|
|
# The horizontal window's 0.5 m² is not assigned to any octant
|
|
sum_octants = sum(
|
|
row[f"window_area_orientation_{c}"]
|
|
for c in ("N", "NE", "E", "SE", "S", "SW", "W", "NW")
|
|
)
|
|
assert sum_octants == pytest.approx(3.0)
|
|
|
|
|
|
def test_to_row_returns_window_zeros_for_property_with_no_windows() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["window_count"] == 0
|
|
assert row["window_total_area_m2"] == 0.0
|
|
for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"):
|
|
assert row[f"window_area_orientation_{cardinal}"] == 0.0
|
|
assert row["window_pct_draught_proofed"] is None
|
|
assert row["window_avg_u_value"] is None
|
|
assert row["window_avg_solar_transmittance"] is None
|
|
|
|
|
|
_GLAZED_TYPE_CODES: tuple[int, ...] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
|
|
|
|
|
|
def test_schema_advertises_window_categorical_share_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert — one float share per known glazed_type code + `_other`, plus pvc_frame share
|
|
for code in _GLAZED_TYPE_CODES:
|
|
name = f"window_pct_glazed_type_{code}"
|
|
assert name in schema.feature_columns, name
|
|
column = schema.feature_columns[name]
|
|
assert column.dtype is float
|
|
assert column.nullable is False
|
|
assert column.categorical is False
|
|
assert "window_pct_glazed_type_other" in schema.feature_columns
|
|
assert "window_pct_pvc_frame" in schema.feature_columns
|
|
assert schema.feature_columns["window_pct_pvc_frame"].dtype is float
|
|
assert schema.feature_columns["window_pct_pvc_frame"].nullable is True
|
|
|
|
|
|
def test_to_row_aggregates_glazed_type_and_pvc_frame_shares() -> None:
|
|
# Arrange — three windows: 3.0 m² glazed_type=2 PVC, 1.5 m² glazed_type=13 PVC,
|
|
# 0.5 m² glazed_type=5 (single, no PVC). Total area = 5.0 m².
|
|
sap_windows = [
|
|
make_window(width=1.5, height=2.0, glazing_type=2, frame_material="PVC"),
|
|
make_window(width=1.0, height=1.5, glazing_type=13, frame_material="PVC"),
|
|
make_window(width=0.5, height=1.0, glazing_type=5, frame_material=None),
|
|
]
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
# Shares (area-weighted) — 3.0/5.0=0.6 type 2; 1.5/5.0=0.3 type 13; 0.5/5.0=0.1 type 5.
|
|
assert row["window_pct_glazed_type_2"] == pytest.approx(0.6)
|
|
assert row["window_pct_glazed_type_13"] == pytest.approx(0.3)
|
|
assert row["window_pct_glazed_type_5"] == pytest.approx(0.1)
|
|
# All other known glazed_type codes are zero.
|
|
for code in _GLAZED_TYPE_CODES:
|
|
if code not in (2, 5, 13):
|
|
assert row[f"window_pct_glazed_type_{code}"] == 0.0
|
|
assert row["window_pct_glazed_type_other"] == 0.0
|
|
# PVC frame area share: (3.0 + 1.5) / 5.0 = 0.9
|
|
assert row["window_pct_pvc_frame"] == pytest.approx(0.9)
|
|
|
|
|
|
def test_to_row_routes_unknown_glazed_type_to_other_bucket() -> None:
|
|
# Arrange — one window has glazing_type=99 (not in the SAP10 enum 1-15)
|
|
sap_windows = [
|
|
make_window(width=2.0, height=1.0, glazing_type=2, frame_material="PVC"),
|
|
make_window(width=1.0, height=1.0, glazing_type=99, frame_material="PVC"),
|
|
]
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
# Total area = 3.0; known type 2 = 2.0/3.0; unknown 99 → _other = 1.0/3.0
|
|
assert row["window_pct_glazed_type_2"] == pytest.approx(2 / 3)
|
|
assert row["window_pct_glazed_type_other"] == pytest.approx(1 / 3)
|
|
|
|
|
|
def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
for code in _GLAZED_TYPE_CODES:
|
|
assert row[f"window_pct_glazed_type_{code}"] == 0.0
|
|
assert row["window_pct_glazed_type_other"] == 0.0
|
|
assert row["window_pct_pvc_frame"] is None
|
|
|
|
|
|
_BUILDING_PART_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
|
|
# name → (dtype, nullable, categorical)
|
|
"building_parts_count": (int, False, False),
|
|
"total_heat_loss_perimeter_m": (float, False, False),
|
|
"total_party_wall_length_m": (float, False, False),
|
|
"total_floor_area_from_parts_m2": (float, False, False),
|
|
"avg_room_height_m": (float, True, False),
|
|
"main_dwelling_heat_loss_perimeter_m": (float, True, False),
|
|
"main_dwelling_party_wall_length_m": (float, True, False),
|
|
"main_dwelling_total_floor_area_m2": (float, True, False),
|
|
"main_dwelling_avg_room_height_m": (float, True, False),
|
|
"main_dwelling_has_room_in_roof": (bool, True, False),
|
|
"main_dwelling_construction_age_band": (str, True, True),
|
|
"main_dwelling_wall_construction": (int, True, True),
|
|
"main_dwelling_roof_construction": (int, True, True),
|
|
}
|
|
|
|
|
|
def test_schema_advertises_building_part_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for name, (expected_dtype, expected_nullable, expected_categorical) in (
|
|
_BUILDING_PART_FEATURES_NULLABLE.items()
|
|
):
|
|
assert name in schema.feature_columns, name
|
|
column = schema.feature_columns[name]
|
|
assert column.dtype is expected_dtype, name
|
|
assert column.nullable is expected_nullable, name
|
|
assert column.categorical is expected_categorical, name
|
|
|
|
|
|
def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None:
|
|
# Arrange — Main Dwelling (two floors, age band B, wall 3, roof 4) plus one extension.
|
|
main = make_building_part(
|
|
identifier=BuildingPartIdentifier.MAIN,
|
|
construction_age_band="B",
|
|
wall_construction=3,
|
|
roof_construction=4,
|
|
floor_dimensions=[
|
|
make_floor_dimension(
|
|
total_floor_area_m2=30.0, room_height_m=2.5,
|
|
party_wall_length_m=6.0, heat_loss_perimeter_m=20.0,
|
|
),
|
|
make_floor_dimension(
|
|
total_floor_area_m2=28.0, room_height_m=2.4,
|
|
party_wall_length_m=6.0, heat_loss_perimeter_m=18.0,
|
|
),
|
|
],
|
|
)
|
|
extension = make_building_part(
|
|
identifier=BuildingPartIdentifier.EXTENSION_1,
|
|
construction_age_band="L",
|
|
wall_construction=4,
|
|
roof_construction=5,
|
|
floor_dimensions=[
|
|
make_floor_dimension(
|
|
total_floor_area_m2=12.0, room_height_m=2.6,
|
|
party_wall_length_m=0.0, heat_loss_perimeter_m=10.0,
|
|
),
|
|
],
|
|
)
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
sap_building_parts=[main, extension],
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert — cross-all aggregates
|
|
assert row["building_parts_count"] == 2
|
|
assert row["total_heat_loss_perimeter_m"] == pytest.approx(48.0)
|
|
assert row["total_party_wall_length_m"] == pytest.approx(12.0)
|
|
assert row["total_floor_area_from_parts_m2"] == pytest.approx(70.0)
|
|
# avg_room_height area-weighted across all floors: (2.5*30 + 2.4*28 + 2.6*12) / 70
|
|
# = (75 + 67.2 + 31.2) / 70 = 173.4 / 70 = 2.4771...
|
|
assert row["avg_room_height_m"] == pytest.approx(2.4771, abs=0.001)
|
|
# Main Dwelling aggregates
|
|
assert row["main_dwelling_heat_loss_perimeter_m"] == pytest.approx(38.0)
|
|
assert row["main_dwelling_party_wall_length_m"] == pytest.approx(12.0)
|
|
assert row["main_dwelling_total_floor_area_m2"] == pytest.approx(58.0)
|
|
# main avg height = (2.5*30 + 2.4*28) / 58 = (75 + 67.2) / 58 = 142.2 / 58 = 2.4517
|
|
assert row["main_dwelling_avg_room_height_m"] == pytest.approx(2.4517, abs=0.001)
|
|
assert row["main_dwelling_has_room_in_roof"] is False
|
|
# Main Dwelling categoricals
|
|
assert row["main_dwelling_construction_age_band"] == "B"
|
|
assert row["main_dwelling_wall_construction"] == 3
|
|
assert row["main_dwelling_roof_construction"] == 4
|
|
|
|
|
|
def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None:
|
|
# Arrange
|
|
main = make_building_part(
|
|
identifier=BuildingPartIdentifier.MAIN,
|
|
sap_room_in_roof=SapRoomInRoof(floor_area=15.0, construction_age_band="B"),
|
|
)
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_building_parts=[main])
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["main_dwelling_has_room_in_roof"] is True
|
|
|
|
|
|
def test_to_row_returns_building_part_nones_when_no_main_dwelling_identified() -> None:
|
|
# Arrange — single part with identifier that doesn't match "Main Dwelling"
|
|
sole_part = make_building_part(identifier=BuildingPartIdentifier.EXTENSION_1)
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82, sap_building_parts=[sole_part]
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert — cross-all aggregates still populate
|
|
assert row["building_parts_count"] == 1
|
|
assert row["total_heat_loss_perimeter_m"] == pytest.approx(20.0)
|
|
# Main-dwelling-specific columns are None — honest about data quality
|
|
assert row["main_dwelling_heat_loss_perimeter_m"] is None
|
|
assert row["main_dwelling_party_wall_length_m"] is None
|
|
assert row["main_dwelling_total_floor_area_m2"] is None
|
|
assert row["main_dwelling_avg_room_height_m"] is None
|
|
assert row["main_dwelling_has_room_in_roof"] is None
|
|
assert row["main_dwelling_construction_age_band"] is None
|
|
assert row["main_dwelling_wall_construction"] is None
|
|
assert row["main_dwelling_roof_construction"] is None
|
|
|
|
|
|
def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["building_parts_count"] == 0
|
|
assert row["total_heat_loss_perimeter_m"] == 0.0
|
|
assert row["total_party_wall_length_m"] == 0.0
|
|
assert row["total_floor_area_from_parts_m2"] == 0.0
|
|
assert row["avg_room_height_m"] is None
|
|
assert row["main_dwelling_heat_loss_perimeter_m"] is None
|
|
assert row["main_dwelling_construction_age_band"] is None
|
|
assert row["main_dwelling_wall_construction"] is None
|
|
|
|
|
|
_HEATING_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
|
|
# name → (dtype, nullable, categorical)
|
|
"main_heating_count": (int, False, False),
|
|
"primary_main_fuel_type": (int, True, True),
|
|
"primary_heat_emitter_type": (int, True, True),
|
|
"primary_main_heating_control": (int, True, True),
|
|
"primary_main_heating_category": (int, True, True),
|
|
"primary_has_fghrs": (bool, True, False),
|
|
"primary_fan_flue_present": (bool, True, False),
|
|
"primary_boiler_flue_type": (int, True, True),
|
|
"primary_central_heating_pump_age": (int, True, True),
|
|
"water_heating_code": (int, True, True),
|
|
"water_heating_fuel": (int, True, True),
|
|
"cylinder_size": (int, True, False),
|
|
"cylinder_insulation_thickness_mm": (int, True, False),
|
|
"has_secondary_heating": (bool, False, False),
|
|
"secondary_fuel_type": (int, True, True),
|
|
}
|
|
|
|
|
|
def test_schema_advertises_heating_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for name, (expected_dtype, expected_nullable, expected_categorical) in (
|
|
_HEATING_FEATURES_NULLABLE.items()
|
|
):
|
|
assert name in schema.feature_columns, name
|
|
column = schema.feature_columns[name]
|
|
assert column.dtype is expected_dtype, name
|
|
assert column.nullable is expected_nullable, name
|
|
assert column.categorical is expected_categorical, name
|
|
|
|
|
|
def test_to_row_extracts_primary_heating_from_first_main_heating_detail() -> None:
|
|
# Arrange — mains-gas boiler with a fan flue, modern control, no FGHRS
|
|
primary = make_main_heating_detail(
|
|
main_fuel_type=26, # mains gas (not community)
|
|
heat_emitter_type=1,
|
|
main_heating_control=2106,
|
|
main_heating_category=2,
|
|
has_fghrs=False,
|
|
fan_flue_present=True,
|
|
boiler_flue_type=2,
|
|
central_heating_pump_age=0,
|
|
)
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
sap_heating=make_sap_heating(main_heating_details=[primary]),
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["main_heating_count"] == 1
|
|
assert row["primary_main_fuel_type"] == 26
|
|
assert row["primary_heat_emitter_type"] == 1
|
|
assert row["primary_main_heating_control"] == 2106
|
|
assert row["primary_main_heating_category"] == 2
|
|
assert row["primary_has_fghrs"] is False
|
|
assert row["primary_fan_flue_present"] is True
|
|
assert row["primary_boiler_flue_type"] == 2
|
|
assert row["primary_central_heating_pump_age"] == 0
|
|
|
|
|
|
def test_to_row_extracts_water_heating_fields() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
sap_heating=make_sap_heating(
|
|
water_heating_code=901,
|
|
water_heating_fuel=26,
|
|
cylinder_size=2,
|
|
cylinder_insulation_thickness_mm=38,
|
|
),
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["water_heating_code"] == 901
|
|
assert row["water_heating_fuel"] == 26
|
|
assert row["cylinder_size"] == 2
|
|
assert row["cylinder_insulation_thickness_mm"] == 38
|
|
|
|
|
|
def test_to_row_flags_secondary_heating_when_present() -> None:
|
|
# Arrange — secondary heating: bottled-LPG (code 38)
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
sap_heating=make_sap_heating(secondary_fuel_type=38),
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_secondary_heating"] is True
|
|
assert row["secondary_fuel_type"] == 38
|
|
|
|
|
|
def test_to_row_returns_no_secondary_heating_when_absent() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
sap_heating=make_sap_heating(secondary_fuel_type=None),
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_secondary_heating"] is False
|
|
assert row["secondary_fuel_type"] is None
|
|
|
|
|
|
def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() -> None:
|
|
# Arrange — sap_heating present but main_heating_details is empty
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
sap_heating=make_sap_heating(main_heating_details=[]),
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["main_heating_count"] == 0
|
|
assert row["primary_main_fuel_type"] is None
|
|
assert row["primary_heat_emitter_type"] is None
|
|
assert row["primary_main_heating_control"] is None
|
|
assert row["primary_main_heating_category"] is None
|
|
assert row["primary_has_fghrs"] is None
|
|
assert row["primary_fan_flue_present"] is None
|
|
assert row["primary_boiler_flue_type"] is None
|
|
assert row["primary_central_heating_pump_age"] is None
|
|
|
|
|
|
_PV_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
|
|
# name → (dtype, nullable, categorical)
|
|
"has_pv": (bool, False, False),
|
|
"pv_capacity_source": (str, False, True),
|
|
"pv_array_count": (int, False, False),
|
|
"pv_total_peak_power_kw": (float, False, False),
|
|
"pv_peak_power_kw_N": (float, False, False),
|
|
"pv_peak_power_kw_NE": (float, False, False),
|
|
"pv_peak_power_kw_E": (float, False, False),
|
|
"pv_peak_power_kw_SE": (float, False, False),
|
|
"pv_peak_power_kw_S": (float, False, False),
|
|
"pv_peak_power_kw_SW": (float, False, False),
|
|
"pv_peak_power_kw_W": (float, False, False),
|
|
"pv_peak_power_kw_NW": (float, False, False),
|
|
"pv_avg_pitch": (float, True, False),
|
|
"pv_avg_overshading": (float, True, False),
|
|
"pv_percent_roof_area": (int, True, False),
|
|
}
|
|
|
|
|
|
def test_schema_advertises_pv_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for name, (expected_dtype, expected_nullable, expected_categorical) in (
|
|
_PV_FEATURES_NULLABLE.items()
|
|
):
|
|
assert name in schema.feature_columns, name
|
|
column = schema.feature_columns[name]
|
|
assert column.dtype is expected_dtype, name
|
|
assert column.nullable is expected_nullable, name
|
|
assert column.categorical is expected_categorical, name
|
|
|
|
|
|
def test_to_row_aggregates_measured_pv_arrays() -> None:
|
|
# Arrange — two S-facing arrays (one with 2.04 kW pitch 2 overshading 1; one
|
|
# with 1.86 kW pitch 3 overshading 2) and one NW array (1.0 kW).
|
|
arrays = [
|
|
make_pv_array(peak_power=2.04, pitch=2, orientation=5, overshading=1),
|
|
make_pv_array(peak_power=1.86, pitch=3, orientation=5, overshading=2),
|
|
make_pv_array(peak_power=1.0, pitch=2, orientation=8, overshading=1),
|
|
]
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82, photovoltaic_arrays=arrays
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_pv"] is True
|
|
assert row["pv_capacity_source"] == "measured"
|
|
assert row["pv_array_count"] == 3
|
|
assert row["pv_total_peak_power_kw"] == pytest.approx(4.9)
|
|
# Power by orientation: S = 2.04 + 1.86 = 3.9; NW = 1.0; rest 0.0
|
|
assert row["pv_peak_power_kw_S"] == pytest.approx(3.9)
|
|
assert row["pv_peak_power_kw_NW"] == pytest.approx(1.0)
|
|
for other in ("N", "NE", "E", "SE", "SW", "W"):
|
|
assert row[f"pv_peak_power_kw_{other}"] == 0.0
|
|
# Power-weighted pitch: (2.04*2 + 1.86*3 + 1.0*2) / 4.9 = (4.08 + 5.58 + 2.0) / 4.9 = 11.66/4.9 ≈ 2.380
|
|
assert row["pv_avg_pitch"] == pytest.approx(11.66 / 4.9)
|
|
# Power-weighted overshading: (2.04*1 + 1.86*2 + 1.0*1) / 4.9 = 6.76 / 4.9 ≈ 1.379
|
|
assert row["pv_avg_overshading"] == pytest.approx(6.76 / 4.9)
|
|
# No percent_roof_area when measured
|
|
assert row["pv_percent_roof_area"] is None
|
|
|
|
|
|
def test_to_row_uses_percent_roof_area_when_pv_not_measured() -> None:
|
|
# Arrange — surveyor couldn't confirm config; only percent_roof_area is known
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82, photovoltaic_supply_percent_roof_area=25
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_pv"] is True
|
|
assert row["pv_capacity_source"] == "estimated_from_roof_area"
|
|
assert row["pv_array_count"] == 0
|
|
assert row["pv_total_peak_power_kw"] == 0.0
|
|
assert row["pv_percent_roof_area"] == 25
|
|
assert row["pv_avg_pitch"] is None
|
|
assert row["pv_avg_overshading"] is None
|
|
|
|
|
|
def test_to_row_returns_pv_no_when_no_pv_data() -> None:
|
|
# Arrange — no measured arrays, no percent_roof_area, no PV at all
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_pv"] is False
|
|
assert row["pv_capacity_source"] == "none"
|
|
assert row["pv_array_count"] == 0
|
|
assert row["pv_total_peak_power_kw"] == 0.0
|
|
for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"):
|
|
assert row[f"pv_peak_power_kw_{cardinal}"] == 0.0
|
|
assert row["pv_percent_roof_area"] is None
|
|
assert row["pv_avg_pitch"] is None
|
|
assert row["pv_avg_overshading"] is None
|
|
|
|
|
|
def test_to_row_treats_zero_percent_roof_area_as_no_pv() -> None:
|
|
# Arrange — `photovoltaic_supply.none_or_no_details.percent_roof_area = 0` is
|
|
# the canonical "no PV" payload on schema-21 EPCs.
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82, photovoltaic_supply_percent_roof_area=0
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_pv"] is False
|
|
assert row["pv_capacity_source"] == "none"
|
|
assert row["pv_percent_roof_area"] is None
|
|
|
|
|
|
_ENERGY_SOURCE_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
|
|
# name → (dtype, nullable, categorical)
|
|
"has_pv_battery": (bool, False, False),
|
|
"pv_battery_count": (int, False, False),
|
|
"pv_battery_capacity_kwh": (float, True, False),
|
|
"has_wind_turbine": (bool, False, False),
|
|
"wind_turbine_count": (int, False, False),
|
|
"mains_gas": (bool, False, False),
|
|
"electricity_smart_meter_present": (bool, False, False),
|
|
"gas_smart_meter_present": (bool, False, False),
|
|
"is_dwelling_export_capable": (bool, False, False),
|
|
}
|
|
|
|
|
|
def test_schema_advertises_energy_source_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for name, (expected_dtype, expected_nullable, expected_categorical) in (
|
|
_ENERGY_SOURCE_FEATURES_NULLABLE.items()
|
|
):
|
|
assert name in schema.feature_columns, name
|
|
column = schema.feature_columns[name]
|
|
assert column.dtype is expected_dtype, name
|
|
assert column.nullable is expected_nullable, name
|
|
assert column.categorical is expected_categorical, name
|
|
|
|
|
|
def test_to_row_extracts_pv_battery_and_capacity() -> None:
|
|
# Arrange — two batteries of 5.0 kWh each
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
pv_battery_count=2,
|
|
pv_battery_capacity_per_unit_kwh=5.0,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_pv_battery"] is True
|
|
assert row["pv_battery_count"] == 2
|
|
assert row["pv_battery_capacity_kwh"] == pytest.approx(10.0)
|
|
|
|
|
|
def test_to_row_returns_no_pv_battery_when_count_zero() -> None:
|
|
# Arrange — no battery
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_pv_battery"] is False
|
|
assert row["pv_battery_count"] == 0
|
|
assert row["pv_battery_capacity_kwh"] is None
|
|
|
|
|
|
def test_to_row_flags_wind_turbine() -> None:
|
|
# Arrange
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82, wind_turbines_count=1)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["has_wind_turbine"] is True
|
|
assert row["wind_turbine_count"] == 1
|
|
|
|
|
|
def test_to_row_extracts_energy_source_booleans() -> None:
|
|
# Arrange — gas + electricity smart meters, export capable
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
mains_gas=True,
|
|
electricity_smart_meter_present=True,
|
|
gas_smart_meter_present=True,
|
|
is_dwelling_export_capable=True,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["mains_gas"] is True
|
|
assert row["electricity_smart_meter_present"] is True
|
|
assert row["gas_smart_meter_present"] is True
|
|
assert row["is_dwelling_export_capable"] is True
|
|
|
|
|
|
_VENTILATION_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
|
|
"mechanical_ventilation": (int, True, True),
|
|
"mechanical_vent_duct_type": (int, True, True),
|
|
"blocked_chimneys_count": (int, True, False),
|
|
"pressure_test": (int, True, False),
|
|
}
|
|
|
|
|
|
def test_schema_advertises_ventilation_features() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
for name, (expected_dtype, expected_nullable, expected_categorical) in (
|
|
_VENTILATION_FEATURES_NULLABLE.items()
|
|
):
|
|
assert name in schema.feature_columns, name
|
|
column = schema.feature_columns[name]
|
|
assert column.dtype is expected_dtype, name
|
|
assert column.nullable is expected_nullable, name
|
|
assert column.categorical is expected_categorical, name
|
|
|
|
|
|
def test_to_row_extracts_ventilation_features() -> None:
|
|
# Arrange — MVHR (mechanical_ventilation code 4), duct type 3
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=82,
|
|
mechanical_ventilation=4,
|
|
mechanical_vent_duct_type=3,
|
|
blocked_chimneys_count=1,
|
|
pressure_test=4,
|
|
)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
assert row["mechanical_ventilation"] == 4
|
|
assert row["mechanical_vent_duct_type"] == 3
|
|
assert row["blocked_chimneys_count"] == 1
|
|
assert row["pressure_test"] == 4
|
|
|
|
|
|
def test_to_rows_returns_dataframe_with_one_row_per_property() -> None:
|
|
# Arrange — two properties with different floor areas + SAP scores
|
|
epcs = [
|
|
make_minimal_sap10_epc(energy_rating_current=82, total_floor_area_m2=70.0),
|
|
make_minimal_sap10_epc(energy_rating_current=45, total_floor_area_m2=120.0),
|
|
]
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
df = transform.to_rows(epcs)
|
|
|
|
# Assert
|
|
assert isinstance(df, pd.DataFrame)
|
|
assert len(df) == 2
|
|
assert df.loc[0, "sap_score"] == 82
|
|
assert df.loc[1, "sap_score"] == 45
|
|
assert df.loc[0, "total_floor_area_m2"] == 70.0
|
|
assert df.loc[1, "total_floor_area_m2"] == 120.0
|
|
|
|
|
|
def test_to_rows_returns_empty_dataframe_for_empty_input() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
df = transform.to_rows([])
|
|
|
|
# Assert
|
|
assert isinstance(df, pd.DataFrame)
|
|
assert len(df) == 0
|
|
# Every advertised column appears as an output column even for empty input.
|
|
schema = transform.schema()
|
|
for name in schema.feature_columns:
|
|
assert name in df.columns
|
|
for name in schema.target_columns:
|
|
assert name in df.columns
|
|
|
|
|
|
def test_to_rows_casts_categorical_columns_to_pd_categorical_dtype() -> None:
|
|
# Arrange — minimal property with a categorical feature populated
|
|
epcs = [
|
|
make_minimal_sap10_epc(
|
|
energy_rating_current=82, dwelling_type="Mid-terrace house"
|
|
),
|
|
make_minimal_sap10_epc(
|
|
energy_rating_current=45, dwelling_type="Detached house"
|
|
),
|
|
]
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
df = transform.to_rows(epcs)
|
|
|
|
# Assert — every column flagged ColumnSpec.categorical=True is a pd.Categorical
|
|
schema = transform.schema()
|
|
for name, spec in schema.feature_columns.items():
|
|
if spec.categorical:
|
|
assert isinstance(df[name].dtype, pd.CategoricalDtype), name
|
|
|
|
|
|
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
|
|
# Arrange — two windows with transmission details; one without.
|
|
sap_windows = [
|
|
make_window(
|
|
orientation=5,
|
|
width=2.0,
|
|
height=1.0,
|
|
window_transmission_details=WindowTransmissionDetails(
|
|
u_value=1.4, data_source=2, solar_transmittance=0.72
|
|
),
|
|
),
|
|
make_window(
|
|
orientation=1,
|
|
width=1.0,
|
|
height=1.0,
|
|
window_transmission_details=WindowTransmissionDetails(
|
|
u_value=2.0, data_source=2, solar_transmittance=0.60
|
|
),
|
|
),
|
|
make_window(orientation=3, width=1.0, height=1.0), # no details
|
|
]
|
|
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
row = transform.to_row(epc)
|
|
|
|
# Assert
|
|
# Area-weighted u: (1.4 * 2.0 + 2.0 * 1.0) / (2.0 + 1.0) = 4.8 / 3.0 = 1.6
|
|
assert row["window_avg_u_value"] == pytest.approx(1.6)
|
|
# Area-weighted solar transmittance: (0.72 * 2.0 + 0.60 * 1.0) / 3.0 = 2.04 / 3.0 = 0.68
|
|
assert row["window_avg_solar_transmittance"] == pytest.approx(0.68)
|
|
|
|
|
|
def test_to_row_extracts_main_dwelling_wall_roof_floor_fabric_inputs() -> None:
|
|
# Arrange
|
|
from datatypes.epc.domain.epc_property_data import SapBuildingPart, SapFloorDimension
|
|
ground = SapFloorDimension(
|
|
room_height_m=2.4, total_floor_area_m2=50.0,
|
|
party_wall_length_m=5.0, heat_loss_perimeter_m=20.0,
|
|
floor=0, floor_insulation=2, floor_construction=1,
|
|
)
|
|
upstairs = SapFloorDimension(
|
|
room_height_m=2.4, total_floor_area_m2=50.0,
|
|
party_wall_length_m=5.0, heat_loss_perimeter_m=20.0,
|
|
floor=1, floor_insulation=0, floor_construction=0,
|
|
)
|
|
main = SapBuildingPart(
|
|
identifier=BuildingPartIdentifier.MAIN,
|
|
construction_age_band="C",
|
|
wall_construction=3,
|
|
wall_insulation_type=4,
|
|
wall_thickness_measured=True,
|
|
party_wall_construction=2,
|
|
sap_floor_dimensions=[ground, upstairs],
|
|
wall_dry_lined=False,
|
|
wall_thickness_mm=300,
|
|
wall_insulation_thickness="50mm",
|
|
floor_heat_loss=7,
|
|
floor_insulation_thickness="100mm",
|
|
roof_construction=5,
|
|
roof_insulation_location=6,
|
|
roof_insulation_thickness="270mm",
|
|
)
|
|
epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main])
|
|
|
|
# Act
|
|
row = EpcMlTransform().to_row(epc)
|
|
|
|
# Assert — wall fabric
|
|
assert row["main_dwelling_wall_insulation_type"] == 4
|
|
assert row["main_dwelling_wall_insulation_thickness_mm"] == 50
|
|
assert row["main_dwelling_wall_dry_lined"] is False
|
|
assert row["main_dwelling_wall_thickness_mm"] == 300
|
|
assert row["main_dwelling_party_wall_construction"] == 2
|
|
# Assert — roof fabric
|
|
assert row["main_dwelling_roof_insulation_location"] == 6
|
|
assert row["main_dwelling_roof_insulation_thickness_mm"] == 270
|
|
# Assert — floor fabric, taken from ground-floor SapFloorDimension
|
|
assert row["main_dwelling_floor_construction"] == 1
|
|
assert row["main_dwelling_floor_insulation"] == 2
|
|
assert row["main_dwelling_floor_insulation_thickness_mm"] == 100
|
|
assert row["main_dwelling_floor_heat_loss"] == 7
|
|
|
|
|
|
def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None:
|
|
# Arrange
|
|
from datatypes.epc.domain.epc_property_data import SapBuildingPart
|
|
main = SapBuildingPart(
|
|
identifier=BuildingPartIdentifier.MAIN,
|
|
construction_age_band="C",
|
|
wall_construction=3,
|
|
wall_insulation_type=4,
|
|
wall_thickness_measured=True,
|
|
party_wall_construction=2,
|
|
sap_floor_dimensions=[],
|
|
wall_insulation_thickness="NI",
|
|
roof_insulation_thickness="ND", # unparseable sentinel
|
|
)
|
|
epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main])
|
|
|
|
# Act
|
|
row = EpcMlTransform().to_row(epc)
|
|
|
|
# Assert
|
|
assert row["main_dwelling_wall_insulation_thickness_mm"] == 0
|
|
assert row["main_dwelling_roof_insulation_thickness_mm"] is None
|
|
|
|
|
|
def test_schema_advertises_envelope_heat_loss_feature() -> None:
|
|
# Arrange
|
|
transform = EpcMlTransform()
|
|
|
|
# Act
|
|
schema = transform.schema()
|
|
|
|
# Assert
|
|
assert "envelope_heat_loss_w_per_k" in schema.feature_columns
|
|
column = schema.feature_columns["envelope_heat_loss_w_per_k"]
|
|
assert column.dtype is float
|
|
assert column.nullable is False
|
|
|
|
|
|
def test_to_row_emits_positive_envelope_heat_loss_for_sap10_epc() -> None:
|
|
# Arrange
|
|
from domain.sap10_ml.tests._fixtures import make_building_part, make_floor_dimension
|
|
|
|
main = make_building_part(
|
|
identifier=BuildingPartIdentifier.MAIN,
|
|
construction_age_band="G",
|
|
wall_construction=4,
|
|
wall_insulation_type=4,
|
|
party_wall_construction=1,
|
|
roof_construction=4,
|
|
floor_dimensions=[
|
|
make_floor_dimension(
|
|
total_floor_area_m2=100.0, room_height_m=2.5,
|
|
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
|
|
)
|
|
],
|
|
)
|
|
epc = make_minimal_sap10_epc(
|
|
energy_rating_current=70,
|
|
sap_building_parts=[main],
|
|
total_floor_area_m2=100.0,
|
|
country_code="ENG",
|
|
)
|
|
|
|
# Act
|
|
row = EpcMlTransform().to_row(epc)
|
|
|
|
# Assert — envelope_heat_loss in plausible range for a 100 m^2 age-G semi.
|
|
assert row["envelope_heat_loss_w_per_k"] > 100.0
|
|
assert row["envelope_heat_loss_w_per_k"] < 400.0
|
|
|
|
|
|
def test_to_row_threads_top_level_fabric_and_demand_scalars() -> None:
|
|
# Arrange
|
|
from dataclasses import replace
|
|
base = make_minimal_sap10_epc(energy_rating_current=72)
|
|
epc = replace(
|
|
base,
|
|
multiple_glazed_proportion=85,
|
|
extract_fans_count=2,
|
|
sap_heating=replace(base.sap_heating, number_baths=2, number_baths_wwhrs=1),
|
|
)
|
|
|
|
# Act
|
|
row = EpcMlTransform().to_row(epc)
|
|
|
|
# Assert
|
|
assert row["multiple_glazed_proportion"] == 85
|
|
assert row["extract_fans_count"] == 2
|
|
assert row["number_baths"] == 2
|
|
assert row["number_baths_wwhrs"] == 1
|