Model/domain/sap10_ml/tests/test_transform.py
Khalim Conn-Kowlessar 68401c517a refactor: lift-and-shift packages/domain/src/domain/ml → domain/sap10_ml
Sibling migration to the sap10_calculator move — `domain.ml` now lives
at the root-level layout (`domain/sap10_ml/`) matching the pattern
already used by `domain.addresses`, `domain.tasks`, `domain.postcode`,
and `domain.sap10_calculator`.

Changes:

- `git mv packages/domain/src/domain/ml → domain/sap10_ml` (19 files;
  history preserved).
- Subpackage rename: `domain.ml` → `domain.sap10_ml`. 32 references
  rewritten across .py and .md files: 11 internal + 21 external
  (datatypes/epc/domain/mapper.py, 14 files in domain/sap10_calculator,
  2 backend tests, 2 ADRs, 1 README, 1 design doc).
- Path-string updates: `pytest.ini` testpath
  `packages/domain/src/domain/ml/tests` → `domain/sap10_ml/tests` so
  ML tests stay in the default auto-discovered sweep. `CONTEXT.md`
  also updated.

`packages/domain/src/domain/` is now empty — the workspace `domain/`
tree has been fully migrated. Together with the `domain/__init__.py`
deletions from the sap10_calculator commit (29ac35cc), `domain` is
now a single root-level namespace package with subpackages
{addresses, sap10_calculator, sap10_ml, tasks} + the standalone
`postcode.py` module.

Verified:

- Focused sweep (backend mapper-chain + sap10_calculator worksheet
  e2e + golden fixtures): 99 passed / 19 failed — identical baseline.
- Wider sweep (all sap10_calculator + sap10_ml): 1654 passed / 20
  failed (same pre-existing failures).
- domain/sap10_ml/tests: 210/210 PASSED at new path.
- Pyright net-zero: heat_transmission.py 13, cert_to_inputs.py 35,
  mapper.py 33, rdsap_uvalues.py 1 (all unchanged from baseline).

Note: `packages/domain/pyproject.toml` still declares
`packages = ["src/domain"]` for the hatchling wheel — that target
directory is now empty and the wheel build is effectively a no-op.
Retiring the workspace package or repointing the wheel is a follow-up.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-26 13:01:35 +00:00

1321 lines
44 KiB
Python

"""Tests for EpcMlTransform v0.1.0 — schema-contract surface and target extraction."""
import pandas as pd
import pytest
from datatypes.epc.domain.epc_property_data import (
BuildingPartIdentifier,
SapRoomInRoof,
WindowTransmissionDetails,
)
from domain.sap10_ml.schema import ColumnSpec, TransformSchema
from domain.sap10_ml.tests._fixtures import (
make_building_part,
make_floor_dimension,
make_main_heating_detail,
make_minimal_sap10_epc,
make_pv_array,
make_sap_heating,
make_window,
)
from domain.sap10_ml.transform import EpcMlTransform
_EXPECTED_TARGET_DTYPES: dict[str, type] = {
"sap_score": int,
"co2_emissions": float,
"peui_raw": int,
"peui_ucl": float,
"space_heating_kwh": float,
"hot_water_kwh": float,
}
def test_transform_advertises_version_and_target_columns() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
assert isinstance(schema, TransformSchema)
assert schema.transform_version == "2.7.1"
assert schema.transform_version == EpcMlTransform.VERSION
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():
column = schema.target_columns[target_name]
assert isinstance(column, ColumnSpec)
assert column.dtype is expected_dtype
def test_to_row_extracts_targets_from_epc_property_data() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
co2_emissions_current=2.7,
energy_consumption_current=232,
space_heating_kwh=10128.81,
water_heating_kwh=2166.19,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["sap_score"] == 82
assert row["co2_emissions"] == 2.7
assert row["peui_raw"] == 232
assert row["space_heating_kwh"] == 10128.81
assert row["hot_water_kwh"] == 2166.19
def test_to_row_applies_ucl_correction_in_band_e() -> None:
# Arrange — SAP 45 = band E; Few et al. 2023 band-E correction is non-trivial
epc = make_minimal_sap10_epc(
energy_rating_current=45,
energy_consumption_current=300,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Band E: gradient=-0.70, intercept=160 → cd = -0.70*300 + 160 = -50
# adjusted = 300 + (-50) = 250.0
assert row["peui_ucl"] == 250.0
def test_to_row_clamps_ucl_correction_when_band_b_would_increase_peui() -> None:
# Arrange — SAP 82 = band B; per-band linear correction yields a *positive*
# consumption_difference for this PEUI, which must be clamped to zero
# (EPCs over-predict only — we never adjust upwards).
epc = make_minimal_sap10_epc(
energy_rating_current=82,
energy_consumption_current=232,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Band B: gradient=-0.10, intercept=28 → cd = -0.10*232 + 28 = +4.8 → clamp to 0
# adjusted = 232 + 0 = 232.0
assert row["peui_ucl"] == 232.0
def test_schema_advertises_total_floor_area_m2_feature() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
assert "total_floor_area_m2" in schema.feature_columns
column = schema.feature_columns["total_floor_area_m2"]
assert isinstance(column, ColumnSpec)
assert column.dtype is float
assert column.nullable is False
def test_to_row_extracts_total_floor_area_m2() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# make_minimal_sap10_epc sets total_floor_area_m2=70.0 by default
assert row["total_floor_area_m2"] == 70.0
_EXPECTED_COUNT_FEATURES: dict[str, type] = {
"door_count": int,
"habitable_rooms_count": int,
"heated_rooms_count": int,
"wet_rooms_count": int,
"extensions_count": int,
"open_chimneys_count": int,
"insulated_door_count": int,
"cfl_fixed_lighting_bulbs_count": int,
"led_fixed_lighting_bulbs_count": int,
"incandescent_fixed_lighting_bulbs_count": int,
}
def test_schema_advertises_count_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for feature_name, expected_dtype in _EXPECTED_COUNT_FEATURES.items():
assert feature_name in schema.feature_columns, feature_name
column = schema.feature_columns[feature_name]
assert isinstance(column, ColumnSpec)
assert column.dtype is expected_dtype
assert column.nullable is False
def test_to_row_extracts_count_features() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
door_count=3,
habitable_rooms_count=5,
heated_rooms_count=4,
wet_rooms_count=1,
extensions_count=1,
open_chimneys_count=0,
insulated_door_count=2,
cfl_fixed_lighting_bulbs_count=0,
led_fixed_lighting_bulbs_count=8,
incandescent_fixed_lighting_bulbs_count=2,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["door_count"] == 3
assert row["habitable_rooms_count"] == 5
assert row["heated_rooms_count"] == 4
assert row["wet_rooms_count"] == 1
assert row["extensions_count"] == 1
assert row["open_chimneys_count"] == 0
assert row["insulated_door_count"] == 2
assert row["cfl_fixed_lighting_bulbs_count"] == 0
assert row["led_fixed_lighting_bulbs_count"] == 8
assert row["incandescent_fixed_lighting_bulbs_count"] == 2
_EXPECTED_FLAT_BOOLEAN_FEATURES: tuple[str, ...] = (
"solar_water_heating",
"has_hot_water_cylinder",
"has_fixed_air_conditioning",
)
_EXPECTED_OPTIONAL_INT_FEATURES: tuple[str, ...] = (
"percent_draughtproofed",
)
def test_schema_advertises_boolean_and_optional_int_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for feature_name in _EXPECTED_FLAT_BOOLEAN_FEATURES:
assert feature_name in schema.feature_columns, feature_name
column = schema.feature_columns[feature_name]
assert column.dtype is bool
assert column.nullable is False
for feature_name in _EXPECTED_OPTIONAL_INT_FEATURES:
assert feature_name in schema.feature_columns, feature_name
column = schema.feature_columns[feature_name]
assert column.dtype is int
assert column.nullable is True
def test_to_row_extracts_boolean_and_optional_int_features() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
solar_water_heating=True,
has_hot_water_cylinder=True,
has_fixed_air_conditioning=False,
percent_draughtproofed=100,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["solar_water_heating"] is True
assert row["has_hot_water_cylinder"] is True
assert row["has_fixed_air_conditioning"] is False
assert row["percent_draughtproofed"] == 100
_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = (
"property_type",
"built_form",
"region_code",
"country_code",
)
_NON_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = (
"dwelling_type",
"transaction_type",
)
def test_schema_advertises_categorical_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for feature_name in _NULLABLE_CATEGORICAL_FEATURES:
assert feature_name in schema.feature_columns, feature_name
column = schema.feature_columns[feature_name]
assert column.dtype is str
assert column.categorical is True
assert column.nullable is True
for feature_name in _NON_NULLABLE_CATEGORICAL_FEATURES:
assert feature_name in schema.feature_columns, feature_name
column = schema.feature_columns[feature_name]
assert column.dtype is str
assert column.categorical is True
assert column.nullable is False
def test_to_row_extracts_categorical_features() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
dwelling_type="End-terrace house",
transaction_type="8",
property_type="0",
built_form="2",
region_code="6",
country_code="ENG",
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["dwelling_type"] == "End-terrace house"
assert "tenure" not in row
assert row["transaction_type"] == "8"
assert row["property_type"] == "0"
assert row["built_form"] == "2"
assert row["region_code"] == "6"
assert row["country_code"] == "ENG"
_WINDOW_PHYSICS_FEATURES_NULLABLE: dict[str, tuple[type, bool]] = {
"window_count": (int, False),
"window_total_area_m2": (float, False),
"window_area_orientation_N": (float, False),
"window_area_orientation_NE": (float, False),
"window_area_orientation_E": (float, False),
"window_area_orientation_SE": (float, False),
"window_area_orientation_S": (float, False),
"window_area_orientation_SW": (float, False),
"window_area_orientation_W": (float, False),
"window_area_orientation_NW": (float, False),
"window_pct_draught_proofed": (float, True),
"window_avg_u_value": (float, True),
"window_avg_solar_transmittance": (float, True),
}
def test_schema_advertises_window_physics_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for feature_name, (expected_dtype, expected_nullable) in _WINDOW_PHYSICS_FEATURES_NULLABLE.items():
assert feature_name in schema.feature_columns, feature_name
column = schema.feature_columns[feature_name]
assert column.dtype is expected_dtype
assert column.nullable is expected_nullable
assert column.categorical is False
def test_to_row_aggregates_window_physics_and_orientation() -> None:
# Arrange — 3 windows: 2.0 m² S, 1.5 m² N, 1.0 m² E (orientations 5/1/3)
sap_windows = [
make_window(orientation=5, width=1.0, height=2.0, draught_proofed=True),
make_window(orientation=1, width=1.0, height=1.5, draught_proofed=False),
make_window(orientation=3, width=1.0, height=1.0, draught_proofed=True),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["window_count"] == 3
assert row["window_total_area_m2"] == pytest.approx(4.5)
assert row["window_area_orientation_N"] == pytest.approx(1.5)
assert row["window_area_orientation_NE"] == 0.0
assert row["window_area_orientation_E"] == pytest.approx(1.0)
assert row["window_area_orientation_SE"] == 0.0
assert row["window_area_orientation_S"] == pytest.approx(2.0)
assert row["window_area_orientation_SW"] == 0.0
assert row["window_area_orientation_W"] == 0.0
assert row["window_area_orientation_NW"] == 0.0
# area-weighted draught-proofing: (2.0 + 1.0) / 4.5 * 100 = 66.66...%
assert row["window_pct_draught_proofed"] == pytest.approx(66.666, abs=0.01)
assert row["window_avg_u_value"] is None
assert row["window_avg_solar_transmittance"] is None
def test_to_row_skips_windows_with_unrecorded_orientation() -> None:
# Arrange — two S windows + one with orientation=0 (horizontal/unrecorded);
# the unrecorded one contributes to count and total_area but to no octant.
sap_windows = [
make_window(orientation=5, width=1.0, height=2.0),
make_window(orientation=5, width=1.0, height=1.0),
make_window(orientation=0, width=1.0, height=0.5),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["window_count"] == 3
assert row["window_total_area_m2"] == pytest.approx(3.5)
assert row["window_area_orientation_S"] == pytest.approx(3.0)
# The horizontal window's 0.5 m² is not assigned to any octant
sum_octants = sum(
row[f"window_area_orientation_{c}"]
for c in ("N", "NE", "E", "SE", "S", "SW", "W", "NW")
)
assert sum_octants == pytest.approx(3.0)
def test_to_row_returns_window_zeros_for_property_with_no_windows() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["window_count"] == 0
assert row["window_total_area_m2"] == 0.0
for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"):
assert row[f"window_area_orientation_{cardinal}"] == 0.0
assert row["window_pct_draught_proofed"] is None
assert row["window_avg_u_value"] is None
assert row["window_avg_solar_transmittance"] is None
_GLAZED_TYPE_CODES: tuple[int, ...] = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
def test_schema_advertises_window_categorical_share_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert — one float share per known glazed_type code + `_other`, plus pvc_frame share
for code in _GLAZED_TYPE_CODES:
name = f"window_pct_glazed_type_{code}"
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is float
assert column.nullable is False
assert column.categorical is False
assert "window_pct_glazed_type_other" in schema.feature_columns
assert "window_pct_pvc_frame" in schema.feature_columns
assert schema.feature_columns["window_pct_pvc_frame"].dtype is float
assert schema.feature_columns["window_pct_pvc_frame"].nullable is True
def test_to_row_aggregates_glazed_type_and_pvc_frame_shares() -> None:
# Arrange — three windows: 3.0 m² glazed_type=2 PVC, 1.5 m² glazed_type=13 PVC,
# 0.5 m² glazed_type=5 (single, no PVC). Total area = 5.0 m².
sap_windows = [
make_window(width=1.5, height=2.0, glazing_type=2, frame_material="PVC"),
make_window(width=1.0, height=1.5, glazing_type=13, frame_material="PVC"),
make_window(width=0.5, height=1.0, glazing_type=5, frame_material=None),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Shares (area-weighted) — 3.0/5.0=0.6 type 2; 1.5/5.0=0.3 type 13; 0.5/5.0=0.1 type 5.
assert row["window_pct_glazed_type_2"] == pytest.approx(0.6)
assert row["window_pct_glazed_type_13"] == pytest.approx(0.3)
assert row["window_pct_glazed_type_5"] == pytest.approx(0.1)
# All other known glazed_type codes are zero.
for code in _GLAZED_TYPE_CODES:
if code not in (2, 5, 13):
assert row[f"window_pct_glazed_type_{code}"] == 0.0
assert row["window_pct_glazed_type_other"] == 0.0
# PVC frame area share: (3.0 + 1.5) / 5.0 = 0.9
assert row["window_pct_pvc_frame"] == pytest.approx(0.9)
def test_to_row_routes_unknown_glazed_type_to_other_bucket() -> None:
# Arrange — one window has glazing_type=99 (not in the SAP10 enum 1-15)
sap_windows = [
make_window(width=2.0, height=1.0, glazing_type=2, frame_material="PVC"),
make_window(width=1.0, height=1.0, glazing_type=99, frame_material="PVC"),
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Total area = 3.0; known type 2 = 2.0/3.0; unknown 99 → _other = 1.0/3.0
assert row["window_pct_glazed_type_2"] == pytest.approx(2 / 3)
assert row["window_pct_glazed_type_other"] == pytest.approx(1 / 3)
def test_to_row_returns_window_share_zeros_for_property_with_no_windows() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
for code in _GLAZED_TYPE_CODES:
assert row[f"window_pct_glazed_type_{code}"] == 0.0
assert row["window_pct_glazed_type_other"] == 0.0
assert row["window_pct_pvc_frame"] is None
_BUILDING_PART_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
# name → (dtype, nullable, categorical)
"building_parts_count": (int, False, False),
"total_heat_loss_perimeter_m": (float, False, False),
"total_party_wall_length_m": (float, False, False),
"total_floor_area_from_parts_m2": (float, False, False),
"avg_room_height_m": (float, True, False),
"main_dwelling_heat_loss_perimeter_m": (float, True, False),
"main_dwelling_party_wall_length_m": (float, True, False),
"main_dwelling_total_floor_area_m2": (float, True, False),
"main_dwelling_avg_room_height_m": (float, True, False),
"main_dwelling_has_room_in_roof": (bool, True, False),
"main_dwelling_construction_age_band": (str, True, True),
"main_dwelling_wall_construction": (int, True, True),
"main_dwelling_roof_construction": (int, True, True),
}
def test_schema_advertises_building_part_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_BUILDING_PART_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None:
# Arrange — Main Dwelling (two floors, age band B, wall 3, roof 4) plus one extension.
main = make_building_part(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="B",
wall_construction=3,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=30.0, room_height_m=2.5,
party_wall_length_m=6.0, heat_loss_perimeter_m=20.0,
),
make_floor_dimension(
total_floor_area_m2=28.0, room_height_m=2.4,
party_wall_length_m=6.0, heat_loss_perimeter_m=18.0,
),
],
)
extension = make_building_part(
identifier=BuildingPartIdentifier.EXTENSION_1,
construction_age_band="L",
wall_construction=4,
roof_construction=5,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=12.0, room_height_m=2.6,
party_wall_length_m=0.0, heat_loss_perimeter_m=10.0,
),
],
)
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_building_parts=[main, extension],
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert — cross-all aggregates
assert row["building_parts_count"] == 2
assert row["total_heat_loss_perimeter_m"] == pytest.approx(48.0)
assert row["total_party_wall_length_m"] == pytest.approx(12.0)
assert row["total_floor_area_from_parts_m2"] == pytest.approx(70.0)
# avg_room_height area-weighted across all floors: (2.5*30 + 2.4*28 + 2.6*12) / 70
# = (75 + 67.2 + 31.2) / 70 = 173.4 / 70 = 2.4771...
assert row["avg_room_height_m"] == pytest.approx(2.4771, abs=0.001)
# Main Dwelling aggregates
assert row["main_dwelling_heat_loss_perimeter_m"] == pytest.approx(38.0)
assert row["main_dwelling_party_wall_length_m"] == pytest.approx(12.0)
assert row["main_dwelling_total_floor_area_m2"] == pytest.approx(58.0)
# main avg height = (2.5*30 + 2.4*28) / 58 = (75 + 67.2) / 58 = 142.2 / 58 = 2.4517
assert row["main_dwelling_avg_room_height_m"] == pytest.approx(2.4517, abs=0.001)
assert row["main_dwelling_has_room_in_roof"] is False
# Main Dwelling categoricals
assert row["main_dwelling_construction_age_band"] == "B"
assert row["main_dwelling_wall_construction"] == 3
assert row["main_dwelling_roof_construction"] == 4
def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None:
# Arrange
main = make_building_part(
identifier=BuildingPartIdentifier.MAIN,
sap_room_in_roof=SapRoomInRoof(floor_area=15.0, construction_age_band="B"),
)
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_building_parts=[main])
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["main_dwelling_has_room_in_roof"] is True
def test_to_row_returns_building_part_nones_when_no_main_dwelling_identified() -> None:
# Arrange — single part with identifier that doesn't match "Main Dwelling"
sole_part = make_building_part(identifier=BuildingPartIdentifier.EXTENSION_1)
epc = make_minimal_sap10_epc(
energy_rating_current=82, sap_building_parts=[sole_part]
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert — cross-all aggregates still populate
assert row["building_parts_count"] == 1
assert row["total_heat_loss_perimeter_m"] == pytest.approx(20.0)
# Main-dwelling-specific columns are None — honest about data quality
assert row["main_dwelling_heat_loss_perimeter_m"] is None
assert row["main_dwelling_party_wall_length_m"] is None
assert row["main_dwelling_total_floor_area_m2"] is None
assert row["main_dwelling_avg_room_height_m"] is None
assert row["main_dwelling_has_room_in_roof"] is None
assert row["main_dwelling_construction_age_band"] is None
assert row["main_dwelling_wall_construction"] is None
assert row["main_dwelling_roof_construction"] is None
def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["building_parts_count"] == 0
assert row["total_heat_loss_perimeter_m"] == 0.0
assert row["total_party_wall_length_m"] == 0.0
assert row["total_floor_area_from_parts_m2"] == 0.0
assert row["avg_room_height_m"] is None
assert row["main_dwelling_heat_loss_perimeter_m"] is None
assert row["main_dwelling_construction_age_band"] is None
assert row["main_dwelling_wall_construction"] is None
_HEATING_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
# name → (dtype, nullable, categorical)
"main_heating_count": (int, False, False),
"primary_main_fuel_type": (int, True, True),
"primary_heat_emitter_type": (int, True, True),
"primary_main_heating_control": (int, True, True),
"primary_main_heating_category": (int, True, True),
"primary_has_fghrs": (bool, True, False),
"primary_fan_flue_present": (bool, True, False),
"primary_boiler_flue_type": (int, True, True),
"primary_central_heating_pump_age": (int, True, True),
"water_heating_code": (int, True, True),
"water_heating_fuel": (int, True, True),
"cylinder_size": (int, True, False),
"cylinder_insulation_thickness_mm": (int, True, False),
"has_secondary_heating": (bool, False, False),
"secondary_fuel_type": (int, True, True),
}
def test_schema_advertises_heating_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_HEATING_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_extracts_primary_heating_from_first_main_heating_detail() -> None:
# Arrange — mains-gas boiler with a fan flue, modern control, no FGHRS
primary = make_main_heating_detail(
main_fuel_type=26, # mains gas (not community)
heat_emitter_type=1,
main_heating_control=2106,
main_heating_category=2,
has_fghrs=False,
fan_flue_present=True,
boiler_flue_type=2,
central_heating_pump_age=0,
)
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(main_heating_details=[primary]),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["main_heating_count"] == 1
assert row["primary_main_fuel_type"] == 26
assert row["primary_heat_emitter_type"] == 1
assert row["primary_main_heating_control"] == 2106
assert row["primary_main_heating_category"] == 2
assert row["primary_has_fghrs"] is False
assert row["primary_fan_flue_present"] is True
assert row["primary_boiler_flue_type"] == 2
assert row["primary_central_heating_pump_age"] == 0
def test_to_row_extracts_water_heating_fields() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(
water_heating_code=901,
water_heating_fuel=26,
cylinder_size=2,
cylinder_insulation_thickness_mm=38,
),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["water_heating_code"] == 901
assert row["water_heating_fuel"] == 26
assert row["cylinder_size"] == 2
assert row["cylinder_insulation_thickness_mm"] == 38
def test_to_row_flags_secondary_heating_when_present() -> None:
# Arrange — secondary heating: bottled-LPG (code 38)
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(secondary_fuel_type=38),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_secondary_heating"] is True
assert row["secondary_fuel_type"] == 38
def test_to_row_returns_no_secondary_heating_when_absent() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(secondary_fuel_type=None),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_secondary_heating"] is False
assert row["secondary_fuel_type"] is None
def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() -> None:
# Arrange — sap_heating present but main_heating_details is empty
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(main_heating_details=[]),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["main_heating_count"] == 0
assert row["primary_main_fuel_type"] is None
assert row["primary_heat_emitter_type"] is None
assert row["primary_main_heating_control"] is None
assert row["primary_main_heating_category"] is None
assert row["primary_has_fghrs"] is None
assert row["primary_fan_flue_present"] is None
assert row["primary_boiler_flue_type"] is None
assert row["primary_central_heating_pump_age"] is None
_PV_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
# name → (dtype, nullable, categorical)
"has_pv": (bool, False, False),
"pv_capacity_source": (str, False, True),
"pv_array_count": (int, False, False),
"pv_total_peak_power_kw": (float, False, False),
"pv_peak_power_kw_N": (float, False, False),
"pv_peak_power_kw_NE": (float, False, False),
"pv_peak_power_kw_E": (float, False, False),
"pv_peak_power_kw_SE": (float, False, False),
"pv_peak_power_kw_S": (float, False, False),
"pv_peak_power_kw_SW": (float, False, False),
"pv_peak_power_kw_W": (float, False, False),
"pv_peak_power_kw_NW": (float, False, False),
"pv_avg_pitch": (float, True, False),
"pv_avg_overshading": (float, True, False),
"pv_percent_roof_area": (int, True, False),
}
def test_schema_advertises_pv_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_PV_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_aggregates_measured_pv_arrays() -> None:
# Arrange — two S-facing arrays (one with 2.04 kW pitch 2 overshading 1; one
# with 1.86 kW pitch 3 overshading 2) and one NW array (1.0 kW).
arrays = [
make_pv_array(peak_power=2.04, pitch=2, orientation=5, overshading=1),
make_pv_array(peak_power=1.86, pitch=3, orientation=5, overshading=2),
make_pv_array(peak_power=1.0, pitch=2, orientation=8, overshading=1),
]
epc = make_minimal_sap10_epc(
energy_rating_current=82, photovoltaic_arrays=arrays
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is True
assert row["pv_capacity_source"] == "measured"
assert row["pv_array_count"] == 3
assert row["pv_total_peak_power_kw"] == pytest.approx(4.9)
# Power by orientation: S = 2.04 + 1.86 = 3.9; NW = 1.0; rest 0.0
assert row["pv_peak_power_kw_S"] == pytest.approx(3.9)
assert row["pv_peak_power_kw_NW"] == pytest.approx(1.0)
for other in ("N", "NE", "E", "SE", "SW", "W"):
assert row[f"pv_peak_power_kw_{other}"] == 0.0
# Power-weighted pitch: (2.04*2 + 1.86*3 + 1.0*2) / 4.9 = (4.08 + 5.58 + 2.0) / 4.9 = 11.66/4.9 ≈ 2.380
assert row["pv_avg_pitch"] == pytest.approx(11.66 / 4.9)
# Power-weighted overshading: (2.04*1 + 1.86*2 + 1.0*1) / 4.9 = 6.76 / 4.9 ≈ 1.379
assert row["pv_avg_overshading"] == pytest.approx(6.76 / 4.9)
# No percent_roof_area when measured
assert row["pv_percent_roof_area"] is None
def test_to_row_uses_percent_roof_area_when_pv_not_measured() -> None:
# Arrange — surveyor couldn't confirm config; only percent_roof_area is known
epc = make_minimal_sap10_epc(
energy_rating_current=82, photovoltaic_supply_percent_roof_area=25
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is True
assert row["pv_capacity_source"] == "estimated_from_roof_area"
assert row["pv_array_count"] == 0
assert row["pv_total_peak_power_kw"] == 0.0
assert row["pv_percent_roof_area"] == 25
assert row["pv_avg_pitch"] is None
assert row["pv_avg_overshading"] is None
def test_to_row_returns_pv_no_when_no_pv_data() -> None:
# Arrange — no measured arrays, no percent_roof_area, no PV at all
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is False
assert row["pv_capacity_source"] == "none"
assert row["pv_array_count"] == 0
assert row["pv_total_peak_power_kw"] == 0.0
for cardinal in ("N", "NE", "E", "SE", "S", "SW", "W", "NW"):
assert row[f"pv_peak_power_kw_{cardinal}"] == 0.0
assert row["pv_percent_roof_area"] is None
assert row["pv_avg_pitch"] is None
assert row["pv_avg_overshading"] is None
def test_to_row_treats_zero_percent_roof_area_as_no_pv() -> None:
# Arrange — `photovoltaic_supply.none_or_no_details.percent_roof_area = 0` is
# the canonical "no PV" payload on schema-21 EPCs.
epc = make_minimal_sap10_epc(
energy_rating_current=82, photovoltaic_supply_percent_roof_area=0
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv"] is False
assert row["pv_capacity_source"] == "none"
assert row["pv_percent_roof_area"] is None
_ENERGY_SOURCE_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
# name → (dtype, nullable, categorical)
"has_pv_battery": (bool, False, False),
"pv_battery_count": (int, False, False),
"pv_battery_capacity_kwh": (float, True, False),
"has_wind_turbine": (bool, False, False),
"wind_turbine_count": (int, False, False),
"mains_gas": (bool, False, False),
"electricity_smart_meter_present": (bool, False, False),
"gas_smart_meter_present": (bool, False, False),
"is_dwelling_export_capable": (bool, False, False),
}
def test_schema_advertises_energy_source_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_ENERGY_SOURCE_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_extracts_pv_battery_and_capacity() -> None:
# Arrange — two batteries of 5.0 kWh each
epc = make_minimal_sap10_epc(
energy_rating_current=82,
pv_battery_count=2,
pv_battery_capacity_per_unit_kwh=5.0,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv_battery"] is True
assert row["pv_battery_count"] == 2
assert row["pv_battery_capacity_kwh"] == pytest.approx(10.0)
def test_to_row_returns_no_pv_battery_when_count_zero() -> None:
# Arrange — no battery
epc = make_minimal_sap10_epc(energy_rating_current=82)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_pv_battery"] is False
assert row["pv_battery_count"] == 0
assert row["pv_battery_capacity_kwh"] is None
def test_to_row_flags_wind_turbine() -> None:
# Arrange
epc = make_minimal_sap10_epc(energy_rating_current=82, wind_turbines_count=1)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_wind_turbine"] is True
assert row["wind_turbine_count"] == 1
def test_to_row_extracts_energy_source_booleans() -> None:
# Arrange — gas + electricity smart meters, export capable
epc = make_minimal_sap10_epc(
energy_rating_current=82,
mains_gas=True,
electricity_smart_meter_present=True,
gas_smart_meter_present=True,
is_dwelling_export_capable=True,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["mains_gas"] is True
assert row["electricity_smart_meter_present"] is True
assert row["gas_smart_meter_present"] is True
assert row["is_dwelling_export_capable"] is True
_VENTILATION_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
"mechanical_ventilation": (int, True, True),
"mechanical_vent_duct_type": (int, True, True),
"blocked_chimneys_count": (int, True, False),
"pressure_test": (int, True, False),
}
def test_schema_advertises_ventilation_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_VENTILATION_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_extracts_ventilation_features() -> None:
# Arrange — MVHR (mechanical_ventilation code 4), duct type 3
epc = make_minimal_sap10_epc(
energy_rating_current=82,
mechanical_ventilation=4,
mechanical_vent_duct_type=3,
blocked_chimneys_count=1,
pressure_test=4,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["mechanical_ventilation"] == 4
assert row["mechanical_vent_duct_type"] == 3
assert row["blocked_chimneys_count"] == 1
assert row["pressure_test"] == 4
def test_to_rows_returns_dataframe_with_one_row_per_property() -> None:
# Arrange — two properties with different floor areas + SAP scores
epcs = [
make_minimal_sap10_epc(energy_rating_current=82, total_floor_area_m2=70.0),
make_minimal_sap10_epc(energy_rating_current=45, total_floor_area_m2=120.0),
]
transform = EpcMlTransform()
# Act
df = transform.to_rows(epcs)
# Assert
assert isinstance(df, pd.DataFrame)
assert len(df) == 2
assert df.loc[0, "sap_score"] == 82
assert df.loc[1, "sap_score"] == 45
assert df.loc[0, "total_floor_area_m2"] == 70.0
assert df.loc[1, "total_floor_area_m2"] == 120.0
def test_to_rows_returns_empty_dataframe_for_empty_input() -> None:
# Arrange
transform = EpcMlTransform()
# Act
df = transform.to_rows([])
# Assert
assert isinstance(df, pd.DataFrame)
assert len(df) == 0
# Every advertised column appears as an output column even for empty input.
schema = transform.schema()
for name in schema.feature_columns:
assert name in df.columns
for name in schema.target_columns:
assert name in df.columns
def test_to_rows_casts_categorical_columns_to_pd_categorical_dtype() -> None:
# Arrange — minimal property with a categorical feature populated
epcs = [
make_minimal_sap10_epc(
energy_rating_current=82, dwelling_type="Mid-terrace house"
),
make_minimal_sap10_epc(
energy_rating_current=45, dwelling_type="Detached house"
),
]
transform = EpcMlTransform()
# Act
df = transform.to_rows(epcs)
# Assert — every column flagged ColumnSpec.categorical=True is a pd.Categorical
schema = transform.schema()
for name, spec in schema.feature_columns.items():
if spec.categorical:
assert isinstance(df[name].dtype, pd.CategoricalDtype), name
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
# Arrange — two windows with transmission details; one without.
sap_windows = [
make_window(
orientation=5,
width=2.0,
height=1.0,
window_transmission_details=WindowTransmissionDetails(
u_value=1.4, data_source=2, solar_transmittance=0.72
),
),
make_window(
orientation=1,
width=1.0,
height=1.0,
window_transmission_details=WindowTransmissionDetails(
u_value=2.0, data_source=2, solar_transmittance=0.60
),
),
make_window(orientation=3, width=1.0, height=1.0), # no details
]
epc = make_minimal_sap10_epc(energy_rating_current=82, sap_windows=sap_windows)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Area-weighted u: (1.4 * 2.0 + 2.0 * 1.0) / (2.0 + 1.0) = 4.8 / 3.0 = 1.6
assert row["window_avg_u_value"] == pytest.approx(1.6)
# Area-weighted solar transmittance: (0.72 * 2.0 + 0.60 * 1.0) / 3.0 = 2.04 / 3.0 = 0.68
assert row["window_avg_solar_transmittance"] == pytest.approx(0.68)
def test_to_row_extracts_main_dwelling_wall_roof_floor_fabric_inputs() -> None:
# Arrange
from datatypes.epc.domain.epc_property_data import SapBuildingPart, SapFloorDimension
ground = SapFloorDimension(
room_height_m=2.4, total_floor_area_m2=50.0,
party_wall_length_m=5.0, heat_loss_perimeter_m=20.0,
floor=0, floor_insulation=2, floor_construction=1,
)
upstairs = SapFloorDimension(
room_height_m=2.4, total_floor_area_m2=50.0,
party_wall_length_m=5.0, heat_loss_perimeter_m=20.0,
floor=1, floor_insulation=0, floor_construction=0,
)
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="C",
wall_construction=3,
wall_insulation_type=4,
wall_thickness_measured=True,
party_wall_construction=2,
sap_floor_dimensions=[ground, upstairs],
wall_dry_lined=False,
wall_thickness_mm=300,
wall_insulation_thickness="50mm",
floor_heat_loss=7,
floor_insulation_thickness="100mm",
roof_construction=5,
roof_insulation_location=6,
roof_insulation_thickness="270mm",
)
epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main])
# Act
row = EpcMlTransform().to_row(epc)
# Assert — wall fabric
assert row["main_dwelling_wall_insulation_type"] == 4
assert row["main_dwelling_wall_insulation_thickness_mm"] == 50
assert row["main_dwelling_wall_dry_lined"] is False
assert row["main_dwelling_wall_thickness_mm"] == 300
assert row["main_dwelling_party_wall_construction"] == 2
# Assert — roof fabric
assert row["main_dwelling_roof_insulation_location"] == 6
assert row["main_dwelling_roof_insulation_thickness_mm"] == 270
# Assert — floor fabric, taken from ground-floor SapFloorDimension
assert row["main_dwelling_floor_construction"] == 1
assert row["main_dwelling_floor_insulation"] == 2
assert row["main_dwelling_floor_insulation_thickness_mm"] == 100
assert row["main_dwelling_floor_heat_loss"] == 7
def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None:
# Arrange
from datatypes.epc.domain.epc_property_data import SapBuildingPart
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="C",
wall_construction=3,
wall_insulation_type=4,
wall_thickness_measured=True,
party_wall_construction=2,
sap_floor_dimensions=[],
wall_insulation_thickness="NI",
roof_insulation_thickness="ND", # unparseable sentinel
)
epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main])
# Act
row = EpcMlTransform().to_row(epc)
# Assert
assert row["main_dwelling_wall_insulation_thickness_mm"] == 0
assert row["main_dwelling_roof_insulation_thickness_mm"] is None
def test_schema_advertises_envelope_heat_loss_feature() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
assert "envelope_heat_loss_w_per_k" in schema.feature_columns
column = schema.feature_columns["envelope_heat_loss_w_per_k"]
assert column.dtype is float
assert column.nullable is False
def test_to_row_emits_positive_envelope_heat_loss_for_sap10_epc() -> None:
# Arrange
from domain.sap10_ml.tests._fixtures import make_building_part, make_floor_dimension
main = make_building_part(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="G",
wall_construction=4,
wall_insulation_type=4,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=5.0, heat_loss_perimeter_m=40.0, floor=0,
)
],
)
epc = make_minimal_sap10_epc(
energy_rating_current=70,
sap_building_parts=[main],
total_floor_area_m2=100.0,
country_code="ENG",
)
# Act
row = EpcMlTransform().to_row(epc)
# Assert — envelope_heat_loss in plausible range for a 100 m^2 age-G semi.
assert row["envelope_heat_loss_w_per_k"] > 100.0
assert row["envelope_heat_loss_w_per_k"] < 400.0
def test_to_row_threads_top_level_fabric_and_demand_scalars() -> None:
# Arrange
from dataclasses import replace
base = make_minimal_sap10_epc(energy_rating_current=72)
epc = replace(
base,
multiple_glazed_proportion=85,
extract_fans_count=2,
sap_heating=replace(base.sap_heating, number_baths=2, number_baths_wwhrs=1),
)
# Act
row = EpcMlTransform().to_row(epc)
# Assert
assert row["multiple_glazed_proportion"] == 85
assert row["extract_fans_count"] == 2
assert row["number_baths"] == 2
assert row["number_baths_wwhrs"] == 1