From aa00259b1a387cb6416273c52050118e80aae59b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 May 2026 14:47:25 +0000 Subject: [PATCH] slice 4: total_floor_area_m2 feature First feature column lands on the transform: schema() advertises total_floor_area_m2 as a non-nullable float; to_row() emits the value from EpcPropertyData.total_floor_area_m2 alongside the six targets. Co-Authored-By: Claude Opus 4.7 --- .../src/domain/ml/tests/test_transform.py | 29 ++++++++++++++++++- packages/domain/src/domain/ml/transform.py | 14 ++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 4a7b6349..2c888f8f 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -31,7 +31,6 @@ def test_transform_advertises_version_and_target_columns() -> None: column = schema.target_columns[target_name] assert isinstance(column, ColumnSpec) assert column.dtype is expected_dtype - assert schema.feature_columns == {} def test_to_row_extracts_targets_from_epc_property_data() -> None: @@ -90,3 +89,31 @@ def test_to_row_clamps_ucl_correction_when_band_b_would_increase_peui() -> None: # Band B: gradient=-0.10, intercept=28 → cd = -0.10*232 + 28 = +4.8 → clamp to 0 # adjusted = 232 + 0 = 232.0 assert row["peui_ucl"] == 232.0 + + +def test_schema_advertises_total_floor_area_m2_feature() -> None: + # Arrange + transform = EpcMlTransform() + + # Act + schema = transform.schema() + + # Assert + assert "total_floor_area_m2" in schema.feature_columns + column = schema.feature_columns["total_floor_area_m2"] + assert isinstance(column, ColumnSpec) + assert column.dtype is float + assert column.nullable is False + + +def test_to_row_extracts_total_floor_area_m2() -> None: + # Arrange + epc = make_minimal_sap10_epc(energy_rating_current=82) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + # make_minimal_sap10_epc sets total_floor_area_m2=70.0 by default + assert row["total_floor_area_m2"] == 70.0 diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 25836feb..2f1b89d8 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -18,6 +18,15 @@ from domain.ml.schema import ColumnSpec, TransformSchema from domain.ml.ucl import apply_ucl_correction +_FEATURE_COLUMNS: dict[str, ColumnSpec] = { + "total_floor_area_m2": ColumnSpec( + dtype=float, + nullable=False, + description="Total floor area in square metres, from `total_floor_area`.", + ), +} + + _TARGET_COLUMNS: dict[str, ColumnSpec] = { "sap_score": ColumnSpec( dtype=int, @@ -79,7 +88,7 @@ class EpcMlTransform: """ return TransformSchema( transform_version=self.VERSION, - feature_columns={}, + feature_columns=dict(_FEATURE_COLUMNS), target_columns=dict(_TARGET_COLUMNS), ) @@ -90,6 +99,9 @@ class EpcMlTransform: """ rhi = epc.renewable_heat_incentive return { + # Features + "total_floor_area_m2": epc.total_floor_area_m2, + # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, "peui_raw": epc.energy_consumption_current,