From 05ef54bb02ace4806b099eb636d096c085753f6c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 May 2026 12:41:14 +0000 Subject: [PATCH] restore transaction_type; keep tenure dropped (v2.0.0 stands) User reverted the transaction_type drop after noting that it doesn't help detect full-SAP assessments (that's `assessment_type` on the bulk-register record, filtered out at build_features.py:37). tenure removal stays; v2.0.0 still MAJOR (a column was removed). --- packages/domain/src/domain/ml/tests/test_transform.py | 4 +++- packages/domain/src/domain/ml/transform.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index a84a45b1..267b072e 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -257,6 +257,7 @@ _NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = ( _NON_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = ( "dwelling_type", + "transaction_type", ) @@ -287,6 +288,7 @@ def test_to_row_extracts_categorical_features() -> None: epc = make_minimal_sap10_epc( energy_rating_current=82, dwelling_type="End-terrace house", + transaction_type="8", property_type="0", built_form="2", region_code="6", @@ -300,7 +302,7 @@ def test_to_row_extracts_categorical_features() -> None: # Assert assert row["dwelling_type"] == "End-terrace house" assert "tenure" not in row - assert "transaction_type" not in row + assert row["transaction_type"] == "8" assert row["property_type"] == "0" assert row["built_form"] == "2" assert row["region_code"] == "6" diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 666fea96..f164042c 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -133,6 +133,12 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { categorical=True, description="Free-form SAP dwelling-type description, e.g. 'Mid-terrace house'.", ), + "transaction_type": ColumnSpec( + dtype=str, + nullable=False, + categorical=True, + description="SAP transaction type code, stringified int.", + ), "property_type": ColumnSpec( dtype=str, nullable=True, @@ -999,6 +1005,7 @@ class EpcMlTransform: "percent_draughtproofed": epc.percent_draughtproofed, # Features — categoricals (raw strings; cast at parquet write time) "dwelling_type": epc.dwelling_type, + "transaction_type": epc.transaction_type, "property_type": epc.property_type, "built_form": epc.built_form, "region_code": epc.region_code,