diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index a84a45b1..267b072e 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -257,6 +257,7 @@ _NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = ( _NON_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = ( "dwelling_type", + "transaction_type", ) @@ -287,6 +288,7 @@ def test_to_row_extracts_categorical_features() -> None: epc = make_minimal_sap10_epc( energy_rating_current=82, dwelling_type="End-terrace house", + transaction_type="8", property_type="0", built_form="2", region_code="6", @@ -300,7 +302,7 @@ def test_to_row_extracts_categorical_features() -> None: # Assert assert row["dwelling_type"] == "End-terrace house" assert "tenure" not in row - assert "transaction_type" not in row + assert row["transaction_type"] == "8" assert row["property_type"] == "0" assert row["built_form"] == "2" assert row["region_code"] == "6" diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 666fea96..f164042c 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -133,6 +133,12 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { categorical=True, description="Free-form SAP dwelling-type description, e.g. 'Mid-terrace house'.", ), + "transaction_type": ColumnSpec( + dtype=str, + nullable=False, + categorical=True, + description="SAP transaction type code, stringified int.", + ), "property_type": ColumnSpec( dtype=str, nullable=True, @@ -999,6 +1005,7 @@ class EpcMlTransform: "percent_draughtproofed": epc.percent_draughtproofed, # Features — categoricals (raw strings; cast at parquet write time) "dwelling_type": epc.dwelling_type, + "transaction_type": epc.transaction_type, "property_type": epc.property_type, "built_form": epc.built_form, "region_code": epc.region_code,