restore transaction_type; keep tenure dropped (v2.0.0 stands)

User reverted the transaction_type drop after noting that it doesn't help
detect full-SAP assessments (that's `assessment_type` on the bulk-register
record, filtered out at build_features.py:37).

tenure removal stays; v2.0.0 still MAJOR (a column was removed).
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 12:41:14 +00:00
parent 6aa3ddfbf4
commit 05ef54bb02
2 changed files with 10 additions and 1 deletions

View file

@ -257,6 +257,7 @@ _NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = (
_NON_NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = (
"dwelling_type",
"transaction_type",
)
@ -287,6 +288,7 @@ def test_to_row_extracts_categorical_features() -> None:
epc = make_minimal_sap10_epc(
energy_rating_current=82,
dwelling_type="End-terrace house",
transaction_type="8",
property_type="0",
built_form="2",
region_code="6",
@ -300,7 +302,7 @@ def test_to_row_extracts_categorical_features() -> None:
# Assert
assert row["dwelling_type"] == "End-terrace house"
assert "tenure" not in row
assert "transaction_type" not in row
assert row["transaction_type"] == "8"
assert row["property_type"] == "0"
assert row["built_form"] == "2"
assert row["region_code"] == "6"

View file

@ -133,6 +133,12 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
categorical=True,
description="Free-form SAP dwelling-type description, e.g. 'Mid-terrace house'.",
),
"transaction_type": ColumnSpec(
dtype=str,
nullable=False,
categorical=True,
description="SAP transaction type code, stringified int.",
),
"property_type": ColumnSpec(
dtype=str,
nullable=True,
@ -999,6 +1005,7 @@ class EpcMlTransform:
"percent_draughtproofed": epc.percent_draughtproofed,
# Features — categoricals (raw strings; cast at parquet write time)
"dwelling_type": epc.dwelling_type,
"transaction_type": epc.transaction_type,
"property_type": epc.property_type,
"built_form": epc.built_form,
"region_code": epc.region_code,