slice 16f: rename secondary_dwelling_* -> extension_1_* (v1.0.0 MAJOR bump)

12 columns renamed; extension_2_* not added (88% null on 250k corpus;
envelope_heat_loss_w_per_k already sums extension_2+ via part-iterator).
ADR-0008.

VERSION 0.4.0 -> 1.0.0 (MAJOR per ADR-0007 versioning policy). Coordinated
cutover with AutoGluon repo + scoring lambda required at deploy time.

features_v16.txt is regenerated from transform.schema() at write-parquet time
(data/ml_training is gitignored; not committed).
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 12:05:01 +00:00
parent cda469dd7d
commit 5c20e323da
2 changed files with 51 additions and 51 deletions

View file

@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:
# Assert
assert isinstance(schema, TransformSchema)
assert schema.transform_version == "0.4.0"
assert schema.transform_version == "1.0.0"
assert schema.transform_version == EpcMlTransform.VERSION
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():

View file

@ -716,54 +716,54 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
dtype=int, nullable=False,
description="Number of LZC energy-source codes declared (0 if none).",
),
# Secondary building part (first non-main building part; ~36% of certs).
"secondary_dwelling_present": ColumnSpec(
# Extension 1 (first non-main building part; ~36% of certs).
"extension_1_present": ColumnSpec(
dtype=bool, nullable=False,
description="True if there is a building part beyond the Main Dwelling.",
),
"secondary_dwelling_wall_construction": ColumnSpec(
"extension_1_wall_construction": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Secondary building part wall construction SAP10 code.",
description="Extension 1 wall construction SAP10 code.",
),
"secondary_dwelling_wall_insulation_type": ColumnSpec(
"extension_1_wall_insulation_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Secondary building part wall insulation type SAP10 code.",
description="Extension 1 wall insulation type SAP10 code.",
),
"secondary_dwelling_wall_insulation_thickness_mm": ColumnSpec(
"extension_1_wall_insulation_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Secondary building part wall insulation thickness in mm.",
description="Extension 1 wall insulation thickness in mm.",
),
"secondary_dwelling_wall_thickness_mm": ColumnSpec(
"extension_1_wall_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Secondary building part external wall thickness in mm.",
description="Extension 1 external wall thickness in mm.",
),
"secondary_dwelling_roof_construction": ColumnSpec(
"extension_1_roof_construction": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Secondary building part roof construction SAP10 code.",
description="Extension 1 roof construction SAP10 code.",
),
"secondary_dwelling_roof_insulation_thickness_mm": ColumnSpec(
"extension_1_roof_insulation_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Secondary building part roof insulation thickness in mm.",
description="Extension 1 roof insulation thickness in mm.",
),
"secondary_dwelling_floor_construction": ColumnSpec(
"extension_1_floor_construction": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Secondary building part ground-floor construction SAP10 code.",
description="Extension 1 ground-floor construction SAP10 code.",
),
"secondary_dwelling_floor_insulation": ColumnSpec(
"extension_1_floor_insulation": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Secondary building part ground-floor insulation SAP10 code.",
description="Extension 1 ground-floor insulation SAP10 code.",
),
"secondary_dwelling_floor_insulation_thickness_mm": ColumnSpec(
"extension_1_floor_insulation_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Secondary building part floor insulation thickness in mm.",
description="Extension 1 floor insulation thickness in mm.",
),
"secondary_dwelling_total_floor_area_m2": ColumnSpec(
"extension_1_total_floor_area_m2": ColumnSpec(
dtype=float, nullable=True,
description="Secondary building part total floor area (sum of its sap_floor_dimensions).",
description="Extension 1 total floor area (sum of its sap_floor_dimensions).",
),
"secondary_dwelling_heat_loss_perimeter_m": ColumnSpec(
"extension_1_heat_loss_perimeter_m": ColumnSpec(
dtype=float, nullable=True,
description="Secondary building part heat-loss perimeter (sum of its sap_floor_dimensions).",
description="Extension 1 heat-loss perimeter (sum of its sap_floor_dimensions).",
),
"other_building_parts_count": ColumnSpec(
dtype=int, nullable=False,
@ -897,7 +897,7 @@ class EpcMlTransform:
Version 0.1.0 schema contract only; feature columns added in subsequent slices.
"""
VERSION: str = "0.4.0"
VERSION: str = "1.0.0"
def schema(self) -> TransformSchema:
"""The cross-repo ML data contract.
@ -1309,17 +1309,17 @@ _MAIN_DWELLING_FABRIC_COLUMNS = (
)
_SECONDARY_DWELLING_FABRIC_COLUMNS = (
"secondary_dwelling_wall_construction",
"secondary_dwelling_wall_insulation_type",
"secondary_dwelling_wall_insulation_thickness_mm",
"secondary_dwelling_wall_thickness_mm",
"secondary_dwelling_roof_construction",
"secondary_dwelling_roof_insulation_thickness_mm",
"secondary_dwelling_floor_construction",
"secondary_dwelling_floor_insulation",
"secondary_dwelling_floor_insulation_thickness_mm",
"secondary_dwelling_total_floor_area_m2",
"secondary_dwelling_heat_loss_perimeter_m",
"extension_1_wall_construction",
"extension_1_wall_insulation_type",
"extension_1_wall_insulation_thickness_mm",
"extension_1_wall_thickness_mm",
"extension_1_roof_construction",
"extension_1_roof_insulation_thickness_mm",
"extension_1_floor_construction",
"extension_1_floor_insulation",
"extension_1_floor_insulation_thickness_mm",
"extension_1_total_floor_area_m2",
"extension_1_heat_loss_perimeter_m",
)
@ -1426,7 +1426,7 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
"main_dwelling_roof_construction": None,
"main_dwelling_alternative_wall_count": 0,
"main_dwelling_alternative_wall_area_m2": 0.0,
"secondary_dwelling_present": False,
"extension_1_present": False,
"other_building_parts_count": 0,
}
for col in _MAIN_DWELLING_FABRIC_COLUMNS:
@ -1517,31 +1517,31 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
)
aggregates["main_dwelling_wall_thickness_measured"] = main.wall_thickness_measured
# Secondary building part — first non-main entry in the list.
# Extension 1 — first non-main entry in the list.
secondary = next(
(p for p in parts if p.identifier != _MAIN_DWELLING_IDENTIFIER), None
)
if secondary is not None:
aggregates["secondary_dwelling_present"] = True
aggregates["secondary_dwelling_wall_construction"] = _int_or_none(
aggregates["extension_1_present"] = True
aggregates["extension_1_wall_construction"] = _int_or_none(
secondary.wall_construction
)
aggregates["secondary_dwelling_wall_insulation_type"] = _int_or_none(
aggregates["extension_1_wall_insulation_type"] = _int_or_none(
secondary.wall_insulation_type
)
aggregates["secondary_dwelling_wall_insulation_thickness_mm"] = _parse_thickness_mm(
aggregates["extension_1_wall_insulation_thickness_mm"] = _parse_thickness_mm(
secondary.wall_insulation_thickness
)
aggregates["secondary_dwelling_wall_thickness_mm"] = secondary.wall_thickness_mm
aggregates["secondary_dwelling_roof_construction"] = secondary.roof_construction
aggregates["secondary_dwelling_roof_insulation_thickness_mm"] = _parse_thickness_mm(
aggregates["extension_1_wall_thickness_mm"] = secondary.wall_thickness_mm
aggregates["extension_1_roof_construction"] = secondary.roof_construction
aggregates["extension_1_roof_insulation_thickness_mm"] = _parse_thickness_mm(
secondary.roof_insulation_thickness
)
sec_ground = _ground_floor(secondary)
if sec_ground is not None:
aggregates["secondary_dwelling_floor_construction"] = sec_ground.floor_construction
aggregates["secondary_dwelling_floor_insulation"] = sec_ground.floor_insulation
aggregates["secondary_dwelling_floor_insulation_thickness_mm"] = _parse_thickness_mm(
aggregates["extension_1_floor_construction"] = sec_ground.floor_construction
aggregates["extension_1_floor_insulation"] = sec_ground.floor_insulation
aggregates["extension_1_floor_insulation_thickness_mm"] = _parse_thickness_mm(
secondary.floor_insulation_thickness
)
sec_floor_area = 0.0
@ -1550,8 +1550,8 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
for fd in secondary.sap_floor_dimensions:
sec_floor_area += fd.total_floor_area_m2
sec_hlp += fd.heat_loss_perimeter_m
aggregates["secondary_dwelling_total_floor_area_m2"] = sec_floor_area
aggregates["secondary_dwelling_heat_loss_perimeter_m"] = sec_hlp
aggregates["extension_1_total_floor_area_m2"] = sec_floor_area
aggregates["extension_1_heat_loss_perimeter_m"] = sec_hlp
# Anything beyond main + secondary just gets counted (extension chains, etc.).
aggregates["other_building_parts_count"] = max(0, len(parts) - (1 if main else 0) - (1 if secondary else 0))