From 5c20e323dac24cc229857df0bfe507396f6ded76 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 May 2026 12:05:01 +0000 Subject: [PATCH] slice 16f: rename secondary_dwelling_* -> extension_1_* (v1.0.0 MAJOR bump) 12 columns renamed; extension_2_* not added (88% null on 250k corpus; envelope_heat_loss_w_per_k already sums extension_2+ via part-iterator). ADR-0008. VERSION 0.4.0 -> 1.0.0 (MAJOR per ADR-0007 versioning policy). Coordinated cutover with AutoGluon repo + scoring lambda required at deploy time. features_v16.txt is regenerated from transform.schema() at write-parquet time (data/ml_training is gitignored; not committed). --- .../src/domain/ml/tests/test_transform.py | 2 +- packages/domain/src/domain/ml/transform.py | 100 +++++++++--------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 33d00cbc..8ef9fcee 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None: # Assert assert isinstance(schema, TransformSchema) - assert schema.transform_version == "0.4.0" + assert schema.transform_version == "1.0.0" assert schema.transform_version == EpcMlTransform.VERSION assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys()) for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items(): diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 83ae3cd7..ddb8e84f 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -716,54 +716,54 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=False, description="Number of LZC energy-source codes declared (0 if none).", ), - # Secondary building part (first non-main building part; ~36% of certs). - "secondary_dwelling_present": ColumnSpec( + # Extension 1 (first non-main building part; ~36% of certs). + "extension_1_present": ColumnSpec( dtype=bool, nullable=False, description="True if there is a building part beyond the Main Dwelling.", ), - "secondary_dwelling_wall_construction": ColumnSpec( + "extension_1_wall_construction": ColumnSpec( dtype=int, nullable=True, categorical=True, - description="Secondary building part wall construction SAP10 code.", + description="Extension 1 wall construction SAP10 code.", ), - "secondary_dwelling_wall_insulation_type": ColumnSpec( + "extension_1_wall_insulation_type": ColumnSpec( dtype=int, nullable=True, categorical=True, - description="Secondary building part wall insulation type SAP10 code.", + description="Extension 1 wall insulation type SAP10 code.", ), - "secondary_dwelling_wall_insulation_thickness_mm": ColumnSpec( + "extension_1_wall_insulation_thickness_mm": ColumnSpec( dtype=int, nullable=True, - description="Secondary building part wall insulation thickness in mm.", + description="Extension 1 wall insulation thickness in mm.", ), - "secondary_dwelling_wall_thickness_mm": ColumnSpec( + "extension_1_wall_thickness_mm": ColumnSpec( dtype=int, nullable=True, - description="Secondary building part external wall thickness in mm.", + description="Extension 1 external wall thickness in mm.", ), - "secondary_dwelling_roof_construction": ColumnSpec( + "extension_1_roof_construction": ColumnSpec( dtype=int, nullable=True, categorical=True, - description="Secondary building part roof construction SAP10 code.", + description="Extension 1 roof construction SAP10 code.", ), - "secondary_dwelling_roof_insulation_thickness_mm": ColumnSpec( + "extension_1_roof_insulation_thickness_mm": ColumnSpec( dtype=int, nullable=True, - description="Secondary building part roof insulation thickness in mm.", + description="Extension 1 roof insulation thickness in mm.", ), - "secondary_dwelling_floor_construction": ColumnSpec( + "extension_1_floor_construction": ColumnSpec( dtype=int, nullable=True, categorical=True, - description="Secondary building part ground-floor construction SAP10 code.", + description="Extension 1 ground-floor construction SAP10 code.", ), - "secondary_dwelling_floor_insulation": ColumnSpec( + "extension_1_floor_insulation": ColumnSpec( dtype=int, nullable=True, categorical=True, - description="Secondary building part ground-floor insulation SAP10 code.", + description="Extension 1 ground-floor insulation SAP10 code.", ), - "secondary_dwelling_floor_insulation_thickness_mm": ColumnSpec( + "extension_1_floor_insulation_thickness_mm": ColumnSpec( dtype=int, nullable=True, - description="Secondary building part floor insulation thickness in mm.", + description="Extension 1 floor insulation thickness in mm.", ), - "secondary_dwelling_total_floor_area_m2": ColumnSpec( + "extension_1_total_floor_area_m2": ColumnSpec( dtype=float, nullable=True, - description="Secondary building part total floor area (sum of its sap_floor_dimensions).", + description="Extension 1 total floor area (sum of its sap_floor_dimensions).", ), - "secondary_dwelling_heat_loss_perimeter_m": ColumnSpec( + "extension_1_heat_loss_perimeter_m": ColumnSpec( dtype=float, nullable=True, - description="Secondary building part heat-loss perimeter (sum of its sap_floor_dimensions).", + description="Extension 1 heat-loss perimeter (sum of its sap_floor_dimensions).", ), "other_building_parts_count": ColumnSpec( dtype=int, nullable=False, @@ -897,7 +897,7 @@ class EpcMlTransform: Version 0.1.0 — schema contract only; feature columns added in subsequent slices. """ - VERSION: str = "0.4.0" + VERSION: str = "1.0.0" def schema(self) -> TransformSchema: """The cross-repo ML data contract. @@ -1309,17 +1309,17 @@ _MAIN_DWELLING_FABRIC_COLUMNS = ( ) _SECONDARY_DWELLING_FABRIC_COLUMNS = ( - "secondary_dwelling_wall_construction", - "secondary_dwelling_wall_insulation_type", - "secondary_dwelling_wall_insulation_thickness_mm", - "secondary_dwelling_wall_thickness_mm", - "secondary_dwelling_roof_construction", - "secondary_dwelling_roof_insulation_thickness_mm", - "secondary_dwelling_floor_construction", - "secondary_dwelling_floor_insulation", - "secondary_dwelling_floor_insulation_thickness_mm", - "secondary_dwelling_total_floor_area_m2", - "secondary_dwelling_heat_loss_perimeter_m", + "extension_1_wall_construction", + "extension_1_wall_insulation_type", + "extension_1_wall_insulation_thickness_mm", + "extension_1_wall_thickness_mm", + "extension_1_roof_construction", + "extension_1_roof_insulation_thickness_mm", + "extension_1_floor_construction", + "extension_1_floor_insulation", + "extension_1_floor_insulation_thickness_mm", + "extension_1_total_floor_area_m2", + "extension_1_heat_loss_perimeter_m", ) @@ -1426,7 +1426,7 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: "main_dwelling_roof_construction": None, "main_dwelling_alternative_wall_count": 0, "main_dwelling_alternative_wall_area_m2": 0.0, - "secondary_dwelling_present": False, + "extension_1_present": False, "other_building_parts_count": 0, } for col in _MAIN_DWELLING_FABRIC_COLUMNS: @@ -1517,31 +1517,31 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: ) aggregates["main_dwelling_wall_thickness_measured"] = main.wall_thickness_measured - # Secondary building part — first non-main entry in the list. + # Extension 1 — first non-main entry in the list. secondary = next( (p for p in parts if p.identifier != _MAIN_DWELLING_IDENTIFIER), None ) if secondary is not None: - aggregates["secondary_dwelling_present"] = True - aggregates["secondary_dwelling_wall_construction"] = _int_or_none( + aggregates["extension_1_present"] = True + aggregates["extension_1_wall_construction"] = _int_or_none( secondary.wall_construction ) - aggregates["secondary_dwelling_wall_insulation_type"] = _int_or_none( + aggregates["extension_1_wall_insulation_type"] = _int_or_none( secondary.wall_insulation_type ) - aggregates["secondary_dwelling_wall_insulation_thickness_mm"] = _parse_thickness_mm( + aggregates["extension_1_wall_insulation_thickness_mm"] = _parse_thickness_mm( secondary.wall_insulation_thickness ) - aggregates["secondary_dwelling_wall_thickness_mm"] = secondary.wall_thickness_mm - aggregates["secondary_dwelling_roof_construction"] = secondary.roof_construction - aggregates["secondary_dwelling_roof_insulation_thickness_mm"] = _parse_thickness_mm( + aggregates["extension_1_wall_thickness_mm"] = secondary.wall_thickness_mm + aggregates["extension_1_roof_construction"] = secondary.roof_construction + aggregates["extension_1_roof_insulation_thickness_mm"] = _parse_thickness_mm( secondary.roof_insulation_thickness ) sec_ground = _ground_floor(secondary) if sec_ground is not None: - aggregates["secondary_dwelling_floor_construction"] = sec_ground.floor_construction - aggregates["secondary_dwelling_floor_insulation"] = sec_ground.floor_insulation - aggregates["secondary_dwelling_floor_insulation_thickness_mm"] = _parse_thickness_mm( + aggregates["extension_1_floor_construction"] = sec_ground.floor_construction + aggregates["extension_1_floor_insulation"] = sec_ground.floor_insulation + aggregates["extension_1_floor_insulation_thickness_mm"] = _parse_thickness_mm( secondary.floor_insulation_thickness ) sec_floor_area = 0.0 @@ -1550,8 +1550,8 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: for fd in secondary.sap_floor_dimensions: sec_floor_area += fd.total_floor_area_m2 sec_hlp += fd.heat_loss_perimeter_m - aggregates["secondary_dwelling_total_floor_area_m2"] = sec_floor_area - aggregates["secondary_dwelling_heat_loss_perimeter_m"] = sec_hlp + aggregates["extension_1_total_floor_area_m2"] = sec_floor_area + aggregates["extension_1_heat_loss_perimeter_m"] = sec_hlp # Anything beyond main + secondary just gets counted (extension chains, etc.). aggregates["other_building_parts_count"] = max(0, len(parts) - (1 if main else 0) - (1 if secondary else 0))