mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
slice 16f: rename secondary_dwelling_* -> extension_1_* (v1.0.0 MAJOR bump)
12 columns renamed; extension_2_* not added (88% null on 250k corpus; envelope_heat_loss_w_per_k already sums extension_2+ via part-iterator). ADR-0008. VERSION 0.4.0 -> 1.0.0 (MAJOR per ADR-0007 versioning policy). Coordinated cutover with AutoGluon repo + scoring lambda required at deploy time. features_v16.txt is regenerated from transform.schema() at write-parquet time (data/ml_training is gitignored; not committed).
This commit is contained in:
parent
cda469dd7d
commit
5c20e323da
2 changed files with 51 additions and 51 deletions
|
|
@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:
|
|||
|
||||
# Assert
|
||||
assert isinstance(schema, TransformSchema)
|
||||
assert schema.transform_version == "0.4.0"
|
||||
assert schema.transform_version == "1.0.0"
|
||||
assert schema.transform_version == EpcMlTransform.VERSION
|
||||
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
|
||||
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():
|
||||
|
|
|
|||
|
|
@ -716,54 +716,54 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
|
|||
dtype=int, nullable=False,
|
||||
description="Number of LZC energy-source codes declared (0 if none).",
|
||||
),
|
||||
# Secondary building part (first non-main building part; ~36% of certs).
|
||||
"secondary_dwelling_present": ColumnSpec(
|
||||
# Extension 1 (first non-main building part; ~36% of certs).
|
||||
"extension_1_present": ColumnSpec(
|
||||
dtype=bool, nullable=False,
|
||||
description="True if there is a building part beyond the Main Dwelling.",
|
||||
),
|
||||
"secondary_dwelling_wall_construction": ColumnSpec(
|
||||
"extension_1_wall_construction": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Secondary building part wall construction SAP10 code.",
|
||||
description="Extension 1 wall construction SAP10 code.",
|
||||
),
|
||||
"secondary_dwelling_wall_insulation_type": ColumnSpec(
|
||||
"extension_1_wall_insulation_type": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Secondary building part wall insulation type SAP10 code.",
|
||||
description="Extension 1 wall insulation type SAP10 code.",
|
||||
),
|
||||
"secondary_dwelling_wall_insulation_thickness_mm": ColumnSpec(
|
||||
"extension_1_wall_insulation_thickness_mm": ColumnSpec(
|
||||
dtype=int, nullable=True,
|
||||
description="Secondary building part wall insulation thickness in mm.",
|
||||
description="Extension 1 wall insulation thickness in mm.",
|
||||
),
|
||||
"secondary_dwelling_wall_thickness_mm": ColumnSpec(
|
||||
"extension_1_wall_thickness_mm": ColumnSpec(
|
||||
dtype=int, nullable=True,
|
||||
description="Secondary building part external wall thickness in mm.",
|
||||
description="Extension 1 external wall thickness in mm.",
|
||||
),
|
||||
"secondary_dwelling_roof_construction": ColumnSpec(
|
||||
"extension_1_roof_construction": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Secondary building part roof construction SAP10 code.",
|
||||
description="Extension 1 roof construction SAP10 code.",
|
||||
),
|
||||
"secondary_dwelling_roof_insulation_thickness_mm": ColumnSpec(
|
||||
"extension_1_roof_insulation_thickness_mm": ColumnSpec(
|
||||
dtype=int, nullable=True,
|
||||
description="Secondary building part roof insulation thickness in mm.",
|
||||
description="Extension 1 roof insulation thickness in mm.",
|
||||
),
|
||||
"secondary_dwelling_floor_construction": ColumnSpec(
|
||||
"extension_1_floor_construction": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Secondary building part ground-floor construction SAP10 code.",
|
||||
description="Extension 1 ground-floor construction SAP10 code.",
|
||||
),
|
||||
"secondary_dwelling_floor_insulation": ColumnSpec(
|
||||
"extension_1_floor_insulation": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Secondary building part ground-floor insulation SAP10 code.",
|
||||
description="Extension 1 ground-floor insulation SAP10 code.",
|
||||
),
|
||||
"secondary_dwelling_floor_insulation_thickness_mm": ColumnSpec(
|
||||
"extension_1_floor_insulation_thickness_mm": ColumnSpec(
|
||||
dtype=int, nullable=True,
|
||||
description="Secondary building part floor insulation thickness in mm.",
|
||||
description="Extension 1 floor insulation thickness in mm.",
|
||||
),
|
||||
"secondary_dwelling_total_floor_area_m2": ColumnSpec(
|
||||
"extension_1_total_floor_area_m2": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Secondary building part total floor area (sum of its sap_floor_dimensions).",
|
||||
description="Extension 1 total floor area (sum of its sap_floor_dimensions).",
|
||||
),
|
||||
"secondary_dwelling_heat_loss_perimeter_m": ColumnSpec(
|
||||
"extension_1_heat_loss_perimeter_m": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Secondary building part heat-loss perimeter (sum of its sap_floor_dimensions).",
|
||||
description="Extension 1 heat-loss perimeter (sum of its sap_floor_dimensions).",
|
||||
),
|
||||
"other_building_parts_count": ColumnSpec(
|
||||
dtype=int, nullable=False,
|
||||
|
|
@ -897,7 +897,7 @@ class EpcMlTransform:
|
|||
Version 0.1.0 — schema contract only; feature columns added in subsequent slices.
|
||||
"""
|
||||
|
||||
VERSION: str = "0.4.0"
|
||||
VERSION: str = "1.0.0"
|
||||
|
||||
def schema(self) -> TransformSchema:
|
||||
"""The cross-repo ML data contract.
|
||||
|
|
@ -1309,17 +1309,17 @@ _MAIN_DWELLING_FABRIC_COLUMNS = (
|
|||
)
|
||||
|
||||
_SECONDARY_DWELLING_FABRIC_COLUMNS = (
|
||||
"secondary_dwelling_wall_construction",
|
||||
"secondary_dwelling_wall_insulation_type",
|
||||
"secondary_dwelling_wall_insulation_thickness_mm",
|
||||
"secondary_dwelling_wall_thickness_mm",
|
||||
"secondary_dwelling_roof_construction",
|
||||
"secondary_dwelling_roof_insulation_thickness_mm",
|
||||
"secondary_dwelling_floor_construction",
|
||||
"secondary_dwelling_floor_insulation",
|
||||
"secondary_dwelling_floor_insulation_thickness_mm",
|
||||
"secondary_dwelling_total_floor_area_m2",
|
||||
"secondary_dwelling_heat_loss_perimeter_m",
|
||||
"extension_1_wall_construction",
|
||||
"extension_1_wall_insulation_type",
|
||||
"extension_1_wall_insulation_thickness_mm",
|
||||
"extension_1_wall_thickness_mm",
|
||||
"extension_1_roof_construction",
|
||||
"extension_1_roof_insulation_thickness_mm",
|
||||
"extension_1_floor_construction",
|
||||
"extension_1_floor_insulation",
|
||||
"extension_1_floor_insulation_thickness_mm",
|
||||
"extension_1_total_floor_area_m2",
|
||||
"extension_1_heat_loss_perimeter_m",
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1426,7 +1426,7 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
|
|||
"main_dwelling_roof_construction": None,
|
||||
"main_dwelling_alternative_wall_count": 0,
|
||||
"main_dwelling_alternative_wall_area_m2": 0.0,
|
||||
"secondary_dwelling_present": False,
|
||||
"extension_1_present": False,
|
||||
"other_building_parts_count": 0,
|
||||
}
|
||||
for col in _MAIN_DWELLING_FABRIC_COLUMNS:
|
||||
|
|
@ -1517,31 +1517,31 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
|
|||
)
|
||||
aggregates["main_dwelling_wall_thickness_measured"] = main.wall_thickness_measured
|
||||
|
||||
# Secondary building part — first non-main entry in the list.
|
||||
# Extension 1 — first non-main entry in the list.
|
||||
secondary = next(
|
||||
(p for p in parts if p.identifier != _MAIN_DWELLING_IDENTIFIER), None
|
||||
)
|
||||
if secondary is not None:
|
||||
aggregates["secondary_dwelling_present"] = True
|
||||
aggregates["secondary_dwelling_wall_construction"] = _int_or_none(
|
||||
aggregates["extension_1_present"] = True
|
||||
aggregates["extension_1_wall_construction"] = _int_or_none(
|
||||
secondary.wall_construction
|
||||
)
|
||||
aggregates["secondary_dwelling_wall_insulation_type"] = _int_or_none(
|
||||
aggregates["extension_1_wall_insulation_type"] = _int_or_none(
|
||||
secondary.wall_insulation_type
|
||||
)
|
||||
aggregates["secondary_dwelling_wall_insulation_thickness_mm"] = _parse_thickness_mm(
|
||||
aggregates["extension_1_wall_insulation_thickness_mm"] = _parse_thickness_mm(
|
||||
secondary.wall_insulation_thickness
|
||||
)
|
||||
aggregates["secondary_dwelling_wall_thickness_mm"] = secondary.wall_thickness_mm
|
||||
aggregates["secondary_dwelling_roof_construction"] = secondary.roof_construction
|
||||
aggregates["secondary_dwelling_roof_insulation_thickness_mm"] = _parse_thickness_mm(
|
||||
aggregates["extension_1_wall_thickness_mm"] = secondary.wall_thickness_mm
|
||||
aggregates["extension_1_roof_construction"] = secondary.roof_construction
|
||||
aggregates["extension_1_roof_insulation_thickness_mm"] = _parse_thickness_mm(
|
||||
secondary.roof_insulation_thickness
|
||||
)
|
||||
sec_ground = _ground_floor(secondary)
|
||||
if sec_ground is not None:
|
||||
aggregates["secondary_dwelling_floor_construction"] = sec_ground.floor_construction
|
||||
aggregates["secondary_dwelling_floor_insulation"] = sec_ground.floor_insulation
|
||||
aggregates["secondary_dwelling_floor_insulation_thickness_mm"] = _parse_thickness_mm(
|
||||
aggregates["extension_1_floor_construction"] = sec_ground.floor_construction
|
||||
aggregates["extension_1_floor_insulation"] = sec_ground.floor_insulation
|
||||
aggregates["extension_1_floor_insulation_thickness_mm"] = _parse_thickness_mm(
|
||||
secondary.floor_insulation_thickness
|
||||
)
|
||||
sec_floor_area = 0.0
|
||||
|
|
@ -1550,8 +1550,8 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
|
|||
for fd in secondary.sap_floor_dimensions:
|
||||
sec_floor_area += fd.total_floor_area_m2
|
||||
sec_hlp += fd.heat_loss_perimeter_m
|
||||
aggregates["secondary_dwelling_total_floor_area_m2"] = sec_floor_area
|
||||
aggregates["secondary_dwelling_heat_loss_perimeter_m"] = sec_hlp
|
||||
aggregates["extension_1_total_floor_area_m2"] = sec_floor_area
|
||||
aggregates["extension_1_heat_loss_perimeter_m"] = sec_hlp
|
||||
|
||||
# Anything beyond main + secondary just gets counted (extension chains, etc.).
|
||||
aggregates["other_building_parts_count"] = max(0, len(parts) - (1 if main else 0) - (1 if secondary else 0))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue