From a374bd075e23c2c9e9ef8c5b874e06fba2474852 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 20 May 2026 12:46:47 +0000 Subject: [PATCH] P6.1 follow-on: use BuildingPartIdentifier enum in ml/transform + tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the string literal "Main Dwelling" / "Extension 1" comparisons in `_building_part_aggregates` and the four affected tests with the typed `BuildingPartIdentifier.MAIN` / `.EXTENSION_1` enum values, so the transform is consistent with the typed domain introduced in the P6.1 cert→inputs adapter. Fixes a latent mismatch that would silently return `main=None` if the string ever drifted. Co-Authored-By: Claude Sonnet 4.6 --- .../src/domain/ml/tests/test_transform.py | 20 +++++++++++-------- packages/domain/src/domain/ml/transform.py | 8 +++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index aac4bb11..f87d4b12 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -3,7 +3,11 @@ import pandas as pd import pytest -from datatypes.epc.domain.epc_property_data import SapRoomInRoof, WindowTransmissionDetails +from datatypes.epc.domain.epc_property_data import ( + BuildingPartIdentifier, + SapRoomInRoof, + WindowTransmissionDetails, +) from domain.ml.schema import ColumnSpec, TransformSchema from domain.ml.tests._fixtures import ( make_building_part, @@ -540,7 +544,7 @@ def test_schema_advertises_building_part_features() -> None: def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None: # Arrange — Main Dwelling (two floors, age band B, wall 3, roof 4) plus one extension. main = make_building_part( - identifier="Main Dwelling", + identifier=BuildingPartIdentifier.MAIN, construction_age_band="B", wall_construction=3, roof_construction=4, @@ -556,7 +560,7 @@ def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None: ], ) extension = make_building_part( - identifier="Extension 1", + identifier=BuildingPartIdentifier.EXTENSION_1, construction_age_band="L", wall_construction=4, roof_construction=5, @@ -600,7 +604,7 @@ def test_to_row_aggregates_building_parts_with_main_dwelling_carveout() -> None: def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None: # Arrange main = make_building_part( - identifier="Main Dwelling", + identifier=BuildingPartIdentifier.MAIN, sap_room_in_roof=SapRoomInRoof(floor_area=15.0, construction_age_band="B"), ) epc = make_minimal_sap10_epc(energy_rating_current=82, sap_building_parts=[main]) @@ -615,7 +619,7 @@ def test_to_row_flags_room_in_roof_when_main_dwelling_has_it() -> None: def test_to_row_returns_building_part_nones_when_no_main_dwelling_identified() -> None: # Arrange — single part with identifier that doesn't match "Main Dwelling" - sole_part = make_building_part(identifier="Extension 1") + sole_part = make_building_part(identifier=BuildingPartIdentifier.EXTENSION_1) epc = make_minimal_sap10_epc( energy_rating_current=82, sap_building_parts=[sole_part] ) @@ -1188,7 +1192,7 @@ def test_to_row_extracts_main_dwelling_wall_roof_floor_fabric_inputs() -> None: floor=1, floor_insulation=0, floor_construction=0, ) main = SapBuildingPart( - identifier="Main Dwelling", + identifier=BuildingPartIdentifier.MAIN, construction_age_band="C", wall_construction=3, wall_insulation_type=4, @@ -1229,7 +1233,7 @@ def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None: # Arrange from datatypes.epc.domain.epc_property_data import SapBuildingPart main = SapBuildingPart( - identifier="Main Dwelling", + identifier=BuildingPartIdentifier.MAIN, construction_age_band="C", wall_construction=3, wall_insulation_type=4, @@ -1268,7 +1272,7 @@ def test_to_row_emits_positive_envelope_heat_loss_for_sap10_epc() -> None: from domain.ml.tests._fixtures import make_building_part, make_floor_dimension main = make_building_part( - identifier="Main Dwelling", + identifier=BuildingPartIdentifier.MAIN, construction_age_band="G", wall_construction=4, wall_insulation_type=4, diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index b4f4ae90..7c7da1a9 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -16,6 +16,7 @@ import pandas as pd from datatypes.epc.domain.epc import Epc from datatypes.epc.domain.epc_property_data import ( + BuildingPartIdentifier, EnergyElement, EpcPropertyData, SapBuildingPart, @@ -41,9 +42,6 @@ from domain.ml.schema import ColumnSpec, TransformSchema from domain.ml.ucl import apply_ucl_correction -_MAIN_DWELLING_IDENTIFIER = "Main Dwelling" - - # SAP10 orientation codes: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW. # Anything else (0, "NR", etc.) is treated as unrecorded — it contributes to # `window_count` and `window_total_area_m2` but to no octant. @@ -1477,7 +1475,7 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: present — otherwise None (we don't silently fall back to the first part). """ main = next( - (p for p in parts if p.identifier == _MAIN_DWELLING_IDENTIFIER), None + (p for p in parts if p.identifier is BuildingPartIdentifier.MAIN), None ) aggregates: dict[str, Any] = { "building_parts_count": len(parts), @@ -1588,7 +1586,7 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: # Extension 1 — first non-main entry in the list. secondary = next( - (p for p in parts if p.identifier != _MAIN_DWELLING_IDENTIFIER), None + (p for p in parts if p.identifier is not BuildingPartIdentifier.MAIN), None ) if secondary is not None: aggregates["extension_1_present"] = True