From 0ffda529ecd1eb80f2923463615285db4773425c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 May 2026 22:08:27 +0000 Subject: [PATCH] slice 15a: add wall/floor/roof + demand scalar features for retrofit simulation 15 new features wired through schema -> domain -> mapper -> transform: Main Dwelling fabric (11): - wall_insulation_type, wall_insulation_thickness_mm, wall_dry_lined, wall_thickness_mm, party_wall_construction - roof_insulation_location, roof_insulation_thickness_mm - floor_construction, floor_insulation, floor_insulation_thickness_mm, floor_heat_loss Dwelling-level scalars (4): - multiple_glazed_proportion, number_baths, number_baths_wwhrs, extract_fans_count Thickness strings like '50mm'/'NI'/'ND' parsed via _parse_thickness_mm; NI (no insulation) lands as 0mm so the model sees the physical zero rather than a missing value. Categorical sentinels ('NA'/'NI'/'ND') become None. Also fixed long-standing typo `multiple_glazed_propertion` -> `_proportion` in domain dataclass + its lone DB-model usage. Co-Authored-By: Claude Opus 4.7 --- backend/app/db/models/epc_property.py | 2 +- datatypes/epc/domain/epc_property_data.py | 6 +- datatypes/epc/domain/mapper.py | 5 + datatypes/epc/schema/rdsap_schema_21_0_1.py | 4 + .../src/domain/ml/tests/test_transform.py | 96 +++++++++++ packages/domain/src/domain/ml/transform.py | 155 +++++++++++++++++- 6 files changed, 265 insertions(+), 3 deletions(-) diff --git a/backend/app/db/models/epc_property.py b/backend/app/db/models/epc_property.py index 50523fbb..74083da2 100644 --- a/backend/app/db/models/epc_property.py +++ b/backend/app/db/models/epc_property.py @@ -225,7 +225,7 @@ class EpcPropertyModel(SQLModel, table=True): pressure_test_certificate_number=data.pressure_test_certificate_number, percent_draughtproofed=data.percent_draughtproofed, insulated_door_u_value=data.insulated_door_u_value, - multiple_glazed_proportion=data.multiple_glazed_propertion, + multiple_glazed_proportion=data.multiple_glazed_proportion, windows_transmission_u_value=( data.windows_transmission_details.u_value if data.windows_transmission_details diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index 9a261768..c7cc4f0f 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -69,6 +69,9 @@ class SapHeating: secondary_fuel_type: Optional[int] = None secondary_heating_type: Optional[Union[int, str]] = None # int from API; str from site notes cylinder_insulation_thickness_mm: Optional[int] = None + # SAP10 hot-water demand inputs from sap_heating. + number_baths: Optional[int] = None + number_baths_wwhrs: Optional[int] = None @dataclass @@ -386,7 +389,8 @@ class EpcPropertyData: draughtproofed_door_count: Optional[int] = None mechanical_vent_duct_type: Optional[int] = None windows_transmission_details: Optional[WindowsTransmissionDetails] = None - multiple_glazed_propertion: Optional[int] = None + multiple_glazed_proportion: Optional[int] = None + extract_fans_count: Optional[int] = None calculation_software_version: Optional[str] = None # Do we care about this? mechanical_vent_duct_placement: Optional[int] = None mechanical_vent_duct_insulation: Optional[int] = None diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 45e0cb3a..a2bc1a02 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1381,6 +1381,8 @@ class EpcPropertyDataMapper: secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, + number_baths=schema.sap_heating.number_baths, + number_baths_wwhrs=schema.sap_heating.number_baths_wwhrs, ), # SAP windows sap_windows=[ @@ -1524,6 +1526,9 @@ class EpcPropertyDataMapper: energy_rating_current=schema.energy_rating_current, co2_emissions_current=float(schema.co2_emissions_current), energy_consumption_current=schema.energy_consumption_current, + # Dwelling-level inputs used as ML features. + multiple_glazed_proportion=schema.multiple_glazed_proportion, + extract_fans_count=schema.extract_fans_count, ) @staticmethod diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index 37498bb8..f4531c52 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -69,6 +69,9 @@ class SapHeating: # Real-API certs carry shower_outlets as a list, not the synthetic single-object form; # accept both shapes so older fixtures keep parsing. shower_outlets: Optional[Union[ShowerOutlets, List[ShowerOutlets]]] = None + # SAP10 hot-water demand inputs. + number_baths: Optional[int] = None + number_baths_wwhrs: Optional[int] = None cylinder_insulation_type: Optional[int] = None cylinder_thermostat: Optional[str] = None secondary_fuel_type: Optional[int] = None @@ -340,6 +343,7 @@ class RdSapSchema21_0_1: incandescent_fixed_lighting_bulbs_count: int # Fields below are present in some certs but absent in many real-world responses; # see datatypes/epc/schema/tests/fixtures/21_0_1_real.json for a representative cert. + extract_fans_count: Optional[int] = None wet_rooms_count: Optional[int] = None open_chimneys_count: Optional[int] = None insulated_door_u_value: Optional[float] = None diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 14fe2404..a0b67a2a 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -1180,3 +1180,99 @@ def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None: assert row["window_avg_u_value"] == pytest.approx(1.6) # Area-weighted solar transmittance: (0.72 * 2.0 + 0.60 * 1.0) / 3.0 = 2.04 / 3.0 = 0.68 assert row["window_avg_solar_transmittance"] == pytest.approx(0.68) + + +def test_to_row_extracts_main_dwelling_wall_roof_floor_fabric_inputs() -> None: + # Arrange + from datatypes.epc.domain.epc_property_data import SapBuildingPart, SapFloorDimension + ground = SapFloorDimension( + room_height_m=2.4, total_floor_area_m2=50.0, + party_wall_length_m=5.0, heat_loss_perimeter_m=20.0, + floor=0, floor_insulation=2, floor_construction=1, + ) + upstairs = SapFloorDimension( + room_height_m=2.4, total_floor_area_m2=50.0, + party_wall_length_m=5.0, heat_loss_perimeter_m=20.0, + floor=1, floor_insulation=0, floor_construction=0, + ) + main = SapBuildingPart( + identifier="Main Dwelling", + construction_age_band="C", + wall_construction=3, + wall_insulation_type=4, + wall_thickness_measured=True, + party_wall_construction=2, + sap_floor_dimensions=[ground, upstairs], + wall_dry_lined=False, + wall_thickness_mm=300, + wall_insulation_thickness="50mm", + floor_heat_loss=7, + floor_insulation_thickness="100mm", + roof_construction=5, + roof_insulation_location=6, + roof_insulation_thickness="270mm", + ) + epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main]) + + # Act + row = EpcMlTransform().to_row(epc) + + # Assert — wall fabric + assert row["main_dwelling_wall_insulation_type"] == 4 + assert row["main_dwelling_wall_insulation_thickness_mm"] == 50 + assert row["main_dwelling_wall_dry_lined"] is False + assert row["main_dwelling_wall_thickness_mm"] == 300 + assert row["main_dwelling_party_wall_construction"] == 2 + # Assert — roof fabric + assert row["main_dwelling_roof_insulation_location"] == 6 + assert row["main_dwelling_roof_insulation_thickness_mm"] == 270 + # Assert — floor fabric, taken from ground-floor SapFloorDimension + assert row["main_dwelling_floor_construction"] == 1 + assert row["main_dwelling_floor_insulation"] == 2 + assert row["main_dwelling_floor_insulation_thickness_mm"] == 100 + assert row["main_dwelling_floor_heat_loss"] == 7 + + +def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None: + # Arrange + from datatypes.epc.domain.epc_property_data import SapBuildingPart + main = SapBuildingPart( + identifier="Main Dwelling", + construction_age_band="C", + wall_construction=3, + wall_insulation_type=4, + wall_thickness_measured=True, + party_wall_construction=2, + sap_floor_dimensions=[], + wall_insulation_thickness="NI", + roof_insulation_thickness="ND", # unparseable sentinel + ) + epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main]) + + # Act + row = EpcMlTransform().to_row(epc) + + # Assert + assert row["main_dwelling_wall_insulation_thickness_mm"] == 0 + assert row["main_dwelling_roof_insulation_thickness_mm"] is None + + +def test_to_row_threads_top_level_fabric_and_demand_scalars() -> None: + # Arrange + from dataclasses import replace + base = make_minimal_sap10_epc(energy_rating_current=72) + epc = replace( + base, + multiple_glazed_proportion=85, + extract_fans_count=2, + sap_heating=replace(base.sap_heating, number_baths=2, number_baths_wwhrs=1), + ) + + # Act + row = EpcMlTransform().to_row(epc) + + # Assert + assert row["multiple_glazed_proportion"] == 85 + assert row["extract_fans_count"] == 2 + assert row["number_baths"] == 2 + assert row["number_baths_wwhrs"] == 1 diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 8b6fe500..9f9fd79f 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -276,6 +276,51 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=True, categorical=True, description="Main Dwelling roof construction SAP10 code.", ), + # Main Dwelling fabric inputs — wall, roof, floor (model retrofit simulation surface). + "main_dwelling_wall_insulation_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling wall insulation type SAP10 code.", + ), + "main_dwelling_wall_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Main Dwelling wall insulation thickness in mm. 'NI' (no insulation) maps to 0.", + ), + "main_dwelling_wall_dry_lined": ColumnSpec( + dtype=bool, nullable=True, + description="Main Dwelling wall_dry_lined flag.", + ), + "main_dwelling_wall_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Main Dwelling external wall thickness in mm.", + ), + "main_dwelling_party_wall_construction": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling party wall construction SAP10 code (str sentinels NA/NI -> None).", + ), + "main_dwelling_roof_insulation_location": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling roof insulation location SAP10 code (str sentinels -> None).", + ), + "main_dwelling_roof_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Main Dwelling roof insulation thickness in mm. 'NI' -> 0; non-numeric sentinels -> None.", + ), + "main_dwelling_floor_construction": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling ground-floor construction SAP10 code (from sap_floor_dimensions[floor==0]).", + ), + "main_dwelling_floor_insulation": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling ground-floor insulation SAP10 code (from sap_floor_dimensions[floor==0]).", + ), + "main_dwelling_floor_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Main Dwelling floor insulation thickness in mm. 'NI' -> 0; non-numeric sentinels -> None.", + ), + "main_dwelling_floor_heat_loss": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Main Dwelling floor heat-loss SAP10 code.", + ), # Heating — count of main heating systems (usually 1) "main_heating_count": ColumnSpec( dtype=int, nullable=False, @@ -436,6 +481,23 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=True, description="Air-tightness pressure-test SAP10 code.", ), + # Dwelling-level fabric + demand inputs. + "multiple_glazed_proportion": ColumnSpec( + dtype=int, nullable=True, + description="Percent of glazed area that is multiple-glazed.", + ), + "number_baths": ColumnSpec( + dtype=int, nullable=True, + description="Number of baths declared on sap_heating (hot-water demand proxy).", + ), + "number_baths_wwhrs": ColumnSpec( + dtype=int, nullable=True, + description="Number of baths served by a WWHRS unit.", + ), + "extract_fans_count": ColumnSpec( + dtype=int, nullable=True, + description="Number of extract fans (ventilation/heat-loss proxy).", + ), } @@ -582,6 +644,11 @@ class EpcMlTransform: "mechanical_vent_duct_type": epc.mechanical_vent_duct_type, "blocked_chimneys_count": epc.blocked_chimneys_count, "pressure_test": epc.pressure_test, + # Features — dwelling-level fabric + demand scalars + "multiple_glazed_proportion": epc.multiple_glazed_proportion, + "number_baths": epc.sap_heating.number_baths, + "number_baths_wwhrs": epc.sap_heating.number_baths_wwhrs, + "extract_fans_count": epc.extract_fans_count, # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, @@ -743,8 +810,66 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]: return aggregates +_MAIN_DWELLING_FABRIC_COLUMNS = ( + "main_dwelling_wall_insulation_type", + "main_dwelling_wall_insulation_thickness_mm", + "main_dwelling_wall_dry_lined", + "main_dwelling_wall_thickness_mm", + "main_dwelling_party_wall_construction", + "main_dwelling_roof_insulation_location", + "main_dwelling_roof_insulation_thickness_mm", + "main_dwelling_floor_construction", + "main_dwelling_floor_insulation", + "main_dwelling_floor_insulation_thickness_mm", + "main_dwelling_floor_heat_loss", +) + + +def _parse_thickness_mm(value: Any) -> Optional[int]: + """Parse a SAP10 insulation-thickness string ('100mm', '400mm+', 'NI', 'ND') to int mm. + + Returns 0 for 'NI' (No Insulation — semantically meaningful as 0mm). Returns None + for unparseable sentinels like 'ND' or '(assumed)'. + """ + if value is None: + return None + if isinstance(value, int): + return value + if not isinstance(value, str): + return None + s = value.strip() + if s.upper() == "NI": + return 0 + digits = "" + for c in s: + if c.isdigit(): + digits += c + else: + break + return int(digits) if digits else None + + +def _int_or_none(value: Any) -> Optional[int]: + """Treat int values as-is, drop string sentinels like 'NA'/'NI'/'ND'.""" + return value if isinstance(value, int) else None + + +def _ground_floor(part: SapBuildingPart) -> Optional[Any]: + """Pick the ground-floor `SapFloorDimension` (floor==0) for a building part. + + Falls back to the first floor dimension if no part is flagged as ground floor. + Returns None if the part has no floor dimensions at all. + """ + if not part.sap_floor_dimensions: + return None + for fd in part.sap_floor_dimensions: + if fd.floor == 0: + return fd + return part.sap_floor_dimensions[0] + + def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: - """Aggregate sap_building_parts into the 13 columns: 5 cross-all + 8 Main-Dwelling. + """Aggregate sap_building_parts into 24 columns: 5 cross-all + 19 Main-Dwelling. Cross-all aggregates always populate (zeros when no parts). Main-Dwelling columns populate only when a part with `identifier == "Main Dwelling"` is @@ -768,6 +893,8 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: "main_dwelling_wall_construction": None, "main_dwelling_roof_construction": None, } + for col in _MAIN_DWELLING_FABRIC_COLUMNS: + aggregates[col] = None if not parts: return aggregates @@ -808,6 +935,32 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: else None ) aggregates["main_dwelling_roof_construction"] = main.roof_construction + # New fabric inputs: walls + aggregates["main_dwelling_wall_insulation_type"] = _int_or_none(main.wall_insulation_type) + aggregates["main_dwelling_wall_insulation_thickness_mm"] = _parse_thickness_mm( + main.wall_insulation_thickness + ) + aggregates["main_dwelling_wall_dry_lined"] = main.wall_dry_lined + aggregates["main_dwelling_wall_thickness_mm"] = main.wall_thickness_mm + aggregates["main_dwelling_party_wall_construction"] = _int_or_none( + main.party_wall_construction + ) + # New fabric inputs: roof + aggregates["main_dwelling_roof_insulation_location"] = _int_or_none( + main.roof_insulation_location + ) + aggregates["main_dwelling_roof_insulation_thickness_mm"] = _parse_thickness_mm( + main.roof_insulation_thickness + ) + # New fabric inputs: floor — from ground-floor SapFloorDimension + aggregates["main_dwelling_floor_heat_loss"] = main.floor_heat_loss + aggregates["main_dwelling_floor_insulation_thickness_mm"] = _parse_thickness_mm( + main.floor_insulation_thickness + ) + ground_floor = _ground_floor(main) + if ground_floor is not None: + aggregates["main_dwelling_floor_construction"] = ground_floor.floor_construction + aggregates["main_dwelling_floor_insulation"] = ground_floor.floor_insulation return aggregates