slice 15a: add wall/floor/roof + demand scalar features for retrofit simulation

15 new features wired through schema -> domain -> mapper -> transform:

Main Dwelling fabric (11):
  - wall_insulation_type, wall_insulation_thickness_mm, wall_dry_lined,
    wall_thickness_mm, party_wall_construction
  - roof_insulation_location, roof_insulation_thickness_mm
  - floor_construction, floor_insulation, floor_insulation_thickness_mm,
    floor_heat_loss

Dwelling-level scalars (4):
  - multiple_glazed_proportion, number_baths, number_baths_wwhrs,
    extract_fans_count

Thickness strings like '50mm'/'NI'/'ND' parsed via _parse_thickness_mm; NI
(no insulation) lands as 0mm so the model sees the physical zero rather than
a missing value. Categorical sentinels ('NA'/'NI'/'ND') become None.

Also fixed long-standing typo `multiple_glazed_propertion` -> `_proportion`
in domain dataclass + its lone DB-model usage.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 22:08:27 +00:00
parent c496f345f8
commit 0ffda529ec
6 changed files with 265 additions and 3 deletions

View file

@ -225,7 +225,7 @@ class EpcPropertyModel(SQLModel, table=True):
pressure_test_certificate_number=data.pressure_test_certificate_number,
percent_draughtproofed=data.percent_draughtproofed,
insulated_door_u_value=data.insulated_door_u_value,
multiple_glazed_proportion=data.multiple_glazed_propertion,
multiple_glazed_proportion=data.multiple_glazed_proportion,
windows_transmission_u_value=(
data.windows_transmission_details.u_value
if data.windows_transmission_details

View file

@ -69,6 +69,9 @@ class SapHeating:
secondary_fuel_type: Optional[int] = None
secondary_heating_type: Optional[Union[int, str]] = None # int from API; str from site notes
cylinder_insulation_thickness_mm: Optional[int] = None
# SAP10 hot-water demand inputs from sap_heating.
number_baths: Optional[int] = None
number_baths_wwhrs: Optional[int] = None
@dataclass
@ -386,7 +389,8 @@ class EpcPropertyData:
draughtproofed_door_count: Optional[int] = None
mechanical_vent_duct_type: Optional[int] = None
windows_transmission_details: Optional[WindowsTransmissionDetails] = None
multiple_glazed_propertion: Optional[int] = None
multiple_glazed_proportion: Optional[int] = None
extract_fans_count: Optional[int] = None
calculation_software_version: Optional[str] = None # Do we care about this?
mechanical_vent_duct_placement: Optional[int] = None
mechanical_vent_duct_insulation: Optional[int] = None

View file

@ -1381,6 +1381,8 @@ class EpcPropertyDataMapper:
secondary_fuel_type=schema.sap_heating.secondary_fuel_type,
secondary_heating_type=schema.sap_heating.secondary_heating_type,
cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness,
number_baths=schema.sap_heating.number_baths,
number_baths_wwhrs=schema.sap_heating.number_baths_wwhrs,
),
# SAP windows
sap_windows=[
@ -1524,6 +1526,9 @@ class EpcPropertyDataMapper:
energy_rating_current=schema.energy_rating_current,
co2_emissions_current=float(schema.co2_emissions_current),
energy_consumption_current=schema.energy_consumption_current,
# Dwelling-level inputs used as ML features.
multiple_glazed_proportion=schema.multiple_glazed_proportion,
extract_fans_count=schema.extract_fans_count,
)
@staticmethod

View file

@ -69,6 +69,9 @@ class SapHeating:
# Real-API certs carry shower_outlets as a list, not the synthetic single-object form;
# accept both shapes so older fixtures keep parsing.
shower_outlets: Optional[Union[ShowerOutlets, List[ShowerOutlets]]] = None
# SAP10 hot-water demand inputs.
number_baths: Optional[int] = None
number_baths_wwhrs: Optional[int] = None
cylinder_insulation_type: Optional[int] = None
cylinder_thermostat: Optional[str] = None
secondary_fuel_type: Optional[int] = None
@ -340,6 +343,7 @@ class RdSapSchema21_0_1:
incandescent_fixed_lighting_bulbs_count: int
# Fields below are present in some certs but absent in many real-world responses;
# see datatypes/epc/schema/tests/fixtures/21_0_1_real.json for a representative cert.
extract_fans_count: Optional[int] = None
wet_rooms_count: Optional[int] = None
open_chimneys_count: Optional[int] = None
insulated_door_u_value: Optional[float] = None

View file

@ -1180,3 +1180,99 @@ def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
assert row["window_avg_u_value"] == pytest.approx(1.6)
# Area-weighted solar transmittance: (0.72 * 2.0 + 0.60 * 1.0) / 3.0 = 2.04 / 3.0 = 0.68
assert row["window_avg_solar_transmittance"] == pytest.approx(0.68)
def test_to_row_extracts_main_dwelling_wall_roof_floor_fabric_inputs() -> None:
# Arrange
from datatypes.epc.domain.epc_property_data import SapBuildingPart, SapFloorDimension
ground = SapFloorDimension(
room_height_m=2.4, total_floor_area_m2=50.0,
party_wall_length_m=5.0, heat_loss_perimeter_m=20.0,
floor=0, floor_insulation=2, floor_construction=1,
)
upstairs = SapFloorDimension(
room_height_m=2.4, total_floor_area_m2=50.0,
party_wall_length_m=5.0, heat_loss_perimeter_m=20.0,
floor=1, floor_insulation=0, floor_construction=0,
)
main = SapBuildingPart(
identifier="Main Dwelling",
construction_age_band="C",
wall_construction=3,
wall_insulation_type=4,
wall_thickness_measured=True,
party_wall_construction=2,
sap_floor_dimensions=[ground, upstairs],
wall_dry_lined=False,
wall_thickness_mm=300,
wall_insulation_thickness="50mm",
floor_heat_loss=7,
floor_insulation_thickness="100mm",
roof_construction=5,
roof_insulation_location=6,
roof_insulation_thickness="270mm",
)
epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main])
# Act
row = EpcMlTransform().to_row(epc)
# Assert — wall fabric
assert row["main_dwelling_wall_insulation_type"] == 4
assert row["main_dwelling_wall_insulation_thickness_mm"] == 50
assert row["main_dwelling_wall_dry_lined"] is False
assert row["main_dwelling_wall_thickness_mm"] == 300
assert row["main_dwelling_party_wall_construction"] == 2
# Assert — roof fabric
assert row["main_dwelling_roof_insulation_location"] == 6
assert row["main_dwelling_roof_insulation_thickness_mm"] == 270
# Assert — floor fabric, taken from ground-floor SapFloorDimension
assert row["main_dwelling_floor_construction"] == 1
assert row["main_dwelling_floor_insulation"] == 2
assert row["main_dwelling_floor_insulation_thickness_mm"] == 100
assert row["main_dwelling_floor_heat_loss"] == 7
def test_to_row_parses_no_insulation_sentinel_as_zero_mm() -> None:
# Arrange
from datatypes.epc.domain.epc_property_data import SapBuildingPart
main = SapBuildingPart(
identifier="Main Dwelling",
construction_age_band="C",
wall_construction=3,
wall_insulation_type=4,
wall_thickness_measured=True,
party_wall_construction=2,
sap_floor_dimensions=[],
wall_insulation_thickness="NI",
roof_insulation_thickness="ND", # unparseable sentinel
)
epc = make_minimal_sap10_epc(energy_rating_current=70, sap_building_parts=[main])
# Act
row = EpcMlTransform().to_row(epc)
# Assert
assert row["main_dwelling_wall_insulation_thickness_mm"] == 0
assert row["main_dwelling_roof_insulation_thickness_mm"] is None
def test_to_row_threads_top_level_fabric_and_demand_scalars() -> None:
# Arrange
from dataclasses import replace
base = make_minimal_sap10_epc(energy_rating_current=72)
epc = replace(
base,
multiple_glazed_proportion=85,
extract_fans_count=2,
sap_heating=replace(base.sap_heating, number_baths=2, number_baths_wwhrs=1),
)
# Act
row = EpcMlTransform().to_row(epc)
# Assert
assert row["multiple_glazed_proportion"] == 85
assert row["extract_fans_count"] == 2
assert row["number_baths"] == 2
assert row["number_baths_wwhrs"] == 1

View file

@ -276,6 +276,51 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
dtype=int, nullable=True, categorical=True,
description="Main Dwelling roof construction SAP10 code.",
),
# Main Dwelling fabric inputs — wall, roof, floor (model retrofit simulation surface).
"main_dwelling_wall_insulation_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling wall insulation type SAP10 code.",
),
"main_dwelling_wall_insulation_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Main Dwelling wall insulation thickness in mm. 'NI' (no insulation) maps to 0.",
),
"main_dwelling_wall_dry_lined": ColumnSpec(
dtype=bool, nullable=True,
description="Main Dwelling wall_dry_lined flag.",
),
"main_dwelling_wall_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Main Dwelling external wall thickness in mm.",
),
"main_dwelling_party_wall_construction": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling party wall construction SAP10 code (str sentinels NA/NI -> None).",
),
"main_dwelling_roof_insulation_location": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling roof insulation location SAP10 code (str sentinels -> None).",
),
"main_dwelling_roof_insulation_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Main Dwelling roof insulation thickness in mm. 'NI' -> 0; non-numeric sentinels -> None.",
),
"main_dwelling_floor_construction": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling ground-floor construction SAP10 code (from sap_floor_dimensions[floor==0]).",
),
"main_dwelling_floor_insulation": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling ground-floor insulation SAP10 code (from sap_floor_dimensions[floor==0]).",
),
"main_dwelling_floor_insulation_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Main Dwelling floor insulation thickness in mm. 'NI' -> 0; non-numeric sentinels -> None.",
),
"main_dwelling_floor_heat_loss": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Main Dwelling floor heat-loss SAP10 code.",
),
# Heating — count of main heating systems (usually 1)
"main_heating_count": ColumnSpec(
dtype=int, nullable=False,
@ -436,6 +481,23 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
dtype=int, nullable=True,
description="Air-tightness pressure-test SAP10 code.",
),
# Dwelling-level fabric + demand inputs.
"multiple_glazed_proportion": ColumnSpec(
dtype=int, nullable=True,
description="Percent of glazed area that is multiple-glazed.",
),
"number_baths": ColumnSpec(
dtype=int, nullable=True,
description="Number of baths declared on sap_heating (hot-water demand proxy).",
),
"number_baths_wwhrs": ColumnSpec(
dtype=int, nullable=True,
description="Number of baths served by a WWHRS unit.",
),
"extract_fans_count": ColumnSpec(
dtype=int, nullable=True,
description="Number of extract fans (ventilation/heat-loss proxy).",
),
}
@ -582,6 +644,11 @@ class EpcMlTransform:
"mechanical_vent_duct_type": epc.mechanical_vent_duct_type,
"blocked_chimneys_count": epc.blocked_chimneys_count,
"pressure_test": epc.pressure_test,
# Features — dwelling-level fabric + demand scalars
"multiple_glazed_proportion": epc.multiple_glazed_proportion,
"number_baths": epc.sap_heating.number_baths,
"number_baths_wwhrs": epc.sap_heating.number_baths_wwhrs,
"extract_fans_count": epc.extract_fans_count,
# Targets
"sap_score": epc.energy_rating_current,
"co2_emissions": epc.co2_emissions_current,
@ -743,8 +810,66 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
return aggregates
_MAIN_DWELLING_FABRIC_COLUMNS = (
"main_dwelling_wall_insulation_type",
"main_dwelling_wall_insulation_thickness_mm",
"main_dwelling_wall_dry_lined",
"main_dwelling_wall_thickness_mm",
"main_dwelling_party_wall_construction",
"main_dwelling_roof_insulation_location",
"main_dwelling_roof_insulation_thickness_mm",
"main_dwelling_floor_construction",
"main_dwelling_floor_insulation",
"main_dwelling_floor_insulation_thickness_mm",
"main_dwelling_floor_heat_loss",
)
def _parse_thickness_mm(value: Any) -> Optional[int]:
"""Parse a SAP10 insulation-thickness string ('100mm', '400mm+', 'NI', 'ND') to int mm.
Returns 0 for 'NI' (No Insulation semantically meaningful as 0mm). Returns None
for unparseable sentinels like 'ND' or '(assumed)'.
"""
if value is None:
return None
if isinstance(value, int):
return value
if not isinstance(value, str):
return None
s = value.strip()
if s.upper() == "NI":
return 0
digits = ""
for c in s:
if c.isdigit():
digits += c
else:
break
return int(digits) if digits else None
def _int_or_none(value: Any) -> Optional[int]:
"""Treat int values as-is, drop string sentinels like 'NA'/'NI'/'ND'."""
return value if isinstance(value, int) else None
def _ground_floor(part: SapBuildingPart) -> Optional[Any]:
"""Pick the ground-floor `SapFloorDimension` (floor==0) for a building part.
Falls back to the first floor dimension if no part is flagged as ground floor.
Returns None if the part has no floor dimensions at all.
"""
if not part.sap_floor_dimensions:
return None
for fd in part.sap_floor_dimensions:
if fd.floor == 0:
return fd
return part.sap_floor_dimensions[0]
def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
"""Aggregate sap_building_parts into the 13 columns: 5 cross-all + 8 Main-Dwelling.
"""Aggregate sap_building_parts into 24 columns: 5 cross-all + 19 Main-Dwelling.
Cross-all aggregates always populate (zeros when no parts). Main-Dwelling
columns populate only when a part with `identifier == "Main Dwelling"` is
@ -768,6 +893,8 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
"main_dwelling_wall_construction": None,
"main_dwelling_roof_construction": None,
}
for col in _MAIN_DWELLING_FABRIC_COLUMNS:
aggregates[col] = None
if not parts:
return aggregates
@ -808,6 +935,32 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
else None
)
aggregates["main_dwelling_roof_construction"] = main.roof_construction
# New fabric inputs: walls
aggregates["main_dwelling_wall_insulation_type"] = _int_or_none(main.wall_insulation_type)
aggregates["main_dwelling_wall_insulation_thickness_mm"] = _parse_thickness_mm(
main.wall_insulation_thickness
)
aggregates["main_dwelling_wall_dry_lined"] = main.wall_dry_lined
aggregates["main_dwelling_wall_thickness_mm"] = main.wall_thickness_mm
aggregates["main_dwelling_party_wall_construction"] = _int_or_none(
main.party_wall_construction
)
# New fabric inputs: roof
aggregates["main_dwelling_roof_insulation_location"] = _int_or_none(
main.roof_insulation_location
)
aggregates["main_dwelling_roof_insulation_thickness_mm"] = _parse_thickness_mm(
main.roof_insulation_thickness
)
# New fabric inputs: floor — from ground-floor SapFloorDimension
aggregates["main_dwelling_floor_heat_loss"] = main.floor_heat_loss
aggregates["main_dwelling_floor_insulation_thickness_mm"] = _parse_thickness_mm(
main.floor_insulation_thickness
)
ground_floor = _ground_floor(main)
if ground_floor is not None:
aggregates["main_dwelling_floor_construction"] = ground_floor.floor_construction
aggregates["main_dwelling_floor_insulation"] = ground_floor.floor_insulation
return aggregates