slice 10: heating system features (primary + water + secondary)

Fifteen heating features land via hybrid Top-1 + flat fields: the
primary heating slot from main_heating_details[0] gives
main_fuel_type, heat_emitter_type, main_heating_control,
main_heating_category, has_fghrs, fan_flue_present, boiler_flue_type
and central_heating_pump_age (all int-categorical for the SAP10
codes); main_heating_count carries the aggregate. Water heating
adds water_heating_code, water_heating_fuel, cylinder_size, and
cylinder_insulation_thickness_mm. Secondary heating is summarised
by has_secondary_heating (derived) and secondary_fuel_type.

Fuel codes follow the gov api enums in epc_codes.csv (44 main_fuel
values shared with water_heating_fuel). Union[int, str] fields
coerce to int when the value is int, else None.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 15:50:05 +00:00
parent fb773fa635
commit fff6ef3352
3 changed files with 332 additions and 1 deletions

View file

@ -14,6 +14,7 @@ from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
InstantaneousWwhrs,
MainHeatingDetail,
RenewableHeatIncentive,
SapBuildingPart,
SapEnergySource,
@ -25,6 +26,59 @@ from datatypes.epc.domain.epc_property_data import (
)
def make_main_heating_detail(
*,
main_fuel_type: Union[int, str] = 26, # mains gas (not community)
heat_emitter_type: Union[int, str] = 1,
main_heating_control: Union[int, str] = 2106,
emitter_temperature: Union[int, str] = 1,
main_heating_category: Optional[int] = 2,
has_fghrs: bool = False,
fan_flue_present: Optional[bool] = True,
boiler_flue_type: Optional[int] = 2,
central_heating_pump_age: Optional[int] = 0,
main_heating_number: Optional[int] = 1,
) -> MainHeatingDetail:
"""Build a MainHeatingDetail with SAP10 API defaults (mains gas boiler)."""
return MainHeatingDetail(
has_fghrs=has_fghrs,
main_fuel_type=main_fuel_type,
heat_emitter_type=heat_emitter_type,
emitter_temperature=emitter_temperature,
main_heating_control=main_heating_control,
fan_flue_present=fan_flue_present,
boiler_flue_type=boiler_flue_type,
central_heating_pump_age=central_heating_pump_age,
main_heating_number=main_heating_number,
main_heating_category=main_heating_category,
)
def make_sap_heating(
*,
main_heating_details: Optional[list[MainHeatingDetail]] = None,
has_fixed_air_conditioning: bool = False,
water_heating_code: Optional[int] = 901,
water_heating_fuel: Optional[int] = 26,
cylinder_size: Optional[Union[int, str]] = None,
cylinder_insulation_thickness_mm: Optional[int] = None,
secondary_fuel_type: Optional[int] = None,
) -> SapHeating:
"""Build a SapHeating with SAP10 API defaults."""
return SapHeating(
instantaneous_wwhrs=InstantaneousWwhrs(),
main_heating_details=main_heating_details
if main_heating_details is not None
else [make_main_heating_detail()],
has_fixed_air_conditioning=has_fixed_air_conditioning,
water_heating_code=water_heating_code,
water_heating_fuel=water_heating_fuel,
cylinder_size=cylinder_size,
cylinder_insulation_thickness_mm=cylinder_insulation_thickness_mm,
secondary_fuel_type=secondary_fuel_type,
)
def make_floor_dimension(
*,
total_floor_area_m2: float = 50.0,
@ -136,6 +190,7 @@ def make_minimal_sap10_epc(
country_code: Optional[str] = None,
sap_windows: Optional[list[SapWindow]] = None,
sap_building_parts: Optional[list[SapBuildingPart]] = None,
sap_heating: Optional[SapHeating] = None,
) -> EpcPropertyData:
"""Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets."""
return EpcPropertyData(
@ -151,7 +206,7 @@ def make_minimal_sap10_epc(
floors=[],
main_heating=[],
door_count=door_count,
sap_heating=SapHeating(
sap_heating=sap_heating if sap_heating is not None else SapHeating(
instantaneous_wwhrs=InstantaneousWwhrs(),
main_heating_details=[],
has_fixed_air_conditioning=False,

View file

@ -7,7 +7,9 @@ from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.tests._fixtures import (
make_building_part,
make_floor_dimension,
make_main_heating_detail,
make_minimal_sap10_epc,
make_sap_heating,
make_window,
)
from domain.ml.transform import EpcMlTransform
@ -661,6 +663,155 @@ def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None
assert row["main_dwelling_wall_construction"] is None
_HEATING_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = {
# name → (dtype, nullable, categorical)
"main_heating_count": (int, False, False),
"primary_main_fuel_type": (int, True, True),
"primary_heat_emitter_type": (int, True, True),
"primary_main_heating_control": (int, True, True),
"primary_main_heating_category": (int, True, True),
"primary_has_fghrs": (bool, True, False),
"primary_fan_flue_present": (bool, True, False),
"primary_boiler_flue_type": (int, True, True),
"primary_central_heating_pump_age": (int, True, True),
"water_heating_code": (int, True, True),
"water_heating_fuel": (int, True, True),
"cylinder_size": (int, True, False),
"cylinder_insulation_thickness_mm": (int, True, False),
"has_secondary_heating": (bool, False, False),
"secondary_fuel_type": (int, True, True),
}
def test_schema_advertises_heating_features() -> None:
# Arrange
transform = EpcMlTransform()
# Act
schema = transform.schema()
# Assert
for name, (expected_dtype, expected_nullable, expected_categorical) in (
_HEATING_FEATURES_NULLABLE.items()
):
assert name in schema.feature_columns, name
column = schema.feature_columns[name]
assert column.dtype is expected_dtype, name
assert column.nullable is expected_nullable, name
assert column.categorical is expected_categorical, name
def test_to_row_extracts_primary_heating_from_first_main_heating_detail() -> None:
# Arrange — mains-gas boiler with a fan flue, modern control, no FGHRS
primary = make_main_heating_detail(
main_fuel_type=26, # mains gas (not community)
heat_emitter_type=1,
main_heating_control=2106,
main_heating_category=2,
has_fghrs=False,
fan_flue_present=True,
boiler_flue_type=2,
central_heating_pump_age=0,
)
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(main_heating_details=[primary]),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["main_heating_count"] == 1
assert row["primary_main_fuel_type"] == 26
assert row["primary_heat_emitter_type"] == 1
assert row["primary_main_heating_control"] == 2106
assert row["primary_main_heating_category"] == 2
assert row["primary_has_fghrs"] is False
assert row["primary_fan_flue_present"] is True
assert row["primary_boiler_flue_type"] == 2
assert row["primary_central_heating_pump_age"] == 0
def test_to_row_extracts_water_heating_fields() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(
water_heating_code=901,
water_heating_fuel=26,
cylinder_size=2,
cylinder_insulation_thickness_mm=38,
),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["water_heating_code"] == 901
assert row["water_heating_fuel"] == 26
assert row["cylinder_size"] == 2
assert row["cylinder_insulation_thickness_mm"] == 38
def test_to_row_flags_secondary_heating_when_present() -> None:
# Arrange — secondary heating: bottled-LPG (code 38)
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(secondary_fuel_type=38),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_secondary_heating"] is True
assert row["secondary_fuel_type"] == 38
def test_to_row_returns_no_secondary_heating_when_absent() -> None:
# Arrange
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(secondary_fuel_type=None),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["has_secondary_heating"] is False
assert row["secondary_fuel_type"] is None
def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() -> None:
# Arrange — sap_heating present but main_heating_details is empty
epc = make_minimal_sap10_epc(
energy_rating_current=82,
sap_heating=make_sap_heating(main_heating_details=[]),
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
assert row["main_heating_count"] == 0
assert row["primary_main_fuel_type"] is None
assert row["primary_heat_emitter_type"] is None
assert row["primary_main_heating_control"] is None
assert row["primary_main_heating_category"] is None
assert row["primary_has_fghrs"] is None
assert row["primary_fan_flue_present"] is None
assert row["primary_boiler_flue_type"] is None
assert row["primary_central_heating_pump_age"] is None
def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None:
# Arrange — two windows with transmission details; one without.
sap_windows = [

View file

@ -16,6 +16,7 @@ from datatypes.epc.domain.epc import Epc
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
SapBuildingPart,
SapHeating,
SapWindow,
)
from domain.ml.schema import ColumnSpec, TransformSchema
@ -272,6 +273,70 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
dtype=int, nullable=True, categorical=True,
description="Main Dwelling roof construction SAP10 code.",
),
# Heating — count of main heating systems (usually 1)
"main_heating_count": ColumnSpec(
dtype=int, nullable=False,
description="Number of main heating systems declared on sap_heating.main_heating_details.",
),
# Heating — primary (Top-1) slot from main_heating_details[0]
"primary_main_fuel_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Primary heating main_fuel SAP10 code (per epc_codes.csv main_fuel enum).",
),
"primary_heat_emitter_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Primary heating heat_emitter_type SAP10 code.",
),
"primary_main_heating_control": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Primary heating main_heating_control SAP10 code.",
),
"primary_main_heating_category": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Primary heating main_heating_category SAP10 code.",
),
"primary_has_fghrs": ColumnSpec(
dtype=bool, nullable=True,
description="Primary heating has flue gas heat recovery system.",
),
"primary_fan_flue_present": ColumnSpec(
dtype=bool, nullable=True,
description="Primary heating boiler has a fan flue.",
),
"primary_boiler_flue_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Primary heating boiler flue type SAP10 code.",
),
"primary_central_heating_pump_age": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Primary heating central-heating pump age band (SAP10 enum).",
),
# Water heating — on sap_heating directly
"water_heating_code": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Water heating SAP10 code.",
),
"water_heating_fuel": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Water heating fuel SAP10 code (per epc_codes.csv water_heating_fuel enum).",
),
"cylinder_size": ColumnSpec(
dtype=int, nullable=True,
description="Hot water cylinder size SAP10 code (1=small, 2=normal, 3=large).",
),
"cylinder_insulation_thickness_mm": ColumnSpec(
dtype=int, nullable=True,
description="Hot water cylinder insulation thickness (mm).",
),
# Secondary heating — present when secondary_fuel_type is set
"has_secondary_heating": ColumnSpec(
dtype=bool, nullable=False,
description="True if sap_heating.secondary_fuel_type is populated.",
),
"secondary_fuel_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Secondary heating fuel SAP10 code (shares main_fuel enum).",
),
}
@ -348,6 +413,7 @@ class EpcMlTransform:
rhi = epc.renewable_heat_incentive
window_aggregates = _window_aggregates(epc.sap_windows)
building_part_aggregates = _building_part_aggregates(epc.sap_building_parts)
heating_aggregates = _heating_aggregates(epc.sap_heating)
return {
# Features — geometry
"total_floor_area_m2": epc.total_floor_area_m2,
@ -382,6 +448,8 @@ class EpcMlTransform:
**window_aggregates,
# Features — building parts aggregates + Main Dwelling carve-out
**building_part_aggregates,
# Features — heating system (primary slot + water + secondary)
**heating_aggregates,
# Targets
"sap_score": epc.energy_rating_current,
"co2_emissions": epc.co2_emissions_current,
@ -404,6 +472,63 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
return apply_ucl_correction(float(epc.energy_consumption_current), band)
def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
"""Aggregate sap_heating into 15 heating-feature columns.
Hybrid Top-1: the primary heating slot comes from `main_heating_details[0]`;
water heating fields read directly off `sap_heating`; secondary heating is
inferred from `secondary_fuel_type`. Fields are Union[int, str] in the
domain object Union-int values pass through as int categoricals; str
values (from site notes) coerce to None.
"""
aggregates: dict[str, Any] = {
"main_heating_count": len(sap_heating.main_heating_details),
"primary_main_fuel_type": None,
"primary_heat_emitter_type": None,
"primary_main_heating_control": None,
"primary_main_heating_category": None,
"primary_has_fghrs": None,
"primary_fan_flue_present": None,
"primary_boiler_flue_type": None,
"primary_central_heating_pump_age": None,
"water_heating_code": sap_heating.water_heating_code,
"water_heating_fuel": sap_heating.water_heating_fuel,
"cylinder_size": (
sap_heating.cylinder_size
if isinstance(sap_heating.cylinder_size, int)
else None
),
"cylinder_insulation_thickness_mm": sap_heating.cylinder_insulation_thickness_mm,
"has_secondary_heating": sap_heating.secondary_fuel_type is not None,
"secondary_fuel_type": sap_heating.secondary_fuel_type,
}
if sap_heating.main_heating_details:
primary = sap_heating.main_heating_details[0]
aggregates["primary_main_fuel_type"] = (
primary.main_fuel_type if isinstance(primary.main_fuel_type, int) else None
)
aggregates["primary_heat_emitter_type"] = (
primary.heat_emitter_type
if isinstance(primary.heat_emitter_type, int)
else None
)
aggregates["primary_main_heating_control"] = (
primary.main_heating_control
if isinstance(primary.main_heating_control, int)
else None
)
aggregates["primary_main_heating_category"] = primary.main_heating_category
aggregates["primary_has_fghrs"] = primary.has_fghrs
aggregates["primary_fan_flue_present"] = primary.fan_flue_present
aggregates["primary_boiler_flue_type"] = primary.boiler_flue_type
aggregates["primary_central_heating_pump_age"] = (
primary.central_heating_pump_age
)
return aggregates
def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]:
"""Aggregate sap_building_parts into the 13 columns: 5 cross-all + 8 Main-Dwelling.