diff --git a/packages/domain/src/domain/ml/tests/_fixtures.py b/packages/domain/src/domain/ml/tests/_fixtures.py index b8858485..5f34cfca 100644 --- a/packages/domain/src/domain/ml/tests/_fixtures.py +++ b/packages/domain/src/domain/ml/tests/_fixtures.py @@ -14,6 +14,7 @@ from typing import Optional, Union from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, InstantaneousWwhrs, + MainHeatingDetail, RenewableHeatIncentive, SapBuildingPart, SapEnergySource, @@ -25,6 +26,59 @@ from datatypes.epc.domain.epc_property_data import ( ) +def make_main_heating_detail( + *, + main_fuel_type: Union[int, str] = 26, # mains gas (not community) + heat_emitter_type: Union[int, str] = 1, + main_heating_control: Union[int, str] = 2106, + emitter_temperature: Union[int, str] = 1, + main_heating_category: Optional[int] = 2, + has_fghrs: bool = False, + fan_flue_present: Optional[bool] = True, + boiler_flue_type: Optional[int] = 2, + central_heating_pump_age: Optional[int] = 0, + main_heating_number: Optional[int] = 1, +) -> MainHeatingDetail: + """Build a MainHeatingDetail with SAP10 API defaults (mains gas boiler).""" + return MainHeatingDetail( + has_fghrs=has_fghrs, + main_fuel_type=main_fuel_type, + heat_emitter_type=heat_emitter_type, + emitter_temperature=emitter_temperature, + main_heating_control=main_heating_control, + fan_flue_present=fan_flue_present, + boiler_flue_type=boiler_flue_type, + central_heating_pump_age=central_heating_pump_age, + main_heating_number=main_heating_number, + main_heating_category=main_heating_category, + ) + + +def make_sap_heating( + *, + main_heating_details: Optional[list[MainHeatingDetail]] = None, + has_fixed_air_conditioning: bool = False, + water_heating_code: Optional[int] = 901, + water_heating_fuel: Optional[int] = 26, + cylinder_size: Optional[Union[int, str]] = None, + cylinder_insulation_thickness_mm: Optional[int] = None, + secondary_fuel_type: Optional[int] = None, +) -> SapHeating: + """Build a SapHeating with SAP10 API defaults.""" + return SapHeating( + instantaneous_wwhrs=InstantaneousWwhrs(), + main_heating_details=main_heating_details + if main_heating_details is not None + else [make_main_heating_detail()], + has_fixed_air_conditioning=has_fixed_air_conditioning, + water_heating_code=water_heating_code, + water_heating_fuel=water_heating_fuel, + cylinder_size=cylinder_size, + cylinder_insulation_thickness_mm=cylinder_insulation_thickness_mm, + secondary_fuel_type=secondary_fuel_type, + ) + + def make_floor_dimension( *, total_floor_area_m2: float = 50.0, @@ -136,6 +190,7 @@ def make_minimal_sap10_epc( country_code: Optional[str] = None, sap_windows: Optional[list[SapWindow]] = None, sap_building_parts: Optional[list[SapBuildingPart]] = None, + sap_heating: Optional[SapHeating] = None, ) -> EpcPropertyData: """Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets.""" return EpcPropertyData( @@ -151,7 +206,7 @@ def make_minimal_sap10_epc( floors=[], main_heating=[], door_count=door_count, - sap_heating=SapHeating( + sap_heating=sap_heating if sap_heating is not None else SapHeating( instantaneous_wwhrs=InstantaneousWwhrs(), main_heating_details=[], has_fixed_air_conditioning=False, diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 6ff44c9f..0e43344e 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -7,7 +7,9 @@ from domain.ml.schema import ColumnSpec, TransformSchema from domain.ml.tests._fixtures import ( make_building_part, make_floor_dimension, + make_main_heating_detail, make_minimal_sap10_epc, + make_sap_heating, make_window, ) from domain.ml.transform import EpcMlTransform @@ -661,6 +663,155 @@ def test_to_row_returns_building_part_zeros_for_property_with_no_parts() -> None assert row["main_dwelling_wall_construction"] is None +_HEATING_FEATURES_NULLABLE: dict[str, tuple[type, bool, bool]] = { + # name → (dtype, nullable, categorical) + "main_heating_count": (int, False, False), + "primary_main_fuel_type": (int, True, True), + "primary_heat_emitter_type": (int, True, True), + "primary_main_heating_control": (int, True, True), + "primary_main_heating_category": (int, True, True), + "primary_has_fghrs": (bool, True, False), + "primary_fan_flue_present": (bool, True, False), + "primary_boiler_flue_type": (int, True, True), + "primary_central_heating_pump_age": (int, True, True), + "water_heating_code": (int, True, True), + "water_heating_fuel": (int, True, True), + "cylinder_size": (int, True, False), + "cylinder_insulation_thickness_mm": (int, True, False), + "has_secondary_heating": (bool, False, False), + "secondary_fuel_type": (int, True, True), +} + + +def test_schema_advertises_heating_features() -> None: + # Arrange + transform = EpcMlTransform() + + # Act + schema = transform.schema() + + # Assert + for name, (expected_dtype, expected_nullable, expected_categorical) in ( + _HEATING_FEATURES_NULLABLE.items() + ): + assert name in schema.feature_columns, name + column = schema.feature_columns[name] + assert column.dtype is expected_dtype, name + assert column.nullable is expected_nullable, name + assert column.categorical is expected_categorical, name + + +def test_to_row_extracts_primary_heating_from_first_main_heating_detail() -> None: + # Arrange — mains-gas boiler with a fan flue, modern control, no FGHRS + primary = make_main_heating_detail( + main_fuel_type=26, # mains gas (not community) + heat_emitter_type=1, + main_heating_control=2106, + main_heating_category=2, + has_fghrs=False, + fan_flue_present=True, + boiler_flue_type=2, + central_heating_pump_age=0, + ) + epc = make_minimal_sap10_epc( + energy_rating_current=82, + sap_heating=make_sap_heating(main_heating_details=[primary]), + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["main_heating_count"] == 1 + assert row["primary_main_fuel_type"] == 26 + assert row["primary_heat_emitter_type"] == 1 + assert row["primary_main_heating_control"] == 2106 + assert row["primary_main_heating_category"] == 2 + assert row["primary_has_fghrs"] is False + assert row["primary_fan_flue_present"] is True + assert row["primary_boiler_flue_type"] == 2 + assert row["primary_central_heating_pump_age"] == 0 + + +def test_to_row_extracts_water_heating_fields() -> None: + # Arrange + epc = make_minimal_sap10_epc( + energy_rating_current=82, + sap_heating=make_sap_heating( + water_heating_code=901, + water_heating_fuel=26, + cylinder_size=2, + cylinder_insulation_thickness_mm=38, + ), + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["water_heating_code"] == 901 + assert row["water_heating_fuel"] == 26 + assert row["cylinder_size"] == 2 + assert row["cylinder_insulation_thickness_mm"] == 38 + + +def test_to_row_flags_secondary_heating_when_present() -> None: + # Arrange — secondary heating: bottled-LPG (code 38) + epc = make_minimal_sap10_epc( + energy_rating_current=82, + sap_heating=make_sap_heating(secondary_fuel_type=38), + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["has_secondary_heating"] is True + assert row["secondary_fuel_type"] == 38 + + +def test_to_row_returns_no_secondary_heating_when_absent() -> None: + # Arrange + epc = make_minimal_sap10_epc( + energy_rating_current=82, + sap_heating=make_sap_heating(secondary_fuel_type=None), + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["has_secondary_heating"] is False + assert row["secondary_fuel_type"] is None + + +def test_to_row_returns_primary_heating_nones_when_no_main_heating_details() -> None: + # Arrange — sap_heating present but main_heating_details is empty + epc = make_minimal_sap10_epc( + energy_rating_current=82, + sap_heating=make_sap_heating(main_heating_details=[]), + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["main_heating_count"] == 0 + assert row["primary_main_fuel_type"] is None + assert row["primary_heat_emitter_type"] is None + assert row["primary_main_heating_control"] is None + assert row["primary_main_heating_category"] is None + assert row["primary_has_fghrs"] is None + assert row["primary_fan_flue_present"] is None + assert row["primary_boiler_flue_type"] is None + assert row["primary_central_heating_pump_age"] is None + + def test_to_row_area_weights_window_u_value_and_solar_transmittance() -> None: # Arrange — two windows with transmission details; one without. sap_windows = [ diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 473763b5..95ddf45b 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -16,6 +16,7 @@ from datatypes.epc.domain.epc import Epc from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, SapBuildingPart, + SapHeating, SapWindow, ) from domain.ml.schema import ColumnSpec, TransformSchema @@ -272,6 +273,70 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=True, categorical=True, description="Main Dwelling roof construction SAP10 code.", ), + # Heating — count of main heating systems (usually 1) + "main_heating_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of main heating systems declared on sap_heating.main_heating_details.", + ), + # Heating — primary (Top-1) slot from main_heating_details[0] + "primary_main_fuel_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Primary heating main_fuel SAP10 code (per epc_codes.csv main_fuel enum).", + ), + "primary_heat_emitter_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Primary heating heat_emitter_type SAP10 code.", + ), + "primary_main_heating_control": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Primary heating main_heating_control SAP10 code.", + ), + "primary_main_heating_category": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Primary heating main_heating_category SAP10 code.", + ), + "primary_has_fghrs": ColumnSpec( + dtype=bool, nullable=True, + description="Primary heating has flue gas heat recovery system.", + ), + "primary_fan_flue_present": ColumnSpec( + dtype=bool, nullable=True, + description="Primary heating boiler has a fan flue.", + ), + "primary_boiler_flue_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Primary heating boiler flue type SAP10 code.", + ), + "primary_central_heating_pump_age": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Primary heating central-heating pump age band (SAP10 enum).", + ), + # Water heating — on sap_heating directly + "water_heating_code": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Water heating SAP10 code.", + ), + "water_heating_fuel": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Water heating fuel SAP10 code (per epc_codes.csv water_heating_fuel enum).", + ), + "cylinder_size": ColumnSpec( + dtype=int, nullable=True, + description="Hot water cylinder size SAP10 code (1=small, 2=normal, 3=large).", + ), + "cylinder_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Hot water cylinder insulation thickness (mm).", + ), + # Secondary heating — present when secondary_fuel_type is set + "has_secondary_heating": ColumnSpec( + dtype=bool, nullable=False, + description="True if sap_heating.secondary_fuel_type is populated.", + ), + "secondary_fuel_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Secondary heating fuel SAP10 code (shares main_fuel enum).", + ), } @@ -348,6 +413,7 @@ class EpcMlTransform: rhi = epc.renewable_heat_incentive window_aggregates = _window_aggregates(epc.sap_windows) building_part_aggregates = _building_part_aggregates(epc.sap_building_parts) + heating_aggregates = _heating_aggregates(epc.sap_heating) return { # Features — geometry "total_floor_area_m2": epc.total_floor_area_m2, @@ -382,6 +448,8 @@ class EpcMlTransform: **window_aggregates, # Features — building parts aggregates + Main Dwelling carve-out **building_part_aggregates, + # Features — heating system (primary slot + water + secondary) + **heating_aggregates, # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, @@ -404,6 +472,63 @@ def _peui_ucl(epc: EpcPropertyData) -> Optional[float]: return apply_ucl_correction(float(epc.energy_consumption_current), band) +def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]: + """Aggregate sap_heating into 15 heating-feature columns. + + Hybrid Top-1: the primary heating slot comes from `main_heating_details[0]`; + water heating fields read directly off `sap_heating`; secondary heating is + inferred from `secondary_fuel_type`. Fields are Union[int, str] in the + domain object — Union-int values pass through as int categoricals; str + values (from site notes) coerce to None. + """ + aggregates: dict[str, Any] = { + "main_heating_count": len(sap_heating.main_heating_details), + "primary_main_fuel_type": None, + "primary_heat_emitter_type": None, + "primary_main_heating_control": None, + "primary_main_heating_category": None, + "primary_has_fghrs": None, + "primary_fan_flue_present": None, + "primary_boiler_flue_type": None, + "primary_central_heating_pump_age": None, + "water_heating_code": sap_heating.water_heating_code, + "water_heating_fuel": sap_heating.water_heating_fuel, + "cylinder_size": ( + sap_heating.cylinder_size + if isinstance(sap_heating.cylinder_size, int) + else None + ), + "cylinder_insulation_thickness_mm": sap_heating.cylinder_insulation_thickness_mm, + "has_secondary_heating": sap_heating.secondary_fuel_type is not None, + "secondary_fuel_type": sap_heating.secondary_fuel_type, + } + + if sap_heating.main_heating_details: + primary = sap_heating.main_heating_details[0] + aggregates["primary_main_fuel_type"] = ( + primary.main_fuel_type if isinstance(primary.main_fuel_type, int) else None + ) + aggregates["primary_heat_emitter_type"] = ( + primary.heat_emitter_type + if isinstance(primary.heat_emitter_type, int) + else None + ) + aggregates["primary_main_heating_control"] = ( + primary.main_heating_control + if isinstance(primary.main_heating_control, int) + else None + ) + aggregates["primary_main_heating_category"] = primary.main_heating_category + aggregates["primary_has_fghrs"] = primary.has_fghrs + aggregates["primary_fan_flue_present"] = primary.fan_flue_present + aggregates["primary_boiler_flue_type"] = primary.boiler_flue_type + aggregates["primary_central_heating_pump_age"] = ( + primary.central_heating_pump_age + ) + + return aggregates + + def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: """Aggregate sap_building_parts into the 13 columns: 5 cross-all + 8 Main-Dwelling.