From e4f9e9e1db9dfb4ee82bdec68873359a605e6152 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 May 2026 15:06:02 +0000 Subject: [PATCH] slice 6: flat booleans and optional integer indicators Adds three non-nullable booleans (solar_water_heating, has_hot_water_cylinder, has_fixed_air_conditioning) and three optional integer indicators (percent_draughtproofed, energy_rating_average, environmental_impact_current). All direct EpcPropertyData field reads. Co-Authored-By: Claude Opus 4.7 --- .../domain/src/domain/ml/tests/_fixtures.py | 15 ++++- .../src/domain/ml/tests/test_transform.py | 59 +++++++++++++++++++ packages/domain/src/domain/ml/transform.py | 34 +++++++++++ 3 files changed, 105 insertions(+), 3 deletions(-) diff --git a/packages/domain/src/domain/ml/tests/_fixtures.py b/packages/domain/src/domain/ml/tests/_fixtures.py index fe3b214c..bdab655c 100644 --- a/packages/domain/src/domain/ml/tests/_fixtures.py +++ b/packages/domain/src/domain/ml/tests/_fixtures.py @@ -35,6 +35,12 @@ def make_minimal_sap10_epc( cfl_fixed_lighting_bulbs_count: int = 0, led_fixed_lighting_bulbs_count: int = 0, incandescent_fixed_lighting_bulbs_count: int = 0, + solar_water_heating: bool = False, + has_hot_water_cylinder: bool = False, + has_fixed_air_conditioning: bool = False, + percent_draughtproofed: Optional[int] = None, + energy_rating_average: Optional[int] = None, + environmental_impact_current: Optional[int] = None, ) -> EpcPropertyData: """Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets.""" return EpcPropertyData( @@ -67,9 +73,9 @@ def make_minimal_sap10_epc( electricity_smart_meter_present=False, ), sap_building_parts=[], - solar_water_heating=False, - has_hot_water_cylinder=False, - has_fixed_air_conditioning=False, + solar_water_heating=solar_water_heating, + has_hot_water_cylinder=has_hot_water_cylinder, + has_fixed_air_conditioning=has_fixed_air_conditioning, wet_rooms_count=wet_rooms_count, extensions_count=extensions_count, heated_rooms_count=heated_rooms_count, @@ -84,6 +90,9 @@ def make_minimal_sap10_epc( energy_rating_current=energy_rating_current, co2_emissions_current=co2_emissions_current, energy_consumption_current=energy_consumption_current, + percent_draughtproofed=percent_draughtproofed, + energy_rating_average=energy_rating_average, + environmental_impact_current=environmental_impact_current, renewable_heat_incentive=RenewableHeatIncentive( space_heating_kwh=space_heating_kwh, water_heating_kwh=water_heating_kwh, diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 5f447c3d..a7c667fb 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -180,3 +180,62 @@ def test_to_row_extracts_count_features() -> None: assert row["cfl_fixed_lighting_bulbs_count"] == 0 assert row["led_fixed_lighting_bulbs_count"] == 8 assert row["incandescent_fixed_lighting_bulbs_count"] == 2 + + +_EXPECTED_FLAT_BOOLEAN_FEATURES: tuple[str, ...] = ( + "solar_water_heating", + "has_hot_water_cylinder", + "has_fixed_air_conditioning", +) + + +_EXPECTED_OPTIONAL_INT_FEATURES: tuple[str, ...] = ( + "percent_draughtproofed", + "energy_rating_average", + "environmental_impact_current", +) + + +def test_schema_advertises_boolean_and_optional_int_features() -> None: + # Arrange + transform = EpcMlTransform() + + # Act + schema = transform.schema() + + # Assert + for feature_name in _EXPECTED_FLAT_BOOLEAN_FEATURES: + assert feature_name in schema.feature_columns, feature_name + column = schema.feature_columns[feature_name] + assert column.dtype is bool + assert column.nullable is False + for feature_name in _EXPECTED_OPTIONAL_INT_FEATURES: + assert feature_name in schema.feature_columns, feature_name + column = schema.feature_columns[feature_name] + assert column.dtype is int + assert column.nullable is True + + +def test_to_row_extracts_boolean_and_optional_int_features() -> None: + # Arrange + epc = make_minimal_sap10_epc( + energy_rating_current=82, + solar_water_heating=True, + has_hot_water_cylinder=True, + has_fixed_air_conditioning=False, + percent_draughtproofed=100, + energy_rating_average=60, + environmental_impact_current=72, + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + assert row["solar_water_heating"] is True + assert row["has_hot_water_cylinder"] is True + assert row["has_fixed_air_conditioning"] is False + assert row["percent_draughtproofed"] == 100 + assert row["energy_rating_average"] == 60 + assert row["environmental_impact_current"] == 72 diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 907380a5..058053a0 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -66,6 +66,32 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { nullable=False, description="Number of incandescent bulbs in fixed lighting outlets.", ), + # Booleans — directly populated by all SAP10 EPCs + "solar_water_heating": ColumnSpec( + dtype=bool, nullable=False, description="Solar water heating present." + ), + "has_hot_water_cylinder": ColumnSpec( + dtype=bool, nullable=False, description="Hot water cylinder present." + ), + "has_fixed_air_conditioning": ColumnSpec( + dtype=bool, nullable=False, description="Fixed air conditioning present." + ), + # Optional integer indicators — may be absent on older or partial certificates + "percent_draughtproofed": ColumnSpec( + dtype=int, + nullable=True, + description="Percentage of windows / doors with draught proofing.", + ), + "energy_rating_average": ColumnSpec( + dtype=int, + nullable=True, + description="Average SAP score for comparable properties (neighbourhood comparator).", + ), + "environmental_impact_current": ColumnSpec( + dtype=int, + nullable=True, + description="Environmental impact rating; separate from energy efficiency SAP score.", + ), } @@ -154,6 +180,14 @@ class EpcMlTransform: "cfl_fixed_lighting_bulbs_count": epc.cfl_fixed_lighting_bulbs_count, "led_fixed_lighting_bulbs_count": epc.led_fixed_lighting_bulbs_count, "incandescent_fixed_lighting_bulbs_count": epc.incandescent_fixed_lighting_bulbs_count, + # Features — booleans + "solar_water_heating": epc.solar_water_heating, + "has_hot_water_cylinder": epc.has_hot_water_cylinder, + "has_fixed_air_conditioning": epc.has_fixed_air_conditioning, + # Features — optional integer indicators + "percent_draughtproofed": epc.percent_draughtproofed, + "energy_rating_average": epc.energy_rating_average, + "environmental_impact_current": epc.environmental_impact_current, # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current,