From 831ebac2aeee1863e242d7a282181c4404380276 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 May 2026 18:13:47 +0000 Subject: [PATCH] slice 18d: seasonal_efficiency category fallback for null SAP code (v2.6.0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many real certs carry main_heating_category=4 (heat pump) but null sap_main_heating_code, so seasonal_efficiency() was returning the 0.80 gas-boiler default — a 3x COP under-count that dragged the high-SAP heat-pump tail. Adds main_heating_category + main_fuel_type fallbacks: cat=4 -> 2.30, cat=7 -> 1.00, cat=10 routes by fuel (electric=1.00, gas=0.55, oil=0.65), cat=5 warm air -> 0.76. Explicit SAP codes still win. --- .../domain/src/domain/ml/sap_efficiencies.py | 63 ++++++++++++-- .../domain/ml/tests/test_sap_efficiencies.py | 86 +++++++++++++++++++ .../src/domain/ml/tests/test_transform.py | 2 +- packages/domain/src/domain/ml/transform.py | 10 ++- 4 files changed, 152 insertions(+), 9 deletions(-) diff --git a/packages/domain/src/domain/ml/sap_efficiencies.py b/packages/domain/src/domain/ml/sap_efficiencies.py index 65184905..d62db300 100644 --- a/packages/domain/src/domain/ml/sap_efficiencies.py +++ b/packages/domain/src/domain/ml/sap_efficiencies.py @@ -89,15 +89,66 @@ _WATER_EFF_BY_CODE: Final[dict[int, float]] = { } -def seasonal_efficiency(sap_main_heating_code: Optional[int]) -> float: +# Gov EPC API main_heating_category -> typical SAP10.2 Table 4a seasonal-eff +# fallback when `sap_main_heating_code` is null. Real certs frequently omit +# the Table 4a code but still report a category, and the silent fallback to +# 0.80 (gas boiler) catastrophically misrates heat pumps and storage. +_CATEGORY_FALLBACK_EFF: Final[dict[int, float]] = { + # 1 = central heating without separate HW (boiler typical) + 1: 0.80, + 2: 0.80, # central heating with separate HW + 3: 0.80, # community heat network — Table 4a 301 typical + 4: 2.30, # heat pump — Table 4a 211 typical (mid GSHP/ASHP) + 5: 0.76, # warm air — Table 4a 502 typical + 6: 0.80, # community heat network + 7: 1.00, # high-heat-retention electric storage +} + + +# Gov EPC API main_fuel_type -> Table 4a room-heater eff column when +# category==10 ("Room heaters") and the SAP code is null. +_ROOM_HEATER_FUEL_EFF: Final[dict[int, float]] = { + 1: 0.55, # mains gas (legacy) + 2: 0.55, # LPG (legacy) + 3: 0.55, # bottled LPG + 4: 0.65, # oil (legacy) + 10: 1.00, # electricity (legacy) + 26: 0.55, # mains gas (not community) + 27: 0.55, # LPG (not community) + 28: 0.65, # oil (not community) + 29: 1.00, # electricity (not community) +} + + +def seasonal_efficiency( + sap_main_heating_code: Optional[int], + main_heating_category: Optional[int] = None, + main_fuel_type: Optional[int] = None, +) -> float: """Space-heating seasonal efficiency as a decimal (0.84 = 84%). - Falls back to 0.80 (typical gas-boiler) when the code is missing or - not in Table 4a/4b. + Resolution order: + 1. `sap_main_heating_code` -> Table 4a/4b lookup (most authoritative). + 2. `main_heating_category` (gov API enum: 4=heat pump, 7=storage, ...) + with optional `main_fuel_type` discriminator for `category==10` + room heaters. + 3. 0.80 typical-gas-boiler default. """ - if sap_main_heating_code is None: - return 0.80 - return _SPACE_EFF_BY_CODE.get(sap_main_heating_code, 0.80) + if sap_main_heating_code is not None: + eff = _SPACE_EFF_BY_CODE.get(sap_main_heating_code) + if eff is not None: + return eff + if main_heating_category == 10: + if main_fuel_type is not None: + eff = _ROOM_HEATER_FUEL_EFF.get(main_fuel_type) + if eff is not None: + return eff + return 0.55 + if main_heating_category is not None: + eff = _CATEGORY_FALLBACK_EFF.get(main_heating_category) + if eff is not None: + return eff + return 0.80 def water_heating_efficiency( diff --git a/packages/domain/src/domain/ml/tests/test_sap_efficiencies.py b/packages/domain/src/domain/ml/tests/test_sap_efficiencies.py index 29da2204..f32e9bd7 100644 --- a/packages/domain/src/domain/ml/tests/test_sap_efficiencies.py +++ b/packages/domain/src/domain/ml/tests/test_sap_efficiencies.py @@ -81,6 +81,92 @@ def test_seasonal_efficiency_unknown_code_falls_back_to_mid_range() -> None: assert result == pytest.approx(0.80, abs=0.01) +def test_seasonal_efficiency_null_code_uses_heat_pump_category_fallback() -> None: + # Arrange — many real certs have sap_main_heating_code=None but the gov + # API still gives main_heating_category=4 (heat pump). Without the + # category fallback `seasonal_efficiency` returns 0.80 (gas boiler), + # under-counting a heat pump's COP by ~3x and driving sap_score down. + + # Act + result = seasonal_efficiency( + sap_main_heating_code=None, + main_heating_category=4, + ) + + # Assert — SAP10.2 Table 4a heat-pump space COP ~2.30 (code 211 typical). + assert result == pytest.approx(2.30, abs=0.01) + + +def test_seasonal_efficiency_null_code_uses_storage_heater_category_fallback() -> None: + # Arrange — cat=7 (high-heat-retention electric storage) with null code. + + # Act + result = seasonal_efficiency( + sap_main_heating_code=None, + main_heating_category=7, + ) + + # Assert — Table 4a electric storage = 1.00. + assert result == pytest.approx(1.00, abs=0.01) + + +def test_seasonal_efficiency_null_code_room_heaters_gas_fuel_fallback() -> None: + # Arrange — cat=10 (room heaters) + fuel=26 (mains gas, gov API code). + # Without the fuel-aware fallback, gas room heaters get the 0.80 default + # (gas boiler) when they should be ~0.55 (Table 4a 605-606 gas decorative). + + # Act + result = seasonal_efficiency( + sap_main_heating_code=None, + main_heating_category=10, + main_fuel_type=26, + ) + + # Assert + assert result == pytest.approx(0.55, abs=0.05) + + +def test_seasonal_efficiency_null_code_room_heaters_electric_fuel_fallback() -> None: + # Arrange — cat=10 + fuel=29 (electricity not community). + + # Act + result = seasonal_efficiency( + sap_main_heating_code=None, + main_heating_category=10, + main_fuel_type=29, + ) + + # Assert — electric room heater = 1.00. + assert result == pytest.approx(1.00, abs=0.01) + + +def test_seasonal_efficiency_explicit_code_beats_category_fallback() -> None: + # Arrange — when both are present, the SAP code is authoritative. + # Code 211 GSHP -> 2.30; category=2 (boilers) would otherwise return 0.80. + + # Act + result = seasonal_efficiency( + sap_main_heating_code=211, + main_heating_category=2, + ) + + # Assert + assert result == pytest.approx(2.30, abs=0.01) + + +def test_seasonal_efficiency_null_code_central_heating_category_keeps_default() -> None: + # Arrange — cat=2 (central heating with separate HW) -> keep gas-boiler default. + + # Act + result = seasonal_efficiency( + sap_main_heating_code=None, + main_heating_category=2, + ) + + # Assert + assert result == pytest.approx(0.80, abs=0.01) + + # ----- Water-heating efficiency (Table 4a hot-water section) ----- diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index b314776c..e1c3c83f 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None: # Assert assert isinstance(schema, TransformSchema) - assert schema.transform_version == "2.5.0" + assert schema.transform_version == "2.6.0" assert schema.transform_version == EpcMlTransform.VERSION assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys()) for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items(): diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 750819a7..0bc1a53f 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -902,7 +902,7 @@ class EpcMlTransform: Version 0.1.0 — schema contract only; feature columns added in subsequent slices. """ - VERSION: str = "2.5.0" + VERSION: str = "2.6.0" def schema(self) -> TransformSchema: """The cross-repo ML data contract. @@ -962,7 +962,13 @@ class EpcMlTransform: ) main_heating_code = heating_aggregates.get("primary_sap_main_heating_code") water_code = heating_aggregates.get("water_heating_code") - space_eff = seasonal_efficiency(main_heating_code if isinstance(main_heating_code, int) else None) + main_category = heating_aggregates.get("primary_main_heating_category") + main_fuel = heating_aggregates.get("primary_main_fuel_type") + space_eff = seasonal_efficiency( + main_heating_code if isinstance(main_heating_code, int) else None, + main_heating_category=main_category if isinstance(main_category, int) else None, + main_fuel_type=main_fuel if isinstance(main_fuel, int) else None, + ) water_eff = water_heating_efficiency( water_heating_code=water_code if isinstance(water_code, int) else None, main_heating_code=main_heating_code if isinstance(main_heating_code, int) else None,