diff --git a/datatypes/epc/domain/epc.py b/datatypes/epc/domain/epc.py index e694ba2f..b715be82 100644 --- a/datatypes/epc/domain/epc.py +++ b/datatypes/epc/domain/epc.py @@ -9,3 +9,25 @@ class Epc(Enum): E = "E" F = "F" G = "G" + + @classmethod + def from_sap_score(cls, score: int) -> "Epc": + """Map a SAP10 energy rating (1-100) to its EPC band. + + Thresholds are the standard SAP10 boundaries: A 92+, B 81-91, C 69-80, + D 55-68, E 39-54, F 21-38, G 1-20. Scores below 21 (including 0 and + negatives, which should not occur in practice) fall through to G. + """ + if score >= 92: + return cls.A + if score >= 81: + return cls.B + if score >= 69: + return cls.C + if score >= 55: + return cls.D + if score >= 39: + return cls.E + if score >= 21: + return cls.F + return cls.G diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 88538d47..4a7b6349 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -54,3 +54,39 @@ def test_to_row_extracts_targets_from_epc_property_data() -> None: assert row["peui_raw"] == 232 assert row["space_heating_kwh"] == 10128.81 assert row["hot_water_kwh"] == 2166.19 + + +def test_to_row_applies_ucl_correction_in_band_e() -> None: + # Arrange — SAP 45 = band E; Few et al. 2023 band-E correction is non-trivial + epc = make_minimal_sap10_epc( + energy_rating_current=45, + energy_consumption_current=300, + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + # Band E: gradient=-0.70, intercept=160 → cd = -0.70*300 + 160 = -50 + # adjusted = 300 + (-50) = 250.0 + assert row["peui_ucl"] == 250.0 + + +def test_to_row_clamps_ucl_correction_when_band_b_would_increase_peui() -> None: + # Arrange — SAP 82 = band B; per-band linear correction yields a *positive* + # consumption_difference for this PEUI, which must be clamped to zero + # (EPCs over-predict only — we never adjust upwards). + epc = make_minimal_sap10_epc( + energy_rating_current=82, + energy_consumption_current=232, + ) + transform = EpcMlTransform() + + # Act + row = transform.to_row(epc) + + # Assert + # Band B: gradient=-0.10, intercept=28 → cd = -0.10*232 + 28 = +4.8 → clamp to 0 + # adjusted = 232 + 0 = 232.0 + assert row["peui_ucl"] == 232.0 diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 5a28bc94..25836feb 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -10,10 +10,12 @@ are added in subsequent slices. See docs/adr/0007-kwh-as-ml-target.md for the target set and rationale. """ -from typing import Any +from typing import Any, Optional +from datatypes.epc.domain.epc import Epc from datatypes.epc.domain.epc_property_data import EpcPropertyData from domain.ml.schema import ColumnSpec, TransformSchema +from domain.ml.ucl import apply_ucl_correction _TARGET_COLUMNS: dict[str, ColumnSpec] = { @@ -84,14 +86,26 @@ class EpcMlTransform: def to_row(self, epc: EpcPropertyData) -> dict[str, Any]: """Map an EpcPropertyData to a single row of features + targets. - v0.1.0 populates the five directly-extractable targets. The UCL-corrected - PEUI target and all feature columns land in later slices. + v0.1.0 populates the six targets. Feature columns land in later slices. """ rhi = epc.renewable_heat_incentive return { "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, "peui_raw": epc.energy_consumption_current, + "peui_ucl": _peui_ucl(epc), "space_heating_kwh": rhi.space_heating_kwh if rhi is not None else None, "hot_water_kwh": rhi.water_heating_kwh if rhi is not None else None, } + + +def _peui_ucl(epc: EpcPropertyData) -> Optional[float]: + """Apply the Few et al. per-band UCL correction to PEUI for training labels. + + Returns None when either the raw PEUI or the SAP score is missing — those rows + are unusable as `peui_ucl` training labels and should be dropped upstream. + """ + if epc.energy_consumption_current is None or epc.energy_rating_current is None: + return None + band = Epc.from_sap_score(epc.energy_rating_current) + return apply_ucl_correction(float(epc.energy_consumption_current), band)