added ucl corrected peui

This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 14:39:24 +00:00
parent a64e7e74c5
commit 81f6163295
3 changed files with 75 additions and 3 deletions

View file

@ -9,3 +9,25 @@ class Epc(Enum):
E = "E"
F = "F"
G = "G"
@classmethod
def from_sap_score(cls, score: int) -> "Epc":
"""Map a SAP10 energy rating (1-100) to its EPC band.
Thresholds are the standard SAP10 boundaries: A 92+, B 81-91, C 69-80,
D 55-68, E 39-54, F 21-38, G 1-20. Scores below 21 (including 0 and
negatives, which should not occur in practice) fall through to G.
"""
if score >= 92:
return cls.A
if score >= 81:
return cls.B
if score >= 69:
return cls.C
if score >= 55:
return cls.D
if score >= 39:
return cls.E
if score >= 21:
return cls.F
return cls.G

View file

@ -54,3 +54,39 @@ def test_to_row_extracts_targets_from_epc_property_data() -> None:
assert row["peui_raw"] == 232
assert row["space_heating_kwh"] == 10128.81
assert row["hot_water_kwh"] == 2166.19
def test_to_row_applies_ucl_correction_in_band_e() -> None:
# Arrange — SAP 45 = band E; Few et al. 2023 band-E correction is non-trivial
epc = make_minimal_sap10_epc(
energy_rating_current=45,
energy_consumption_current=300,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Band E: gradient=-0.70, intercept=160 → cd = -0.70*300 + 160 = -50
# adjusted = 300 + (-50) = 250.0
assert row["peui_ucl"] == 250.0
def test_to_row_clamps_ucl_correction_when_band_b_would_increase_peui() -> None:
# Arrange — SAP 82 = band B; per-band linear correction yields a *positive*
# consumption_difference for this PEUI, which must be clamped to zero
# (EPCs over-predict only — we never adjust upwards).
epc = make_minimal_sap10_epc(
energy_rating_current=82,
energy_consumption_current=232,
)
transform = EpcMlTransform()
# Act
row = transform.to_row(epc)
# Assert
# Band B: gradient=-0.10, intercept=28 → cd = -0.10*232 + 28 = +4.8 → clamp to 0
# adjusted = 232 + 0 = 232.0
assert row["peui_ucl"] == 232.0

View file

@ -10,10 +10,12 @@ are added in subsequent slices.
See docs/adr/0007-kwh-as-ml-target.md for the target set and rationale.
"""
from typing import Any
from typing import Any, Optional
from datatypes.epc.domain.epc import Epc
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.ucl import apply_ucl_correction
_TARGET_COLUMNS: dict[str, ColumnSpec] = {
@ -84,14 +86,26 @@ class EpcMlTransform:
def to_row(self, epc: EpcPropertyData) -> dict[str, Any]:
"""Map an EpcPropertyData to a single row of features + targets.
v0.1.0 populates the five directly-extractable targets. The UCL-corrected
PEUI target and all feature columns land in later slices.
v0.1.0 populates the six targets. Feature columns land in later slices.
"""
rhi = epc.renewable_heat_incentive
return {
"sap_score": epc.energy_rating_current,
"co2_emissions": epc.co2_emissions_current,
"peui_raw": epc.energy_consumption_current,
"peui_ucl": _peui_ucl(epc),
"space_heating_kwh": rhi.space_heating_kwh if rhi is not None else None,
"hot_water_kwh": rhi.water_heating_kwh if rhi is not None else None,
}
def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
"""Apply the Few et al. per-band UCL correction to PEUI for training labels.
Returns None when either the raw PEUI or the SAP score is missing those rows
are unusable as `peui_ucl` training labels and should be dropped upstream.
"""
if epc.energy_consumption_current is None or epc.energy_rating_current is None:
return None
band = Epc.from_sap_score(epc.energy_rating_current)
return apply_ucl_correction(float(epc.energy_consumption_current), band)