add missing ucl.py and _fixtures.py from slices 2-3

Previous slice commits used -a-style and missed these new files;
imports in transform.py and test_transform.py would dangle on a
fresh checkout. Re-running pytest after this commit covers all four
EpcMlTransform tests cleanly.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-16 14:43:09 +00:00
parent 81f6163295
commit 375b0e895e
2 changed files with 139 additions and 0 deletions

View file

@ -0,0 +1,80 @@
"""Test fixtures for EpcMlTransform tests.
`make_minimal_sap10_epc()` constructs a valid EpcPropertyData with the smallest
sensible defaults for required fields; target values are passed by kwarg so each
test parametrises only the fields it cares about.
"""
from datetime import date
from typing import Optional
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
InstantaneousWwhrs,
RenewableHeatIncentive,
SapEnergySource,
SapHeating,
)
def make_minimal_sap10_epc(
*,
energy_rating_current: Optional[int] = None,
co2_emissions_current: Optional[float] = None,
energy_consumption_current: Optional[int] = None,
space_heating_kwh: float = 0.0,
water_heating_kwh: float = 0.0,
) -> EpcPropertyData:
"""Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets."""
return EpcPropertyData(
dwelling_type="Mid-terrace house",
inspection_date=date(2025, 6, 1),
tenure="1",
transaction_type="1",
address_line_1="1 Test Street",
postcode="A1 1AA",
post_town="Testtown",
roofs=[],
walls=[],
floors=[],
main_heating=[],
door_count=0,
sap_heating=SapHeating(
instantaneous_wwhrs=InstantaneousWwhrs(),
main_heating_details=[],
has_fixed_air_conditioning=False,
),
sap_windows=[],
sap_energy_source=SapEnergySource(
mains_gas=True,
meter_type="Single",
pv_battery_count=0,
wind_turbines_count=0,
gas_smart_meter_present=False,
is_dwelling_export_capable=False,
wind_turbines_terrain_type="Suburban",
electricity_smart_meter_present=False,
),
sap_building_parts=[],
solar_water_heating=False,
has_hot_water_cylinder=False,
has_fixed_air_conditioning=False,
wet_rooms_count=0,
extensions_count=0,
heated_rooms_count=0,
open_chimneys_count=0,
habitable_rooms_count=0,
insulated_door_count=0,
cfl_fixed_lighting_bulbs_count=0,
led_fixed_lighting_bulbs_count=0,
incandescent_fixed_lighting_bulbs_count=0,
total_floor_area_m2=70.0,
sap_version=10.2,
energy_rating_current=energy_rating_current,
co2_emissions_current=co2_emissions_current,
energy_consumption_current=energy_consumption_current,
renewable_heat_incentive=RenewableHeatIncentive(
space_heating_kwh=space_heating_kwh,
water_heating_kwh=water_heating_kwh,
),
)

View file

@ -0,0 +1,59 @@
"""UCL per-band correction for Primary Energy Intensity.
Per Few et al. 2023 "The over-prediction of energy use by EPCs in Great Britain"
(Energy & Buildings 288, 113024). Table 3 per-band linear correction.
Ported from `backend/ml_models/AnnualBillSavings.adjust_energy_to_metered`. Applied
to PEUI training labels per ADR-0007, *not* at runtime the discontinuities at
EPC band boundaries that arose when this was applied post-prediction are what made
us fold it into the training labels instead.
Open question §15.14 in the PRD: the paper was calibrated on gas-heated, non-PV
homes in England and Wales rated under SAP 2012. The current implementation
extrapolates silently to all properties.
"""
from typing import Final
from datatypes.epc.domain.epc import Epc
_GRADIENTS: Final[dict[Epc, float]] = {
Epc.A: -0.10,
Epc.B: -0.10,
Epc.C: -0.43,
Epc.D: -0.52,
Epc.E: -0.70,
Epc.F: -0.76,
Epc.G: -0.76,
}
_INTERCEPTS: Final[dict[Epc, float]] = {
Epc.A: 28.0,
Epc.B: 28.0,
Epc.C: 97.0,
Epc.D: 119.0,
Epc.E: 160.0,
Epc.F: 157.0,
Epc.G: 157.0,
}
def apply_ucl_correction(peui_raw: float, band: Epc) -> float:
"""Return the metered-equivalent PEUI for an EPC's raw PEUI in a given band.
The Few et al. correction is one-sided: EPCs over-predict consumption, so the
correction only ever subtracts from PEUI. When the linear correction would
instead *add* to PEUI for an unusually low-PEUI property in its band, we clamp
to zero leaving PEUI unchanged rather than inflating it.
"""
consumption_difference = _GRADIENTS[band] * peui_raw + _INTERCEPTS[band]
if consumption_difference > 0:
consumption_difference = 0.0
adjusted = peui_raw + consumption_difference
if adjusted < 0:
raise ValueError(
f"UCL-corrected PEUI is negative ({adjusted}) — "
f"impossible for raw PEUI {peui_raw} band {band.value}"
)
return adjusted