diff --git a/packages/domain/src/domain/ml/tests/_fixtures.py b/packages/domain/src/domain/ml/tests/_fixtures.py new file mode 100644 index 00000000..20d0dafb --- /dev/null +++ b/packages/domain/src/domain/ml/tests/_fixtures.py @@ -0,0 +1,80 @@ +"""Test fixtures for EpcMlTransform tests. + +`make_minimal_sap10_epc()` constructs a valid EpcPropertyData with the smallest +sensible defaults for required fields; target values are passed by kwarg so each +test parametrises only the fields it cares about. +""" + +from datetime import date +from typing import Optional + +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + InstantaneousWwhrs, + RenewableHeatIncentive, + SapEnergySource, + SapHeating, +) + + +def make_minimal_sap10_epc( + *, + energy_rating_current: Optional[int] = None, + co2_emissions_current: Optional[float] = None, + energy_consumption_current: Optional[int] = None, + space_heating_kwh: float = 0.0, + water_heating_kwh: float = 0.0, +) -> EpcPropertyData: + """Construct a minimal valid SAP10 EpcPropertyData with parametrisable targets.""" + return EpcPropertyData( + dwelling_type="Mid-terrace house", + inspection_date=date(2025, 6, 1), + tenure="1", + transaction_type="1", + address_line_1="1 Test Street", + postcode="A1 1AA", + post_town="Testtown", + roofs=[], + walls=[], + floors=[], + main_heating=[], + door_count=0, + sap_heating=SapHeating( + instantaneous_wwhrs=InstantaneousWwhrs(), + main_heating_details=[], + has_fixed_air_conditioning=False, + ), + sap_windows=[], + sap_energy_source=SapEnergySource( + mains_gas=True, + meter_type="Single", + pv_battery_count=0, + wind_turbines_count=0, + gas_smart_meter_present=False, + is_dwelling_export_capable=False, + wind_turbines_terrain_type="Suburban", + electricity_smart_meter_present=False, + ), + sap_building_parts=[], + solar_water_heating=False, + has_hot_water_cylinder=False, + has_fixed_air_conditioning=False, + wet_rooms_count=0, + extensions_count=0, + heated_rooms_count=0, + open_chimneys_count=0, + habitable_rooms_count=0, + insulated_door_count=0, + cfl_fixed_lighting_bulbs_count=0, + led_fixed_lighting_bulbs_count=0, + incandescent_fixed_lighting_bulbs_count=0, + total_floor_area_m2=70.0, + sap_version=10.2, + energy_rating_current=energy_rating_current, + co2_emissions_current=co2_emissions_current, + energy_consumption_current=energy_consumption_current, + renewable_heat_incentive=RenewableHeatIncentive( + space_heating_kwh=space_heating_kwh, + water_heating_kwh=water_heating_kwh, + ), + ) diff --git a/packages/domain/src/domain/ml/ucl.py b/packages/domain/src/domain/ml/ucl.py new file mode 100644 index 00000000..2cc16aab --- /dev/null +++ b/packages/domain/src/domain/ml/ucl.py @@ -0,0 +1,59 @@ +"""UCL per-band correction for Primary Energy Intensity. + +Per Few et al. 2023 — "The over-prediction of energy use by EPCs in Great Britain" +(Energy & Buildings 288, 113024). Table 3 per-band linear correction. + +Ported from `backend/ml_models/AnnualBillSavings.adjust_energy_to_metered`. Applied +to PEUI training labels per ADR-0007, *not* at runtime — the discontinuities at +EPC band boundaries that arose when this was applied post-prediction are what made +us fold it into the training labels instead. + +Open question §15.14 in the PRD: the paper was calibrated on gas-heated, non-PV +homes in England and Wales rated under SAP 2012. The current implementation +extrapolates silently to all properties. +""" + +from typing import Final + +from datatypes.epc.domain.epc import Epc + + +_GRADIENTS: Final[dict[Epc, float]] = { + Epc.A: -0.10, + Epc.B: -0.10, + Epc.C: -0.43, + Epc.D: -0.52, + Epc.E: -0.70, + Epc.F: -0.76, + Epc.G: -0.76, +} + +_INTERCEPTS: Final[dict[Epc, float]] = { + Epc.A: 28.0, + Epc.B: 28.0, + Epc.C: 97.0, + Epc.D: 119.0, + Epc.E: 160.0, + Epc.F: 157.0, + Epc.G: 157.0, +} + + +def apply_ucl_correction(peui_raw: float, band: Epc) -> float: + """Return the metered-equivalent PEUI for an EPC's raw PEUI in a given band. + + The Few et al. correction is one-sided: EPCs over-predict consumption, so the + correction only ever subtracts from PEUI. When the linear correction would + instead *add* to PEUI for an unusually low-PEUI property in its band, we clamp + to zero — leaving PEUI unchanged rather than inflating it. + """ + consumption_difference = _GRADIENTS[band] * peui_raw + _INTERCEPTS[band] + if consumption_difference > 0: + consumption_difference = 0.0 + adjusted = peui_raw + consumption_difference + if adjusted < 0: + raise ValueError( + f"UCL-corrected PEUI is negative ({adjusted}) — " + f"impossible for raw PEUI {peui_raw} band {band.value}" + ) + return adjusted