Model/domain/sap10_ml/ucl.py

"""UCL per-band correction for Primary Energy Intensity.

Per Few et al. 2023 — "The over-prediction of energy use by EPCs in Great Britain"
(Energy & Buildings 288, 113024). Table 3 per-band linear correction.

Ported from `backend/ml_models/AnnualBillSavings.adjust_energy_to_metered`. Applied
to PEUI training labels per ADR-0007, *not* at runtime — the discontinuities at
EPC band boundaries that arose when this was applied post-prediction are what made
us fold it into the training labels instead.

Open question §15.14 in the PRD: the paper was calibrated on gas-heated, non-PV
homes in England and Wales rated under SAP 2012. The current implementation
extrapolates silently to all properties.
"""

from typing import Final

from datatypes.epc.domain.epc import Epc


_GRADIENTS: Final[dict[Epc, float]] = {
    Epc.A: -0.10,
    Epc.B: -0.10,
    Epc.C: -0.43,
    Epc.D: -0.52,
    Epc.E: -0.70,
    Epc.F: -0.76,
    Epc.G: -0.76,
}

_INTERCEPTS: Final[dict[Epc, float]] = {
    Epc.A: 28.0,
    Epc.B: 28.0,
    Epc.C: 97.0,
    Epc.D: 119.0,
    Epc.E: 160.0,
    Epc.F: 157.0,
    Epc.G: 157.0,
}


def apply_ucl_correction(peui_raw: float, band: Epc) -> float:
    """Return the metered-equivalent PEUI for an EPC's raw PEUI in a given band.

    The Few et al. correction is one-sided: EPCs over-predict consumption, so the
    correction only ever subtracts from PEUI. When the linear correction would
    instead *add* to PEUI for an unusually low-PEUI property in its band, we clamp
    to zero — leaving PEUI unchanged rather than inflating it.
    """
    consumption_difference = _GRADIENTS[band] * peui_raw + _INTERCEPTS[band]
    if consumption_difference > 0:
        consumption_difference = 0.0
    adjusted = peui_raw + consumption_difference
    if adjusted < 0:
        raise ValueError(
            f"UCL-corrected PEUI is negative ({adjusted}) — "
            f"impossible for raw PEUI {peui_raw} band {band.value}"
        )
    return adjusted