slice S-B10: price-table seam for cert-calibration parity validation

Separates the SAP-spec source of truth from the empirical cert-
calibration prices. cert_to_inputs() now accepts a `prices: PriceTable`
parameter defaulting to SAP_10_2_SPEC_PRICES (3.64 gas, 16.49 elec,
9.40 7h-low — verbatim from SAP 10.2 §12.2 / Table 12). Parity probe
passes the empirical cert_calibration_prices() factory from
domain.sap.tables.table_12_cert_calibration which carries the lower
prices that match the cert assessor software's actual output (3.48,
13.19, 5.50).

This split is documented in both table modules: cert calibration is
explicitly NOT spec-correct, it just matches observed cert behaviour
for parity testing.

100-cert parity probe with cert-calibration prices:
  MAE 6.66 → 4.99   (recovered from spec-price regression; also -0.41
                      from absolute baseline thanks to other S-B fixes)
  RMSE 10.29 → 7.13
  bias -4.66 → -1.03
  within ±1:  20% → 23%
  within ±3:  38% → 47%
  within ±5:  63% → 67%
  within ±10: 82% → 93%

Session-B progress overall (S-B2 baseline → here): MAE 8.41 → 4.99,
within ±1 doubled (10% → 23%).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-18 15:20:46 +00:00
parent c74857ac14
commit 92727568a3
3 changed files with 196 additions and 40 deletions

View file

@ -37,7 +37,7 @@ Reference: RdSAP 10 specification (10-06-2025); SAP 10.3 specification
from __future__ import annotations
from dataclasses import dataclass
from typing import Final, Optional
from typing import Callable, Final, Optional
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
@ -128,10 +128,31 @@ _DEFAULT_THERMAL_MASS_PARAMETER_KJ_PER_M2_K: Final[float] = 250.0
_DEFAULT_PUMPS_FANS_KWH_PER_YR: Final[float] = 130.0
# SAP 10.3 §12: lighting + central-heating pumps + fans always bill at
# the standard-electricity rate regardless of the main heating fuel —
# Table 12 code 30 (standard electricity), 16.49 p/kWh.
_STANDARD_ELECTRICITY_P_PER_KWH: Final[float] = 16.49
@dataclass(frozen=True)
class PriceTable:
"""Seam between the spec-correct SAP 10.2/10.3 Table 12 prices and
the empirical cert-calibration prices used to parity-test against
the corpus's lodged ratings. The cert assessor software diverges
from spec on unit prices (see slice S-B9 commit); this struct lets
the cert mapper switch between modes without touching the engine.
`unit_price_p_per_kwh` accepts either an API fuel code or a Table 12
code; implementations translate before lookup. `e7_low_rate_p_per_kwh`
is the off-peak rate used for true storage-heater space heating, and
`standard_electricity_p_per_kwh` is the rate applied to lighting +
pumps + fans regardless of main fuel.
"""
unit_price_p_per_kwh: Callable[[Optional[int]], float]
e7_low_rate_p_per_kwh: float
standard_electricity_p_per_kwh: float
SAP_10_2_SPEC_PRICES: Final[PriceTable] = PriceTable(
unit_price_p_per_kwh=unit_price_p_per_kwh,
e7_low_rate_p_per_kwh=9.40,
standard_electricity_p_per_kwh=16.49,
)
# SAP 10.3 Table 9 main_heating_control codes → control type (1/2/3).
@ -158,8 +179,6 @@ _CONTROL_TYPE_BY_CODE: Final[dict[int, int]] = {
_E7_SPACE_HEATING_CODES: Final[frozenset[int]] = frozenset(
list(range(401, 410)) + list(range(421, 426))
)
# SAP 10.3 Table 12 code 31 — Economy-7 "7h low" off-peak rate.
_E7_LOW_RATE_P_PER_KWH: Final[float] = 9.40
def _dwelling_exposure(dwelling_type: Optional[str]) -> DwellingExposure:
@ -328,10 +347,12 @@ def _main_fuel_code(main: Optional[MainHeatingDetail]) -> Optional[int]:
return fuel if isinstance(fuel, int) else None
def _fuel_cost_gbp_per_kwh(main: Optional[MainHeatingDetail]) -> float:
"""Convert SAP 10.3 Table 12 p/kWh → £/kWh. Unknown fuel falls back
to mains gas (3.64 p/kWh)."""
return unit_price_p_per_kwh(_main_fuel_code(main)) * _PENCE_TO_GBP
def _fuel_cost_gbp_per_kwh(
main: Optional[MainHeatingDetail], prices: PriceTable
) -> float:
"""Convert main-fuel unit price → £/kWh using the supplied price
table. Unknown fuel falls back to mains gas per the table's default."""
return prices.unit_price_p_per_kwh(_main_fuel_code(main)) * _PENCE_TO_GBP
def _is_electric_storage_or_direct(main: Optional[MainHeatingDetail]) -> bool:
@ -344,36 +365,44 @@ def _is_electric_storage_or_direct(main: Optional[MainHeatingDetail]) -> bool:
return code is not None and code in _E7_SPACE_HEATING_CODES
def _space_heating_fuel_cost_gbp_per_kwh(main: Optional[MainHeatingDetail]) -> float:
"""Off-peak rate when the main heating is electric-storage / direct-
electric, else the standard main-fuel rate."""
def _space_heating_fuel_cost_gbp_per_kwh(
main: Optional[MainHeatingDetail], prices: PriceTable
) -> float:
"""Off-peak rate when the main heating is electric-storage (codes
401-409 or 421-425), else the standard main-fuel rate."""
if _is_electric_storage_or_direct(main):
return _E7_LOW_RATE_P_PER_KWH * _PENCE_TO_GBP
return _fuel_cost_gbp_per_kwh(main)
return prices.e7_low_rate_p_per_kwh * _PENCE_TO_GBP
return _fuel_cost_gbp_per_kwh(main, prices)
def _hot_water_fuel_cost_gbp_per_kwh(
main: Optional[MainHeatingDetail], water_heating_fuel: Optional[int]
main: Optional[MainHeatingDetail],
water_heating_fuel: Optional[int],
prices: PriceTable,
) -> float:
"""Hot water bills at the *water-heating* fuel's rate. Special case:
an E7-tariff dwelling (electric storage / direct-electric main
heating) running an electric immersion HW cylinder bills HW at the
7h-low rate too, since these households typically run the immersion
on the off-peak timer RdSAP convention. Falls back to the main
fuel when the cert doesn't lodge a separate water fuel."""
an E7-tariff dwelling (storage-heater main) running an electric
immersion HW cylinder bills HW at the 7h-low rate too, since these
households typically run the immersion on the off-peak timer.
Falls back to the main fuel when the cert doesn't lodge a separate
water fuel."""
is_e7 = _is_electric_storage_or_direct(main)
if is_e7 and (water_heating_fuel is None or unit_price_p_per_kwh(water_heating_fuel) > _E7_LOW_RATE_P_PER_KWH):
return _E7_LOW_RATE_P_PER_KWH * _PENCE_TO_GBP
e7_low = prices.e7_low_rate_p_per_kwh
if is_e7 and (
water_heating_fuel is None
or prices.unit_price_p_per_kwh(water_heating_fuel) > e7_low
):
return e7_low * _PENCE_TO_GBP
if water_heating_fuel is not None:
return unit_price_p_per_kwh(water_heating_fuel) * _PENCE_TO_GBP
return _fuel_cost_gbp_per_kwh(main)
return prices.unit_price_p_per_kwh(water_heating_fuel) * _PENCE_TO_GBP
return _fuel_cost_gbp_per_kwh(main, prices)
def _other_fuel_cost_gbp_per_kwh() -> float:
def _other_fuel_cost_gbp_per_kwh(prices: PriceTable) -> float:
"""Pumps, fans, and lighting always bill at the standard-electricity
rate (SAP 10.3 §12; Table 32 code 30) regardless of the main heating
fuel these end uses are electric in every UK dwelling."""
return _STANDARD_ELECTRICITY_P_PER_KWH * _PENCE_TO_GBP
rate regardless of the main heating fuel these end uses are
electric in every UK dwelling."""
return prices.standard_electricity_p_per_kwh * _PENCE_TO_GBP
@ -412,8 +441,17 @@ def _ventilation_counts(vent: Optional[SapVentilation]) -> _VentilationCounts:
)
def cert_to_inputs(epc: EpcPropertyData) -> CalculatorInputs:
"""Build a typed `CalculatorInputs` aggregate from an `EpcPropertyData`."""
def cert_to_inputs(
epc: EpcPropertyData, *, prices: PriceTable = SAP_10_2_SPEC_PRICES
) -> CalculatorInputs:
"""Build a typed `CalculatorInputs` aggregate from an `EpcPropertyData`.
`prices` defaults to the SAP 10.2/10.3 spec-mandated Table 12 values
(`SAP_10_2_SPEC_PRICES`). For parity validation against the cert
corpus's lodged ratings, pass `CERT_CALIBRATION_PRICES` from
`domain.sap.tables.table_12_cert_calibration` the cert assessor
software diverges from the published spec on unit prices (see slice
S-B9 + docs/sap-spec/PARITY_FINDINGS.md)."""
dim = dimensions_from_cert(epc)
window_total_area, window_avg_u = _window_total_area_and_avg_u(epc.sap_windows)
exposure = _dwelling_exposure(epc.dwelling_type)
@ -488,10 +526,12 @@ def cert_to_inputs(epc: EpcPropertyData) -> CalculatorInputs:
hot_water_kwh_per_yr=hw_kwh,
pumps_fans_kwh_per_yr=_DEFAULT_PUMPS_FANS_KWH_PER_YR,
lighting_kwh_per_yr=lighting_kwh,
space_heating_fuel_cost_gbp_per_kwh=_space_heating_fuel_cost_gbp_per_kwh(main),
hot_water_fuel_cost_gbp_per_kwh=_hot_water_fuel_cost_gbp_per_kwh(
main, epc.sap_heating.water_heating_fuel
space_heating_fuel_cost_gbp_per_kwh=_space_heating_fuel_cost_gbp_per_kwh(
main, prices
),
other_fuel_cost_gbp_per_kwh=_other_fuel_cost_gbp_per_kwh(),
hot_water_fuel_cost_gbp_per_kwh=_hot_water_fuel_cost_gbp_per_kwh(
main, epc.sap_heating.water_heating_fuel, prices
),
other_fuel_cost_gbp_per_kwh=_other_fuel_cost_gbp_per_kwh(prices),
co2_factor_kg_per_kwh=_co2_factor_kg_per_kwh(main),
)

View file

@ -0,0 +1,113 @@
"""Empirical fuel-price table that matches the cert corpus's lodged
ratings not the spec-mandated SAP 10.2/10.3 Table 12.
These prices are an EMPIRICAL CALIBRATION: the cert assessor software
that produced `energy_rating_current` in the corpus appears to use
prices ~10-25% lower than the SAP 10.2 spec mandates (§12.2). Whether
that gap comes from a pre-amendment SAP 10.2 publication, an RdSAP §19
override, or frozen lodgement-time prices is an open investigation
(see commit S-B9 + dev discussion thread).
Use this table when running parity validation against
`energy_rating_current` from the corpus the calculator will then
produce values directly comparable to the cert. For new ratings or
forward-looking calculations, use `domain.sap.tables.table_12` (SAP 10.2
spec-correct, identical to SAP 10.3).
The values come verbatim from the prior
`domain.ml.sap_efficiencies._FUEL_UNIT_PRICE` table that the legacy ML
pipeline had been silently using; CO2 factors mirror the SAP 10.2
spec since the bias regression is cost-driven, not emissions-driven.
"""
from __future__ import annotations
from typing import Final
UNIT_PRICE_P_PER_KWH: Final[dict[int, float]] = {
# Gas fuels
1: 3.48, # mains gas
2: 7.60, # bulk LPG
3: 10.30, # bottled LPG (main)
5: 3.48, # bottled LPG (secondary)
9: 7.60, # LPG SC11F
7: 0.0, # biogas
# Liquid fuels
4: 5.44, # heating oil
71: 7.64, 73: 7.64, 75: 6.10, 76: 47.0,
# Solid fuels
11: 3.67, 15: 3.64, 12: 4.61, 20: 4.23, 22: 5.81, 23: 5.26,
21: 3.07, 10: 3.99,
# Electricity
30: 13.19, # standard tariff
32: 15.29, # 7h high
31: 5.50, # 7h low (Economy-7 off-peak)
34: 14.68, # 10h high
33: 7.50, # 10h low
38: 13.67, # 18h high
40: 7.41, # 18h low
35: 6.61, # 24h heating
39: 13.19,
60: 13.19,
36: 13.19,
# Heat networks
51: 4.24, 52: 4.24, 53: 4.24, 54: 4.24, 55: 4.24, 56: 4.24,
57: 4.24, 58: 4.24,
41: 4.24, 42: 4.24, 43: 4.24, 44: 4.24,
45: 2.97, 46: 2.97, 48: 2.97, 50: 0.0,
47: 2.97, 49: 2.97,
}
_DEFAULT_P_PER_KWH: Final[float] = 3.48
# Lifted from `domain.sap.tables.table_12.API_FUEL_TO_TABLE_12` since the
# API enum → Table 12 code mapping is spec-stable.
API_FUEL_TO_TABLE_12: Final[dict[int, int]] = {
0: 30, 1: 1, 2: 2, 3: 3, 4: 4, 5: 15, 6: 20, 7: 23, 8: 21, 9: 10,
10: 30, 11: 42, 12: 43, 13: 44, 14: 11, 15: 12, 16: 22, 17: 9,
18: 75, 19: 76, 20: 51, 21: 52, 22: 53, 23: 55, 24: 54, 25: 41,
26: 1, 27: 2, 28: 4, 29: 30,
}
def unit_price_p_per_kwh(fuel_code: int | None) -> float:
"""Empirical cert-calibration unit price (p/kWh) for the given fuel
code. Use only for parity validation; the SAP-spec answer is in
`domain.sap.tables.table_12.unit_price_p_per_kwh`."""
if fuel_code is None:
return _DEFAULT_P_PER_KWH
if fuel_code in UNIT_PRICE_P_PER_KWH:
return UNIT_PRICE_P_PER_KWH[fuel_code]
translated = API_FUEL_TO_TABLE_12.get(fuel_code)
if translated is not None and translated in UNIT_PRICE_P_PER_KWH:
return UNIT_PRICE_P_PER_KWH[translated]
return _DEFAULT_P_PER_KWH
# Economy-7 low-rate cert-calibration price — empirically matches what
# the cert assessor software appears to charge on storage-heater
# dwellings.
E7_LOW_RATE_P_PER_KWH: Final[float] = 5.50
STANDARD_ELECTRICITY_P_PER_KWH: Final[float] = 13.19
def _build_cert_calibration_table():
"""Lazy import to avoid the cert_to_inputs ↔ tables import cycle —
the cert_to_inputs module defines `PriceTable`, but this module
can't import from cert_to_inputs because cert_to_inputs imports the
spec table_12. We expose a factory the caller uses to build the
`PriceTable` value at validation-script init time."""
from domain.sap.rdsap.cert_to_inputs import PriceTable
return PriceTable(
unit_price_p_per_kwh=unit_price_p_per_kwh,
e7_low_rate_p_per_kwh=E7_LOW_RATE_P_PER_KWH,
standard_electricity_p_per_kwh=STANDARD_ELECTRICITY_P_PER_KWH,
)
def cert_calibration_prices():
"""Returns a `PriceTable` populated with the empirical cert-
calibration prices. Call from parity-validation scripts as
`cert_to_inputs(epc, prices=cert_calibration_prices())`."""
return _build_cert_calibration_table()

View file

@ -22,7 +22,9 @@ from typing import Any, cast
import pandas as pd
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap.calculator import Sap10Calculator
from domain.sap.calculator import calculate_sap_from_inputs
from domain.sap.rdsap.cert_to_inputs import cert_to_inputs
from domain.sap.tables.table_12_cert_calibration import cert_calibration_prices
from ml_training_data.bulk_zip_reader import BulkZipReader
from ml_training_data.storage import LocalStorage
@ -46,9 +48,9 @@ def main(argv: list[str] | None = None) -> None:
seed = int(args[1]) if len(args) > 1 else 7
targets = _sample_certs(n, seed)
print(f"Sampling {len(targets)} certs (seed={seed}) ...")
print(f"Sampling {len(targets)} certs (seed={seed}) — using cert-calibration prices")
storage = LocalStorage(_BULK)
calc = Sap10Calculator()
prices = cert_calibration_prices()
results: list[dict[str, Any]] = []
errors: list[dict[str, Any]] = []
remaining = set(targets)
@ -70,7 +72,8 @@ def main(argv: list[str] | None = None) -> None:
)
try:
epc = EpcPropertyDataMapper.from_api_response(document)
result = calc.calculate(epc)
inputs = cert_to_inputs(epc, prices=prices)
result = calculate_sap_from_inputs(inputs)
results.append({
"cert": cn,
"actual": actual,