From 92727568a370c726c0ce0ceaa22e0a5c26b7e2f4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 May 2026 15:20:46 +0000 Subject: [PATCH] slice S-B10: price-table seam for cert-calibration parity validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separates the SAP-spec source of truth from the empirical cert- calibration prices. cert_to_inputs() now accepts a `prices: PriceTable` parameter defaulting to SAP_10_2_SPEC_PRICES (3.64 gas, 16.49 elec, 9.40 7h-low — verbatim from SAP 10.2 §12.2 / Table 12). Parity probe passes the empirical cert_calibration_prices() factory from domain.sap.tables.table_12_cert_calibration which carries the lower prices that match the cert assessor software's actual output (3.48, 13.19, 5.50). This split is documented in both table modules: cert calibration is explicitly NOT spec-correct, it just matches observed cert behaviour for parity testing. 100-cert parity probe with cert-calibration prices: MAE 6.66 → 4.99 (recovered from spec-price regression; also -0.41 from absolute baseline thanks to other S-B fixes) RMSE 10.29 → 7.13 bias -4.66 → -1.03 within ±1: 20% → 23% within ±3: 38% → 47% within ±5: 63% → 67% within ±10: 82% → 93% Session-B progress overall (S-B2 baseline → here): MAE 8.41 → 4.99, within ±1 doubled (10% → 23%). Co-Authored-By: Claude Opus 4.7 --- .../src/domain/sap/rdsap/cert_to_inputs.py | 112 +++++++++++------ .../sap/tables/table_12_cert_calibration.py | 113 ++++++++++++++++++ .../src/ml_training_data/sap_parity_probe.py | 11 +- 3 files changed, 196 insertions(+), 40 deletions(-) create mode 100644 packages/domain/src/domain/sap/tables/table_12_cert_calibration.py diff --git a/packages/domain/src/domain/sap/rdsap/cert_to_inputs.py b/packages/domain/src/domain/sap/rdsap/cert_to_inputs.py index 6915583c..21443d1d 100644 --- a/packages/domain/src/domain/sap/rdsap/cert_to_inputs.py +++ b/packages/domain/src/domain/sap/rdsap/cert_to_inputs.py @@ -37,7 +37,7 @@ Reference: RdSAP 10 specification (10-06-2025); SAP 10.3 specification from __future__ import annotations from dataclasses import dataclass -from typing import Final, Optional +from typing import Callable, Final, Optional from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, @@ -128,10 +128,31 @@ _DEFAULT_THERMAL_MASS_PARAMETER_KJ_PER_M2_K: Final[float] = 250.0 _DEFAULT_PUMPS_FANS_KWH_PER_YR: Final[float] = 130.0 -# SAP 10.3 §12: lighting + central-heating pumps + fans always bill at -# the standard-electricity rate regardless of the main heating fuel — -# Table 12 code 30 (standard electricity), 16.49 p/kWh. -_STANDARD_ELECTRICITY_P_PER_KWH: Final[float] = 16.49 +@dataclass(frozen=True) +class PriceTable: + """Seam between the spec-correct SAP 10.2/10.3 Table 12 prices and + the empirical cert-calibration prices used to parity-test against + the corpus's lodged ratings. The cert assessor software diverges + from spec on unit prices (see slice S-B9 commit); this struct lets + the cert mapper switch between modes without touching the engine. + + `unit_price_p_per_kwh` accepts either an API fuel code or a Table 12 + code; implementations translate before lookup. `e7_low_rate_p_per_kwh` + is the off-peak rate used for true storage-heater space heating, and + `standard_electricity_p_per_kwh` is the rate applied to lighting + + pumps + fans regardless of main fuel. + """ + + unit_price_p_per_kwh: Callable[[Optional[int]], float] + e7_low_rate_p_per_kwh: float + standard_electricity_p_per_kwh: float + + +SAP_10_2_SPEC_PRICES: Final[PriceTable] = PriceTable( + unit_price_p_per_kwh=unit_price_p_per_kwh, + e7_low_rate_p_per_kwh=9.40, + standard_electricity_p_per_kwh=16.49, +) # SAP 10.3 Table 9 main_heating_control codes → control type (1/2/3). @@ -158,8 +179,6 @@ _CONTROL_TYPE_BY_CODE: Final[dict[int, int]] = { _E7_SPACE_HEATING_CODES: Final[frozenset[int]] = frozenset( list(range(401, 410)) + list(range(421, 426)) ) -# SAP 10.3 Table 12 code 31 — Economy-7 "7h low" off-peak rate. -_E7_LOW_RATE_P_PER_KWH: Final[float] = 9.40 def _dwelling_exposure(dwelling_type: Optional[str]) -> DwellingExposure: @@ -328,10 +347,12 @@ def _main_fuel_code(main: Optional[MainHeatingDetail]) -> Optional[int]: return fuel if isinstance(fuel, int) else None -def _fuel_cost_gbp_per_kwh(main: Optional[MainHeatingDetail]) -> float: - """Convert SAP 10.3 Table 12 p/kWh → £/kWh. Unknown fuel falls back - to mains gas (3.64 p/kWh).""" - return unit_price_p_per_kwh(_main_fuel_code(main)) * _PENCE_TO_GBP +def _fuel_cost_gbp_per_kwh( + main: Optional[MainHeatingDetail], prices: PriceTable +) -> float: + """Convert main-fuel unit price → £/kWh using the supplied price + table. Unknown fuel falls back to mains gas per the table's default.""" + return prices.unit_price_p_per_kwh(_main_fuel_code(main)) * _PENCE_TO_GBP def _is_electric_storage_or_direct(main: Optional[MainHeatingDetail]) -> bool: @@ -344,36 +365,44 @@ def _is_electric_storage_or_direct(main: Optional[MainHeatingDetail]) -> bool: return code is not None and code in _E7_SPACE_HEATING_CODES -def _space_heating_fuel_cost_gbp_per_kwh(main: Optional[MainHeatingDetail]) -> float: - """Off-peak rate when the main heating is electric-storage / direct- - electric, else the standard main-fuel rate.""" +def _space_heating_fuel_cost_gbp_per_kwh( + main: Optional[MainHeatingDetail], prices: PriceTable +) -> float: + """Off-peak rate when the main heating is electric-storage (codes + 401-409 or 421-425), else the standard main-fuel rate.""" if _is_electric_storage_or_direct(main): - return _E7_LOW_RATE_P_PER_KWH * _PENCE_TO_GBP - return _fuel_cost_gbp_per_kwh(main) + return prices.e7_low_rate_p_per_kwh * _PENCE_TO_GBP + return _fuel_cost_gbp_per_kwh(main, prices) def _hot_water_fuel_cost_gbp_per_kwh( - main: Optional[MainHeatingDetail], water_heating_fuel: Optional[int] + main: Optional[MainHeatingDetail], + water_heating_fuel: Optional[int], + prices: PriceTable, ) -> float: """Hot water bills at the *water-heating* fuel's rate. Special case: - an E7-tariff dwelling (electric storage / direct-electric main - heating) running an electric immersion HW cylinder bills HW at the - 7h-low rate too, since these households typically run the immersion - on the off-peak timer — RdSAP convention. Falls back to the main - fuel when the cert doesn't lodge a separate water fuel.""" + an E7-tariff dwelling (storage-heater main) running an electric + immersion HW cylinder bills HW at the 7h-low rate too, since these + households typically run the immersion on the off-peak timer. + Falls back to the main fuel when the cert doesn't lodge a separate + water fuel.""" is_e7 = _is_electric_storage_or_direct(main) - if is_e7 and (water_heating_fuel is None or unit_price_p_per_kwh(water_heating_fuel) > _E7_LOW_RATE_P_PER_KWH): - return _E7_LOW_RATE_P_PER_KWH * _PENCE_TO_GBP + e7_low = prices.e7_low_rate_p_per_kwh + if is_e7 and ( + water_heating_fuel is None + or prices.unit_price_p_per_kwh(water_heating_fuel) > e7_low + ): + return e7_low * _PENCE_TO_GBP if water_heating_fuel is not None: - return unit_price_p_per_kwh(water_heating_fuel) * _PENCE_TO_GBP - return _fuel_cost_gbp_per_kwh(main) + return prices.unit_price_p_per_kwh(water_heating_fuel) * _PENCE_TO_GBP + return _fuel_cost_gbp_per_kwh(main, prices) -def _other_fuel_cost_gbp_per_kwh() -> float: +def _other_fuel_cost_gbp_per_kwh(prices: PriceTable) -> float: """Pumps, fans, and lighting always bill at the standard-electricity - rate (SAP 10.3 §12; Table 32 code 30) regardless of the main heating - fuel — these end uses are electric in every UK dwelling.""" - return _STANDARD_ELECTRICITY_P_PER_KWH * _PENCE_TO_GBP + rate regardless of the main heating fuel — these end uses are + electric in every UK dwelling.""" + return prices.standard_electricity_p_per_kwh * _PENCE_TO_GBP @@ -412,8 +441,17 @@ def _ventilation_counts(vent: Optional[SapVentilation]) -> _VentilationCounts: ) -def cert_to_inputs(epc: EpcPropertyData) -> CalculatorInputs: - """Build a typed `CalculatorInputs` aggregate from an `EpcPropertyData`.""" +def cert_to_inputs( + epc: EpcPropertyData, *, prices: PriceTable = SAP_10_2_SPEC_PRICES +) -> CalculatorInputs: + """Build a typed `CalculatorInputs` aggregate from an `EpcPropertyData`. + + `prices` defaults to the SAP 10.2/10.3 spec-mandated Table 12 values + (`SAP_10_2_SPEC_PRICES`). For parity validation against the cert + corpus's lodged ratings, pass `CERT_CALIBRATION_PRICES` from + `domain.sap.tables.table_12_cert_calibration` — the cert assessor + software diverges from the published spec on unit prices (see slice + S-B9 + docs/sap-spec/PARITY_FINDINGS.md).""" dim = dimensions_from_cert(epc) window_total_area, window_avg_u = _window_total_area_and_avg_u(epc.sap_windows) exposure = _dwelling_exposure(epc.dwelling_type) @@ -488,10 +526,12 @@ def cert_to_inputs(epc: EpcPropertyData) -> CalculatorInputs: hot_water_kwh_per_yr=hw_kwh, pumps_fans_kwh_per_yr=_DEFAULT_PUMPS_FANS_KWH_PER_YR, lighting_kwh_per_yr=lighting_kwh, - space_heating_fuel_cost_gbp_per_kwh=_space_heating_fuel_cost_gbp_per_kwh(main), - hot_water_fuel_cost_gbp_per_kwh=_hot_water_fuel_cost_gbp_per_kwh( - main, epc.sap_heating.water_heating_fuel + space_heating_fuel_cost_gbp_per_kwh=_space_heating_fuel_cost_gbp_per_kwh( + main, prices ), - other_fuel_cost_gbp_per_kwh=_other_fuel_cost_gbp_per_kwh(), + hot_water_fuel_cost_gbp_per_kwh=_hot_water_fuel_cost_gbp_per_kwh( + main, epc.sap_heating.water_heating_fuel, prices + ), + other_fuel_cost_gbp_per_kwh=_other_fuel_cost_gbp_per_kwh(prices), co2_factor_kg_per_kwh=_co2_factor_kg_per_kwh(main), ) diff --git a/packages/domain/src/domain/sap/tables/table_12_cert_calibration.py b/packages/domain/src/domain/sap/tables/table_12_cert_calibration.py new file mode 100644 index 00000000..7cbe1f18 --- /dev/null +++ b/packages/domain/src/domain/sap/tables/table_12_cert_calibration.py @@ -0,0 +1,113 @@ +"""Empirical fuel-price table that matches the cert corpus's lodged +ratings — not the spec-mandated SAP 10.2/10.3 Table 12. + +These prices are an EMPIRICAL CALIBRATION: the cert assessor software +that produced `energy_rating_current` in the corpus appears to use +prices ~10-25% lower than the SAP 10.2 spec mandates (§12.2). Whether +that gap comes from a pre-amendment SAP 10.2 publication, an RdSAP §19 +override, or frozen lodgement-time prices is an open investigation +(see commit S-B9 + dev discussion thread). + +Use this table when running parity validation against +`energy_rating_current` from the corpus — the calculator will then +produce values directly comparable to the cert. For new ratings or +forward-looking calculations, use `domain.sap.tables.table_12` (SAP 10.2 +spec-correct, identical to SAP 10.3). + +The values come verbatim from the prior +`domain.ml.sap_efficiencies._FUEL_UNIT_PRICE` table that the legacy ML +pipeline had been silently using; CO2 factors mirror the SAP 10.2 +spec since the bias regression is cost-driven, not emissions-driven. +""" + +from __future__ import annotations + +from typing import Final + + +UNIT_PRICE_P_PER_KWH: Final[dict[int, float]] = { + # Gas fuels + 1: 3.48, # mains gas + 2: 7.60, # bulk LPG + 3: 10.30, # bottled LPG (main) + 5: 3.48, # bottled LPG (secondary) + 9: 7.60, # LPG SC11F + 7: 0.0, # biogas + # Liquid fuels + 4: 5.44, # heating oil + 71: 7.64, 73: 7.64, 75: 6.10, 76: 47.0, + # Solid fuels + 11: 3.67, 15: 3.64, 12: 4.61, 20: 4.23, 22: 5.81, 23: 5.26, + 21: 3.07, 10: 3.99, + # Electricity + 30: 13.19, # standard tariff + 32: 15.29, # 7h high + 31: 5.50, # 7h low (Economy-7 off-peak) + 34: 14.68, # 10h high + 33: 7.50, # 10h low + 38: 13.67, # 18h high + 40: 7.41, # 18h low + 35: 6.61, # 24h heating + 39: 13.19, + 60: 13.19, + 36: 13.19, + # Heat networks + 51: 4.24, 52: 4.24, 53: 4.24, 54: 4.24, 55: 4.24, 56: 4.24, + 57: 4.24, 58: 4.24, + 41: 4.24, 42: 4.24, 43: 4.24, 44: 4.24, + 45: 2.97, 46: 2.97, 48: 2.97, 50: 0.0, + 47: 2.97, 49: 2.97, +} +_DEFAULT_P_PER_KWH: Final[float] = 3.48 + + +# Lifted from `domain.sap.tables.table_12.API_FUEL_TO_TABLE_12` since the +# API enum → Table 12 code mapping is spec-stable. +API_FUEL_TO_TABLE_12: Final[dict[int, int]] = { + 0: 30, 1: 1, 2: 2, 3: 3, 4: 4, 5: 15, 6: 20, 7: 23, 8: 21, 9: 10, + 10: 30, 11: 42, 12: 43, 13: 44, 14: 11, 15: 12, 16: 22, 17: 9, + 18: 75, 19: 76, 20: 51, 21: 52, 22: 53, 23: 55, 24: 54, 25: 41, + 26: 1, 27: 2, 28: 4, 29: 30, +} + + +def unit_price_p_per_kwh(fuel_code: int | None) -> float: + """Empirical cert-calibration unit price (p/kWh) for the given fuel + code. Use only for parity validation; the SAP-spec answer is in + `domain.sap.tables.table_12.unit_price_p_per_kwh`.""" + if fuel_code is None: + return _DEFAULT_P_PER_KWH + if fuel_code in UNIT_PRICE_P_PER_KWH: + return UNIT_PRICE_P_PER_KWH[fuel_code] + translated = API_FUEL_TO_TABLE_12.get(fuel_code) + if translated is not None and translated in UNIT_PRICE_P_PER_KWH: + return UNIT_PRICE_P_PER_KWH[translated] + return _DEFAULT_P_PER_KWH + + +# Economy-7 low-rate cert-calibration price — empirically matches what +# the cert assessor software appears to charge on storage-heater +# dwellings. +E7_LOW_RATE_P_PER_KWH: Final[float] = 5.50 +STANDARD_ELECTRICITY_P_PER_KWH: Final[float] = 13.19 + + +def _build_cert_calibration_table(): + """Lazy import to avoid the cert_to_inputs ↔ tables import cycle — + the cert_to_inputs module defines `PriceTable`, but this module + can't import from cert_to_inputs because cert_to_inputs imports the + spec table_12. We expose a factory the caller uses to build the + `PriceTable` value at validation-script init time.""" + from domain.sap.rdsap.cert_to_inputs import PriceTable + return PriceTable( + unit_price_p_per_kwh=unit_price_p_per_kwh, + e7_low_rate_p_per_kwh=E7_LOW_RATE_P_PER_KWH, + standard_electricity_p_per_kwh=STANDARD_ELECTRICITY_P_PER_KWH, + ) + + +def cert_calibration_prices(): + """Returns a `PriceTable` populated with the empirical cert- + calibration prices. Call from parity-validation scripts as + `cert_to_inputs(epc, prices=cert_calibration_prices())`.""" + return _build_cert_calibration_table() diff --git a/services/ml_training_data/src/ml_training_data/sap_parity_probe.py b/services/ml_training_data/src/ml_training_data/sap_parity_probe.py index a0bebf54..ccf8c4b3 100644 --- a/services/ml_training_data/src/ml_training_data/sap_parity_probe.py +++ b/services/ml_training_data/src/ml_training_data/sap_parity_probe.py @@ -22,7 +22,9 @@ from typing import Any, cast import pandas as pd from datatypes.epc.domain.mapper import EpcPropertyDataMapper -from domain.sap.calculator import Sap10Calculator +from domain.sap.calculator import calculate_sap_from_inputs +from domain.sap.rdsap.cert_to_inputs import cert_to_inputs +from domain.sap.tables.table_12_cert_calibration import cert_calibration_prices from ml_training_data.bulk_zip_reader import BulkZipReader from ml_training_data.storage import LocalStorage @@ -46,9 +48,9 @@ def main(argv: list[str] | None = None) -> None: seed = int(args[1]) if len(args) > 1 else 7 targets = _sample_certs(n, seed) - print(f"Sampling {len(targets)} certs (seed={seed}) ...") + print(f"Sampling {len(targets)} certs (seed={seed}) — using cert-calibration prices") storage = LocalStorage(_BULK) - calc = Sap10Calculator() + prices = cert_calibration_prices() results: list[dict[str, Any]] = [] errors: list[dict[str, Any]] = [] remaining = set(targets) @@ -70,7 +72,8 @@ def main(argv: list[str] | None = None) -> None: ) try: epc = EpcPropertyDataMapper.from_api_response(document) - result = calc.calculate(epc) + inputs = cert_to_inputs(epc, prices=prices) + result = calculate_sap_from_inputs(inputs) results.append({ "cert": cn, "actual": actual,