diff --git a/domain/sap10_calculator/rdsap/cert_to_inputs.py b/domain/sap10_calculator/rdsap/cert_to_inputs.py index d293c193..0851e92b 100644 --- a/domain/sap10_calculator/rdsap/cert_to_inputs.py +++ b/domain/sap10_calculator/rdsap/cert_to_inputs.py @@ -1070,6 +1070,56 @@ def _heat_network_dlf(age_band: Optional[str]) -> float: raise UnmappedSapCode("heat_network_age_band", age_band) +# SAP 10.2 Table 4c(3) (PDF p.169) — "Factor for controls and charging +# method" for HEAT NETWORKS, by Table 4e control code. The factor multiplies +# the heat-network heat requirement on top of the Table 12c distribution loss +# factor: worksheet (307) space = (98c) × (302) × (305) × (306), and +# (310) DHW = (64) × (305a) × (306). "Flat rate charging" (note d: the +# household pays a fixed amount regardless of heat used) carries a demand +# penalty — there is no incentive to economise; "charging linked to use of +# heat" does not. The SPACE column adds a further +0.05 when there is also +# no thermostatic room-temperature control (2301/2302). +_HEAT_NETWORK_SPACE_CHARGING_FACTOR_BY_CODE: Final[dict[int, float]] = { + # Flat rate charging, no thermostatic room control → 1.10 + 2301: 1.10, 2302: 1.10, + # Flat rate charging, with some thermostatic control → 1.05 + 2303: 1.05, 2304: 1.05, 2305: 1.05, 2307: 1.05, 2311: 1.05, 2313: 1.05, + # Charging linked to use of heat, thermostat but no TRVs → 1.05 + 2308: 1.05, 2309: 1.05, + # Charging linked to use of heat, with TRVs → 1.00 + 2306: 1.00, 2310: 1.00, 2312: 1.00, 2314: 1.00, +} +_HEAT_NETWORK_DHW_CHARGING_FACTOR_BY_CODE: Final[dict[int, float]] = { + # Flat rate charging → 1.05 (all controls) + 2301: 1.05, 2302: 1.05, 2303: 1.05, 2304: 1.05, + 2305: 1.05, 2307: 1.05, 2311: 1.05, 2313: 1.05, + # Charging linked to use of heat → 1.00 + 2306: 1.00, 2308: 1.00, 2309: 1.00, 2310: 1.00, 2312: 1.00, 2314: 1.00, +} + + +def _heat_network_space_charging_factor(main: Optional[MainHeatingDetail]) -> float: + """SAP 10.2 Table 4c(3) (PDF p.169) worksheet (305) — heat-network + space-heating factor for controls and charging method. Returns 1.0 when + the control is absent (no penalty); every Table 4e Group-3 code + (2301-2314) is covered, so an unmapped key only arises off the + heat-network path and is harmless at 1.0.""" + code = main.main_heating_control if main is not None else None + if not isinstance(code, int): + return 1.0 + return _HEAT_NETWORK_SPACE_CHARGING_FACTOR_BY_CODE.get(code, 1.0) + + +def _heat_network_dhw_charging_factor(main: Optional[MainHeatingDetail]) -> float: + """SAP 10.2 Table 4c(3) (PDF p.169) worksheet (305a) — heat-network + water-heating factor for charging method (flat rate → 1.05, linked to + use → 1.0). Returns 1.0 when the control is absent.""" + code = main.main_heating_control if main is not None else None + if not isinstance(code, int): + return 1.0 + return _HEAT_NETWORK_DHW_CHARGING_FACTOR_BY_CODE.get(code, 1.0) + + def _dwelling_age_band(epc: EpcPropertyData) -> Optional[str]: """The dwelling's construction age band, read from the first building part that lodges one. @@ -1822,7 +1872,13 @@ def _main_heating_detail_efficiency( eff = seasonal_efficiency(main_code, main_category, main_fuel) if _is_heat_network_main(main): primary_age = _dwelling_age_band(epc) - eff = 1.0 / _heat_network_dlf(primary_age) + # Worksheet (307): heat required = demand × (305) × (306 DLF), so the + # delivered-per-fuel efficiency carries 1 / ((305) charging factor × + # DLF). The Table 4c(3) flat-rate charging penalty raises demand. + eff = 1.0 / ( + _heat_network_dlf(primary_age) + * _heat_network_space_charging_factor(main) + ) return eff @@ -7074,8 +7130,13 @@ def cert_to_inputs( # HW from main on a heat-network cert: the DHW also incurs the # network's distribution losses. Same 1/DLF override as for # space heating so the delivered HW kWh reflects q_useful × DLF - # = q_generated, matching the per-kWh-generated unit price. - water_eff = 1.0 / _heat_network_dlf(primary_age) + # = q_generated, matching the per-kWh-generated unit price. Worksheet + # (310): heat required = (64) × (305a) × (306 DLF), so the DHW + # efficiency also carries 1 / Table 4c(3) (305a) charging factor. + water_eff = 1.0 / ( + _heat_network_dlf(primary_age) + * _heat_network_dhw_charging_factor(main) + ) elif epc.sap_heating.water_heating_code in _WATER_HEAT_NETWORK_ONLY_CODES: # HW-only heat network (whc 950/951/952): the Table 4a plant # efficiency is already in `water_eff`; apply the Table 12c diff --git a/scripts/decompose_co2_pe_error.py b/scripts/decompose_co2_pe_error.py new file mode 100644 index 00000000..a025a738 --- /dev/null +++ b/scripts/decompose_co2_pe_error.py @@ -0,0 +1,124 @@ +"""Decompose the API-path CO2/PE over-estimate against lodged EPC figures. + +The corpus integration test surfaced a systematic +5-10% over-estimate on +CO2 and PE while cost/SAP is well-calibrated. This script profiles the +structure of that bias: multiplicative vs additive, per-component shares, +and segmentation by fuel / PV / heating type — to localise the cause. +""" +from __future__ import annotations + +import json +import statistics as stats +from collections import defaultdict +from pathlib import Path +from typing import Any + +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.sap10_calculator.calculator import calculate_sap_from_inputs +from domain.sap10_calculator.rdsap.cert_to_inputs import ( + SAP_10_2_SPEC_PRICES, + cert_to_inputs, +) + +CORPUS = Path("backend/epc_api/json_samples/RdSAP-Schema-21.0.1/corpus.jsonl") + + +def main() -> None: + docs = [ + json.loads(line) + for line in CORPUS.read_text().splitlines() + if line.strip() + ] + rows: list[dict[str, Any]] = [] + for doc in docs: + lodged_sap = doc.get("energy_rating_current") + lodged_co2 = doc.get("co2_emissions_current") # t/yr + lodged_pe = doc.get("energy_consumption_current") # kWh/m2/yr + if lodged_pe is None or lodged_co2 is None: + continue + try: + epc = EpcPropertyDataMapper.from_api_response(doc) + res = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + except Exception: + continue + tfa = res.intermediate["tfa_m2"] + im = res.intermediate + rows.append({ + "cert": doc.get("rrn") or "", + "sap_err": res.sap_score_continuous - (lodged_sap or 0), + "our_pe": res.primary_energy_kwh_per_m2, + "lodged_pe": lodged_pe, + "pe_diff": res.primary_energy_kwh_per_m2 - lodged_pe, + "pe_ratio": res.primary_energy_kwh_per_m2 / lodged_pe if lodged_pe else 0, + "our_co2": res.co2_kg_per_yr / 1000.0, + "lodged_co2": lodged_co2, + "co2_diff": res.co2_kg_per_yr / 1000.0 - lodged_co2, + "co2_ratio": (res.co2_kg_per_yr / 1000.0) / lodged_co2 if lodged_co2 else 0, + # PE components per m2 + "space_pe": im["space_heating_pe_kwh_per_m2"], + "hw_pe": im["hot_water_pe_kwh_per_m2"], + "other_pe": im["other_pe_kwh_per_m2"], + "pv_pe": im["pv_pe_offset_kwh_per_m2"], + "mains_gas": (doc.get("sap_energy_source") or {}).get("mains_gas"), + "has_pv": bool((doc.get("sap_energy_source") or {}).get("photovoltaic_supply")), + "tfa": tfa, + }) + + n = len(rows) + print(f"decomposed {n} certs\n" + "=" * 70) + + def summ(key: str) -> str: + vals = [r[key] for r in rows] + return (f"median={stats.median(vals):+.3f} mean={stats.mean(vals):+.3f} " + f"p10={sorted(vals)[n//10]:+.3f} p90={sorted(vals)[n*9//10]:+.3f}") + + print(f"PE diff (kWh/m2): {summ('pe_diff')}") + print(f"PE ratio (our/lod): {summ('pe_ratio')}") + print(f"CO2 diff (t/yr) : {summ('co2_diff')}") + print(f"CO2 ratio (our/lod): {summ('co2_ratio')}") + print("=" * 70) + + # multiplicative vs additive: correlate pe_diff with lodged_pe magnitude + lod = [r["lodged_pe"] for r in rows] + dif = [r["pe_diff"] for r in rows] + mean_lod, mean_dif = stats.mean(lod), stats.mean(dif) + cov = sum((l - mean_lod) * (d - mean_dif) for l, d in zip(lod, dif)) / n + var = sum((l - mean_lod) ** 2 for l in lod) / n + slope = cov / var + print(f"PE: regress pe_diff ~ lodged_pe -> slope={slope:+.4f} intercept={mean_dif - slope*mean_lod:+.3f}") + print(" (slope~0 => additive constant; slope>0 => multiplicative)") + print("=" * 70) + + # segment + def seg(name: str, pred: Any) -> None: + s = [r for r in rows if pred(r)] + if not s: + return + print(f" {name:28s} n={len(s):4d} PEdiff_med={stats.median(r['pe_diff'] for r in s):+6.2f} " + f"PEratio_med={stats.median(r['pe_ratio'] for r in s):.3f} " + f"CO2diff_med={stats.median(r['co2_diff'] for r in s):+.3f} " + f"SAPerr_med={stats.median(r['sap_err'] for r in s):+.2f}") + + print("SEGMENTS:") + seg("mains_gas=Y", lambda r: r["mains_gas"] in (True, 1, "Y")) + seg("mains_gas=N", lambda r: r["mains_gas"] not in (True, 1, "Y")) + seg("ALL", lambda r: True) + print("=" * 70) + print("PE COMPONENT MEANS (kWh/m2):") + for k in ("space_pe", "hw_pe", "other_pe", "pv_pe", "our_pe", "lodged_pe"): + print(f" {k:12s} mean={stats.mean(r[k] for r in rows):7.2f} " + f"median={stats.median(r[k] for r in rows):7.2f}") + print("=" * 70) + # Well-calibrated-SAP certs: energy is right, so PE diff isolates factor/scope. + cal = [r for r in rows if abs(r["sap_err"]) < 0.3] + print(f"WELL-CALIBRATED-SAP (|sap_err|<0.3) n={len(cal)}:") + print(f" PE diff median={stats.median(r['pe_diff'] for r in cal):+.2f} " + f"ratio={stats.median(r['pe_ratio'] for r in cal):.3f}") + print(f" CO2 diff median={stats.median(r['co2_diff'] for r in cal):+.3f} " + f"ratio={stats.median(r['co2_ratio'] for r in cal):.3f}") + + +if __name__ == "__main__": + main() diff --git a/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py b/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py index ed9d43ab..326034d0 100644 --- a/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py +++ b/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py @@ -277,6 +277,70 @@ def test_heat_network_main_applies_table12c_dlf_to_main_heating_efficiency() -> assert inputs.main_heating_efficiency == pytest.approx(1.0 / 1.41, abs=0.005) +def test_heat_network_flat_rate_charging_applies_table_4c3_space_factor() -> None: + # Arrange — community heat-network main (Table 4a code 301, cat 6), age + # band E (Table 12c DLF = 1.41). Control 2307 = "Flat rate charging, + # TRVs". SAP 10.2 Table 4c(3) (PDF p.169) multiplies the heat-network + # heat requirement by 1.05 for flat-rate charging (worksheet (305) → + # (307) = (98c) × (302) × (305) × (306)). Flat-rate billing gives no + # incentive to economise, so demand rises. The factor folds into the + # delivered-per-fuel efficiency alongside the DLF: 1 / (DLF × 1.05). + main = MainHeatingDetail( + has_fghrs=False, + main_fuel_type=20, # mains gas (community) + heat_emitter_type=1, + emitter_temperature=1, + main_heating_control=2307, # Flat rate charging, TRVs + main_heating_category=6, + sap_main_heating_code=301, + ) + part = make_building_part(construction_age_band="E") + epc = make_minimal_sap10_epc( + total_floor_area_m2=_TYPICAL_TFA_M2, + habitable_rooms_count=4, + country_code="ENG", + sap_building_parts=[part], + sap_heating=make_sap_heating(main_heating_details=[main]), + ) + + # Act + inputs = cert_to_inputs(epc) + + # Assert — efficiency = 1 / (DLF 1.41 × Table 4c(3) factor 1.05). + assert abs(inputs.main_heating_efficiency - 1.0 / (1.41 * 1.05)) <= 1e-9 + + +def test_heat_network_charging_linked_to_use_has_no_table_4c3_penalty() -> None: + # Arrange — same community heat-network main, but control 2306 = + # "Charging system linked to use of heating, programmer and TRVs". SAP + # 10.2 Table 4c(3) (PDF p.169) gives factor 1.0 (charges track usage, so + # no demand penalty), leaving the efficiency at the bare 1/DLF — i.e. the + # 2307 flat-rate penalty above must NOT fire here. + main = MainHeatingDetail( + has_fghrs=False, + main_fuel_type=20, + heat_emitter_type=1, + emitter_temperature=1, + main_heating_control=2306, # Charging linked to use, programmer + TRVs + main_heating_category=6, + sap_main_heating_code=301, + ) + part = make_building_part(construction_age_band="E") + epc = make_minimal_sap10_epc( + total_floor_area_m2=_TYPICAL_TFA_M2, + habitable_rooms_count=4, + country_code="ENG", + sap_building_parts=[part], + sap_heating=make_sap_heating(main_heating_details=[main]), + ) + + # Act + inputs = cert_to_inputs(epc) + + # Assert — efficiency = 1 / DLF 1.41, no Table 4c(3) multiplier. + assert abs(inputs.main_heating_efficiency - 1.0 / 1.41) <= 1e-9 + + def test_heat_network_dlf_uses_first_non_empty_building_part_age_band() -> None: # Arrange — the GOV.UK API lodges a junk empty leading building part # (all fields absent) before the real Main Dwelling. The dwelling age diff --git a/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py b/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py index 8fa8b382..27bdcd52 100644 --- a/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py +++ b/tests/infrastructure/epc_client/test_sap_accuracy_corpus.py @@ -41,18 +41,30 @@ _CORPUS = Path( ) # Measured floors/ceilings over the fixed corpus at HEAD (1000 certs, 0 skips). -# Current: SAP within-0.5 = 65.0%, SAP MAE = 1.174. -# CO2 MAE = 0.27 t/yr (signed +0.17 — a systematic over-estimate, see below). -# PE MAE = 14.6 kWh/m2/yr (signed +8.9). +# Current: SAP within-0.5 = 65.9%, SAP MAE = 1.160 (heat-network Table 4c(3) +# flat-rate charging factor, this slice: community cluster 38% -> 62% within-0.5). +# CO2 MAE = 0.28 t/yr (signed +0.17 — a systematic over-estimate, see below). +# PE MAE = 14.7 kWh/m2/yr (signed +9.2). # # The SAP (cost) gauge is the optimised target — its floor/ceiling are TIGHT. -# CO2 and PE are reported + LOOSELY guarded: cost is well-calibrated but CO2 -# and PE both run ~+5-10% high (a real systematic gap, not yet investigated — -# uniform across fuels, so a CO2/PE-factor or scope issue, NOT the energy or -# cost). Their ceilings catch "got worse", not "isn't perfect". -# RATCHET any of these up when a slice tightens the corresponding metric. -_MIN_WITHIN_HALF_SAP = 0.62 -_MAX_SAP_MAE = 1.25 +# CO2 and PE are reported + LOOSELY guarded: both run ~+5% high vs the lodged +# figures. INVESTIGATED (see scripts/decompose_co2_pe_error.py): this is NOT a +# CO2/PE-factor or scope bug. Table 12 factors are spec-exact (SAP 10.2 p.188: +# mains gas PEF 1.130 / CO2 0.210, electricity 1.501 / 0.136) and worksheet +# (286)/(272) scope is identical to ours. Two real causes: +# 1. Per-cert mapper/demand fidelity. corr(SAP-err, PE-diff) = -0.54: when we +# over-estimate demand the cert under-rates on SAP AND over-estimates PE. +# Golden cert 0300-2747 (matched data) hits register PE to -0.10 kWh/m2, +# proving the engine is correct — the corpus bias is the aggregate of the +# same unclosed mapper gaps the SAP gauge chases, seen through a more +# sensitive (linear, no standing-charge/deflator damping) lens. +# 2. SAP cost is genuinely calibrated, NOT energy hiding behind it: a +5% gas +# space-heating bump moves SAP -0.6 (would show as a -0.6 corpus bias if +# energy were 5% high; actual SAP bias is +0.145). +# So closing demand over-estimates lifts BOTH the SAP gauge and PE/CO2; there is +# no one-slice factor fix. RATCHET any ceiling up when a slice tightens it. +_MIN_WITHIN_HALF_SAP = 0.63 +_MAX_SAP_MAE = 1.22 _MAX_CO2_MAE_TONNES = 0.35 # t CO2 / yr vs co2_emissions_current _MAX_PE_PER_M2_MAE = 16.0 # kWh / m2 / yr vs energy_consumption_current