From 171cb97c6eaa09e77c8a3ee36162e7897800e6f9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 20 May 2026 16:27:04 +0000 Subject: [PATCH] e2e: SAP-score regression test against both Elmhurst worksheets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First end-to-end test running EpcPropertyData → cert_to_inputs → calculate_sap_from_inputs → SapResult and comparing against the Elmhurst worksheet's headline SAP rating (line 258). Current state: 000490 mid-terrace gas combi, time-clock keep-hot SAP rating: 57 = 57 ✓ exact integer match Continuous: 56.72 vs 57.40 → 0.7 points off (rounding noise) 000474 end-terrace gas combi, PCDB Vaillant ecoTEC pro SAP rating: 55 vs 62 → 7 points UNDER Space heating: 12299.6 vs 10612.9 (+16%) Hot water: 3020.0 vs 2291.8 (+32%) The 000474 gap localises to (a) the legacy hot-water cascade not knowing about PCDB Table 3b combi loss (over-estimates HW by 32%) and (b) likely a downstream space-heating-efficiency consequence. Both will shrink once the §4 worksheet orchestrator + Table 3b are wired into cert_to_inputs. Tolerances set at the CURRENT gap so subsequent improvements show up as tightening, not silent drift. The 000474 ceiling drops to ≤2 SAP points once the worksheet §4 path lands in the mapper. Co-Authored-By: Claude Opus 4.7 --- .../tests/test_e2e_elmhurst_sap_score.py | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 packages/domain/src/domain/sap/worksheet/tests/test_e2e_elmhurst_sap_score.py diff --git a/packages/domain/src/domain/sap/worksheet/tests/test_e2e_elmhurst_sap_score.py b/packages/domain/src/domain/sap/worksheet/tests/test_e2e_elmhurst_sap_score.py new file mode 100644 index 00000000..72a09ede --- /dev/null +++ b/packages/domain/src/domain/sap/worksheet/tests/test_e2e_elmhurst_sap_score.py @@ -0,0 +1,137 @@ +"""End-to-end SAP score validation against the Elmhurst worksheet outputs. + +For each non-RR Elmhurst fixture, run the full calculator chain + EpcPropertyData → cert_to_inputs → calculate_sap_from_inputs +and compare the resulting SAP score against the Elmhurst worksheet's +SAP rating (line 258). + +These tests pin the current end-to-end gap so subsequent slices that +shrink it (worksheet §4 wired into cert_to_inputs, PCDB Table 3b combi +loss, etc.) show up as tolerance tightening rather than silent drift. + +Reference: Elmhurst U985-0001-000474.pdf and U985-0001-000490.pdf +(supplied by the user; not stored in repo). +""" + +from dataclasses import dataclass +from typing import Final + +import pytest + +from domain.sap.calculator import Sap10Calculator +from domain.sap.worksheet.tests import ( + _elmhurst_worksheet_000474 as _w000474, + _elmhurst_worksheet_000490 as _w000490, +) + + +@dataclass(frozen=True) +class ElmhurstExpectedSap: + """Headline figures from the Elmhurst worksheet's SAP rating section + (xlsx rows around line refs 240 / 255 / 257 / 258 / 261).""" + + sap_rating: int # (258) integer + sap_score_continuous: float # (258) un-rounded + space_heating_kwh: float # (98c) annual + hot_water_kwh: float # (219) annual fuel + total_energy_cost_gbp: float # (255) + ecf: float # (257) + + +_ELMHURST_000490_EXPECTED: Final[ElmhurstExpectedSap] = ElmhurstExpectedSap( + sap_rating=57, + sap_score_continuous=57.3979, + space_heating_kwh=11183.2751, + hot_water_kwh=2850.5701, + total_energy_cost_gbp=807.5421, + ecf=3.0539, +) +_ELMHURST_000474_EXPECTED: Final[ElmhurstExpectedSap] = ElmhurstExpectedSap( + sap_rating=62, + sap_score_continuous=62.2584, + space_heating_kwh=10612.8595, + hot_water_kwh=2291.7784, + total_energy_cost_gbp=655.6949, + ecf=2.7055, +) + + +def test_elmhurst_000490_end_to_end_sap_score_within_1_point() -> None: + """Mid-terrace combi-gas dwelling with time-clock keep-hot. The + legacy hot-water model (`domain.ml.demand.predicted_hot_water_kwh`) + closes this fixture to the integer SAP rating already; continuous + score is within 0.7 of the worksheet (rounding-noise territory). + """ + # Arrange + epc = _w000490.build_epc() + + # Act + result = Sap10Calculator().calculate(epc) + + # Assert + assert result.sap_score == _ELMHURST_000490_EXPECTED.sap_rating + assert result.sap_score_continuous == pytest.approx( + _ELMHURST_000490_EXPECTED.sap_score_continuous, abs=1.0 + ) + + +def test_elmhurst_000474_end_to_end_sap_score_currently_within_7_points() -> None: + """End-terrace PCDB-tested Vaillant boiler — currently a 7-point + under-prediction. The gap localises to: + + | metric | actual | expected | delta | + | --------------- | ------- | -------- | ----- | + | space heating | 12299.6 | 10612.9 | +16% | + | hot water fuel | 3020.0 | 2291.8 | +32% | + | total fuel cost | £778.09 | £655.69 | +19% | + + Hypothesised drivers (to close in future slices): + 1. The legacy hot-water cascade doesn't know about PCDB Table 3b + combi loss — 000474's Vaillant ecoTEC pro tests below the + Table 3a row our cascade defaults to. + 2. Space heating over-prediction may be downstream of fabric + heat-loss + heating efficiency cascade. + + Tolerance set at the CURRENT gap so improvements show up as test + tightening, not silent drift. Drop to ≤2 points once the §4 + orchestrator + PCDB combi loss are wired into cert_to_inputs. + """ + # Arrange + epc = _w000474.build_epc() + + # Act + result = Sap10Calculator().calculate(epc) + + # Assert + delta = abs(result.sap_score - _ELMHURST_000474_EXPECTED.sap_rating) + assert delta <= 7, ( + f"SAP rating delta {delta} exceeds current-state ceiling of 7. " + f"Actual={result.sap_score}, expected={_ELMHURST_000474_EXPECTED.sap_rating}." + ) + continuous_delta = abs( + result.sap_score_continuous - _ELMHURST_000474_EXPECTED.sap_score_continuous + ) + assert continuous_delta <= 7.0, ( + f"Continuous SAP delta {continuous_delta:.2f} exceeds ceiling 7.0" + ) + + +def test_elmhurst_000490_end_to_end_kwh_within_15pct() -> None: + """Per-end-use kWh sanity check for 000490. Closer-fitting than the + SAP score because intermediate values aren't compressed through the + cost-deflator + rating equations.""" + # Arrange + epc = _w000490.build_epc() + + # Act + result = Sap10Calculator().calculate(epc) + + # Assert + exp = _ELMHURST_000490_EXPECTED + assert result.space_heating_kwh_per_yr == pytest.approx( + exp.space_heating_kwh, rel=0.15 + ) + assert result.hot_water_kwh_per_yr == pytest.approx(exp.hot_water_kwh, rel=0.15) + assert result.total_fuel_cost_gbp == pytest.approx( + exp.total_energy_cost_gbp, rel=0.10 + )