diff --git a/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py b/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py index defcd43e..16c240ba 100644 --- a/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py +++ b/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py @@ -1,23 +1,31 @@ -"""Regression-anchor tests for a small set of corpus certs whose -SAP rating and primary-energy values our calculator currently produces -within tolerance of what Elmhurst lodged. +"""Loose smoke-test regression anchors for a small set of corpus certs. -Purpose: catch regressions silently introduced by future slices. If a -slice that's meant to improve aggregate MAE accidentally pushes a -previously-correct cert outside the tolerance, this suite fails and we -can decide consciously whether the trade-off is worth it. +**Retiring**: per ADR-0010 §10 these cert-based fixtures contained +compensating errors against the cert-cal-prices state of the calculator +and are scheduled for replacement by BRE worked-example fixtures (P5). +Until P5 lands, the fixtures stay in place as a *loose* smoke test — +catching only catastrophic regressions, not per-line spec-correctness +breaks. -Tolerance rationale: -- SAP rounded-integer residual ≤ 1 — the score the cert lodges; integer - rounding means an exact "0" check would over-fit to integer flips. -- PE residual ≤ 10 kWh/m² — looser than ideal but reflects current - calculator capability; tightens as we close the PE bias. +Purpose: catch wholesale-broken slices (e.g. a refactor that drops a +worksheet stage entirely) between now and P5. Per-section verification +during the spec sweep should lean on BRE worked-example unit tests, not +on these fixtures. + +Tolerance rationale (per ADR-0010 §10): +- SAP rounded-integer residual ≤ 5 — was ±1 under cert-cal prices. + Loosened because spec prices produce ±2-3 SAP drift on these certs + (the cert-cal prices had been numerically tuned around the same + certs). +- PE residual ≤ 25 kWh/m² — was ±10. Loosened on the same logic. Selection criteria (see docs/sap-spec/PARITY_FINDINGS.md): from a 1000-cert random sample these 7 certs satisfied |continuous SAP residual| ≤ 1.0 AND |PE residual| ≤ 10 AND (main_heating_category != 4 OR main_heating_data_source != 1) -i.e. non-PCDB-heat-pump, lowest combined-residual quartile of the sample. +under the **cert-cal prices** that have since been deleted. They are +no longer a "lowest-residual" set under spec prices, but stable enough +to catch obvious regressions. Each cert is a stored JSON document under `fixtures/golden/.json` — frozen at extraction time @@ -35,12 +43,15 @@ import pytest from datatypes.epc.domain.mapper import EpcPropertyDataMapper from domain.sap.calculator import calculate_sap_from_inputs -from domain.sap.rdsap.cert_to_inputs import cert_to_inputs -from domain.sap.tables.table_12_cert_calibration import cert_calibration_prices +from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs _FIXTURES_DIR = Path(__file__).parent / "fixtures" / "golden" -_SAP_TOLERANCE = 1 -_PE_TOLERANCE_KWH_PER_M2 = 10.0 +# Loose smoke-test tolerances per ADR-0010 §10; was ±1 / ±10 under +# cert-cal prices, which had been numerically tuned around these +# specific certs. Tightens when BRE worked-example fixtures (P5) +# replace this suite. +_SAP_TOLERANCE = 5 +_PE_TOLERANCE_KWH_PER_M2 = 25.0 @dataclass(frozen=True) @@ -99,13 +110,13 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( expected_pe_resid_kwh_per_m2=+8.18, notes="Semi-detached, TFA 102, age C, gas PCDB-listed.", ), - _GoldenExpectation( - cert_number="9390-2722-3520-2105-8715", - actual_sap=67, - expected_sap_resid=0, - expected_pe_resid_kwh_per_m2=+7.90, - notes="Mid-floor flat, TFA 75, age D, heat network (cat 6, sap_code 301).", - ), + # Retired early at P2.2: 9390-2722-3520-2105-8715 (mid-floor flat, + # heat network cat 6 sap_code 301). Drifted to SAP residual -7 + # under SAP 10.2 spec prices because cert-cal had absorbed + # heat-network DLF + Table 12c interactions on this cert. Cert JSON + # remains in fixtures/golden/ as reference data per ADR-0010 §10; + # will be subsumed by a BRE worked-example fixture covering the + # heat-network path during P5. ) @@ -122,12 +133,13 @@ def _load_cert(cert_number: str) -> dict[str, Any]: ids=lambda e: e.cert_number, ) def test_golden_cert_stays_within_tolerance(expectation: _GoldenExpectation) -> None: - # Arrange — load the frozen cert JSON, map to EpcPropertyData, run the - # calculator end-to-end with cert-calibration prices (the parity- - # validation mode the existing probe uses). + # Arrange — load the frozen cert JSON, map to EpcPropertyData, run + # the calculator end-to-end with SAP 10.2 (14-03-2025) spec prices + # per ADR-0010. Recorded residuals on _GoldenExpectation predate the + # cert-cal deletion and are informational only. doc = _load_cert(expectation.cert_number) epc = EpcPropertyDataMapper.from_api_response(doc) - inputs = cert_to_inputs(epc, prices=cert_calibration_prices()) + inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) # Act result = calculate_sap_from_inputs(inputs)