From 82b8a16b40954a431d6be938eea6a63f3ed5cf6a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 31 May 2026 08:36:08 +0000 Subject: [PATCH] Slice S0380.129: heating-systems corpus residual-pin regression guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 001431 corpus at `sap worksheets/heating systems examples/` now has a permanent test module pinning cascade-vs-worksheet residuals across all 41 populated heating-system variants. The corpus is a controlled-variable test set — same dwelling (semi-detached, TFA 90 m², age G, W6 9BF, Elmhurst P960 worksheet format) under different heating configurations — so every cascade-vs-worksheet residual is fully attributable to the heating subsystem. `test_heating_systems_corpus_residual_matches_pin` is parametrised by variant folder name. Per variant it: 1. Extracts Block 11a (individual) or Block 11b (community) pins from the P960 PDF — continuous SAP (`SAP value` row), total fuel cost (255)/(355), CO2 (272/372/382/383), PE (286/386/486/483). 2. Routes the Summary PDF through ElmhurstSiteNotesExtractor → EpcPropertyDataMapper.from_elmhurst_site_notes → cert_to_inputs → calculate_sap_from_inputs. 3. Asserts each of the four cascade outputs sits within an absolute tolerance of the pinned residual. Tolerances are tight (SAP ±0.001, cost ±£0.01, CO2 ±0.1 kg/yr, PE ±0.1 kWh/yr) — the *expected residual* moves toward 0 as heating- cascade gaps close; the *tolerance* never widens. Per [[feedback-zero-error-strict]] + [[feedback-golden-residuals-near-zero]]. Pins captured at HEAD `729ee29c` (post-S0380.128). All 41 pass. Smallest residual: `solid fuel 8` +0.87 SAP / −£20 cost (closest to closure). First negative ΔSAP: `community heating 6` −6.87 SAP / +£158 cost (heat-pump heat network — only variant where cascade UNDERshoots the worksheet). Extended handover suite at HEAD post-slice: **873 pass, 0 fail** (was 832 + 41 new parametrised cases). Pyright net-zero on new file (0 → 0). Co-Authored-By: Claude Opus 4.7 --- .../tests/test_heating_systems_corpus.py | 281 ++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 backend/documents_parser/tests/test_heating_systems_corpus.py diff --git a/backend/documents_parser/tests/test_heating_systems_corpus.py b/backend/documents_parser/tests/test_heating_systems_corpus.py new file mode 100644 index 00000000..e89c3d1b --- /dev/null +++ b/backend/documents_parser/tests/test_heating_systems_corpus.py @@ -0,0 +1,281 @@ +"""Heating-systems corpus residual pins — same property × heating variants. + +The fixtures at `sap worksheets/heating systems examples/` lodge the same +dwelling (Reference 001431, semi-detached, TFA 90 m², age G 1983-1990, +W6 9BF) under 41 distinct heating-system configurations. With the +envelope held constant, every cascade-vs-worksheet residual between two +variants is fully attributable to the heating subsystem — that's the +controlled-variable signal this corpus was built to exercise. + +Per variant we extract Block 11a (individual heating) or Block 11b +(community heating) pins from the P960 worksheet PDF, route the Summary +PDF through `ElmhurstSiteNotesExtractor` → `from_elmhurst_site_notes` → +`cert_to_inputs` → `calculate_sap_from_inputs`, and assert each of the +four published outputs (continuous SAP, total fuel cost, CO2, PE) +matches its pinned residual within a tight absolute tolerance. + +Residuals are non-zero today: the cascade overshoots most variants by ++1..+30 SAP points (with `community heating 6` undershooting at −6.87, +the lone HP-fed heat-network shape). As heating-cascade gaps close the +expected residuals shrink toward 0; the per-pin absolute tolerance +stays tight so any drift fires loudly. Per +[[feedback-golden-residuals-near-zero]] + [[feedback-zero-error-strict]]: +re-pin tighter when a slice closes a gap, never widen the tolerance. + +Each Summary PDF is parsed via the same `pdftotext -layout` → +Textract-style preprocessing the rest of the chain tests use. +""" + +from __future__ import annotations + +import re +import subprocess +from dataclasses import dataclass +from pathlib import Path + +import pytest + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.sap10_calculator.calculator import calculate_sap_from_inputs +from domain.sap10_calculator.rdsap.cert_to_inputs import ( + SAP_10_2_SPEC_PRICES, + cert_to_inputs, +) + + +_CORPUS_ROOT = ( + Path(__file__).parents[3] + / "sap worksheets/heating systems examples" +) + + +# Per-pin absolute tolerances. Worksheet `SAP value` lodges 4 d.p., +# (255) total fuel cost 4 d.p., (272) total CO2 4 d.p., (286) Total +# Primary energy kWh/year 4 d.p. — pin at 1e-4 relative to lodged +# precision so any drift outside cascade float noise fires. +_SAP_RESID_ABS_TOLERANCE = 0.001 +_COST_RESID_ABS_TOLERANCE_GBP = 0.01 +_CO2_RESID_ABS_TOLERANCE_KG = 0.1 +_PE_RESID_ABS_TOLERANCE_KWH = 0.1 + + +@dataclass(frozen=True) +class _CorpusExpectation: + """Pinned residuals (cascade − worksheet) per heating-system variant.""" + + variant: str + block: str # "11a" individual, "11b" community + expected_sap_resid: float + expected_cost_resid_gbp: float + expected_co2_resid_kg: float + expected_pe_resid_kwh: float + + +# Captured at HEAD `729ee29c` (post-S0380.128). All 41 populated +# fixtures cascade-execute; the residuals below are the current +# cascade-vs-worksheet diff per variant. Closures land by re-pinning +# the smaller expected residual. +_EXPECTATIONS: tuple[_CorpusExpectation, ...] = ( + _CorpusExpectation(variant='ashp', block='11a', expected_sap_resid=+5.6680, expected_cost_resid_gbp=-130.5995, expected_co2_resid_kg=-1.4283, expected_pe_resid_kwh=+1467.8983), + _CorpusExpectation(variant='community heating 1', block='11b', expected_sap_resid=+4.1830, expected_cost_resid_gbp=-96.3816, expected_co2_resid_kg=-786.6453, expected_pe_resid_kwh=-940.7364), + _CorpusExpectation(variant='community heating 2', block='11b', expected_sap_resid=+1.1558, expected_cost_resid_gbp=-26.6309, expected_co2_resid_kg=-498.3058, expected_pe_resid_kwh=+636.7545), + _CorpusExpectation(variant='community heating 3', block='11b', expected_sap_resid=+4.1830, expected_cost_resid_gbp=-96.3816, expected_co2_resid_kg=+2545.7991, expected_pe_resid_kwh=+11009.4778), + _CorpusExpectation(variant='community heating 4', block='11b', expected_sap_resid=+1.1558, expected_cost_resid_gbp=-26.6309, expected_co2_resid_kg=-3465.0640, expected_pe_resid_kwh=-374.6720), + _CorpusExpectation(variant='community heating 6', block='11b', expected_sap_resid=-6.8661, expected_cost_resid_gbp=+158.2067, expected_co2_resid_kg=-2002.8867, expected_pe_resid_kwh=+6995.3140), + _CorpusExpectation(variant='electric 1', block='11a', expected_sap_resid=+9.6439, expected_cost_resid_gbp=-222.2109, expected_co2_resid_kg=+14.3441, expected_pe_resid_kwh=+2837.1414), + _CorpusExpectation(variant='electric 11', block='11a', expected_sap_resid=+18.1002, expected_cost_resid_gbp=-417.0547, expected_co2_resid_kg=+579.6971, expected_pe_resid_kwh=-1067.1863), + _CorpusExpectation(variant='electric 12', block='11a', expected_sap_resid=+15.4249, expected_cost_resid_gbp=-355.4117, expected_co2_resid_kg=+620.4563, expected_pe_resid_kwh=-467.5748), + _CorpusExpectation(variant='electric 13', block='11a', expected_sap_resid=+18.3886, expected_cost_resid_gbp=-423.7001, expected_co2_resid_kg=+619.3628, expected_pe_resid_kwh=-1129.2285), + _CorpusExpectation(variant='electric 14', block='11a', expected_sap_resid=+18.3886, expected_cost_resid_gbp=-423.7001, expected_co2_resid_kg=+619.3628, expected_pe_resid_kwh=-1129.2285), + _CorpusExpectation(variant='electric 2', block='11a', expected_sap_resid=+5.8523, expected_cost_resid_gbp=-134.8455, expected_co2_resid_kg=+94.4364, expected_pe_resid_kwh=+2420.9013), + _CorpusExpectation(variant='electric 3', block='11a', expected_sap_resid=+14.6973, expected_cost_resid_gbp=-338.6485, expected_co2_resid_kg=-379.1296, expected_pe_resid_kwh=-850.9293), + _CorpusExpectation(variant='electric 5', block='11a', expected_sap_resid=+10.9720, expected_cost_resid_gbp=-252.8131, expected_co2_resid_kg=-218.5642, expected_pe_resid_kwh=+540.3309), + _CorpusExpectation(variant='electric 6', block='11a', expected_sap_resid=+10.9720, expected_cost_resid_gbp=-252.8131, expected_co2_resid_kg=-209.8689, expected_pe_resid_kwh=+568.4500), + _CorpusExpectation(variant='electric 7', block='11a', expected_sap_resid=+9.6834, expected_cost_resid_gbp=-223.1212, expected_co2_resid_kg=-137.9832, expected_pe_resid_kwh=+1061.3307), + _CorpusExpectation(variant='electric 8', block='11a', expected_sap_resid=+6.8875, expected_cost_resid_gbp=-158.6999, expected_co2_resid_kg=-34.9564, expected_pe_resid_kwh=+2113.8303), + _CorpusExpectation(variant='electric 9', block='11a', expected_sap_resid=+12.0340, expected_cost_resid_gbp=-277.2813, expected_co2_resid_kg=-255.6076, expected_pe_resid_kwh=+362.4518), + _CorpusExpectation(variant='gshp', block='11a', expected_sap_resid=+5.1598, expected_cost_resid_gbp=-118.8901, expected_co2_resid_kg=-41.4461, expected_pe_resid_kwh=+639.1890), + _CorpusExpectation(variant='no system', block='11a', expected_sap_resid=+21.9350, expected_cost_resid_gbp=-505.4134, expected_co2_resid_kg=+689.2188, expected_pe_resid_kwh=-2454.8193), + _CorpusExpectation(variant='oil 1', block='11a', expected_sap_resid=+13.6701, expected_cost_resid_gbp=-314.9811, expected_co2_resid_kg=-1381.5125, expected_pe_resid_kwh=+612.3606), + _CorpusExpectation(variant='oil 2', block='11a', expected_sap_resid=+26.0712, expected_cost_resid_gbp=-600.7179, expected_co2_resid_kg=+2230.1071, expected_pe_resid_kwh=+801.2920), + _CorpusExpectation(variant='oil 3', block='11a', expected_sap_resid=+30.9500, expected_cost_resid_gbp=-712.1785, expected_co2_resid_kg=+2859.5796, expected_pe_resid_kwh=+738.4592), + _CorpusExpectation(variant='oil 4', block='11a', expected_sap_resid=+28.5927, expected_cost_resid_gbp=-655.6129, expected_co2_resid_kg=+2636.9526, expected_pe_resid_kwh=+701.8340), + _CorpusExpectation(variant='oil 5', block='11a', expected_sap_resid=+120.7457, expected_cost_resid_gbp=-6312.0020, expected_co2_resid_kg=+1345.3630, expected_pe_resid_kwh=-2780.6222), + _CorpusExpectation(variant='oil 6', block='11a', expected_sap_resid=+24.4087, expected_cost_resid_gbp=-561.8886, expected_co2_resid_kg=-658.8928, expected_pe_resid_kwh=-478.5733), + _CorpusExpectation(variant='oil pcdb 1', block='11a', expected_sap_resid=+11.1667, expected_cost_resid_gbp=-257.2961, expected_co2_resid_kg=-1147.3111, expected_pe_resid_kwh=+1455.2982), + _CorpusExpectation(variant='oil pcdb 2', block='11a', expected_sap_resid=+11.1667, expected_cost_resid_gbp=-257.2961, expected_co2_resid_kg=-1147.3111, expected_pe_resid_kwh=+1455.2982), + _CorpusExpectation(variant='oil pcdb 3', block='11a', expected_sap_resid=+11.8747, expected_cost_resid_gbp=-273.6108, expected_co2_resid_kg=-1161.6582, expected_pe_resid_kwh=+1267.6118), + _CorpusExpectation(variant='pcdb 1', block='11a', expected_sap_resid=+21.8997, expected_cost_resid_gbp=-502.0190, expected_co2_resid_kg=-2392.1531, expected_pe_resid_kwh=-1050.3031), + _CorpusExpectation(variant='pcdb 3', block='11a', expected_sap_resid=+27.7563, expected_cost_resid_gbp=-637.0435, expected_co2_resid_kg=-446.3815, expected_pe_resid_kwh=+2097.4553), + _CorpusExpectation(variant='solid fuel 10', block='11a', expected_sap_resid=+14.7769, expected_cost_resid_gbp=-340.4814, expected_co2_resid_kg=+1906.2620, expected_pe_resid_kwh=-584.5284), + _CorpusExpectation(variant='solid fuel 11', block='11a', expected_sap_resid=+8.4098, expected_cost_resid_gbp=-193.7739, expected_co2_resid_kg=+2262.3481, expected_pe_resid_kwh=+2583.7764), + _CorpusExpectation(variant='solid fuel 2', block='11a', expected_sap_resid=+6.0050, expected_cost_resid_gbp=-138.3659, expected_co2_resid_kg=-3718.6886, expected_pe_resid_kwh=+1594.6199), + _CorpusExpectation(variant='solid fuel 3', block='11a', expected_sap_resid=+6.1846, expected_cost_resid_gbp=-142.5032, expected_co2_resid_kg=-5877.9595, expected_pe_resid_kwh=+3118.4874), + _CorpusExpectation(variant='solid fuel 4', block='11a', expected_sap_resid=+5.0671, expected_cost_resid_gbp=-116.7534, expected_co2_resid_kg=-3215.4585, expected_pe_resid_kwh=+2547.5896), + _CorpusExpectation(variant='solid fuel 5', block='11a', expected_sap_resid=+3.7888, expected_cost_resid_gbp=-87.2980, expected_co2_resid_kg=-2725.9268, expected_pe_resid_kwh=+3224.8144), + _CorpusExpectation(variant='solid fuel 6', block='11a', expected_sap_resid=+9.2944, expected_cost_resid_gbp=-214.1551, expected_co2_resid_kg=+2174.7565, expected_pe_resid_kwh=+4052.5690), + _CorpusExpectation(variant='solid fuel 7', block='11a', expected_sap_resid=+15.1079, expected_cost_resid_gbp=-344.9565, expected_co2_resid_kg=-3711.3064, expected_pe_resid_kwh=+488.1476), + _CorpusExpectation(variant='solid fuel 8', block='11a', expected_sap_resid=+0.8707, expected_cost_resid_gbp=-20.0627, expected_co2_resid_kg=+3524.9644, expected_pe_resid_kwh=+4103.0089), + _CorpusExpectation(variant='solid fuel 9', block='11a', expected_sap_resid=+15.1593, expected_cost_resid_gbp=-349.2946, expected_co2_resid_kg=+1810.7952, expected_pe_resid_kwh=+168.2046), +) + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into per-page Textract-style label/value + streams, mirroring the preprocessing in + `test_summary_pdf_mapper_chain.py`.""" + info = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + m = re.search(r"Pages:\s+(\d+)", info) + if m is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(m.group(1)) + pages: list[str] = [] + for i in range(1, page_count + 1): + layout = subprocess.run( + ["pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-"], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def _extract_worksheet_pins(p960_pdf: Path, block: str) -> dict[str, float]: + """Extract Block 11a or 11b worksheet pins from the P960 PDF. + + Block 11a (individual heating) lodges (255) Total energy cost, + (257) ECF, (258) SAP integer, plus a `SAP value` row carrying the + continuous SAP. Block 11b (community heating) mirrors at (355)/ + (357)/(358). CO2 (272/372/382/383) and PE (286/386/486/483) appear + once per worksheet under the relevant block's emissions table. + """ + txt = subprocess.run( + ["pdftotext", "-layout", str(p960_pdf), "-"], + capture_output=True, text=True, check=True, + ).stdout + if block == '11a': + seg_match = re.search( + r'11a\. SAP rating(.*?)(?:11b\.|12a\.|11c\.|11d\.)', txt, re.DOTALL, + ) + cost_pin_code = '255' + elif block == '11b': + seg_match = re.search( + r'11b\. SAP rating(.*?)(?:12b\.|11c\.|11d\.)', txt, re.DOTALL, + ) + cost_pin_code = '355' + else: + raise ValueError(f"unknown block {block!r}") + if seg_match is None: + raise RuntimeError( + f"could not locate Block {block} SAP rating section in {p960_pdf}", + ) + seg = seg_match.group(1) + pre = txt[:seg_match.start()] + sap_c_match = re.search(r'SAP value\s+([-\d.]+)', seg) + cost_match = re.search( + rf'Total energy cost\s+(-?[\d.]+)\s+\({cost_pin_code}\)', pre, + ) + if sap_c_match is None: + raise RuntimeError(f"missing `SAP value` in Block {block}: {p960_pdf}") + if cost_match is None: + raise RuntimeError( + f"missing `Total energy cost ({cost_pin_code})` in {p960_pdf}", + ) + co2: float | None = None + for code in ('272', '372', '382', '383'): + m = re.search(rf'Total CO2, kg/year\s+(-?[\d.]+)\s+\({code}\)', txt) + if m is not None: + co2 = float(m.group(1)) + break + pe: float | None = None + for code in ('286', '386', '486', '483'): + m = re.search( + rf'Total Primary energy kWh/year\s+(-?[\d.]+)\s+\({code}\)', txt, + ) + if m is not None: + pe = float(m.group(1)) + break + if co2 is None or pe is None: + raise RuntimeError(f"missing CO2/PE pin in {p960_pdf}") + return { + 'sap_c': float(sap_c_match.group(1)), + 'cost': float(cost_match.group(1)), + 'co2': co2, + 'pe': pe, + } + + +def _variant_paths(variant: str) -> tuple[Path, Path]: + """Resolve the Summary + P960 PDF pair for a given variant folder.""" + folder = _CORPUS_ROOT / variant + summary_candidates = list(folder.glob('Summary_*.pdf')) + p960_candidates = list(folder.glob('P960-*.pdf')) + if not summary_candidates: + raise RuntimeError(f"no Summary PDF in {folder}") + if not p960_candidates: + raise RuntimeError(f"no P960 PDF in {folder}") + return summary_candidates[0], p960_candidates[0] + + +@pytest.mark.parametrize( + "expectation", + _EXPECTATIONS, + ids=lambda e: e.variant, +) +def test_heating_systems_corpus_residual_matches_pin( + expectation: _CorpusExpectation, +) -> None: + # Arrange — extract worksheet pins + route Summary through the full + # extractor → mapper → cascade chain. Same property (001431) under a + # different heating system per variant; the cascade-vs-worksheet + # residual is the heating-cascade signal we're pinning. + summary_pdf, p960_pdf = _variant_paths(expectation.variant) + worksheet = _extract_worksheet_pins(p960_pdf, expectation.block) + pages = _summary_pdf_to_textract_style_pages(summary_pdf) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES), + ) + + sap_resid = result.sap_score_continuous - worksheet['sap_c'] + cost_resid = result.total_fuel_cost_gbp - worksheet['cost'] + co2_resid = result.co2_kg_per_yr - worksheet['co2'] + pe_resid = result.primary_energy_kwh_per_yr - worksheet['pe'] + + # Assert — each residual sits within its absolute tolerance of the + # pinned value. Drift beyond tolerance fires loudly; closures land + # by re-pinning the smaller expected residual (never widen the + # tolerance — per [[feedback-zero-error-strict]]). + assert abs(sap_resid - expectation.expected_sap_resid) <= _SAP_RESID_ABS_TOLERANCE, ( + f"{expectation.variant}: continuous SAP residual {sap_resid:+.4f} " + f"drifted from pin {expectation.expected_sap_resid:+.4f} " + f"(tolerance ±{_SAP_RESID_ABS_TOLERANCE})" + ) + assert abs(cost_resid - expectation.expected_cost_resid_gbp) <= _COST_RESID_ABS_TOLERANCE_GBP, ( + f"{expectation.variant}: total fuel cost residual £{cost_resid:+.4f} " + f"drifted from pin £{expectation.expected_cost_resid_gbp:+.4f} " + f"(tolerance ±£{_COST_RESID_ABS_TOLERANCE_GBP})" + ) + assert abs(co2_resid - expectation.expected_co2_resid_kg) <= _CO2_RESID_ABS_TOLERANCE_KG, ( + f"{expectation.variant}: CO2 residual {co2_resid:+.4f} kg/yr " + f"drifted from pin {expectation.expected_co2_resid_kg:+.4f} kg/yr " + f"(tolerance ±{_CO2_RESID_ABS_TOLERANCE_KG})" + ) + assert abs(pe_resid - expectation.expected_pe_resid_kwh) <= _PE_RESID_ABS_TOLERANCE_KWH, ( + f"{expectation.variant}: PE residual {pe_resid:+.4f} kWh/yr " + f"drifted from pin {expectation.expected_pe_resid_kwh:+.4f} kWh/yr " + f"(tolerance ±{_PE_RESID_ABS_TOLERANCE_KWH})" + )