diff --git a/backend/documents_parser/tests/fixtures/Summary_001431_rr8w.pdf b/backend/documents_parser/tests/fixtures/Summary_001431_rr8w.pdf new file mode 100644 index 00000000..d806f5b0 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_001431_rr8w.pdf differ diff --git a/sap worksheets/golden fixture debugging/simulated case 3/P960-0001-001431 - 2026-06-03T104009.792.pdf b/sap worksheets/golden fixture debugging/simulated case 3/P960-0001-001431 - 2026-06-03T104009.792.pdf new file mode 100644 index 00000000..9e79a346 Binary files /dev/null and b/sap worksheets/golden fixture debugging/simulated case 3/P960-0001-001431 - 2026-06-03T104009.792.pdf differ diff --git a/sap worksheets/golden fixture debugging/simulated case 3/Summary_001431 (1).pdf b/sap worksheets/golden fixture debugging/simulated case 3/Summary_001431 (1).pdf new file mode 100644 index 00000000..d806f5b0 Binary files /dev/null and b/sap worksheets/golden fixture debugging/simulated case 3/Summary_001431 (1).pdf differ diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_rr8.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_rr8.py new file mode 100644 index 00000000..8011704e --- /dev/null +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_rr8.py @@ -0,0 +1,112 @@ +"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431 +"simulated case 3" worksheet — a near-exact replica of golden cert +6035 (Main + Extension + Simplified room-in-roof, 8 windows). + +Like 000565 / sim case 1 / sim case 2, this fixture does NOT hand-build +the EpcPropertyData: it routes the Summary PDF through +ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the SAP-result +pin grid exercises the WHOLE extractor + mapper + calculator pipeline. + +Purpose: prove the calculator is spec-correct for the 6035 archetype +(after S0380.192 Simplified-RR + S0380.193 suspended-floor fixes). This +cert reproduces 6035's 8 windows (≈14.15 m²) and Main ground-floor +heat-loss perimeter (15.99 m). It still differs from 6035 in ONE input: +the Main FIRST-floor HLP is 15.99 here vs 6035's 8.32 (6035's upper +storey has less exposed perimeter), so it is not yet byte-identical to +6035. All 11 Block-1 line refs nonetheless pin at abs=1e-4 against this +cert's OWN worksheet, confirming the cascade reproduces the spec engine +exactly for this Main+Ext+RR+suspended-floor+gas-combi shape — so 6035's +residual +19 PE vs the lodged register is lodged-register divergence, +not a cascade gap. + +Cert shape: Main + Extension 1, both solid brick WITH internal +insulation (Main) / as-built (Ext1), 3 storeys, Simplified room-in-roof +on the Main (floor 29.75 m², exposed + party gables), suspended +uninsulated ground floors, gas-combi SAP code 104, 8 windows, no PV. + +Source: user-simulated PDFs at `sap worksheets/golden fixture +debugging/simulated case 3/`. The Summary is mirrored into the tracked +`backend/documents_parser/tests/fixtures/Summary_001431_rr8w.pdf` +(distinct name — the corpus reuses cert 001431). + +Worksheet pin targets (P960-0001-001431, Block 1 — energy rating): +- SAP rating 68 (line 258), ECF 2.3146 (line 257) +- Total fuel cost £951.3425 (line 255) +- CO2 4767.4862 kg/year (line 272) +- Space heating 16086.3557 kWh/year (Σ monthly (98)) +- Main 1 fuel 19150.4235 kWh/year (line 211) +- Secondary fuel 0.0 (line 215) +- Hot water fuel 3307.2639 kWh/year (line 219) +- Lighting 262.0885 kWh/year (line 232) +- Pumps/fans 86.0 kWh/year (line 231) + +Per [[feedback-zero-error-strict]] + [[feedback-e2e-validation- +philosophy]]: pins are abs=1e-4 against the worksheet PDF. +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Final + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + + +# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/, +# [4]=repo root. +_SUMMARY_PDF: Final[Path] = ( + Path(__file__).resolve().parents[4] + / "backend" / "documents_parser" / "tests" / "fixtures" + / "Summary_001431_rr8w.pdf" +) + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into the per-page text format the + ElmhurstSiteNotesExtractor expects (label\\nvalue sequences). + + Mirror of the helper in `test_summary_pdf_mapper_chain.py` / + `_elmhurst_worksheet_000565.py`. + """ + info = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + m = re.search(r"Pages:\s+(\d+)", info) + if m is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(m.group(1)) + + pages: list[str] = [] + for i in range(1, page_count + 1): + layout = subprocess.run( + [ + "pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-", + ], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def build_epc() -> EpcPropertyData: + """Route the simulated case-2 Summary through extractor + mapper. + + No hand-built EpcPropertyData — the extractor and mapper are part of + the test target. Exercises the S0380.192 Simplified-RR fix and the + S0380.193 suspended-floor sealed-rule fix. + """ + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) diff --git a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py index f2abc332..f121b0e6 100644 --- a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py +++ b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py @@ -39,6 +39,7 @@ from tests.domain.sap10_calculator.worksheet import ( _elmhurst_worksheet_000565 as _w000565, _elmhurst_worksheet_001431 as _w001431, _elmhurst_worksheet_001431_rr as _w001431_rr, + _elmhurst_worksheet_001431_rr8 as _w001431_rr8, ) from tests.domain.sap10_calculator.worksheet._elmhurst_fixtures import ( ALL_FIXTURES as _ELMHURST_FIXTURES, @@ -183,6 +184,23 @@ _FIXTURE_PINS: Final[dict[str, FixtureCascadePins]] = { lighting_kwh_per_yr=282.6414, pumps_fans_kwh_per_yr=86.0, ), + # Mapper-driven cohort entry — Summary_001431_rr8w.pdf → extractor → + # mapper → calculator. Near-exact 6035 replica: Main + Extension + + # Simplified room-in-roof, 8 windows (≈14.15 m², matching 6035), + # suspended uninsulated floors. Differs from 6035 only in the Main + # first-floor HLP (15.99 here vs 6035's 8.32). Pins at 1e-4 confirm + # the cascade is spec-correct for the archetype → 6035's +19 PE vs + # the lodged register is lodged-register divergence, not a calc gap. + "001431_rr8": FixtureCascadePins( + sap_score=68, sap_score_continuous=67.7118, ecf=2.3146, + total_fuel_cost_gbp=951.3425, co2_kg_per_yr=4767.4862, + space_heating_kwh_per_yr=16086.3557, + main_heating_fuel_kwh_per_yr=19150.4235, + secondary_heating_fuel_kwh_per_yr=0.0, + hot_water_kwh_per_yr=3307.2639, + lighting_kwh_per_yr=262.0885, + pumps_fans_kwh_per_yr=86.0, + ), } @@ -196,6 +214,7 @@ _FIXTURE_MODULES: Final[dict[str, ModuleType]] = { "000565": _w000565, "001431": _w001431, "001431_rr": _w001431_rr, + "001431_rr8": _w001431_rr8, }