diff --git a/backend/documents_parser/tests/fixtures/Summary_001431_case38.pdf b/backend/documents_parser/tests/fixtures/Summary_001431_case38.pdf new file mode 100644 index 00000000..7b4cbcae Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_001431_case38.pdf differ diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case38.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case38.py new file mode 100644 index 00000000..f96b17ce --- /dev/null +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case38.py @@ -0,0 +1,116 @@ +"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431 +"simulated case 38" worksheet — a mains-gas dwelling with a code-117 +regular boiler (1979-1997, winter 66%), control 2102 (programmer, no room +thermostat → −5pp interlock → (206)=61%), and a **mains-gas condensing gas +fire secondary** (SAP code 611). + +This is the realistic re-generation of "simulated case 37": case 37 lodged +the same dwelling's code-605 gas fire on BIOGAS (7.60 p/kWh), which the +Elmhurst Summary export cannot carry (it lodges only the secondary SAP +code, not its sub-fuel — see `_elmhurst_secondary_fuel_from_sap_code`), so +the mains-gas modal default left a +7 SAP gap that was purely the biogas +sub-fuel. With a mains-gas secondary the whole cascade reproduces the +worksheet EXACTLY, confirming the boiler-efficiency / control-2102 / +secondary handling is all correct. + +Like 000565 / the _rr cases / case 20 / 21, this fixture does NOT hand- +build the EpcPropertyData: it routes the Summary PDF through +ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the pin exercises +the WHOLE extractor + mapper + calculator pipeline. + +Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/ +simulated case 38/`. The Summary is mirrored into the tracked +`backend/documents_parser/tests/fixtures/Summary_001431_case38.pdf` so the +test runs without depending on the unstaged workspace. + +Worksheet pin targets (P960-0001-001431, "11a. SAP rating" block): +- SAP value (un-rounded, before (258) integer rounding) = 60.9152 +- (272) Total CO2, kg/year = 5801.0770 + +Per [[feedback-zero-error-strict]] + [[feedback-continuous-sap-tolerance]]: +pins are abs <= 1e-3 against the worksheet PDF (the worksheet prints the +SAP value to 4 dp). +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Final + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.sap10_calculator.calculator import calculate_sap_from_inputs +from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs + +# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/, +# [4]=repo root. +_SUMMARY_PDF: Final[Path] = ( + Path(__file__).resolve().parents[4] + / "backend" / "documents_parser" / "tests" / "fixtures" + / "Summary_001431_case38.pdf" +) + +LINE_258_SAP_VALUE_CONTINUOUS: Final[float] = 60.9152 +LINE_272_TOTAL_CO2_KG_PER_YR: Final[float] = 5801.0770 +_PIN_ABS: Final[float] = 1e-3 + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into the per-page text format the + ElmhurstSiteNotesExtractor expects (label/value token sequences). + Mirror of the helper in the other `_elmhurst_worksheet_*` fixtures. + """ + info = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + m = re.search(r"Pages:\s+(\d+)", info) + if m is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(m.group(1)) + pages: list[str] = [] + for i in range(1, page_count + 1): + layout = subprocess.run( + [ + "pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-", + ], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def build_epc() -> EpcPropertyData: + """Route the simulated case-38 Summary through extractor + mapper. + No hand-built EpcPropertyData — the extractor and mapper are part of + the test target.""" + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + +def test_case38_mains_gas_secondary_reproduces_the_worksheet_sap_and_co2() -> None: + # Arrange — the full extractor -> mapper -> calculator pipeline on the + # simulated case-38 Summary (mains-gas boiler 117 + mains-gas + # condensing gas-fire secondary 611). + epc = build_epc() + + # Act + result = calculate_sap_from_inputs(cert_to_inputs(epc)) + + # Assert — the SAP-rating block reproduces the worksheet exactly. + assert ( + abs(result.sap_score_continuous - LINE_258_SAP_VALUE_CONTINUOUS) + <= _PIN_ABS + ) + assert abs(result.co2_kg_per_yr - LINE_272_TOTAL_CO2_KG_PER_YR) <= _PIN_ABS