diff --git a/tests/domain/modelling/_elmhurst_recommendation.py b/tests/domain/modelling/_elmhurst_recommendation.py new file mode 100644 index 00000000..9797f144 --- /dev/null +++ b/tests/domain/modelling/_elmhurst_recommendation.py @@ -0,0 +1,76 @@ +"""Parse an Elmhurst *recommendation* Summary PDF into an EpcPropertyData. + +The Modelling cascade pins use Elmhurst's own before/after measure +re-lodgements as deterministic test vectors: each measure folder under +`sap worksheets/Recommendations Elmhurst Files/` holds a `before` Summary +(the baseline cert) and an `after` Summary (the same cert re-lodged with the +measure applied). Applying the matching Recommendation Generator's overlay to +the parsed `before` must reproduce the calculator's score on the parsed +`after` at delta 0 — proving the overlay is the exact field change Elmhurst +made. + +This routes the Summary PDF through the same extractor + mapper chain the +worksheet e2e fixtures use (`_elmhurst_worksheet_001431.build_epc`), NOT the +Textract `parse_site_notes_pdf` path — that path has an unrelated window +extraction bug on cert 001431. The before/after Summaries are mirrored into +`tests/domain/modelling/fixtures/` so the pins do not depend on the unstaged +workspace. +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Final + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + +_FIXTURES_DIR: Final[Path] = Path(__file__).resolve().parent / "fixtures" + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into the per-page text format the + `ElmhurstSiteNotesExtractor` expects (label\\nvalue sequences). + + Mirror of the helper in `_elmhurst_worksheet_001431.py`: `pdftotext + -layout` preserves the spatial label/value pairing on each line; we split + on 2+ spaces to surface the tokens, then rejoin newline-delimited. + """ + info: str = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + match = re.search(r"Pages:\s+(\d+)", info) + if match is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(match.group(1)) + + pages: list[str] = [] + for i in range(1, page_count + 1): + layout: str = subprocess.run( + [ + "pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-", + ], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def parse_recommendation_summary(fixture_name: str) -> EpcPropertyData: + """Parse a before/after recommendation Summary fixture (by file name in + `tests/domain/modelling/fixtures/`) into an EpcPropertyData.""" + pdf_path: Path = _FIXTURES_DIR / fixture_name + pages: list[str] = _summary_pdf_to_textract_style_pages(pdf_path) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) diff --git a/tests/domain/modelling/fixtures/cavity_wall_001431_after.pdf b/tests/domain/modelling/fixtures/cavity_wall_001431_after.pdf new file mode 100644 index 00000000..e4263745 Binary files /dev/null and b/tests/domain/modelling/fixtures/cavity_wall_001431_after.pdf differ diff --git a/tests/domain/modelling/fixtures/cavity_wall_001431_before.pdf b/tests/domain/modelling/fixtures/cavity_wall_001431_before.pdf new file mode 100644 index 00000000..e08bad0a Binary files /dev/null and b/tests/domain/modelling/fixtures/cavity_wall_001431_before.pdf differ diff --git a/tests/domain/modelling/test_elmhurst_cascade_pins.py b/tests/domain/modelling/test_elmhurst_cascade_pins.py new file mode 100644 index 00000000..8f7f5e83 --- /dev/null +++ b/tests/domain/modelling/test_elmhurst_cascade_pins.py @@ -0,0 +1,81 @@ +"""Elmhurst before/after cascade pins for the Recommendation Generators. + +Each measure has an Elmhurst `before` Summary (baseline cert) and an `after` +Summary (the same cert re-lodged with the measure applied). The pin drives the +matching generator on the parsed `before`, scores its Option's overlay through +the `PackageScorer`, and asserts the result equals the calculator's score on +the parsed `after` at `abs(diff) <= 1e-4` for SAP / CO2 / primary energy. + +This is the real cert→generator→overlay→calculator cascade, not a per-section +isolation test (see `[[feedback-cascade-pin-methodology]]`): a non-zero delta +is a named generator/overlay/calculator gap to fix, never a tolerance to widen +(`[[feedback-zero-error-strict]]`). +""" + +from __future__ import annotations + +from typing import Final + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from domain.modelling.package_scorer import PackageScorer, Score +from domain.modelling.product import Product +from domain.modelling.recommendation import Recommendation +from domain.modelling.simulation import EpcSimulation +from domain.modelling.wall_recommendation import recommend_cavity_wall +from domain.sap10_calculator.calculator import Sap10Calculator, SapResult +from repositories.product.product_repository import ProductRepository +from tests.domain.modelling._elmhurst_recommendation import ( + parse_recommendation_summary, +) + +# Pin tolerance: the Summary PDFs are deterministic test vectors, so the +# overlay must reproduce the re-lodged cert exactly. Matches the worksheet +# e2e tolerance. +_PIN_ABS: Final[float] = 1e-4 + + +class _AnyProduct(ProductRepository): + """In-memory ProductRepository returning a fixed Product for any Measure + Type. The pins assert the SAP cascade, not Cost, so the unit cost is + immaterial — only the generator's overlay is exercised.""" + + def get(self, measure_type: str) -> Product: + return Product( + measure_type=measure_type, unit_cost_per_m2=1.0, contingency_rate=0.0 + ) + + +def _assert_overlay_reproduces_after( + before: EpcPropertyData, after: EpcPropertyData, overlay: EpcSimulation +) -> None: + """Score ``overlay`` on ``before`` and assert it matches the calculator's + score on the re-lodged ``after`` across all three metrics.""" + calculator = Sap10Calculator() + relodged: SapResult = calculator.calculate(after) + scored: Score = PackageScorer(calculator).score(before, [overlay]) + + assert abs(scored.sap_continuous - relodged.sap_score_continuous) <= _PIN_ABS + assert abs(scored.co2_kg_per_yr - relodged.co2_kg_per_yr) <= _PIN_ABS + assert ( + abs(scored.primary_energy_kwh_per_yr - relodged.primary_energy_kwh_per_yr) + <= _PIN_ABS + ) + + +def test_cavity_wall_overlay_reproduces_the_relodged_after() -> None: + # Arrange + before: EpcPropertyData = parse_recommendation_summary( + "cavity_wall_001431_before.pdf" + ) + after: EpcPropertyData = parse_recommendation_summary( + "cavity_wall_001431_after.pdf" + ) + recommendation: Recommendation | None = recommend_cavity_wall( + before, _AnyProduct() + ) + assert recommendation is not None + + # Act / Assert + _assert_overlay_reproduces_after( + before, after, recommendation.options[0].overlay + )