mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
test(modelling): Elmhurst before/after cascade pin for cavity wall (#1154)
Closes #1154 — the Package Scorer's Elmhurst cascade pin. Drives recommend_cavity_wall on the parsed `before` Summary, scores its Option's overlay through PackageScorer, and asserts delta 0 (abs<=1e-4 on SAP/CO2/PE) vs the calculator's score on the re-lodged `after` Summary. Key finding: the handover's stated parser gate (parse_site_notes_pdf throwing 'Manufacturer' on cert 001431) does NOT block these pins. The Elmhurst recommendation Summaries route cleanly through the same ElmhurstSiteNotesExtractor + EpcPropertyDataMapper chain the worksheet e2e fixtures use (_elmhurst_worksheet_001431.build_epc). The Textract path's window bug is unrelated and unused here. The before→after field change is exactly wall_insulation_type 4 (uninsulated) → 2 (filled cavity), which is precisely the overlay recommend_cavity_wall emits; the cascade closes at delta 0.000000 on all three metrics. Before/after Summaries mirrored into tests/domain/modelling/fixtures/ so the pin does not depend on the unstaged workspace. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
9ed4ccc28e
commit
4c0a907a54
4 changed files with 157 additions and 0 deletions
76
tests/domain/modelling/_elmhurst_recommendation.py
Normal file
76
tests/domain/modelling/_elmhurst_recommendation.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
"""Parse an Elmhurst *recommendation* Summary PDF into an EpcPropertyData.
|
||||
|
||||
The Modelling cascade pins use Elmhurst's own before/after measure
|
||||
re-lodgements as deterministic test vectors: each measure folder under
|
||||
`sap worksheets/Recommendations Elmhurst Files/` holds a `before` Summary
|
||||
(the baseline cert) and an `after` Summary (the same cert re-lodged with the
|
||||
measure applied). Applying the matching Recommendation Generator's overlay to
|
||||
the parsed `before` must reproduce the calculator's score on the parsed
|
||||
`after` at delta 0 — proving the overlay is the exact field change Elmhurst
|
||||
made.
|
||||
|
||||
This routes the Summary PDF through the same extractor + mapper chain the
|
||||
worksheet e2e fixtures use (`_elmhurst_worksheet_001431.build_epc`), NOT the
|
||||
Textract `parse_site_notes_pdf` path — that path has an unrelated window
|
||||
extraction bug on cert 001431. The before/after Summaries are mirrored into
|
||||
`tests/domain/modelling/fixtures/` so the pins do not depend on the unstaged
|
||||
workspace.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
_FIXTURES_DIR: Final[Path] = Path(__file__).resolve().parent / "fixtures"
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
"""Convert a Summary PDF into the per-page text format the
|
||||
`ElmhurstSiteNotesExtractor` expects (label\\nvalue sequences).
|
||||
|
||||
Mirror of the helper in `_elmhurst_worksheet_001431.py`: `pdftotext
|
||||
-layout` preserves the spatial label/value pairing on each line; we split
|
||||
on 2+ spaces to surface the tokens, then rejoin newline-delimited.
|
||||
"""
|
||||
info: str = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
match = re.search(r"Pages:\s+(\d+)", info)
|
||||
if match is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(match.group(1))
|
||||
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout: str = subprocess.run(
|
||||
[
|
||||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||||
str(pdf_path), "-",
|
||||
],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||||
tokens.extend(parts)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def parse_recommendation_summary(fixture_name: str) -> EpcPropertyData:
|
||||
"""Parse a before/after recommendation Summary fixture (by file name in
|
||||
`tests/domain/modelling/fixtures/`) into an EpcPropertyData."""
|
||||
pdf_path: Path = _FIXTURES_DIR / fixture_name
|
||||
pages: list[str] = _summary_pdf_to_textract_style_pages(pdf_path)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
BIN
tests/domain/modelling/fixtures/cavity_wall_001431_after.pdf
Normal file
BIN
tests/domain/modelling/fixtures/cavity_wall_001431_after.pdf
Normal file
Binary file not shown.
BIN
tests/domain/modelling/fixtures/cavity_wall_001431_before.pdf
Normal file
BIN
tests/domain/modelling/fixtures/cavity_wall_001431_before.pdf
Normal file
Binary file not shown.
81
tests/domain/modelling/test_elmhurst_cascade_pins.py
Normal file
81
tests/domain/modelling/test_elmhurst_cascade_pins.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
"""Elmhurst before/after cascade pins for the Recommendation Generators.
|
||||
|
||||
Each measure has an Elmhurst `before` Summary (baseline cert) and an `after`
|
||||
Summary (the same cert re-lodged with the measure applied). The pin drives the
|
||||
matching generator on the parsed `before`, scores its Option's overlay through
|
||||
the `PackageScorer`, and asserts the result equals the calculator's score on
|
||||
the parsed `after` at `abs(diff) <= 1e-4` for SAP / CO2 / primary energy.
|
||||
|
||||
This is the real cert→generator→overlay→calculator cascade, not a per-section
|
||||
isolation test (see `[[feedback-cascade-pin-methodology]]`): a non-zero delta
|
||||
is a named generator/overlay/calculator gap to fix, never a tolerance to widen
|
||||
(`[[feedback-zero-error-strict]]`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Final
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from domain.modelling.package_scorer import PackageScorer, Score
|
||||
from domain.modelling.product import Product
|
||||
from domain.modelling.recommendation import Recommendation
|
||||
from domain.modelling.simulation import EpcSimulation
|
||||
from domain.modelling.wall_recommendation import recommend_cavity_wall
|
||||
from domain.sap10_calculator.calculator import Sap10Calculator, SapResult
|
||||
from repositories.product.product_repository import ProductRepository
|
||||
from tests.domain.modelling._elmhurst_recommendation import (
|
||||
parse_recommendation_summary,
|
||||
)
|
||||
|
||||
# Pin tolerance: the Summary PDFs are deterministic test vectors, so the
|
||||
# overlay must reproduce the re-lodged cert exactly. Matches the worksheet
|
||||
# e2e tolerance.
|
||||
_PIN_ABS: Final[float] = 1e-4
|
||||
|
||||
|
||||
class _AnyProduct(ProductRepository):
|
||||
"""In-memory ProductRepository returning a fixed Product for any Measure
|
||||
Type. The pins assert the SAP cascade, not Cost, so the unit cost is
|
||||
immaterial — only the generator's overlay is exercised."""
|
||||
|
||||
def get(self, measure_type: str) -> Product:
|
||||
return Product(
|
||||
measure_type=measure_type, unit_cost_per_m2=1.0, contingency_rate=0.0
|
||||
)
|
||||
|
||||
|
||||
def _assert_overlay_reproduces_after(
|
||||
before: EpcPropertyData, after: EpcPropertyData, overlay: EpcSimulation
|
||||
) -> None:
|
||||
"""Score ``overlay`` on ``before`` and assert it matches the calculator's
|
||||
score on the re-lodged ``after`` across all three metrics."""
|
||||
calculator = Sap10Calculator()
|
||||
relodged: SapResult = calculator.calculate(after)
|
||||
scored: Score = PackageScorer(calculator).score(before, [overlay])
|
||||
|
||||
assert abs(scored.sap_continuous - relodged.sap_score_continuous) <= _PIN_ABS
|
||||
assert abs(scored.co2_kg_per_yr - relodged.co2_kg_per_yr) <= _PIN_ABS
|
||||
assert (
|
||||
abs(scored.primary_energy_kwh_per_yr - relodged.primary_energy_kwh_per_yr)
|
||||
<= _PIN_ABS
|
||||
)
|
||||
|
||||
|
||||
def test_cavity_wall_overlay_reproduces_the_relodged_after() -> None:
|
||||
# Arrange
|
||||
before: EpcPropertyData = parse_recommendation_summary(
|
||||
"cavity_wall_001431_before.pdf"
|
||||
)
|
||||
after: EpcPropertyData = parse_recommendation_summary(
|
||||
"cavity_wall_001431_after.pdf"
|
||||
)
|
||||
recommendation: Recommendation | None = recommend_cavity_wall(
|
||||
before, _AnyProduct()
|
||||
)
|
||||
assert recommendation is not None
|
||||
|
||||
# Act / Assert
|
||||
_assert_overlay_reproduces_after(
|
||||
before, after, recommendation.options[0].overlay
|
||||
)
|
||||
Loading…
Add table
Reference in a new issue