mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
test(worksheet): pin simulated case 38 — mains-gas secondary reproduces worksheet exactly
The realistic re-generation of case 37 (code-117 gas boiler, control 2102, + a MAINS-GAS condensing gas-fire secondary code 611, vs case 37's biogas 605). The full extractor -> mapper -> calculator pipeline reproduces the worksheet's SAP-rating block EXACTLY: continuous SAP 60.9152 (Δ 2e-5) and (272) CO2 5801.0770 (Δ ~0). This confirms the boiler-efficiency / control-2102 −5pp interlock / secondary-fuel handling are all correct, and that case 37's +7 gap was purely the biogas sub-fuel the Summary export cannot carry. Summary mirrored into backend/documents_parser/tests/fixtures so the pin runs without the unstaged workspace. PE not pinned — it is a separate DPER block (different scope) already guarded by the corpus PE gauge. Worksheet harness 47/47 unchanged; pyright net-zero. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
c11eb46b8a
commit
4fdc23f83d
2 changed files with 116 additions and 0 deletions
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case38.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case38.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -0,0 +1,116 @@
|
|||
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
|
||||
"simulated case 38" worksheet — a mains-gas dwelling with a code-117
|
||||
regular boiler (1979-1997, winter 66%), control 2102 (programmer, no room
|
||||
thermostat → −5pp interlock → (206)=61%), and a **mains-gas condensing gas
|
||||
fire secondary** (SAP code 611).
|
||||
|
||||
This is the realistic re-generation of "simulated case 37": case 37 lodged
|
||||
the same dwelling's code-605 gas fire on BIOGAS (7.60 p/kWh), which the
|
||||
Elmhurst Summary export cannot carry (it lodges only the secondary SAP
|
||||
code, not its sub-fuel — see `_elmhurst_secondary_fuel_from_sap_code`), so
|
||||
the mains-gas modal default left a +7 SAP gap that was purely the biogas
|
||||
sub-fuel. With a mains-gas secondary the whole cascade reproduces the
|
||||
worksheet EXACTLY, confirming the boiler-efficiency / control-2102 /
|
||||
secondary handling is all correct.
|
||||
|
||||
Like 000565 / the _rr cases / case 20 / 21, this fixture does NOT hand-
|
||||
build the EpcPropertyData: it routes the Summary PDF through
|
||||
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the pin exercises
|
||||
the WHOLE extractor + mapper + calculator pipeline.
|
||||
|
||||
Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/
|
||||
simulated case 38/`. The Summary is mirrored into the tracked
|
||||
`backend/documents_parser/tests/fixtures/Summary_001431_case38.pdf` so the
|
||||
test runs without depending on the unstaged workspace.
|
||||
|
||||
Worksheet pin targets (P960-0001-001431, "11a. SAP rating" block):
|
||||
- SAP value (un-rounded, before (258) integer rounding) = 60.9152
|
||||
- (272) Total CO2, kg/year = 5801.0770
|
||||
|
||||
Per [[feedback-zero-error-strict]] + [[feedback-continuous-sap-tolerance]]:
|
||||
pins are abs <= 1e-3 against the worksheet PDF (the worksheet prints the
|
||||
SAP value to 4 dp).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
||||
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
|
||||
|
||||
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
|
||||
# [4]=repo root.
|
||||
_SUMMARY_PDF: Final[Path] = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "backend" / "documents_parser" / "tests" / "fixtures"
|
||||
/ "Summary_001431_case38.pdf"
|
||||
)
|
||||
|
||||
LINE_258_SAP_VALUE_CONTINUOUS: Final[float] = 60.9152
|
||||
LINE_272_TOTAL_CO2_KG_PER_YR: Final[float] = 5801.0770
|
||||
_PIN_ABS: Final[float] = 1e-3
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
"""Convert a Summary PDF into the per-page text format the
|
||||
ElmhurstSiteNotesExtractor expects (label/value token sequences).
|
||||
Mirror of the helper in the other `_elmhurst_worksheet_*` fixtures.
|
||||
"""
|
||||
info = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
m = re.search(r"Pages:\s+(\d+)", info)
|
||||
if m is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(m.group(1))
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout = subprocess.run(
|
||||
[
|
||||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||||
str(pdf_path), "-",
|
||||
],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||||
tokens.extend(parts)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def build_epc() -> EpcPropertyData:
|
||||
"""Route the simulated case-38 Summary through extractor + mapper.
|
||||
No hand-built EpcPropertyData — the extractor and mapper are part of
|
||||
the test target."""
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
|
||||
def test_case38_mains_gas_secondary_reproduces_the_worksheet_sap_and_co2() -> None:
|
||||
# Arrange — the full extractor -> mapper -> calculator pipeline on the
|
||||
# simulated case-38 Summary (mains-gas boiler 117 + mains-gas
|
||||
# condensing gas-fire secondary 611).
|
||||
epc = build_epc()
|
||||
|
||||
# Act
|
||||
result = calculate_sap_from_inputs(cert_to_inputs(epc))
|
||||
|
||||
# Assert — the SAP-rating block reproduces the worksheet exactly.
|
||||
assert (
|
||||
abs(result.sap_score_continuous - LINE_258_SAP_VALUE_CONTINUOUS)
|
||||
<= _PIN_ABS
|
||||
)
|
||||
assert abs(result.co2_kg_per_yr - LINE_272_TOTAL_CO2_KG_PER_YR) <= _PIN_ABS
|
||||
Loading…
Add table
Reference in a new issue