mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
test(worksheet): pin simulated case 43 at 1e-4 (RR + dry-line + mixed roof)
Golden regression fixture for the multi-feature dwelling that surfaced the
two Elmhurst-extractor bugs in a33707f8. case 43 is a 2-BP mid-terrace with
a DETAILED room-in-roof (two slopes, two flat ceilings, party + exposed
gables, two common walls), a MIXED-insulation multi-section roof (Main
insulated + Extension uninsulated 2.30), a DRY-LINED extension solid wall,
a mains-gas boiler (102 / control 2106) and a House-coal solid-fuel
secondary (633).
Routes the Summary PDF through the WHOLE extractor + mapper + calculator
pipeline (no hand-built EpcPropertyData) and pins the §3 fabric + SAP-rating
block at abs=1e-4: (29a) walls 74.5800, (30) roof 38.5008, (33) fabric
172.7844, continuous SAP 73.2332 = (258), CO2 3518.3037 = (272). Guards the
detailed-RR slope/common_wall surfaces, the dry-lining R=0.17 adjustment,
and the per-part mixed-roof billing together. Summary mirrored to
backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf; provider
module mirrors the _case6/_case21 pattern, assertion in
test_section_cascade_pins. Harness 47/47; regression = the 3 pre-existing
fails; pyright net-zero.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
a33707f851
commit
419e340477
3 changed files with 158 additions and 0 deletions
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -0,0 +1,116 @@
|
|||
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
|
||||
"simulated case 43" worksheet — a 2-storey mid-terrace deliberately built to
|
||||
exercise every feature in one dwelling:
|
||||
|
||||
- a DETAILED room-in-roof on the Main BP (two slopes, two flat ceilings,
|
||||
a party + an exposed gable, two common walls) — exercises the
|
||||
slope / stud / common_wall detailed-RR surfaces end-to-end;
|
||||
- a MIXED-insulation multi-section roof (Main insulated 0.16/0.54/0.68/0.11
|
||||
+ Extension uninsulated 2.30);
|
||||
- a DRY-LINED extension solid wall (RdSAP 10 §5.8 Table 14 R=0.17:
|
||||
solid brick 1.70 -> 1.32);
|
||||
- a mains-gas boiler (SAP 102, control 2106 interlock) with a House-coal
|
||||
solid-fuel SECONDARY (633, 60%) and a 210 L declared-loss cylinder.
|
||||
|
||||
This case was generated to settle the room-in-roof + mixed-roof + secondary
|
||||
feature set with a single 1e-4 pin. It exposed two compensating Elmhurst-
|
||||
extractor bugs (commit `a33707f8`) whose fabric errors nearly cancelled
|
||||
(walls net -0.76 W/K, hidden behind a +0.05 SAP delta):
|
||||
1. the main/extension wall "Dry-lining: Yes" line was read only for
|
||||
ALTERNATIVE walls -> the dry-lined extension wall billed at the
|
||||
un-adjusted 1.70 instead of 1.32;
|
||||
2. the LAST room-in-roof surface row's per-row token scan over-read into
|
||||
the next section -> Common Wall 2's default U silently zeroed
|
||||
(1.90 -> 0.00).
|
||||
With both fixed the whole §3 fabric and the SAP/CO2 reproduce EXACTLY.
|
||||
|
||||
Like 000565 / the _rr cases / case 20 / 21 / 38 / 39, this fixture does NOT
|
||||
hand-build the EpcPropertyData: it routes the Summary PDF through
|
||||
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the pin exercises
|
||||
the WHOLE extractor + mapper + calculator pipeline.
|
||||
|
||||
Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/
|
||||
simulated case 43/`. The Summary is mirrored into the tracked
|
||||
`backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf` so the
|
||||
test runs without depending on the unstaged workspace.
|
||||
|
||||
Worksheet pin targets (P960-0001-001431, "11a. SAP rating" / "12a. CO2
|
||||
emissions" block — the UK-average-climate rating block our cascade
|
||||
reproduces):
|
||||
- SAP value (un-rounded, before (258) integer rounding) = 73.2332 (band C)
|
||||
- (272) Total CO2, kg/year = 3518.30
|
||||
|
||||
Per [[feedback-zero-error-strict]] + [[feedback-continuous-sap-tolerance]]:
|
||||
pins are abs <= 1e-3 against the worksheet PDF (printed to 4 dp).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
|
||||
# [4]=repo root.
|
||||
_SUMMARY_PDF: Final[Path] = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "backend" / "documents_parser" / "tests" / "fixtures"
|
||||
/ "Summary_001431_case43.pdf"
|
||||
)
|
||||
|
||||
LINE_29A_WALLS_W_PER_K: Final[float] = 74.5800
|
||||
# (30) = ΣA×U: FlatCeil1 4.3200 + FlatCeil2 6.9000 + Slope1 1.0200 +
|
||||
# Slope2 0.1408 + roof Main 3.1200 + roof Ext1 (uninsulated) 23.0000.
|
||||
LINE_30_ROOF_W_PER_K: Final[float] = 38.5008
|
||||
LINE_33_FABRIC_W_PER_K: Final[float] = 172.7844
|
||||
LINE_258_SAP_VALUE_CONTINUOUS: Final[float] = 73.2332
|
||||
LINE_272_TOTAL_CO2_KG_PER_YR: Final[float] = 3518.3037
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
"""Convert a Summary PDF into the per-page text format the
|
||||
ElmhurstSiteNotesExtractor expects (label/value token sequences).
|
||||
Mirror of the helper in the other `_elmhurst_worksheet_*` fixtures.
|
||||
"""
|
||||
info = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
m = re.search(r"Pages:\s+(\d+)", info)
|
||||
if m is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(m.group(1))
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout = subprocess.run(
|
||||
[
|
||||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||||
str(pdf_path), "-",
|
||||
],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||||
tokens.extend(parts)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def build_epc() -> EpcPropertyData:
|
||||
"""Route the simulated case-43 Summary through extractor + mapper.
|
||||
No hand-built EpcPropertyData — the extractor and mapper are part of
|
||||
the test target. This module is a pin PROVIDER (build_epc + LINE_*
|
||||
constants, mirroring `_elmhurst_worksheet_001431_case6` / `_case21`);
|
||||
the collected assertion lives in
|
||||
`test_section_cascade_pins.test_case43_*`."""
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
|
@ -44,6 +44,7 @@ from tests.domain.sap10_calculator.worksheet import (
|
|||
_elmhurst_worksheet_000516 as _w000516,
|
||||
_elmhurst_worksheet_001431_case6 as _w001431_case6,
|
||||
_elmhurst_worksheet_001431_case21 as _w001431_case21,
|
||||
_elmhurst_worksheet_001431_case43 as _w001431_case43,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -328,6 +329,47 @@ def test_section_3_wall_u_by_thickness_case21_match_pdf() -> None:
|
|||
)
|
||||
|
||||
|
||||
def test_case43_detailed_rr_dryline_and_mixed_roof_match_pdf() -> None:
|
||||
"""Full-feature pin for simulated case 43 — a 2-BP mid-terrace with a
|
||||
DETAILED room-in-roof (slopes + flat ceilings + party/exposed gables +
|
||||
common walls), a MIXED-insulation multi-section roof (Main insulated +
|
||||
Extension uninsulated), a DRY-LINED extension solid wall (RdSAP 10 §5.8
|
||||
Table 14: 1.70 -> 1.32), a mains-gas boiler (102, control 2106) and a
|
||||
House-coal solid-fuel secondary (633). Exposed + regression-guards two
|
||||
compensating Elmhurst-extractor bugs (commit a33707f8): the unread
|
||||
main-wall dry-lining and the last-RR-row default-U over-read, whose
|
||||
fabric errors nearly cancelled (walls net -0.76). With both fixed the
|
||||
§3 fabric and the SAP-rating block reproduce the P960 exactly."""
|
||||
# Arrange
|
||||
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
||||
|
||||
epc = _w001431_case43.build_epc()
|
||||
|
||||
# Act
|
||||
ht = heat_transmission_section_from_cert(epc)
|
||||
result = calculate_sap_from_inputs(cert_to_inputs(epc))
|
||||
|
||||
# Assert — §3 fabric (the RR + dry-lining + mixed-roof fixes) and the
|
||||
# SAP-rating block, each at abs=1e-4.
|
||||
_pin(ht.walls_w_per_k, _w001431_case43.LINE_29A_WALLS_W_PER_K, "§3 (29a) case43")
|
||||
_pin(ht.roof_w_per_k, _w001431_case43.LINE_30_ROOF_W_PER_K, "§3 (30) case43")
|
||||
_pin(
|
||||
ht.fabric_heat_loss_w_per_k,
|
||||
_w001431_case43.LINE_33_FABRIC_W_PER_K,
|
||||
"§3 (33) case43",
|
||||
)
|
||||
_pin(
|
||||
result.sap_score_continuous,
|
||||
_w001431_case43.LINE_258_SAP_VALUE_CONTINUOUS,
|
||||
"(258) case43",
|
||||
)
|
||||
_pin(
|
||||
result.co2_kg_per_yr,
|
||||
_w001431_case43.LINE_272_TOTAL_CO2_KG_PER_YR,
|
||||
"(272) case43",
|
||||
)
|
||||
|
||||
|
||||
def test_case6_main_2_emitter_and_control_extracted() -> None:
|
||||
"""Simulated case 6's §14.1 Main Heating2 lodges its OWN emitter
|
||||
("Underfloor Heating") and control ("SAP code 2110, ...") — the two
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue