diff --git a/backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf b/backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf new file mode 100644 index 00000000..080fe618 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf differ diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case43.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case43.py new file mode 100644 index 00000000..2e4a7ae3 --- /dev/null +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_case43.py @@ -0,0 +1,116 @@ +"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431 +"simulated case 43" worksheet — a 2-storey mid-terrace deliberately built to +exercise every feature in one dwelling: + + - a DETAILED room-in-roof on the Main BP (two slopes, two flat ceilings, + a party + an exposed gable, two common walls) — exercises the + slope / stud / common_wall detailed-RR surfaces end-to-end; + - a MIXED-insulation multi-section roof (Main insulated 0.16/0.54/0.68/0.11 + + Extension uninsulated 2.30); + - a DRY-LINED extension solid wall (RdSAP 10 §5.8 Table 14 R=0.17: + solid brick 1.70 -> 1.32); + - a mains-gas boiler (SAP 102, control 2106 interlock) with a House-coal + solid-fuel SECONDARY (633, 60%) and a 210 L declared-loss cylinder. + +This case was generated to settle the room-in-roof + mixed-roof + secondary +feature set with a single 1e-4 pin. It exposed two compensating Elmhurst- +extractor bugs (commit `a33707f8`) whose fabric errors nearly cancelled +(walls net -0.76 W/K, hidden behind a +0.05 SAP delta): + 1. the main/extension wall "Dry-lining: Yes" line was read only for + ALTERNATIVE walls -> the dry-lined extension wall billed at the + un-adjusted 1.70 instead of 1.32; + 2. the LAST room-in-roof surface row's per-row token scan over-read into + the next section -> Common Wall 2's default U silently zeroed + (1.90 -> 0.00). +With both fixed the whole §3 fabric and the SAP/CO2 reproduce EXACTLY. + +Like 000565 / the _rr cases / case 20 / 21 / 38 / 39, this fixture does NOT +hand-build the EpcPropertyData: it routes the Summary PDF through +ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the pin exercises +the WHOLE extractor + mapper + calculator pipeline. + +Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/ +simulated case 43/`. The Summary is mirrored into the tracked +`backend/documents_parser/tests/fixtures/Summary_001431_case43.pdf` so the +test runs without depending on the unstaged workspace. + +Worksheet pin targets (P960-0001-001431, "11a. SAP rating" / "12a. CO2 +emissions" block — the UK-average-climate rating block our cascade +reproduces): +- SAP value (un-rounded, before (258) integer rounding) = 73.2332 (band C) +- (272) Total CO2, kg/year = 3518.30 + +Per [[feedback-zero-error-strict]] + [[feedback-continuous-sap-tolerance]]: +pins are abs <= 1e-3 against the worksheet PDF (printed to 4 dp). +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Final + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + +# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/, +# [4]=repo root. +_SUMMARY_PDF: Final[Path] = ( + Path(__file__).resolve().parents[4] + / "backend" / "documents_parser" / "tests" / "fixtures" + / "Summary_001431_case43.pdf" +) + +LINE_29A_WALLS_W_PER_K: Final[float] = 74.5800 +# (30) = ΣA×U: FlatCeil1 4.3200 + FlatCeil2 6.9000 + Slope1 1.0200 + +# Slope2 0.1408 + roof Main 3.1200 + roof Ext1 (uninsulated) 23.0000. +LINE_30_ROOF_W_PER_K: Final[float] = 38.5008 +LINE_33_FABRIC_W_PER_K: Final[float] = 172.7844 +LINE_258_SAP_VALUE_CONTINUOUS: Final[float] = 73.2332 +LINE_272_TOTAL_CO2_KG_PER_YR: Final[float] = 3518.3037 + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into the per-page text format the + ElmhurstSiteNotesExtractor expects (label/value token sequences). + Mirror of the helper in the other `_elmhurst_worksheet_*` fixtures. + """ + info = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + m = re.search(r"Pages:\s+(\d+)", info) + if m is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(m.group(1)) + pages: list[str] = [] + for i in range(1, page_count + 1): + layout = subprocess.run( + [ + "pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-", + ], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def build_epc() -> EpcPropertyData: + """Route the simulated case-43 Summary through extractor + mapper. + No hand-built EpcPropertyData — the extractor and mapper are part of + the test target. This module is a pin PROVIDER (build_epc + LINE_* + constants, mirroring `_elmhurst_worksheet_001431_case6` / `_case21`); + the collected assertion lives in + `test_section_cascade_pins.test_case43_*`.""" + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) diff --git a/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py b/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py index b8f166ab..4e7336e3 100644 --- a/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py +++ b/tests/domain/sap10_calculator/worksheet/test_section_cascade_pins.py @@ -44,6 +44,7 @@ from tests.domain.sap10_calculator.worksheet import ( _elmhurst_worksheet_000516 as _w000516, _elmhurst_worksheet_001431_case6 as _w001431_case6, _elmhurst_worksheet_001431_case21 as _w001431_case21, + _elmhurst_worksheet_001431_case43 as _w001431_case43, ) @@ -328,6 +329,47 @@ def test_section_3_wall_u_by_thickness_case21_match_pdf() -> None: ) +def test_case43_detailed_rr_dryline_and_mixed_roof_match_pdf() -> None: + """Full-feature pin for simulated case 43 — a 2-BP mid-terrace with a + DETAILED room-in-roof (slopes + flat ceilings + party/exposed gables + + common walls), a MIXED-insulation multi-section roof (Main insulated + + Extension uninsulated), a DRY-LINED extension solid wall (RdSAP 10 §5.8 + Table 14: 1.70 -> 1.32), a mains-gas boiler (102, control 2106) and a + House-coal solid-fuel secondary (633). Exposed + regression-guards two + compensating Elmhurst-extractor bugs (commit a33707f8): the unread + main-wall dry-lining and the last-RR-row default-U over-read, whose + fabric errors nearly cancelled (walls net -0.76). With both fixed the + §3 fabric and the SAP-rating block reproduce the P960 exactly.""" + # Arrange + from domain.sap10_calculator.calculator import calculate_sap_from_inputs + + epc = _w001431_case43.build_epc() + + # Act + ht = heat_transmission_section_from_cert(epc) + result = calculate_sap_from_inputs(cert_to_inputs(epc)) + + # Assert — §3 fabric (the RR + dry-lining + mixed-roof fixes) and the + # SAP-rating block, each at abs=1e-4. + _pin(ht.walls_w_per_k, _w001431_case43.LINE_29A_WALLS_W_PER_K, "§3 (29a) case43") + _pin(ht.roof_w_per_k, _w001431_case43.LINE_30_ROOF_W_PER_K, "§3 (30) case43") + _pin( + ht.fabric_heat_loss_w_per_k, + _w001431_case43.LINE_33_FABRIC_W_PER_K, + "§3 (33) case43", + ) + _pin( + result.sap_score_continuous, + _w001431_case43.LINE_258_SAP_VALUE_CONTINUOUS, + "(258) case43", + ) + _pin( + result.co2_kg_per_yr, + _w001431_case43.LINE_272_TOTAL_CO2_KG_PER_YR, + "(272) case43", + ) + + def test_case6_main_2_emitter_and_control_extracted() -> None: """Simulated case 6's §14.1 Main Heating2 lodges its OWN emitter ("Underfloor Heating") and control ("SAP code 2110, ...") — the two