diff --git a/backend/documents_parser/tests/fixtures/Summary_001431_rr_ext.pdf b/backend/documents_parser/tests/fixtures/Summary_001431_rr_ext.pdf new file mode 100644 index 00000000..34934393 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_001431_rr_ext.pdf differ diff --git a/domain/sap10_calculator/rdsap/cert_to_inputs.py b/domain/sap10_calculator/rdsap/cert_to_inputs.py index 0d99af01..2f4eb570 100644 --- a/domain/sap10_calculator/rdsap/cert_to_inputs.py +++ b/domain/sap10_calculator/rdsap/cert_to_inputs.py @@ -4103,13 +4103,27 @@ def _has_suspended_timber_floor_per_spec( if age in _AGE_BANDS_F_TO_M: return True, True # sealed if age in _AGE_BANDS_A_TO_E: - # (a) U-value < 0.5 → sealed - main_floor_u = _main_floor_u_value(epc) - if main_floor_u is not None and main_floor_u < _FLOOR_U_SEALED_THRESHOLD: - return True, True - # (b) retro-fitted insulation + no U-value supplied → sealed - ins_type_str = (main.floor_insulation_type_str or "").strip().lower() u_value_known = bool(getattr(main, "floor_u_value_known", False)) + # (a) a SUPPLIED floor U-value < 0.5 → sealed. RdSAP 10 §5 (PDF + # p.29) splits (a)/(b) on whether a U-value is supplied: (a) is + # the "U-value supplied" branch, (b) the "no U-value is supplied" + # branch. A computed default U (an assumed / as-built uninsulated + # floor) is NOT a supplied value, so it must NOT trigger (a) — it + # falls through to (b). Without this gate the cascade marked an + # as-built suspended-timber floor with default U=0.43 "sealed" + # (0.1) where Elmhurst uses "unsealed" (0.2) — cert 001431 sim + # case 2 worksheet (12)=0.2, dropping (25) effective ACH and + # understating space heating ~450 kWh. + main_floor_u = _main_floor_u_value(epc) + if ( + u_value_known + and main_floor_u is not None + and main_floor_u < _FLOOR_U_SEALED_THRESHOLD + ): + return True, True + # (b) no U-value supplied: retro-fitted insulation → sealed; + # otherwise unsealed. + ins_type_str = (main.floor_insulation_type_str or "").strip().lower() if "retro" in ins_type_str and not u_value_known: return True, True # otherwise → unsealed diff --git a/sap worksheets/golden fixture debugging/simulated case 2/P960-0001-001431 - 2026-06-03T093549.591.pdf b/sap worksheets/golden fixture debugging/simulated case 2/P960-0001-001431 - 2026-06-03T093549.591.pdf new file mode 100644 index 00000000..8888b35a Binary files /dev/null and b/sap worksheets/golden fixture debugging/simulated case 2/P960-0001-001431 - 2026-06-03T093549.591.pdf differ diff --git a/sap worksheets/golden fixture debugging/simulated case 2/Summary_001431 (1).pdf b/sap worksheets/golden fixture debugging/simulated case 2/Summary_001431 (1).pdf new file mode 100644 index 00000000..34934393 Binary files /dev/null and b/sap worksheets/golden fixture debugging/simulated case 2/Summary_001431 (1).pdf differ diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_rr.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_rr.py new file mode 100644 index 00000000..8fc8ecc7 --- /dev/null +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_001431_rr.py @@ -0,0 +1,124 @@ +"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431 +"simulated case 2" worksheet — a Main + Extension dwelling with a +Simplified room-in-roof (the 6035 archetype, more complete than sim +case 1). + +Like 000565 / sim case 1, this fixture does NOT hand-build the +EpcPropertyData: it routes the Summary PDF through +ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the SAP-result +pin grid exercises the WHOLE extractor + mapper + calculator pipeline. + +This cert surfaced two real cascade bugs (both fixed; this fixture pins +them end-to-end at 1e-4): + + S0380.192 — Simplified room-in-roof. The Summary lodges placeholder + slope/ceiling Length/Height cells (a 40 m ceiling height, a 32 m + slope on a 4.65 m gable). RdSAP 10 §3.9.1 derives one timber-framed + "remaining area" from the floor area instead + (A_RR = 12.5√(A_floor/1.5) − Σgables = 32.89 m²). Emitting the + placeholders as detailed_surfaces billed 1024 + 160 m² of explicit + roof area → a 7.5× fabric-heat-loss explosion (SAP −14.6). Fixed by + dropping roof-going surfaces for Simplified assessments so the + cascade's residual formula fires. + + S0380.193 — Suspended-timber-floor "sealed/unsealed" infiltration. + RdSAP 10 §5 (PDF p.29) line (12): rule (a) ("U-value < 0.5 → sealed + 0.1") applies only when a floor U-value is SUPPLIED. This cert's + floor is as-built/uninsulated (default U=0.43, not supplied), so it + falls to rule (b) → unsealed 0.2. The cascade was feeding the + computed default U into rule (a) → sealed 0.1 → (25) effective ACH + dropped → space heating understated ~450 kWh. + +Source: user-simulated PDFs at `sap worksheets/golden fixture +debugging/simulated case 2/`. The Summary is mirrored into the tracked +`backend/documents_parser/tests/fixtures/Summary_001431_rr_ext.pdf` +(distinct name — the corpus reuses cert 001431; sim case 1 is the +single-part gas-combi variant) so the test runs without depending on +the unstaged workspace. + +Cert shape: Main + Extension 1, both solid brick WITH internal +insulation (Main) / as-built (Ext1), 3 storeys, Simplified room-in-roof +on the Main (floor 29.75 m², exposed + party gables), suspended +uninsulated ground floors, gas-combi SAP code 104, no PV. + +Worksheet pin targets (P960-0001-001431, Block 1 — energy rating): +- SAP rating 69 (line 258), ECF 2.2395 (line 257) +- Total fuel cost £920.5046 (line 255) +- CO2 4566.7090 kg/year (line 272) +- Space heating 15269.8593 kWh/year (Σ monthly (98)) +- Main 1 fuel 18178.4039 kWh/year (line 211) +- Secondary fuel 0.0 (line 215) +- Hot water fuel 3308.6172 kWh/year (line 219) +- Lighting 282.6414 kWh/year (line 232) +- Pumps/fans 86.0 kWh/year (line 231) + +Per [[feedback-zero-error-strict]] + [[feedback-e2e-validation- +philosophy]]: pins are abs=1e-4 against the worksheet PDF. +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Final + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + + +# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/, +# [4]=repo root. +_SUMMARY_PDF: Final[Path] = ( + Path(__file__).resolve().parents[4] + / "backend" / "documents_parser" / "tests" / "fixtures" + / "Summary_001431_rr_ext.pdf" +) + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into the per-page text format the + ElmhurstSiteNotesExtractor expects (label\\nvalue sequences). + + Mirror of the helper in `test_summary_pdf_mapper_chain.py` / + `_elmhurst_worksheet_000565.py`. + """ + info = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + m = re.search(r"Pages:\s+(\d+)", info) + if m is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(m.group(1)) + + pages: list[str] = [] + for i in range(1, page_count + 1): + layout = subprocess.run( + [ + "pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-", + ], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def build_epc() -> EpcPropertyData: + """Route the simulated case-2 Summary through extractor + mapper. + + No hand-built EpcPropertyData — the extractor and mapper are part of + the test target. Exercises the S0380.192 Simplified-RR fix and the + S0380.193 suspended-floor sealed-rule fix. + """ + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) diff --git a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py index 4f4653fd..f2abc332 100644 --- a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py +++ b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py @@ -38,6 +38,7 @@ from tests.domain.sap10_calculator.worksheet import ( _elmhurst_worksheet_000516 as _w000516, _elmhurst_worksheet_000565 as _w000565, _elmhurst_worksheet_001431 as _w001431, + _elmhurst_worksheet_001431_rr as _w001431_rr, ) from tests.domain.sap10_calculator.worksheet._elmhurst_fixtures import ( ALL_FIXTURES as _ELMHURST_FIXTURES, @@ -167,6 +168,21 @@ _FIXTURE_PINS: Final[dict[str, FixtureCascadePins]] = { lighting_kwh_per_yr=283.2229, pumps_fans_kwh_per_yr=86.0, ), + # Mapper-driven cohort entry — Summary_001431_rr_ext.pdf → extractor + # → mapper → calculator. Main + Extension, Simplified room-in-roof, + # suspended uninsulated floors (the 6035 archetype). Surfaced + pins + # S0380.192 (Simplified-RR remaining area) and S0380.193 (suspended- + # floor sealed/unsealed rule). Pins are worksheet Block 1 line refs. + "001431_rr": FixtureCascadePins( + sap_score=69, sap_score_continuous=68.7584, ecf=2.2395, + total_fuel_cost_gbp=920.5046, co2_kg_per_yr=4566.7090, + space_heating_kwh_per_yr=15269.8593, + main_heating_fuel_kwh_per_yr=18178.4039, + secondary_heating_fuel_kwh_per_yr=0.0, + hot_water_kwh_per_yr=3308.6172, + lighting_kwh_per_yr=282.6414, + pumps_fans_kwh_per_yr=86.0, + ), } @@ -179,6 +195,7 @@ _FIXTURE_MODULES: Final[dict[str, ModuleType]] = { "000516": _w000516, "000565": _w000565, "001431": _w001431, + "001431_rr": _w001431_rr, }