diff --git a/backend/documents_parser/tests/fixtures/Summary_000565_case50.pdf b/backend/documents_parser/tests/fixtures/Summary_000565_case50.pdf new file mode 100644 index 00000000..ab23b998 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_000565_case50.pdf differ diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000565_case50.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000565_case50.py new file mode 100644 index 00000000..e051922b --- /dev/null +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000565_case50.py @@ -0,0 +1,113 @@ +"""Mapper-driven cascade pin against the Elmhurst U985-0001-000565 +"simulated case 50" worksheet — an all-electric Economy-7 dwelling with +MVHR + electric storage heaters + a DUAL-immersion hot-water cylinder. + +Like 000565 / the _rr cases, this fixture does NOT hand-build the +EpcPropertyData: it routes the Summary PDF through +ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the SAP-result +pin grid exercises the WHOLE extractor + mapper + calculator pipeline. + +This case was hand-built (Khalim) to ground-truth the dual-immersion +cylinder water-heating path, and it exercises two distinct off-peak +tariff mechanics that the gas/standard-tariff fixtures don't reach: + + - The Table 13 HW high-rate fraction for a whc-903 DUAL electric + immersion (0.1009 high / 0.8991 low) — the COST (6.4878 p/kWh) AND + the CO2/PE split ("Water heating - high/low rate cost", fixed in + `_electric_immersion_hw_high_rate_fraction`). + - The Table 12a Grid 2 fan fraction (0.71/0.58) for MVHR fan + electricity (315.64 kWh, 100% MVHR per worksheet line 230a) — billed + distinct from "all other uses" 0.90/0.80 (fixed by including the MVHR + fan in `mev_kwh_for_cost_split`). + +Unknown meter + dual electric immersion resolves to 7-hour off-peak via +the §12 trigger. After both fixes the existing-dwelling rating reconciles +to the U985 worksheet EXACTLY, including the (272) rating CO2. + +Cert shape: 000565 semi shell, main = electric storage heaters (SAP 402, +manual charge control), portable electric secondary (SAP 693), water +heating from a whc-903 dual electric immersion + 160 L foam cylinder (no +cylinder stat), MVHR (Vent Axia, PCDB 500140), one instantaneous electric +shower, no PV, Economy-7. + +Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/ +simulated case 50/`. The Summary is mirrored into the tracked +`backend/documents_parser/tests/fixtures/Summary_000565_case50.pdf` so the +test runs without depending on the unstaged workspace. + +Worksheet pin targets (U985-0001-000565 block 1 — existing dwelling SAP): +- SAP rating 39 (258); continuous 38.8426; ECF 4.4252 (257) +- Total fuel cost £1317.0116 (255) +- Total CO2 2397.1237 kg/year (272) +- Space heating 14318.4904 kWh/year ((98c)) +- Main 1 fuel 12170.7169 kWh/year (211) +- Secondary fuel 2147.7736 kWh/year (215) +- Hot water fuel 1668.0788 kWh/year (219) +- Lighting 435.3204 kWh/year (232) +- Pumps/fans 315.6384 kWh/year (231) + +Per [[feedback-zero-error-strict]] + [[feedback-e2e-validation- +philosophy]]: pins are abs=1e-4 against the worksheet PDF. The pin +values live in `test_e2e_elmhurst_sap_score._FIXTURE_PINS`. +""" + +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Final + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + +# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/, +# [4]=repo root. +_SUMMARY_PDF: Final[Path] = ( + Path(__file__).resolve().parents[4] + / "backend" / "documents_parser" / "tests" / "fixtures" + / "Summary_000565_case50.pdf" +) + + +def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: + """Convert a Summary PDF into the per-page text format the + ElmhurstSiteNotesExtractor expects (label\\nvalue sequences). Mirror + of the helper in the other `_elmhurst_worksheet_*` fixtures. + """ + info = subprocess.run( + ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True, + ).stdout + m = re.search(r"Pages:\s+(\d+)", info) + if m is None: + raise RuntimeError(f"Could not parse page count from {pdf_path}") + page_count = int(m.group(1)) + pages: list[str] = [] + for i in range(1, page_count + 1): + layout = subprocess.run( + [ + "pdftotext", "-layout", "-f", str(i), "-l", str(i), + str(pdf_path), "-", + ], + capture_output=True, text=True, check=True, + ).stdout + tokens: list[str] = [] + for line in layout.splitlines(): + if not line.strip(): + tokens.append("") + continue + parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] + tokens.extend(parts) + pages.append("\n".join(tokens)) + return pages + + +def build_epc() -> EpcPropertyData: + """Route the simulated case-50 Summary through extractor + mapper. + No hand-built EpcPropertyData — the extractor and mapper are part of + the test target. + """ + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) diff --git a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py index 13b1b1bb..918329d9 100644 --- a/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py +++ b/tests/domain/sap10_calculator/worksheet/test_e2e_elmhurst_sap_score.py @@ -48,6 +48,7 @@ from tests.domain.sap10_calculator.worksheet import ( _elmhurst_worksheet_001431_case6 as _w001431_case6, _elmhurst_worksheet_001431_case7 as _w001431_case7, _elmhurst_worksheet_001431_case20 as _w001431_case20, + _elmhurst_worksheet_000565_case50 as _w000565_case50, _elmhurst_worksheet_000565_case52 as _w000565_case52, ) from tests.domain.sap10_calculator.worksheet._elmhurst_fixtures import ( @@ -297,6 +298,22 @@ _FIXTURE_PINS: Final[dict[str, FixtureCascadePins]] = { lighting_kwh_per_yr=246.3083, pumps_fans_kwh_per_yr=0.0, ), + # Mapper-driven — Summary_000565_case50.pdf → extractor → mapper → + # calculator. All-electric Economy-7: storage-heater main (SAP 402) + + # MVHR + whc-903 DUAL electric immersion + 160 L cylinder. Exercises + # the Table 13 HW high/low split (cost + CO2/PE) AND the Table 12a + # Grid 2 MVHR fan fraction (0.71/0.58). Reconciles to the worksheet + # EXACTLY after both off-peak fixes (incl. the (272) rating CO2). + "000565_case50": FixtureCascadePins( + sap_score=39, sap_score_continuous=38.8426, ecf=4.4252, + total_fuel_cost_gbp=1317.0116, co2_kg_per_yr=2397.1237, + space_heating_kwh_per_yr=14318.4904, + main_heating_fuel_kwh_per_yr=12170.7169, + secondary_heating_fuel_kwh_per_yr=2147.7736, + hot_water_kwh_per_yr=1668.0788, + lighting_kwh_per_yr=435.3204, + pumps_fans_kwh_per_yr=315.6384, + ), # Mapper-driven — Summary_000565_case52.pdf → extractor → mapper → # calculator. Regular (non-combi) mains-gas boiler (SAP 102) + a # 160 L foam cylinder heated from the main (WHC 901), no cylinder @@ -333,6 +350,7 @@ _FIXTURE_MODULES: Final[dict[str, ModuleType]] = { "001431_case6": _w001431_case6, "001431_case7": _w001431_case7, "001431_case20": _w001431_case20, + "000565_case50": _w000565_case50, "000565_case52": _w000565_case52, }