diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index e8eda558..56659081 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -42,6 +42,7 @@ from domain.sap.worksheet.tests import ( _elmhurst_worksheet_000474 as _w000474, _elmhurst_worksheet_000477 as _w000477, _elmhurst_worksheet_000480 as _w000480, + _elmhurst_worksheet_000487 as _w000487, ) _FIXTURES = Path(__file__).parent / "fixtures" @@ -628,3 +629,35 @@ def test_from_elmhurst_site_notes_matches_hand_built_000480() -> None: f"hand-built EpcPropertyData for cohort cert 000480:\n " + "\n ".join(diffs) ) + + +def test_from_elmhurst_site_notes_matches_hand_built_000487() -> None: + # Arrange — _elmhurst_worksheet_000487.build_epc() is the canonical + # hand-built EpcPropertyData for cert U985-0001-000487 (Enclosed + # Mid-Terrace, main + 1 extension + 21.03 m² RIR with explicit-U + # gable_wall_external, gas combi, 1 electric shower, 1.43 m² + # timber-frame alt wall on the extension); it cascades to the + # worksheet PDF's `SAP value 61.6431` at 1e-4. Routing the Summary + # PDF through the Elmhurst mapper MUST produce a load-bearing- + # field-equivalent EpcPropertyData; any divergence is a mapper- + # coverage gap to close as its own slice. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000487_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + hand_built = _w000487.build_epc() + + # Act + diffs: list[str] = [] + for field_name in _LOAD_BEARING_FIELDS: + diffs.extend(_diff_load_bearing( + getattr(mapped, field_name, None), + getattr(hand_built, field_name, None), + field_name, + )) + + # Assert + assert not diffs, ( + f"{len(diffs)} load-bearing divergence(s) between mapped and " + f"hand-built EpcPropertyData for cohort cert 000487:\n " + + "\n ".join(diffs) + )