diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index de8218cf..e8eda558 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -41,6 +41,7 @@ from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs from domain.sap.worksheet.tests import ( _elmhurst_worksheet_000474 as _w000474, _elmhurst_worksheet_000477 as _w000477, + _elmhurst_worksheet_000480 as _w000480, ) _FIXTURES = Path(__file__).parent / "fixtures" @@ -597,3 +598,33 @@ def test_from_elmhurst_site_notes_matches_hand_built_000477() -> None: f"hand-built EpcPropertyData for cohort cert 000477:\n " + "\n ".join(diffs) ) + + +def test_from_elmhurst_site_notes_matches_hand_built_000480() -> None: + # Arrange — _elmhurst_worksheet_000480.build_epc() is the canonical + # hand-built EpcPropertyData for cert U985-0001-000480 (mid-terrace + # with main + 1 extension + 19.83 m² RIR, gas combi); it cascades + # to the worksheet PDF's `SAP value 61.2986` at 1e-4. Routing the + # Summary PDF through the Elmhurst mapper MUST produce a load- + # bearing-field-equivalent EpcPropertyData; any divergence is a + # mapper-coverage gap to close as its own slice. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000480_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + hand_built = _w000480.build_epc() + + # Act + diffs: list[str] = [] + for field_name in _LOAD_BEARING_FIELDS: + diffs.extend(_diff_load_bearing( + getattr(mapped, field_name, None), + getattr(hand_built, field_name, None), + field_name, + )) + + # Assert + assert not diffs, ( + f"{len(diffs)} load-bearing divergence(s) between mapped and " + f"hand-built EpcPropertyData for cohort cert 000480:\n " + + "\n ".join(diffs) + )