diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 390905a6..de8218cf 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -38,7 +38,10 @@ from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtract from datatypes.epc.domain.mapper import EpcPropertyDataMapper from domain.sap.calculator import calculate_sap_from_inputs from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs -from domain.sap.worksheet.tests import _elmhurst_worksheet_000474 as _w000474 +from domain.sap.worksheet.tests import ( + _elmhurst_worksheet_000474 as _w000474, + _elmhurst_worksheet_000477 as _w000477, +) _FIXTURES = Path(__file__).parent / "fixtures" _SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf" @@ -564,3 +567,33 @@ def test_from_elmhurst_site_notes_matches_hand_built_000474() -> None: f"hand-built EpcPropertyData for cohort cert 000474:\n " + "\n ".join(diffs) ) + + +def test_from_elmhurst_site_notes_matches_hand_built_000477() -> None: + # Arrange — _elmhurst_worksheet_000477.build_epc() is the canonical + # hand-built EpcPropertyData for cert U985-0001-000477 (single-bp + # mid-terrace, age band B, RIR with stud walls + party gables, no + # extension); it cascades to the worksheet PDF's `SAP value 65.0057` + # at 1e-4. Routing the Summary PDF through the Elmhurst mapper MUST + # produce a load-bearing-field-equivalent EpcPropertyData; any + # divergence is a mapper-coverage gap to close as its own slice. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + hand_built = _w000477.build_epc() + + # Act + diffs: list[str] = [] + for field_name in _LOAD_BEARING_FIELDS: + diffs.extend(_diff_load_bearing( + getattr(mapped, field_name, None), + getattr(hand_built, field_name, None), + field_name, + )) + + # Assert + assert not diffs, ( + f"{len(diffs)} load-bearing divergence(s) between mapped and " + f"hand-built EpcPropertyData for cohort cert 000477:\n " + + "\n ".join(diffs) + )