From dca2ff0918b058c93afa5292737955343d2c45de Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 May 2026 17:31:59 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.1:=20RED=20=E2=80=94=20pin=20cert?= =?UTF-8?q?=200380=20Summary=20cascade=20against=20worksheet=2088.5104?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `test_summary_0380_full_chain_sap_matches_worksheet_pdf_exactly` plus the `_SUMMARY_000899_PDF` fixture constant. The test pins the Summary → ElmhurstSiteNotesExtractor → EpcPropertyDataMapper → cert_to_inputs → calculator chain for cert 0380-2471-3250-2596-8761 (Mitsubishi PUZ-WM50VHA ASHP, PCDB index 104568, semi-detached bungalow age D, TFA 60.43 m²) against the unrounded SAP lodged on the `dr87-0001-000899.pdf` worksheet "SAP value" line: **88.5104**. Opens the Summary-path workstream for the 7-cert ASHP cohort. API path is already at the spec-precision floor (Δ +0.0594, pinned by slice 102f). The Summary path becomes the canonical reference once it closes to 1e-4 — the boiler precedents (cert 001479 worksheet 69.0094, cert 0330 worksheet 61.5993) followed the same Summary- first ordering. Diagnostic baseline (printed by the probe in the handover): Summary mapper main_heating_category: None (expected: 4 / HP) Summary mapper main_heating_index_number: 104568 (expected: 104568) Summary path SAP: 33.7920 Δ vs 88.5104: -54.7184 Failure mode is exactly what the handover predicts: the Elmhurst extractor surfaces the PCDB index correctly but leaves `main_heating_category=None`, so `cert_to_inputs` misroutes off the Appendix N3.6/N3.7 heat-pump path and lands on a default boiler-ish cascade. First slice to fix in slice 2: surface `main_heating_category=4` from the Elmhurst Summary heating block when the PCDB index resolves to a HP record. Pyright: 0 errors on the test file. Convention: 1e-4 tolerance per `feedback_zero_error_strict` and the closed-boiler precedent (no widening until cascade matches at 1e-3 and the residual is documented). AAA literal headers per `feedback_aaa_test_convention`. `abs(diff)` not `pytest.approx` per `feedback_abs_diff_over_pytest_approx`. Baseline shifts from "669 pass + 10 pre-existing fail" to "669 pass + 11 fail" — the new fail is the forcing function for the workstream. Refs: - backend/documents_parser/tests/test_summary_pdf_mapper_chain.py:494 - domain/sap10_calculator/docs/HANDOVER_CERT_0380_SUMMARY_PATH.md Co-Authored-By: Claude Opus 4.7 --- .../tests/test_summary_pdf_mapper_chain.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 18e68a60..557ea037 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -58,6 +58,7 @@ _SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf" _SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf" _SUMMARY_000897_PDF = _FIXTURES / "Summary_000897.pdf" _SUMMARY_000784_PDF = _FIXTURES / "Summary_000784.pdf" +_SUMMARY_000899_PDF = _FIXTURES / "Summary_000899.pdf" # GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the # Summary_001479.pdf fixture. Together they drive the API ≡ Summary @@ -491,6 +492,34 @@ def test_summary_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None: assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 +def test_summary_0380_full_chain_sap_matches_worksheet_pdf_exactly() -> None: + # Arrange — cert 0380-2471-3250-2596-8761 (Summary_000899.pdf / + # dr87-0001-000899.pdf) is the first heat-pump cert under per-cert + # Summary-path mapper validation: Mitsubishi PUZ-WM50VHA ASHP + # (PCDB index 104568), semi-detached bungalow age D, TFA 60.43 m². + # Worksheet PDF "SAP value" line lodges unrounded SAP **88.5104**. + # API-path cohort already pinned at the ±0.07 spec-precision floor + # (SAP 88.5698, Δ +0.0594); the Summary path becomes the canonical + # reference once it closes to 1e-4 — same pattern boilers 001479 + # and 0330 followed. Diagnostic probe at handover baseline: the + # Elmhurst mapper surfaces main_heating_index_number=104568 but + # leaves main_heating_category=None, so the cascade misroutes off + # the heat-pump path and lands at SAP 33.7920 (Δ -54.7184). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + + # Assert — 1e-4 pin, no widening, no xfail (project memory + # `feedback_zero_error_strict`). + worksheet_unrounded_sap = 88.5104 + assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + _API_0330_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden"