diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 50269060..ab57b2ee 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -1872,6 +1872,153 @@ def test_api_9418_full_chain_sap_within_spec_floor_of_worksheet() -> None: assert abs(result.sap_score_continuous - 84.6305) < _ASHP_COHORT_CHAIN_TOLERANCE +# ============================================================================ +# Cohort-2 API-path chain tests (cross-mapper parity at the cascade) +# ============================================================================ +# Mirror the cohort-2 Summary-path sweep that closed across S0380.30..38. +# Per [[feedback-cross-mapper-parity-via-cascade]]: API EPC and Elmhurst EPC +# must produce SAP within 1e-4 of each other AND of the worksheet — the +# SAP cascade is the load-bearing equivalence check. Each cert in this +# cohort has both a Summary PDF (under `sap worksheets/additional with +# api 2//Summary_*.pdf`) and an API JSON fixture (fetched into +# `domain/sap10_calculator/rdsap/tests/fixtures/golden/.json` in +# Slice S0380.39). Worksheet SAP is the source of truth. +# +# At HEAD of Slice S0380.40: 34/38 certs hit 1e-4 immediately; the +# remaining 4 are residual-pinned below as forcing functions for the +# next per-cert closure slices (Slice C+). + +_COHORT_2_API_FIXTURE_DIR: Path = ( + Path(__file__).parents[3] + / "domain/sap10_calculator/rdsap/tests/fixtures/golden" +) + +# (cert_dir, worksheet_unrounded_sap) — 34 cohort-2 certs whose API-path +# cascade hits the worksheet's continuous SAP at 1e-4 without any +# follow-up mapper work. Identical to the Summary-path sweep at the +# same tolerance: cross-mapper parity is achieved via cascade output +# equivalence (per [[feedback-cross-mapper-parity-via-cascade]]). +_COHORT_2_API_CLOSED: list[tuple[str, float]] = [ + ("0036-6325-1100-0063-1226", 62.7471), + ("0100-5141-0522-4696-3463", 85.8332), + ("0200-3155-0122-2602-3563", 80.8674), + ("0310-2763-5450-2506-3501", 78.3593), + ("0320-2126-2150-2326-6161", 71.7224), + ("0320-2756-8640-2296-1101", 89.9458), + ("0330-2257-3640-2196-3145", 84.6541), + ("0360-2266-5650-2106-8285", 80.468), + ("0380-2530-6150-2326-4161", 65.7795), + ("0390-2066-4250-2026-4555", 65.3253), + ("0464-3032-0205-4276-3204", 80.4533), + ("0652-3022-1205-2826-1200", 70.9577), + ("2007-3011-9205-8136-3204", 68.3914), + ("2031-3007-0205-1296-3204", 64.1734), + ("2130-3018-4205-4686-5204", 71.3158), + ("2336-3124-3600-0517-1292", 83.4955), + ("2536-2525-0600-0788-2292", 79.7264), + ("2590-3025-7205-9066-0200", 65.9194), + ("2699-3025-5205-8066-0200", 68.7535), + ("2800-7999-0322-4594-3563", 78.1408), + ("3136-7925-4500-0246-6202", 77.8872), + ("3336-2825-9400-0512-8292", 78.3739), + ("4536-5424-8600-0109-1226", 82.4974), + ("4536-8325-3100-0409-1222", 65.6), + ("4800-3992-0422-0599-3563", 86.7192), + ("6835-3920-2509-0933-5226", 80.1977), + ("7700-3362-0922-7022-3563", 63.4425), + ("7800-1501-0922-7127-3563", 64.7504), + ("7836-3125-0600-0526-2202", 80.1792), + ("9036-0824-3500-0420-8222", 84.2727), + ("9370-3060-1205-3546-4204", 87.8687), + ("9421-3045-3205-1646-6200", 87.4495), + ("9796-3058-6205-0346-9200", 90.1318), + ("9836-7525-9500-0575-1202", 75.2223), +] + +# (cert_dir, worksheet_unrounded_sap, current_cascade_continuous_sap) +# — 4 cohort-2 certs whose API-path cascade does NOT yet hit the +# worksheet at 1e-4. The third tuple element is the cascade's current +# `sap_score_continuous` at HEAD of Slice S0380.40, pinned at abs <= +# 1e-4 as a forcing function: when a follow-up slice closes the +# residual, the cascade output moves and this assertion fires, forcing +# the cert to migrate from `_COHORT_2_API_OPEN` to `_COHORT_2_API_CLOSED`. +# +# Cluster diagnosis (handover to next agent): +# - 0300/1536/9380: ws Δ = +0.42..+0.44, tight 0.02-band cluster +# — likely a single shared cascade-spec gap (heating/cooling +# dispatch or RdSAP fuel-factor cascade). Summary path hits 1e-4 +# on all three, so the gap is API-mapper-specific (a field the +# Summary mapper surfaces and the API mapper drops or mis-routes). +# - 2102: ws Δ = -6.30, two orders of magnitude worse. Summary path +# hits 1e-4 (cohort-2 Summary sweep is 38/38). The Summary test +# `test_summary_2102_secondary_heating_routes_house_coal_for_open_fire` +# covers the cert's open-fire + house-coal secondary heating; the +# API mapper likely lodges the secondary fuel differently. Probe +# the API JSON's `secondary_heating` block first. +_COHORT_2_API_OPEN: list[tuple[str, float, float]] = [ + ("0300-2403-2650-2206-0235", 76.6541, 77.084454), + ("1536-9325-5100-0433-1226", 65.8928, 66.337334), + ("9380-2957-7490-2595-3141", 74.5902, 75.010196), + ("2102-3018-0205-7886-5204", 63.8732, 57.570156), +] + + +def _cascade_continuous_sap_from_api(cert_dir_name: str) -> float: + doc = json.loads((_COHORT_2_API_FIXTURE_DIR / f"{cert_dir_name}.json").read_text()) + epc = EpcPropertyDataMapper.from_api_response(doc) + r = calculate_sap_from_inputs(cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)) + return r.sap_score_continuous + + +@pytest.mark.parametrize("cert_dir_name,ws_sap", _COHORT_2_API_CLOSED) +def test_api_cohort_2_full_chain_sap_matches_worksheet_at_1e_minus_4( + cert_dir_name: str, ws_sap: float +) -> None: + """API-path mirror of the cohort-2 Summary-path sweep. + + For each cert: the GOV.UK EPB API JSON → `from_api_response` → + `cert_to_inputs` → `calculate_sap_from_inputs` chain must hit the + worksheet's continuous SAP at abs <= 1e-4 — the same tolerance + the Summary path achieves. Cross-mapper parity at the cascade + output ([[feedback-cross-mapper-parity-via-cascade]]).""" + # Arrange + actual = _cascade_continuous_sap_from_api(cert_dir_name) + + # Act (no separate act phase — `actual` IS the cascade output) + delta = actual - ws_sap + + # Assert + assert abs(delta) <= 1e-4, ( + f"cert {cert_dir_name}: cascade SAP={actual:.6f} vs worksheet {ws_sap}; Δ={delta:+.6f}" + ) + + +@pytest.mark.parametrize( + "cert_dir_name,ws_sap,pinned_continuous_sap", _COHORT_2_API_OPEN +) +def test_api_cohort_2_open_cert_residual_matches_current_pin( + cert_dir_name: str, ws_sap: float, pinned_continuous_sap: float +) -> None: + """Residual pin for the 4 cohort-2 API-path certs that DON'T yet hit + 1e-4 against the worksheet. The pin asserts the cascade's current + `sap_score_continuous` at abs <= 1e-4 — a forcing function: when a + follow-up slice closes the underlying mapper or spec gap, the + cascade output moves and this test fires, forcing the cert to + migrate from `_COHORT_2_API_OPEN` to `_COHORT_2_API_CLOSED`. Per + [[project-api-to-sap-residual-test]] this is the established + pattern for tracking residuals as forcing functions, not as + tolerance widening.""" + # Arrange + actual = _cascade_continuous_sap_from_api(cert_dir_name) + + # Assert — Δ vs PINNED cascade output (worksheet Δ stays surfaced + # in the message for diagnostic context). + assert abs(actual - pinned_continuous_sap) <= 1e-4, ( + f"cert {cert_dir_name}: cascade SAP={actual:.6f} moved from pin " + f"{pinned_continuous_sap}; worksheet Δ now {actual - ws_sap:+.6f}" + ) + + # ============================================================================ # Mapper-vs-hand-built EpcPropertyData diff tests # ============================================================================