From 01d234dd0bfea8df23e29a912184dd35a909cb88 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 25 May 2026 16:43:04 +0000 Subject: [PATCH] Slice 63: RED tracer-bullet mapper-vs-hand-built diff test for cohort 000474 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-driven pivot to the cohort-first validation strategy: the 6 existing hand-built `_elmhurst_worksheet_NNNNNN.build_epc()` fixtures already cascade to their worksheet PDFs at 1e-4 — they ARE the 100%-correct calculator-input ground truth. Adding diff tests that assert `from_elmhurst_site_notes(pdf) == hand_built()` surfaces every silent divergence the existing chain tests miss (because chain tests only check cascade output, not field-level EpcPropertyData equality). Adds `test_from_elmhurst_site_notes_matches_hand_built_000474` as the tracer-bullet first cohort case. The test: 1. Maps Summary_000474.pdf through the Elmhurst extractor + mapper. 2. Builds the hand-built EpcPropertyData via `_elmhurst_worksheet_000474.build_epc()`. 3. Recursively diffs the two across a `_LOAD_BEARING_FIELDS` allow-list (40 top-level fields driving the SAP cascade or cross-mapper semantic equivalence; explicitly excludes cert metadata, EnergyElement descriptive lists, registration dates, and other fields that vary by mapper pathway without semantic disagreement — these are noise per user decision). RED status committed as the load-bearing TDD forcing function: 50 load-bearing divergences across 4 categories: Cat A — encoding-only / cascade-equivalent (~30 diffs): * Ventilation flue counts `0 vs None` (cascade defaults None to 0) * Dual-encoded sub-fields (`floor_construction_type` str-side, `roof_insulation_location` str-side, etc.) * Mapper-surfaces-descriptive-only fields (`floor_type`, `floor_u_value_known`) Cat B — real cascade-affecting gaps (~10 diffs): * `sap_heating.water_heating_fuel`: None vs 26 (mains gas) * `sap_heating.shower_outlets`: extracted vs None * `sap_heating.number_baths`: 1 vs None * `country_code`: None vs 'ENG' * `built_form`: 'Mid-Terrace' vs None * `boiler_flue_type`, `central_heating_pump_age` dual-encoding * `dwelling_type` casing 'Mid-Terrace house' vs 'Mid-terrace house' * `wall_thickness_measured`: True vs False Cat C — structural shape divergences (1 diff): * `sap_windows: LEN 7 vs 5` — mapper extracts 1:1 with §11 table; cohort hand-built collapsed entries by glazing-type group (preserving total area, cascade-equivalent but not field-equal). Cat D — Slice-54-style hand-built staleness (~5 diffs): * `extensions_count: 2 vs 0` — Slice 54 fix landed on mapper; hand-built still uses old hardcoded 0 * `party_wall_construction: None vs 0` — cohort convention sentinel * Hand-built ages prior to current mapper conventions Two RED forcing functions on the branch now: - test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly (delta 1.19 SAP vs 69.0094) - test_from_elmhurst_site_notes_matches_hand_built_000474 (50 load-bearing field divergences) Strict-pyright net-zero on the chain test file (0 errors); cohort chain tests all still pass (13 green / 2 RED). Next slices will chip away at the diff list — bulk-update cohort hand-builts for Cat A/D (mechanical) then attack Cat B/C with per-field design decisions. Once 000474 closes, parametrize over the 5 other cohort certs, then API-mapper diff test, then cross- mapper parity falls out. Co-Authored-By: Claude Opus 4.7 --- .../tests/test_summary_pdf_mapper_chain.py | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index bd37d60d..c859ce4f 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -28,14 +28,17 @@ Textract directly. from __future__ import annotations +import dataclasses import re import subprocess from pathlib import Path +from typing import cast from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor from datatypes.epc.domain.mapper import EpcPropertyDataMapper from domain.sap.calculator import calculate_sap_from_inputs from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs +from domain.sap.worksheet.tests import _elmhurst_worksheet_000474 as _w000474 _FIXTURES = Path(__file__).parent / "fixtures" _SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf" @@ -378,3 +381,151 @@ def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # `feedback_zero_error_strict`). worksheet_unrounded_sap = 69.0094 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + +# ============================================================================ +# Mapper-vs-hand-built EpcPropertyData diff tests +# ============================================================================ +# The 6 cohort hand-builts (_elmhurst_worksheet_NNNNNN.build_epc) are the +# 100%-correct calculator-input ground truth — each cascades to its +# worksheet PDF's lodged SAP at 1e-4. The chain tests above only assert +# cascade-output equivalence; the mapper can pass them by producing a +# *different* EpcPropertyData that happens to cascade to the same number. +# +# These tests pin the missing layer: the mapper's EpcPropertyData must +# match the hand-built's load-bearing fields exactly. Every divergence +# surfaced here is a mapper coverage gap to close as its own slice. +# +# "Load-bearing" = the subset of EpcPropertyData fields that drive the +# SAP cascade or carry semantic cross-mapper meaning. Cert-metadata +# fields (address, registration dates, descriptive EnergyElement lists, +# tariff strings) are excluded because they don't change calculator +# output and vary by mapper pathway (the API publishes some, the +# Elmhurst Summary publishes others) without semantic disagreement. + +_LOAD_BEARING_FIELDS: tuple[str, ...] = ( + # Cascade-driving structural fields + "sap_building_parts", + "sap_windows", + "sap_roof_windows", + "sap_heating", + "sap_ventilation", + "sap_energy_source", + "total_floor_area_m2", + # Building-classification fields driving default cascades + "dwelling_type", + "built_form", + "property_type", + "country_code", + "postcode", + # Counts and openings + "door_count", + "insulated_door_count", + "insulated_door_u_value", + "habitable_rooms_count", + "heated_rooms_count", + "wet_rooms_count", + "extensions_count", + "open_chimneys_count", + "blocked_chimneys_count", + "extract_fans_count", + # Lighting + "cfl_fixed_lighting_bulbs_count", + "led_fixed_lighting_bulbs_count", + "incandescent_fixed_lighting_bulbs_count", + "low_energy_fixed_lighting_bulbs_count", + "fixed_lighting_outlets_count", + "low_energy_fixed_lighting_outlets_count", + # HW / appliances + "solar_water_heating", + "has_hot_water_cylinder", + "has_fixed_air_conditioning", + "has_conservatory", + "has_heated_separate_conservatory", + # Envelope drivers + "percent_draughtproofed", + "mechanical_ventilation", + "pressure_test", + # Construction-detail flags + "addendum", + "lzc_energy_sources", + "any_unheated_rooms", + "number_of_storeys", + "sap_flat_details", +) + + +def _diff_load_bearing( + mapped: object, hand_built: object, path: str = "", +) -> list[str]: + """Recursive field diff; yields one line per leaf divergence between + mapped EpcPropertyData and the hand-built fixture. Int/float type + differences with the same numeric value are not flagged. + + Strict-pyright posture: arguments typed `object` so each branch + narrows via `isinstance` rather than threading `Any` through the + recursion (which pyright can't reason about under + `strict`/`typeCheckingMode = strict`).""" + out: list[str] = [] + if type(mapped) is not type(hand_built): + if not (isinstance(mapped, (int, float)) and isinstance(hand_built, (int, float))): + out.append( + f"{path}: TYPE {type(mapped).__name__} vs " + f"{type(hand_built).__name__} mapped={mapped!r} " + f"handbuilt={hand_built!r}" + ) + return out + if dataclasses.is_dataclass(mapped) and not isinstance(mapped, type) \ + and dataclasses.is_dataclass(hand_built) and not isinstance(hand_built, type): + for fld in dataclasses.fields(mapped): + out.extend(_diff_load_bearing( + getattr(mapped, fld.name), + getattr(hand_built, fld.name), + f"{path}.{fld.name}" if path else fld.name, + )) + return out + if isinstance(mapped, list) and isinstance(hand_built, list): + mapped_list = cast("list[object]", mapped) + hand_built_list = cast("list[object]", hand_built) + if len(mapped_list) != len(hand_built_list): + out.append(f"{path}: LEN {len(mapped_list)} vs {len(hand_built_list)}") + return out + for i, (m_item, h_item) in enumerate(zip(mapped_list, hand_built_list)): + out.extend(_diff_load_bearing(m_item, h_item, f"{path}[{i}]")) + return out + if mapped != hand_built: + out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}") + return out + + +def test_from_elmhurst_site_notes_matches_hand_built_000474() -> None: + # Arrange — _elmhurst_worksheet_000474.build_epc() is the canonical + # hand-built EpcPropertyData for cert U985-0001-000474; it cascades + # to the worksheet PDF's `SAP value 62.2584` at 1e-4 (cohort SAP- + # result pin). Routing the corresponding Summary PDF through the + # Elmhurst mapper MUST produce a load-bearing-field-equivalent + # EpcPropertyData; any divergence is a mapper-coverage gap. + # + # Tracer-bullet scope: cert 000474 only. Once GREEN, parametrize + # over the 5 other cohort fixtures and add cert 001479 (after + # `_elmhurst_worksheet_001479` lands at 1e-4 via Slice 62 iteration). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + hand_built = _w000474.build_epc() + + # Act + diffs: list[str] = [] + for field_name in _LOAD_BEARING_FIELDS: + diffs.extend(_diff_load_bearing( + getattr(mapped, field_name, None), + getattr(hand_built, field_name, None), + field_name, + )) + + # Assert + assert not diffs, ( + f"{len(diffs)} load-bearing divergence(s) between mapped and " + f"hand-built EpcPropertyData for cohort cert 000474:\n " + + "\n ".join(diffs) + )