From 035d916dd68ceadedbd061e8bbb6c4b42284cfd9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 25 May 2026 17:09:39 +0000 Subject: [PATCH] Slice 70: cohort 000474 mapper-vs-hand-built diff is GREEN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the final 49 → 0 diffs in two moves: 1. **Filter non-load-bearing SapWindow sub-fields from the diff.** The Elmhurst mapper surfaces Summary §11 strings (window_type='Window', glazing_type='Double between 2002 and 2021', glazing_gap='12 mm', data_source='Manufacturer', permanent_shutters_present='None') while the cohort `make_window` helper produces API-style int codes for the same fields. None of these affect the SAP cascade — it reads only window_width / window_height / orientation / window_location / frame_factor / window_transmission_details. {u_value, solar_transmittance}. Adding `_NON_LOAD_BEARING_WINDOW_ SUBFIELDS` + `_is_excluded_path` to the diff helper drops them from the comparison without changing the load-bearing scope. Per the user's earlier "load-bearing only" decision — encoding noise that doesn't change the cascade output is excluded. 2. **`make_window` helper now defaults `frame_factor=0.7`.** The SAP10.2 Table 6c PVC default (and the modal value the Elmhurst mapper surfaces from Summary §11). Previously the helper left it `None`, which the cascade resolves to 0.7 internally; setting it explicitly is cascade-equivalent and closes the last 7 diffs. Diff count for cohort 000474: Slice 63 baseline: 50 Slice 64 (Cat A): 14 Slice 65 (HW): 12 Slice 66+67 (mapper): 5 Slice 68 (party-wall): 1 Slice 69 (windows): 49 (encoding-noise surface) Slice 70 (filter): **0** — diff test now GREEN `test_from_elmhurst_site_notes_matches_hand_built_000474` PASSES. First cohort cert fully validated at the EpcPropertyData load- bearing-field level. All 66 cohort cascade pins remain GREEN at 1e-4. Pyright net-zero (0 errors on touched files). Next slices: parametrize the diff test over the 5 other cohort certs (000477, 000480, 000487, 000490, 000516) — each may have its own bulk-update + mapper-tweak pattern, but the toolchain (diff helper, exclusion list, _LOAD_BEARING_FIELDS, helper defaults) is in place. Then 001479 (after Slice 62 hand-built hits 1e-4). Then the API mapper diff test (currently the API mapper has its own gaps — Slice 58/59/60 cascade fixes closed golden cert residuals but field-level cross-mapper parity isn't asserted yet). Co-Authored-By: Claude Opus 4.7 --- .../tests/test_summary_pdf_mapper_chain.py | 47 ++++++++++++++++--- .../domain/src/domain/ml/tests/_fixtures.py | 4 ++ 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index c859ce4f..390905a6 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -403,6 +403,39 @@ def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # output and vary by mapper pathway (the API publishes some, the # Elmhurst Summary publishes others) without semantic disagreement. +# SapWindow sub-fields the cascade doesn't read (descriptive Union[int, +# str] codes lodged differently by each mapper). The cascade reads +# window_width / window_height / orientation / window_location / +# frame_factor / window_transmission_details.{u_value,solar_ +# transmittance} — those WILL still be diffed; everything else on +# SapWindow is metadata and excluded to avoid noise from the int/str +# dual encoding (API mapper produces int codes; Elmhurst mapper +# surfaces the Summary's lodged strings). +_NON_LOAD_BEARING_WINDOW_SUBFIELDS: frozenset[str] = frozenset({ + "frame_material", + "glazing_gap", + "window_type", + "glazing_type", + "window_wall_type", + "draught_proofed", + "permanent_shutters_present", + "permanent_shutters_insulated", +}) + + +def _is_excluded_path(path: str) -> bool: + """Return True for paths the diff should silently skip — non-cascade- + affecting Union[int, str] encoding differences between the API and + Elmhurst mapper outputs that cohort hand-built fixtures don't pin.""" + if path.startswith("sap_windows[") and "]." in path: + suffix = path.split("].", 1)[1] + if suffix in _NON_LOAD_BEARING_WINDOW_SUBFIELDS: + return True + if suffix == "window_transmission_details.data_source": + return True + return False + + _LOAD_BEARING_FIELDS: tuple[str, ...] = ( # Cascade-driving structural fields "sap_building_parts", @@ -469,11 +502,12 @@ def _diff_load_bearing( out: list[str] = [] if type(mapped) is not type(hand_built): if not (isinstance(mapped, (int, float)) and isinstance(hand_built, (int, float))): - out.append( - f"{path}: TYPE {type(mapped).__name__} vs " - f"{type(hand_built).__name__} mapped={mapped!r} " - f"handbuilt={hand_built!r}" - ) + if not _is_excluded_path(path): + out.append( + f"{path}: TYPE {type(mapped).__name__} vs " + f"{type(hand_built).__name__} mapped={mapped!r} " + f"handbuilt={hand_built!r}" + ) return out if dataclasses.is_dataclass(mapped) and not isinstance(mapped, type) \ and dataclasses.is_dataclass(hand_built) and not isinstance(hand_built, type): @@ -494,7 +528,8 @@ def _diff_load_bearing( out.extend(_diff_load_bearing(m_item, h_item, f"{path}[{i}]")) return out if mapped != hand_built: - out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}") + if not _is_excluded_path(path): + out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}") return out diff --git a/packages/domain/src/domain/ml/tests/_fixtures.py b/packages/domain/src/domain/ml/tests/_fixtures.py index 72ab46a7..a9352f92 100644 --- a/packages/domain/src/domain/ml/tests/_fixtures.py +++ b/packages/domain/src/domain/ml/tests/_fixtures.py @@ -181,6 +181,9 @@ def make_window( window_wall_type: Union[int, str] = 1, permanent_shutters_present: Union[bool, str] = False, frame_material: Optional[str] = "PVC", + frame_factor: Optional[float] = 0.7, # SAP10.2 Table 6c PVC default; + # mirrors the Elmhurst mapper's + # surfaced value from Summary §11. window_transmission_details: Optional[WindowTransmissionDetails] = None, solar_transmittance: Optional[float] = None, u_value: float = 2.8, @@ -208,6 +211,7 @@ def make_window( window_location=window_location, window_wall_type=window_wall_type, permanent_shutters_present=permanent_shutters_present, + frame_factor=frame_factor, window_transmission_details=window_transmission_details, )