diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index c859ce4f..390905a6 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -403,6 +403,39 @@ def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # output and vary by mapper pathway (the API publishes some, the # Elmhurst Summary publishes others) without semantic disagreement. +# SapWindow sub-fields the cascade doesn't read (descriptive Union[int, +# str] codes lodged differently by each mapper). The cascade reads +# window_width / window_height / orientation / window_location / +# frame_factor / window_transmission_details.{u_value,solar_ +# transmittance} — those WILL still be diffed; everything else on +# SapWindow is metadata and excluded to avoid noise from the int/str +# dual encoding (API mapper produces int codes; Elmhurst mapper +# surfaces the Summary's lodged strings). +_NON_LOAD_BEARING_WINDOW_SUBFIELDS: frozenset[str] = frozenset({ + "frame_material", + "glazing_gap", + "window_type", + "glazing_type", + "window_wall_type", + "draught_proofed", + "permanent_shutters_present", + "permanent_shutters_insulated", +}) + + +def _is_excluded_path(path: str) -> bool: + """Return True for paths the diff should silently skip — non-cascade- + affecting Union[int, str] encoding differences between the API and + Elmhurst mapper outputs that cohort hand-built fixtures don't pin.""" + if path.startswith("sap_windows[") and "]." in path: + suffix = path.split("].", 1)[1] + if suffix in _NON_LOAD_BEARING_WINDOW_SUBFIELDS: + return True + if suffix == "window_transmission_details.data_source": + return True + return False + + _LOAD_BEARING_FIELDS: tuple[str, ...] = ( # Cascade-driving structural fields "sap_building_parts", @@ -469,11 +502,12 @@ def _diff_load_bearing( out: list[str] = [] if type(mapped) is not type(hand_built): if not (isinstance(mapped, (int, float)) and isinstance(hand_built, (int, float))): - out.append( - f"{path}: TYPE {type(mapped).__name__} vs " - f"{type(hand_built).__name__} mapped={mapped!r} " - f"handbuilt={hand_built!r}" - ) + if not _is_excluded_path(path): + out.append( + f"{path}: TYPE {type(mapped).__name__} vs " + f"{type(hand_built).__name__} mapped={mapped!r} " + f"handbuilt={hand_built!r}" + ) return out if dataclasses.is_dataclass(mapped) and not isinstance(mapped, type) \ and dataclasses.is_dataclass(hand_built) and not isinstance(hand_built, type): @@ -494,7 +528,8 @@ def _diff_load_bearing( out.extend(_diff_load_bearing(m_item, h_item, f"{path}[{i}]")) return out if mapped != hand_built: - out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}") + if not _is_excluded_path(path): + out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}") return out diff --git a/packages/domain/src/domain/ml/tests/_fixtures.py b/packages/domain/src/domain/ml/tests/_fixtures.py index 72ab46a7..a9352f92 100644 --- a/packages/domain/src/domain/ml/tests/_fixtures.py +++ b/packages/domain/src/domain/ml/tests/_fixtures.py @@ -181,6 +181,9 @@ def make_window( window_wall_type: Union[int, str] = 1, permanent_shutters_present: Union[bool, str] = False, frame_material: Optional[str] = "PVC", + frame_factor: Optional[float] = 0.7, # SAP10.2 Table 6c PVC default; + # mirrors the Elmhurst mapper's + # surfaced value from Summary §11. window_transmission_details: Optional[WindowTransmissionDetails] = None, solar_transmittance: Optional[float] = None, u_value: float = 2.8, @@ -208,6 +211,7 @@ def make_window( window_location=window_location, window_wall_type=window_wall_type, permanent_shutters_present=permanent_shutters_present, + frame_factor=frame_factor, window_transmission_details=window_transmission_details, )