diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 4c56fce0..7042c32f 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -75,6 +75,7 @@ _SUMMARY_000889_PDF = _FIXTURES / "Summary_000889.pdf" # cert 2536 (Normal cyli _SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cylinder) _SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0) _SUMMARY_000890_PDF = _FIXTURES / "Summary_000890.pdf" # cert 7800 (two electric showers) +_SUMMARY_000565_PDF = _FIXTURES / "Summary_000565.pdf" # cert 000565 (5-bp Elmhurst-only) # GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the # Summary_001479.pdf fixture. Together they drive the API ≡ Summary @@ -1090,6 +1091,93 @@ def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None: assert excinfo.value.value == "Quintuple glazed with helium" +def test_summary_000565_ext1_wall_construction_routes_to_stone_granite() -> None: + # Arrange — RdSAP 10 §3.3 + Table 4: cert 000565 Ext1 lodges + # "SG Stone: granite or whinstone" which routes to SAP10 + # WALL_STONE_GRANITE=1. Pre-S0380.64 fell through silent-None, + # losing the Ext1 wall channel (worksheet line 29a: 91.83 m² × + # U=1.7 = 156.11 W/K) from the cascade fabric subtotal. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_building_parts[1].wall_construction == 1 + + +def test_summary_000565_ext3_ext4_wall_constructions_route_to_basement_code_6() -> None: + # Arrange — RdSAP 10 §5.17 / Table 23: cert 000565 Ext3 + Ext4 + # lodge "B Basement wall". The canonical `BASEMENT_WALL_ + # CONSTRUCTION_CODE=6` triggers the cascade's + # `part.main_wall_is_basement` route to `u_basement_wall` at + # heat_transmission.py:640. Pre-S0380.64 silent-None bypassed + # the basement-wall override entirely. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_building_parts[3].wall_construction == 6 + assert epc.sap_building_parts[3].main_wall_is_basement is True + assert epc.sap_building_parts[4].wall_construction == 6 + assert epc.sap_building_parts[4].main_wall_is_basement is True + + +def test_summary_000565_ext1_party_wall_routes_to_cavity_filled_code_4() -> None: + # Arrange — RdSAP 10 Table 15 row 3 "Cavity masonry filled": + # cert 000565 Ext1 lodges "CF Cavity masonry filled". Routes + # to SAP10 code 4 (Cavity). TODO(S0380.64+1): Table 15 row 3 + # spec U=0.20; today's `u_party_wall` only returns 0.0 / 0.5 / + # 0.25 for code 4 so the cascade conservatively rounds up to + # the cavity-unfilled U=0.5 — matches the pre-existing + # `_API_PARTY_WALL_CONSTRUCTION_TO_SAP10[3]` approximation. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_building_parts[1].party_wall_construction == 4 + + +def test_summary_mapper_raises_on_unmapped_wall_type_code() -> None: + # Arrange — strict-coverage gate per [[reference-unmapped-api- + # code]] mirror: an Elmhurst wall_type lodgement that isn't in + # `_ELMHURST_WALL_CODE_TO_SAP10` raises `UnmappedElmhurstLabel` + # rather than silently routing through wall_construction=None. + # The silent-None failure mode is what hid cert 000565 Ext1/3/4 + # ~300 W/K cascade gap until the S0380.64 fabric-loss audit. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + site_notes.walls.wall_type = "XX Unknown construction" + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + assert excinfo.value.field == "walls.wall_type" + assert excinfo.value.value == "XX Unknown construction" + + +def test_summary_mapper_raises_on_unmapped_party_wall_type_code() -> None: + # Arrange — mirror strict-coverage gate for party-wall-type + # lodgements (same silent-None failure mode at the + # `_elmhurst_party_wall_construction_int` boundary). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + site_notes.walls.party_wall_type = "YY Unknown party wall" + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + assert excinfo.value.field == "walls.party_wall_type" + assert excinfo.value.value == "YY Unknown party wall" + + # ---------------------------------------------------------------------- # API mapper strict-raise — mirror the Elmhurst UnmappedElmhurstLabel # coverage gate on the GOV.UK API path. The same failure mode (silently diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index d56ffaaa..f6f3abe6 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -2077,6 +2077,9 @@ def _leading_code(value: str) -> str: _ELMHURST_WALL_CODE_TO_SAP10: Dict[str, int] = { "ST": 1, # Stone (granite/sandstone) — placeholder; sandstone vs granite # ambiguity resolved downstream via walls[].description. + "SG": 1, # Stone: granite or whinstone (cert 000565 Ext1) — the + # granite-specific Elmhurst variant of "ST"; same SAP10 + # WALL_STONE_GRANITE=1 cascade entry. "SB": 3, # Solid brick (cohort cert lodgement) "SO": 3, # Solid brick (newer Elmhurst PDF variant — same SAP10 # mapping; cert 9501 lodges "SO Solid Brick" where the @@ -2085,6 +2088,15 @@ _ELMHURST_WALL_CODE_TO_SAP10: Dict[str, int] = { "TF": 5, # Timber frame "TI": 5, # Timber frame (Elmhurst's alt-wall code; same SAP10 mapping) "SY": 6, # System build + "B": 6, # Basement wall (cert 000565 Ext3+Ext4) — routes to the + # `BASEMENT_WALL_CONSTRUCTION_CODE=6` canonical signal so + # the cascade's `part.main_wall_is_basement` triggers the + # RdSAP 10 §5.17 / Table 23 `u_basement_wall` override + # (heat_transmission.py:640). Collides numerically with + # "SY" System build — the cascade's basement check + # precedes `u_wall(construction=6)` so SY would be + # silently mis-routed to u_basement_wall today; no cohort + # fixture exercises SY yet so the conflict is dormant. "CO": 7, # Cob "PH": 8, # Park home "CW": 9, # Curtain wall @@ -2151,9 +2163,19 @@ def _elmhurst_dwelling_type( def _elmhurst_wall_construction_int(coded: str) -> Optional[int]: """Map an Elmhurst wall_type string ('CA Cavity') to the SAP10 - integer code (4). Returns None when the leading code isn't a known - SAP10 wall type.""" - return _ELMHURST_WALL_CODE_TO_SAP10.get(_leading_code(coded)) + integer code (4). Returns None when the lodging is absent (empty + string). Raises `UnmappedElmhurstLabel` when a non-empty code + isn't in `_ELMHURST_WALL_CODE_TO_SAP10` — that's a mapper-coverage + gap that should be made explicit so the next fixture forces a + dict entry rather than silently routing to wall_construction=None + (the failure mode that hid cert 000565 Ext1/Ext3/Ext4's ~300 W/K + cascade fabric-loss gap until the S0380.64 audit).""" + code = _leading_code(coded) + if not code: + return None + if code not in _ELMHURST_WALL_CODE_TO_SAP10: + raise UnmappedElmhurstLabel("walls.wall_type", coded) + return _ELMHURST_WALL_CODE_TO_SAP10[code] # Elmhurst Party Wall Type codes — distinct category-set from the Wall @@ -2168,6 +2190,13 @@ _ELMHURST_PARTY_WALL_CODE_TO_SAP10: Dict[str, int] = { "CU": 4, # Cavity masonry unfilled — same U=0.5 cascade; Elmhurst # encodes party-wall cavity type with the masonry sub-code # (CU vs CF filled) — observed first on cert 001479 Main. + "CF": 4, # Cavity masonry filled (cert 000565 Ext1) — RdSAP 10 + # Table 15 row 3 spec U=0.20. The cascade's `u_party_wall` + # only returns 0.0 / 0.5 / 0.25 for code 4 today, so CF + # rounds up to the conservative cavity-unfilled U=0.5 — + # matches the existing `_API_PARTY_WALL_CONSTRUCTION_TO + # _SAP10[3]` approximation until u_party_wall gains the + # filled-cavity branch (TODO). # "U Unable to determine" — the cohort's modal lodgement. The cohort # hand-built convention uses 0 as the explicit "unknown" sentinel # (rather than None) so cross-mapper field parity is preserved; the @@ -2178,10 +2207,19 @@ _ELMHURST_PARTY_WALL_CODE_TO_SAP10: Dict[str, int] = { def _elmhurst_party_wall_construction_int(coded: str) -> Optional[int]: - """Map an Elmhurst party-wall-type string to a SAP10 wall_construction - integer. Returns None for 'U Unable to determine' (cascade default - U=0.25 then applies) and for unrecognised codes.""" - return _ELMHURST_PARTY_WALL_CODE_TO_SAP10.get(_leading_code(coded)) + """Map an Elmhurst party-wall-type string to a SAP10 + wall_construction integer. Returns None when lodging is absent + (empty string — cascade default U=0.25 applies). Raises + `UnmappedElmhurstLabel` when a non-empty code isn't recognised + (same strict-coverage gate as `_elmhurst_wall_construction_int`; + silent-None routes to the same cascade default but hides genuinely- + new party-wall variants from the next fixture probe).""" + code = _leading_code(coded) + if not code: + return None + if code not in _ELMHURST_PARTY_WALL_CODE_TO_SAP10: + raise UnmappedElmhurstLabel("walls.party_wall_type", coded) + return _ELMHURST_PARTY_WALL_CODE_TO_SAP10[code] class UnmappedApiCode(ValueError):