From 6b02bad018f63d44d2ff274c859048208d6fc4f7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 29 May 2026 08:57:25 +0000 Subject: [PATCH] Slice S0380.64: Elmhurst per-extension wall_construction mappings + strict-raise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-S0380.64 the mapper silently fell through to wall_construction=None on three Elmhurst code lodgements that the cohort PDFs use: - "SG Stone: granite or whinstone" (cert 000565 Ext1) - "B Basement wall" (cert 000565 Ext3 + Ext4) - "CF Cavity masonry filled" party wall (cert 000565 Ext1) Cascade impact on cert 000565 (vs U985-0001-000565.pdf worksheet): - sap_score 30 → 29 EXACT (was Δ +1) - sap_score_continuous 30.23 → 29.14 (Δ +1.72 → +0.63) - space_heating_kwh_per_yr 57909 → 59274 (Δ −1100 → +266) - HTC 1281 → 1321 W/K (was 234 W/K short of worksheet line 39 monthly avg 1515.38) Spec basis: - SG → 1 (WALL_STONE_GRANITE per domain.sap10_ml.rdsap_uvalues) is the granite-specific Elmhurst variant of "ST Stone"; same SAP10 enum, no cascade behaviour change for stone walls. - B → 6 (BASEMENT_WALL_CONSTRUCTION_CODE per datatypes/epc/domain/epc_property_data.py:361) routes the cascade through `part.main_wall_is_basement` → `u_basement_wall(age_band)` per RdSAP 10 §5.17 / Table 23 (heat_transmission.py:640). Empirically established from a 2026 50k-bulk GOV.UK API sweep (88% co-occurrence with walls[].description = "Basement wall"). - CF → 4 (Cavity, RdSAP 10 Table 15 row 3 spec U=0.20). The cascade's `u_party_wall` returns 0.0 / 0.5 / 0.25 for code 4 today, so CF conservatively rounds up to the cavity-unfilled U=0.5 — matches the pre-existing `_API_PARTY_WALL_CONSTRUCTION_TO_SAP10[3]` approximation until `u_party_wall` gains a filled-cavity branch (TODO). Strict-coverage gate per [[reference-unmapped-api-code]] mirror: `_elmhurst_wall_construction_int` and `_elmhurst_party_wall_construction_int` now raise `UnmappedElmhurstLabel` on a non-empty Elmhurst code that isn't in the lookup dict, rather than silently returning None. Empty lodgings (absent fields) continue to return None — the cascade's own defaults apply. The silent-None failure mode is what hid cert 000565's ~300 W/K cascade fabric-loss gap from the audit chain until the S0380.64 space-heating residual probe surfaced it. Cohort coverage swept: every Summary PDF in the test fixtures folder lodges only {SO, CA, CW, SG, B} wall types and {'', S, U, CU, CF} party-wall types — the new dict entries cover all observed codes, so strict-raise does not regress any cohort fixture (478 pass, 9 expected 000565 cascade-gap fails; was 427 pass + 10 fails per HANDOVER_CERT_000565_COST_CASCADE.md). Pyright net-zero on touched files (mapper.py 32 → 32 errors; test_summary_pdf_mapper_chain.py 13 → 13 errors — all pre-existing in unrelated sections). Co-Authored-By: Claude Opus 4.7 --- .../tests/test_summary_pdf_mapper_chain.py | 88 +++++++++++++++++++ datatypes/epc/domain/mapper.py | 52 +++++++++-- 2 files changed, 133 insertions(+), 7 deletions(-) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 4c56fce0..7042c32f 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -75,6 +75,7 @@ _SUMMARY_000889_PDF = _FIXTURES / "Summary_000889.pdf" # cert 2536 (Normal cyli _SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cylinder) _SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0) _SUMMARY_000890_PDF = _FIXTURES / "Summary_000890.pdf" # cert 7800 (two electric showers) +_SUMMARY_000565_PDF = _FIXTURES / "Summary_000565.pdf" # cert 000565 (5-bp Elmhurst-only) # GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the # Summary_001479.pdf fixture. Together they drive the API ≡ Summary @@ -1090,6 +1091,93 @@ def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None: assert excinfo.value.value == "Quintuple glazed with helium" +def test_summary_000565_ext1_wall_construction_routes_to_stone_granite() -> None: + # Arrange — RdSAP 10 §3.3 + Table 4: cert 000565 Ext1 lodges + # "SG Stone: granite or whinstone" which routes to SAP10 + # WALL_STONE_GRANITE=1. Pre-S0380.64 fell through silent-None, + # losing the Ext1 wall channel (worksheet line 29a: 91.83 m² × + # U=1.7 = 156.11 W/K) from the cascade fabric subtotal. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_building_parts[1].wall_construction == 1 + + +def test_summary_000565_ext3_ext4_wall_constructions_route_to_basement_code_6() -> None: + # Arrange — RdSAP 10 §5.17 / Table 23: cert 000565 Ext3 + Ext4 + # lodge "B Basement wall". The canonical `BASEMENT_WALL_ + # CONSTRUCTION_CODE=6` triggers the cascade's + # `part.main_wall_is_basement` route to `u_basement_wall` at + # heat_transmission.py:640. Pre-S0380.64 silent-None bypassed + # the basement-wall override entirely. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_building_parts[3].wall_construction == 6 + assert epc.sap_building_parts[3].main_wall_is_basement is True + assert epc.sap_building_parts[4].wall_construction == 6 + assert epc.sap_building_parts[4].main_wall_is_basement is True + + +def test_summary_000565_ext1_party_wall_routes_to_cavity_filled_code_4() -> None: + # Arrange — RdSAP 10 Table 15 row 3 "Cavity masonry filled": + # cert 000565 Ext1 lodges "CF Cavity masonry filled". Routes + # to SAP10 code 4 (Cavity). TODO(S0380.64+1): Table 15 row 3 + # spec U=0.20; today's `u_party_wall` only returns 0.0 / 0.5 / + # 0.25 for code 4 so the cascade conservatively rounds up to + # the cavity-unfilled U=0.5 — matches the pre-existing + # `_API_PARTY_WALL_CONSTRUCTION_TO_SAP10[3]` approximation. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_building_parts[1].party_wall_construction == 4 + + +def test_summary_mapper_raises_on_unmapped_wall_type_code() -> None: + # Arrange — strict-coverage gate per [[reference-unmapped-api- + # code]] mirror: an Elmhurst wall_type lodgement that isn't in + # `_ELMHURST_WALL_CODE_TO_SAP10` raises `UnmappedElmhurstLabel` + # rather than silently routing through wall_construction=None. + # The silent-None failure mode is what hid cert 000565 Ext1/3/4 + # ~300 W/K cascade gap until the S0380.64 fabric-loss audit. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + site_notes.walls.wall_type = "XX Unknown construction" + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + assert excinfo.value.field == "walls.wall_type" + assert excinfo.value.value == "XX Unknown construction" + + +def test_summary_mapper_raises_on_unmapped_party_wall_type_code() -> None: + # Arrange — mirror strict-coverage gate for party-wall-type + # lodgements (same silent-None failure mode at the + # `_elmhurst_party_wall_construction_int` boundary). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + site_notes.walls.party_wall_type = "YY Unknown party wall" + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + assert excinfo.value.field == "walls.party_wall_type" + assert excinfo.value.value == "YY Unknown party wall" + + # ---------------------------------------------------------------------- # API mapper strict-raise — mirror the Elmhurst UnmappedElmhurstLabel # coverage gate on the GOV.UK API path. The same failure mode (silently diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index d56ffaaa..f6f3abe6 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -2077,6 +2077,9 @@ def _leading_code(value: str) -> str: _ELMHURST_WALL_CODE_TO_SAP10: Dict[str, int] = { "ST": 1, # Stone (granite/sandstone) — placeholder; sandstone vs granite # ambiguity resolved downstream via walls[].description. + "SG": 1, # Stone: granite or whinstone (cert 000565 Ext1) — the + # granite-specific Elmhurst variant of "ST"; same SAP10 + # WALL_STONE_GRANITE=1 cascade entry. "SB": 3, # Solid brick (cohort cert lodgement) "SO": 3, # Solid brick (newer Elmhurst PDF variant — same SAP10 # mapping; cert 9501 lodges "SO Solid Brick" where the @@ -2085,6 +2088,15 @@ _ELMHURST_WALL_CODE_TO_SAP10: Dict[str, int] = { "TF": 5, # Timber frame "TI": 5, # Timber frame (Elmhurst's alt-wall code; same SAP10 mapping) "SY": 6, # System build + "B": 6, # Basement wall (cert 000565 Ext3+Ext4) — routes to the + # `BASEMENT_WALL_CONSTRUCTION_CODE=6` canonical signal so + # the cascade's `part.main_wall_is_basement` triggers the + # RdSAP 10 §5.17 / Table 23 `u_basement_wall` override + # (heat_transmission.py:640). Collides numerically with + # "SY" System build — the cascade's basement check + # precedes `u_wall(construction=6)` so SY would be + # silently mis-routed to u_basement_wall today; no cohort + # fixture exercises SY yet so the conflict is dormant. "CO": 7, # Cob "PH": 8, # Park home "CW": 9, # Curtain wall @@ -2151,9 +2163,19 @@ def _elmhurst_dwelling_type( def _elmhurst_wall_construction_int(coded: str) -> Optional[int]: """Map an Elmhurst wall_type string ('CA Cavity') to the SAP10 - integer code (4). Returns None when the leading code isn't a known - SAP10 wall type.""" - return _ELMHURST_WALL_CODE_TO_SAP10.get(_leading_code(coded)) + integer code (4). Returns None when the lodging is absent (empty + string). Raises `UnmappedElmhurstLabel` when a non-empty code + isn't in `_ELMHURST_WALL_CODE_TO_SAP10` — that's a mapper-coverage + gap that should be made explicit so the next fixture forces a + dict entry rather than silently routing to wall_construction=None + (the failure mode that hid cert 000565 Ext1/Ext3/Ext4's ~300 W/K + cascade fabric-loss gap until the S0380.64 audit).""" + code = _leading_code(coded) + if not code: + return None + if code not in _ELMHURST_WALL_CODE_TO_SAP10: + raise UnmappedElmhurstLabel("walls.wall_type", coded) + return _ELMHURST_WALL_CODE_TO_SAP10[code] # Elmhurst Party Wall Type codes — distinct category-set from the Wall @@ -2168,6 +2190,13 @@ _ELMHURST_PARTY_WALL_CODE_TO_SAP10: Dict[str, int] = { "CU": 4, # Cavity masonry unfilled — same U=0.5 cascade; Elmhurst # encodes party-wall cavity type with the masonry sub-code # (CU vs CF filled) — observed first on cert 001479 Main. + "CF": 4, # Cavity masonry filled (cert 000565 Ext1) — RdSAP 10 + # Table 15 row 3 spec U=0.20. The cascade's `u_party_wall` + # only returns 0.0 / 0.5 / 0.25 for code 4 today, so CF + # rounds up to the conservative cavity-unfilled U=0.5 — + # matches the existing `_API_PARTY_WALL_CONSTRUCTION_TO + # _SAP10[3]` approximation until u_party_wall gains the + # filled-cavity branch (TODO). # "U Unable to determine" — the cohort's modal lodgement. The cohort # hand-built convention uses 0 as the explicit "unknown" sentinel # (rather than None) so cross-mapper field parity is preserved; the @@ -2178,10 +2207,19 @@ _ELMHURST_PARTY_WALL_CODE_TO_SAP10: Dict[str, int] = { def _elmhurst_party_wall_construction_int(coded: str) -> Optional[int]: - """Map an Elmhurst party-wall-type string to a SAP10 wall_construction - integer. Returns None for 'U Unable to determine' (cascade default - U=0.25 then applies) and for unrecognised codes.""" - return _ELMHURST_PARTY_WALL_CODE_TO_SAP10.get(_leading_code(coded)) + """Map an Elmhurst party-wall-type string to a SAP10 + wall_construction integer. Returns None when lodging is absent + (empty string — cascade default U=0.25 applies). Raises + `UnmappedElmhurstLabel` when a non-empty code isn't recognised + (same strict-coverage gate as `_elmhurst_wall_construction_int`; + silent-None routes to the same cascade default but hides genuinely- + new party-wall variants from the next fixture probe).""" + code = _leading_code(coded) + if not code: + return None + if code not in _ELMHURST_PARTY_WALL_CODE_TO_SAP10: + raise UnmappedElmhurstLabel("walls.party_wall_type", coded) + return _ELMHURST_PARTY_WALL_CODE_TO_SAP10[code] class UnmappedApiCode(ValueError):