From 78c57c0dc79d183817e66b7e49c76fd4ab606b88 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 30 May 2026 14:08:05 +0000 Subject: [PATCH] Slice S0380.94: RIR insulation "400+ mm PUR or PIR" extractor + mapper + cascade (RdSAP 10 Table 17 col 3b) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RdSAP 10 §5.11.3 + Table 17 (PDF p.42-43) "Roof room U-values when insulation thickness is known". Column (3b) "Stud wall — PUR or PIR optional" 400 mm row → 0.10 W/m²K. Cert 000565 Summary §8.1 BP[2] Ext2 (Detailed) lodges: Stud Wall 2 2.00 × 2.00 400+ mm PUR or PIR Default U=0.10 Pre-slice three coupled bugs silently dropped the lodgement, routing the cascade through the uninsulated Table 17 row 0 (U=2.30) — over- counting Stud Wall 2 by (2.30 − 0.10) × 4 m² = +8.80 W/K on roof: 1. **Extractor regex** `_RIR_INSULATION_THICKNESS_RE = ^\d+\s*mm$` failed to match the "400+ mm" bucket-cap form (Table 17's largest tabulated row is annotated with a trailing "+" in the Summary). 2. **Extractor insulation_type allow-list** `("Mineral or EPS", "PUR", "PIR")` failed to match the disjunction "PUR or PIR" — the actual Summary form when the assessor doesn't distinguish PUR from PIR. (Both columns Table 17 column (b) anyway.) 3. **Mapper thickness parser** `_elmhurst_rir_insulation_thickness_mm` used the same `^\d+\s*mm$` regex — also failed on "400+ mm". Plus a fourth coupled fix: the cascade's `_is_rigid_foam` checked a frozenset `{"pur", "pir", "rigid"}` that didn't include the canonical mapper-side code "rigid_foam" — even if the mapper translated "PUR or PIR" → "rigid_foam", the cascade would route to column (a) mineral- wool instead of column (b) rigid-foam. Slice span (4 layers): 1. **Extractor regex** — `^\d+\+?\s*mm$` matches both "100 mm" and "400+ mm". 2. **Extractor allow-list** — add "PUR or PIR" alongside individual "PUR" / "PIR" + "Mineral or EPS". 3. **Mapper** — `_RIR_INSULATION_TYPE_TO_SAP10` canonicalises all rigid-foam strings to "rigid_foam"; thickness parser regex matches "400+ mm" → 400 mm int. 4. **Cascade** — `_RR_RIGID_FOAM_INSULATION_TYPES` adds "rigid_foam" alongside the legacy "pur"/"pir"/"rigid" aliases. Cert 000565 movement (HEAD `23aaa4fa` → this slice): - cascade BP[2] Ext2 Stud Wall 2 U: 2.30 → 0.10 ✓ EXACT vs ws 0.10 - cascade roof_w_per_k: 43.44 → 34.64 (Δ−7.94 → Δ−16.74) - sap_score: 29 ✓ EXACT unchanged - sap_score_continuous: 28.81 → 29.02 (Δ+0.26 → Δ+0.51) - space_heating_kwh: −427 → −685 - main_heating_fuel: −251 → −403 - hot_water_kwh: ✓ 0 EXACT unchanged Closing one spec-correct sub-component while others remain non-spec- correct drifts continuous SAP further; per user direction temporary drift is acceptable as long as we're fixing true intermediate-value problems — once every sub-component is spec-correct, the continuous SAP error closes to zero by construction. The remaining −16.74 W/K roof gap localises to: - BP[0/1/3] missing RR residual area for Detailed-RR mode (§3.10.1 spec — cascade only handles Simplified mode today); +27.85 W/K closure when wired. - BP[4] Flat Ceiling 1 lodges "Unknown thickness, PUR or PIR" → ws U=0.15; cascade over-counts at 2.30 (uninsulated). Elmhurst's "Unknown PUR or PIR" → 200 mm convention is non-spec; the spec- correct path falls back to Table 18 col 4 default (`u_rr_default _all_elements`). Separate diagnostic slice. Cohort safety: 21 other Elmhurst Summary fixtures lodge no RIR detailed surfaces with "400+ mm" or "PUR or PIR" (modal cohort uses As Built / None / no detailed surfaces). Existing "Mineral or EPS" tests at `test_u_rr_stud_wall_table17_col3a_mineral_wool_100mm_returns_0_36` remain green — the new aliases extend rather than replace. Test baseline: 585 pass + 8 expected `000565` fails (was 583 + 8; +2 new tests). Pyright net-zero per touched file (0/32/1/65/13 preserved). Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 13 +++++- .../tests/test_summary_pdf_mapper_chain.py | 46 +++++++++++++++++++ datatypes/epc/domain/mapper.py | 17 +++++-- domain/sap10_ml/rdsap_uvalues.py | 8 +++- domain/sap10_ml/tests/test_rdsap_uvalues.py | 21 +++++++++ 5 files changed, 96 insertions(+), 9 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index c23c19a4..57703995 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -474,7 +474,11 @@ class ElmhurstSiteNotesExtractor: ) _RIR_NUMERIC_RE = re.compile(r"^-?\d+(?:\.\d+)?$") - _RIR_INSULATION_THICKNESS_RE = re.compile(r"^\d+\s*mm$") + # Elmhurst insulation cell formats: "100 mm", "125 mm", ... and the + # bucket-cap "400+ mm" (Table 17 max tabulated row). Optional trailing + # "+" allows the bucket-cap to parse through to the cascade with the + # same numeric value. + _RIR_INSULATION_THICKNESS_RE = re.compile(r"^\d+\+?\s*mm$") def _parse_rir_surface_row( self, name: str, lines: List[str], idx: int @@ -529,7 +533,12 @@ class ElmhurstSiteNotesExtractor: if self._RIR_INSULATION_THICKNESS_RE.match(t) or t in ("As Built", "None"): if not insulation: insulation = t - elif t in ("Mineral or EPS", "PUR", "PIR"): + elif t in ("Mineral or EPS", "PUR", "PIR", "PUR or PIR"): + # Summary §8.1 lodges the rigid-foam column as the + # disjunction "PUR or PIR" when the assessor doesn't + # distinguish between the two; the mapper canonicalises + # all three forms to SAP10 "rigid_foam" (cascade Table + # 17 col (b)). insulation_type = t elif t in ( "Party", "Sheltered", "Exposed", diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index a2422e01..3faae0aa 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -1650,6 +1650,52 @@ def test_summary_000565_section_12_1_extracts_mechanical_extract_decentralised_m ) +def test_summary_000565_ext2_stud_wall_2_extracts_400_plus_mm_pur_or_pir_lodgement() -> None: + # Arrange — cert 000565 Summary §8.1 BP[2] Ext2 (Detailed) lodges + # "Stud Wall 2: 2.00 × 2.00, 400+ mm, PUR or PIR" with Default + # U-value 0.10. Pre-slice the extractor regex `^\d+\s*mm$` failed + # to match "400+ mm" (the trailing "+" tripped the digit-only + # anchor) so the insulation token was silently dropped; and the + # type allow-list `("Mineral or EPS", "PUR", "PIR")` failed to + # match "PUR or PIR" (the conjunction is the actual Summary text). + # Cascade fell through to Table 17 row 0 (uninsulated) → U=2.30 + # against worksheet 0.10, over-counting Stud Wall 2 by ~8.80 W/K. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + + # Act + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Assert + ext2_rir = site_notes.extensions[1].room_in_roof + assert ext2_rir is not None + stud_wall_2 = next(s for s in ext2_rir.surfaces if s.name == "Stud Wall 2") + assert stud_wall_2.insulation == "400+ mm" + assert stud_wall_2.insulation_type == "PUR or PIR" + + +def test_summary_000565_ext2_stud_wall_2_routes_to_400mm_rigid_foam_via_mapper() -> None: + # Arrange — mapper plumbing: "400+ mm" parses to thickness 400 mm + # (the trailing "+" is a bucket-cap convention; spec Table 17 max + # tabulated row is 400 mm). "PUR or PIR" maps to the canonical + # SAP10 insulation-type code "rigid_foam" so the cascade's + # `_is_rigid_foam` resolves correctly. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + ext2_rir = epc.sap_building_parts[2].sap_room_in_roof + assert ext2_rir is not None + detailed = ext2_rir.detailed_surfaces or [] + stud_walls = [s for s in detailed if s.kind == "stud_wall"] + assert len(stud_walls) == 2 + sw_2 = next(s for s in stud_walls if s.area_m2 == 4.0) + assert sw_2.insulation_thickness_mm == 400 + assert sw_2.insulation_type == "rigid_foam" + + def test_summary_000565_ext1_floor_above_partially_heated_routes_to_u_value_0p7_per_rdsap_10_section_5_14() -> None: # Arrange — RdSAP 10 §5.14 (PDF p.47) "U-value of floor above a # partially heated space": diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index d00a2f83..4c0fd872 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3243,8 +3243,13 @@ _RIR_KIND_FROM_NAME_PREFIX: Dict[str, str] = { # Elmhurst insulation-type strings → canonical SAP10 codes used by # `SapRoomInRoofSurface.insulation_type`. Empty / unrecognised → None. +# The cascade `_is_rigid_foam` consumes "rigid_foam" (and the legacy +# individual codes "pur" / "pir") to dispatch to Table 17 column (b). _RIR_INSULATION_TYPE_TO_SAP10: Dict[str, str] = { "Mineral or EPS": "mineral_wool", + "PUR or PIR": "rigid_foam", + "PUR": "rigid_foam", + "PIR": "rigid_foam", } @@ -3265,13 +3270,15 @@ def _round_half_up_2dp(*operands: float) -> float: def _elmhurst_rir_insulation_thickness_mm(insulation_text: str) -> int: - """Translate the Insulation cell ("100 mm", "None", "As Built", "") - into a thickness integer. The Elmhurst cohort uses "As Built" only - on surfaces whose Default U-value is the uninsulated 2.30 row, so - treating it as 0 mm is consistent with the Table 17 'none' column.""" + """Translate the Insulation cell ("100 mm", "400+ mm", "None", "As + Built", "") into a thickness integer. The Elmhurst cohort uses "As + Built" only on surfaces whose Default U-value is the uninsulated + 2.30 row, so treating it as 0 mm is consistent with the Table 17 + 'none' column. The "400+ mm" bucket-cap (Table 17's largest + tabulated row) is read as 400.""" if not insulation_text or insulation_text in ("None", "As Built"): return 0 - m = re.match(r"^(\d+)\s*mm$", insulation_text) + m = re.match(r"^(\d+)\+?\s*mm$", insulation_text) return int(m.group(1)) if m else 0 diff --git a/domain/sap10_ml/rdsap_uvalues.py b/domain/sap10_ml/rdsap_uvalues.py index 14262675..5b3a73a7 100644 --- a/domain/sap10_ml/rdsap_uvalues.py +++ b/domain/sap10_ml/rdsap_uvalues.py @@ -664,8 +664,12 @@ _RR_TABLE_17_ROWS: Final[tuple[tuple[int, float, float, float, float, float, flo # Aliases mapping (insulation_type, column) → tuple index above. The PDF # splits each Table 17 column into "(a) mineral wool or EPS slab" vs "(b) -# PUR or PIR optional". Aliases collapse common synonyms. -_RR_RIGID_FOAM_INSULATION_TYPES: Final[frozenset[str]] = frozenset({"pur", "pir", "rigid"}) +# PUR or PIR optional". Aliases collapse common synonyms — the canonical +# mapper-side code for the PDF disjunction "PUR or PIR" is "rigid_foam" +# (see datatypes/epc/domain/mapper.py:_RIR_INSULATION_TYPE_TO_SAP10). +_RR_RIGID_FOAM_INSULATION_TYPES: Final[frozenset[str]] = frozenset( + {"pur", "pir", "rigid", "rigid_foam"} +) def _is_rigid_foam(insulation_type: Optional[str]) -> bool: diff --git a/domain/sap10_ml/tests/test_rdsap_uvalues.py b/domain/sap10_ml/tests/test_rdsap_uvalues.py index 7619ae77..906ce3b8 100644 --- a/domain/sap10_ml/tests/test_rdsap_uvalues.py +++ b/domain/sap10_ml/tests/test_rdsap_uvalues.py @@ -1497,6 +1497,27 @@ def test_u_rr_stud_wall_table17_col3a_mineral_wool_100mm_returns_0_36() -> None: assert result == pytest.approx(0.36, abs=0.001) +def test_u_rr_stud_wall_rigid_foam_400mm_returns_0p10_per_table_17_col_3b() -> None: + # Arrange — Table 17 column (3b) "Stud wall, PUR or PIR optional", + # 400 mm row → 0.10 W/m²K. Cert 000565 BP[2] Ext2 Summary §8.1 + # lodges "Stud Wall 2: 400+ mm PUR or PIR" → Default U=0.10. The + # "rigid_foam" SAP10 insulation-type code is the canonical alias for + # both the Elmhurst "PUR or PIR" string and the API "PUR" / "PIR" + # individual codes; the cascade's `_is_rigid_foam` recognises all + # three to route through column (b) of Table 17. + + # Act + result = u_rr_stud_wall( + country=Country.ENG, + age_band="J", + insulation_thickness_mm=400, + insulation_type="rigid_foam", + ) + + # Assert + assert abs(result - 0.10) <= 1e-4 + + def test_u_rr_slope_table17_none_row_uninsulated_returns_2_30() -> None: """Table 17 "none" row (every column collapses to 2.3 when no insulation). Used by the U985 worksheet for 000477's RR slope panels