diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index c23c19a4..57703995 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -474,7 +474,11 @@ class ElmhurstSiteNotesExtractor: ) _RIR_NUMERIC_RE = re.compile(r"^-?\d+(?:\.\d+)?$") - _RIR_INSULATION_THICKNESS_RE = re.compile(r"^\d+\s*mm$") + # Elmhurst insulation cell formats: "100 mm", "125 mm", ... and the + # bucket-cap "400+ mm" (Table 17 max tabulated row). Optional trailing + # "+" allows the bucket-cap to parse through to the cascade with the + # same numeric value. + _RIR_INSULATION_THICKNESS_RE = re.compile(r"^\d+\+?\s*mm$") def _parse_rir_surface_row( self, name: str, lines: List[str], idx: int @@ -529,7 +533,12 @@ class ElmhurstSiteNotesExtractor: if self._RIR_INSULATION_THICKNESS_RE.match(t) or t in ("As Built", "None"): if not insulation: insulation = t - elif t in ("Mineral or EPS", "PUR", "PIR"): + elif t in ("Mineral or EPS", "PUR", "PIR", "PUR or PIR"): + # Summary §8.1 lodges the rigid-foam column as the + # disjunction "PUR or PIR" when the assessor doesn't + # distinguish between the two; the mapper canonicalises + # all three forms to SAP10 "rigid_foam" (cascade Table + # 17 col (b)). insulation_type = t elif t in ( "Party", "Sheltered", "Exposed", diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index a2422e01..3faae0aa 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -1650,6 +1650,52 @@ def test_summary_000565_section_12_1_extracts_mechanical_extract_decentralised_m ) +def test_summary_000565_ext2_stud_wall_2_extracts_400_plus_mm_pur_or_pir_lodgement() -> None: + # Arrange — cert 000565 Summary §8.1 BP[2] Ext2 (Detailed) lodges + # "Stud Wall 2: 2.00 × 2.00, 400+ mm, PUR or PIR" with Default + # U-value 0.10. Pre-slice the extractor regex `^\d+\s*mm$` failed + # to match "400+ mm" (the trailing "+" tripped the digit-only + # anchor) so the insulation token was silently dropped; and the + # type allow-list `("Mineral or EPS", "PUR", "PIR")` failed to + # match "PUR or PIR" (the conjunction is the actual Summary text). + # Cascade fell through to Table 17 row 0 (uninsulated) → U=2.30 + # against worksheet 0.10, over-counting Stud Wall 2 by ~8.80 W/K. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + + # Act + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Assert + ext2_rir = site_notes.extensions[1].room_in_roof + assert ext2_rir is not None + stud_wall_2 = next(s for s in ext2_rir.surfaces if s.name == "Stud Wall 2") + assert stud_wall_2.insulation == "400+ mm" + assert stud_wall_2.insulation_type == "PUR or PIR" + + +def test_summary_000565_ext2_stud_wall_2_routes_to_400mm_rigid_foam_via_mapper() -> None: + # Arrange — mapper plumbing: "400+ mm" parses to thickness 400 mm + # (the trailing "+" is a bucket-cap convention; spec Table 17 max + # tabulated row is 400 mm). "PUR or PIR" maps to the canonical + # SAP10 insulation-type code "rigid_foam" so the cascade's + # `_is_rigid_foam` resolves correctly. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + ext2_rir = epc.sap_building_parts[2].sap_room_in_roof + assert ext2_rir is not None + detailed = ext2_rir.detailed_surfaces or [] + stud_walls = [s for s in detailed if s.kind == "stud_wall"] + assert len(stud_walls) == 2 + sw_2 = next(s for s in stud_walls if s.area_m2 == 4.0) + assert sw_2.insulation_thickness_mm == 400 + assert sw_2.insulation_type == "rigid_foam" + + def test_summary_000565_ext1_floor_above_partially_heated_routes_to_u_value_0p7_per_rdsap_10_section_5_14() -> None: # Arrange — RdSAP 10 §5.14 (PDF p.47) "U-value of floor above a # partially heated space": diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index d00a2f83..4c0fd872 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3243,8 +3243,13 @@ _RIR_KIND_FROM_NAME_PREFIX: Dict[str, str] = { # Elmhurst insulation-type strings → canonical SAP10 codes used by # `SapRoomInRoofSurface.insulation_type`. Empty / unrecognised → None. +# The cascade `_is_rigid_foam` consumes "rigid_foam" (and the legacy +# individual codes "pur" / "pir") to dispatch to Table 17 column (b). _RIR_INSULATION_TYPE_TO_SAP10: Dict[str, str] = { "Mineral or EPS": "mineral_wool", + "PUR or PIR": "rigid_foam", + "PUR": "rigid_foam", + "PIR": "rigid_foam", } @@ -3265,13 +3270,15 @@ def _round_half_up_2dp(*operands: float) -> float: def _elmhurst_rir_insulation_thickness_mm(insulation_text: str) -> int: - """Translate the Insulation cell ("100 mm", "None", "As Built", "") - into a thickness integer. The Elmhurst cohort uses "As Built" only - on surfaces whose Default U-value is the uninsulated 2.30 row, so - treating it as 0 mm is consistent with the Table 17 'none' column.""" + """Translate the Insulation cell ("100 mm", "400+ mm", "None", "As + Built", "") into a thickness integer. The Elmhurst cohort uses "As + Built" only on surfaces whose Default U-value is the uninsulated + 2.30 row, so treating it as 0 mm is consistent with the Table 17 + 'none' column. The "400+ mm" bucket-cap (Table 17's largest + tabulated row) is read as 400.""" if not insulation_text or insulation_text in ("None", "As Built"): return 0 - m = re.match(r"^(\d+)\s*mm$", insulation_text) + m = re.match(r"^(\d+)\+?\s*mm$", insulation_text) return int(m.group(1)) if m else 0 diff --git a/domain/sap10_ml/rdsap_uvalues.py b/domain/sap10_ml/rdsap_uvalues.py index 14262675..5b3a73a7 100644 --- a/domain/sap10_ml/rdsap_uvalues.py +++ b/domain/sap10_ml/rdsap_uvalues.py @@ -664,8 +664,12 @@ _RR_TABLE_17_ROWS: Final[tuple[tuple[int, float, float, float, float, float, flo # Aliases mapping (insulation_type, column) → tuple index above. The PDF # splits each Table 17 column into "(a) mineral wool or EPS slab" vs "(b) -# PUR or PIR optional". Aliases collapse common synonyms. -_RR_RIGID_FOAM_INSULATION_TYPES: Final[frozenset[str]] = frozenset({"pur", "pir", "rigid"}) +# PUR or PIR optional". Aliases collapse common synonyms — the canonical +# mapper-side code for the PDF disjunction "PUR or PIR" is "rigid_foam" +# (see datatypes/epc/domain/mapper.py:_RIR_INSULATION_TYPE_TO_SAP10). +_RR_RIGID_FOAM_INSULATION_TYPES: Final[frozenset[str]] = frozenset( + {"pur", "pir", "rigid", "rigid_foam"} +) def _is_rigid_foam(insulation_type: Optional[str]) -> bool: diff --git a/domain/sap10_ml/tests/test_rdsap_uvalues.py b/domain/sap10_ml/tests/test_rdsap_uvalues.py index 7619ae77..906ce3b8 100644 --- a/domain/sap10_ml/tests/test_rdsap_uvalues.py +++ b/domain/sap10_ml/tests/test_rdsap_uvalues.py @@ -1497,6 +1497,27 @@ def test_u_rr_stud_wall_table17_col3a_mineral_wool_100mm_returns_0_36() -> None: assert result == pytest.approx(0.36, abs=0.001) +def test_u_rr_stud_wall_rigid_foam_400mm_returns_0p10_per_table_17_col_3b() -> None: + # Arrange — Table 17 column (3b) "Stud wall, PUR or PIR optional", + # 400 mm row → 0.10 W/m²K. Cert 000565 BP[2] Ext2 Summary §8.1 + # lodges "Stud Wall 2: 400+ mm PUR or PIR" → Default U=0.10. The + # "rigid_foam" SAP10 insulation-type code is the canonical alias for + # both the Elmhurst "PUR or PIR" string and the API "PUR" / "PIR" + # individual codes; the cascade's `_is_rigid_foam` recognises all + # three to route through column (b) of Table 17. + + # Act + result = u_rr_stud_wall( + country=Country.ENG, + age_band="J", + insulation_thickness_mm=400, + insulation_type="rigid_foam", + ) + + # Assert + assert abs(result - 0.10) <= 1e-4 + + def test_u_rr_slope_table17_none_row_uninsulated_returns_2_30() -> None: """Table 17 "none" row (every column collapses to 2.3 when no insulation). Used by the U985 worksheet for 000477's RR slope panels