diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index d4e74b3f..14831ccf 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -3,6 +3,7 @@ from datetime import date, datetime from typing import List, Optional from datatypes.epc.surveys.elmhurst_site_notes import ( + AlternativeWall, BathsAndShowers, BuildingPartDimensions, ElmhurstSiteNotes, @@ -225,8 +226,46 @@ class ElmhurstSiteNotesExtractor: u_value_known=self._local_bool(lines, "U-value Known"), party_wall_type=self._local_str(lines, "Party Wall Type"), thickness_mm=thickness_mm, + alternative_walls=self._alternative_walls_from_lines(lines), ) + def _alternative_walls_from_lines(self, lines: List[str]) -> List[AlternativeWall]: + """Parse up to two §7 "Alternative Wall N" sub-area lodgements. + The Elmhurst Summary PDF lays them out as a contiguous block of + prefixed labels ("Alternative Wall 1 Area", "Alternative Wall 1 + Type", …); we read each numbered slot independently and drop + slots whose Area is missing/zero.""" + result: List[AlternativeWall] = [] + for n in (1, 2): + area_raw = self._local_val(lines, f"Alternative Wall {n} Area") + if not area_raw: + continue + try: + area = float(area_raw.split()[0]) + except (ValueError, IndexError): + continue + if area <= 0: + continue + thickness_raw = self._local_val(lines, f"Alternative Wall {n} Thickness") + thickness_mm = ( + int(thickness_raw.split()[0]) + if thickness_raw and thickness_raw.split()[0].isdigit() + else None + ) + result.append(AlternativeWall( + area_m2=area, + wall_type=self._local_str(lines, f"Alternative Wall {n} Type"), + insulation=self._local_str(lines, f"Alternative Wall {n} Insulation"), + thickness_unknown=self._local_bool( + lines, f"Alternative Wall {n} Thickness Unknown" + ), + thickness_mm=thickness_mm, + u_value_known=self._local_bool( + lines, f"Alternative Wall {n} U-value Known" + ), + )) + return result + def _extract_walls(self) -> WallDetails: section = self._between("7.0 Walls:", "8.0 Roofs:") bp_chunks = self._split_section_by_bp(section) @@ -468,7 +507,22 @@ class ElmhurstSiteNotesExtractor: roof_lines = [l.strip() for l in roof_body.splitlines() if l.strip()] floor_lines = [l.strip() for l in floor_body.splitlines() if l.strip()] - walls = main_walls if self._local_bool(wall_lines, "As Main Wall") else self._wall_details_from_lines(wall_lines) + if self._local_bool(wall_lines, "As Main Wall"): + # Alternative walls live in the extension's own chunk + # even when the main wall fields are inherited; merge + # them into the inherited WallDetails so the bp carries + # them through to its SapBuildingPart. + walls = WallDetails( + wall_type=main_walls.wall_type, + insulation=main_walls.insulation, + thickness_unknown=main_walls.thickness_unknown, + u_value_known=main_walls.u_value_known, + party_wall_type=main_walls.party_wall_type, + thickness_mm=main_walls.thickness_mm, + alternative_walls=self._alternative_walls_from_lines(wall_lines), + ) + else: + walls = self._wall_details_from_lines(wall_lines) roof = main_roof if self._local_bool(roof_lines, "As Main") else self._roof_details_from_lines(roof_lines) floor = main_floor if self._local_bool(floor_lines, "As Main") else self._floor_details_from_lines(floor_lines) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 15bdd26a..61848a2b 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -41,6 +41,7 @@ _FIXTURES = Path(__file__).parent / "fixtures" _SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf" _SUMMARY_000477_PDF = _FIXTURES / "Summary_000477.pdf" _SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf" +_SUMMARY_000487_PDF = _FIXTURES / "Summary_000487.pdf" _SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf" _SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf" @@ -184,6 +185,29 @@ def test_summary_000480_full_chain_sap_matches_worksheet_pdf_exactly() -> None: assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 +def test_summary_000487_full_chain_sap_matches_worksheet_pdf_exactly() -> None: + # Arrange — cert U985-0001-000487 is an enclosed-mid-terrace with + # main bp + 1st extension, a 21.03 m² Room-in-Roof, an electric + # shower, and a 1.43 m² Timber Frame alternative wall on the + # extension. Worksheet PDF lodges unrounded SAP 61.6431. The mapped + # chain has to thread the alt-wall U-value cascade (Thickness + # Unknown → cascade falls back to age-band default U=1.9 for thin + # timber walls) plus the §11 layout variant where the frame_factor + # appears unprefixed on its own line (no "PVC"/"Wood" frame_type). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000487_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + + # Assert + worksheet_unrounded_sap = 61.6431 + assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + def test_summary_000516_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert U985-0001-000516 is a mid-terrace with main bp + # 19.02 m² room-in-roof. Worksheet PDF lodges unrounded SAP 62.7937. diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 9217fe90..6f82123c 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -62,6 +62,7 @@ from datatypes.epc.schema.rdsap_schema_21_0_1 import ( EnergyElement as EnergyElement_21_0_1, ) from datatypes.epc.surveys.elmhurst_site_notes import ( + AlternativeWall as ElmhurstAlternativeWall, BuildingPartDimensions as ElmhurstBuildingPartDimensions, ElmhurstSiteNotes, FloorDetails as ElmhurstFloorDetails, @@ -1817,6 +1818,7 @@ _ELMHURST_WALL_CODE_TO_SAP10: Dict[str, int] = { "SB": 3, # Solid brick "CA": 4, # Cavity "TF": 5, # Timber frame + "TI": 5, # Timber frame (Elmhurst's alt-wall code; same SAP10 mapping) "SY": 6, # System build "CO": 7, # Cob "PH": 8, # Park home @@ -2102,6 +2104,11 @@ def _map_elmhurst_building_part( is_exposed_floor=is_exposed, ) ) + alt_walls: List[Optional[SapAlternativeWall]] = [ + _map_elmhurst_alternative_wall(a) for a in walls.alternative_walls[:2] + ] + while len(alt_walls) < 2: + alt_walls.append(None) return SapBuildingPart( identifier=identifier, construction_age_band=_leading_code(age_band), @@ -2118,6 +2125,32 @@ def _map_elmhurst_building_part( floor_insulation_type_str=_strip_code(floor.insulation), floor_u_value_known=floor.u_value_known, sap_room_in_roof=room_in_roof, + sap_alternative_wall_1=alt_walls[0], + sap_alternative_wall_2=alt_walls[1], + ) + + +def _map_elmhurst_alternative_wall( + a: ElmhurstAlternativeWall, +) -> SapAlternativeWall: + """Translate an Elmhurst §7 Alternative Wall lodgement into a + `SapAlternativeWall`. When the surveyor flags `Thickness Unknown: + Yes` the lodged thickness is treated as an estimate (rather than a + measurement); we route through the cascade with thickness=None so + `u_wall` falls through to the age-band-and-construction default + (e.g. Timber Frame age B → U=1.9 for the 000487 9-mm-thin-wall + case, matching the full-cert-text "TimberWallOneLayer" lodgement).""" + return SapAlternativeWall( + wall_area=a.area_m2, + wall_dry_lined="N", + wall_construction=_elmhurst_wall_construction_int(a.wall_type) or 0, + wall_insulation_type=_elmhurst_wall_insulation_int(a.insulation) or 4, + wall_thickness_measured="Y" if not a.thickness_unknown else "N", + wall_insulation_thickness=( + None + if a.thickness_unknown + else str(a.thickness_mm) if a.thickness_mm is not None else None + ), ) diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index fdfba826..d4f95665 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -51,6 +51,22 @@ class BuildingPartDimensions: floors: List[FloorDimension] +@dataclass +class AlternativeWall: + """RdSAP §S5 Alternative Wall — a sub-area of the building part's + gross wall that has a different construction (e.g. a small 1.43 m² + timber-frame panel on an otherwise cavity-walled extension). Up to + two alternative walls per bp; Elmhurst lodges them in §7's "1st/2nd + Extension" subsection under the "Alternative Wall N " prefix.""" + + area_m2: float + wall_type: str # e.g. "TI Timber Frame" + insulation: str # e.g. "A As Built" + thickness_unknown: bool + thickness_mm: Optional[int] + u_value_known: bool + + @dataclass class WallDetails: wall_type: str # e.g. "CA Cavity" @@ -58,6 +74,10 @@ class WallDetails: thickness_unknown: bool u_value_known: bool party_wall_type: str # e.g. "U Unable to determine" + # `alternative_walls` carries up to two alt sub-areas per bp. + alternative_walls: List["AlternativeWall"] = field( + default_factory=lambda: [] # type: ignore[reportUnknownLambdaType] + ) thickness_mm: Optional[int] = None