diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 67e87ff8..010beb4f 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -389,34 +389,59 @@ class ElmhurstSiteNotesExtractor: def _extract_room_in_roof( self, main_dim_body: str, age_band_text: str ) -> Optional[RoomInRoof]: - """Parse the §8.1 Rooms in Roof section for the Main bp. Returns - None when no RR is lodged (single-storey or simple loft houses). - `main_dim_body` is the Main-property §4 chunk used to pull the - RR floor area; `age_band_text` is the §3 raw text holding the - "Main Prop. Room(s) in Roof " line.""" - # RR floor area lives in §4 Dimensions immediately above the - # storey floor entries: "Room(s) in Roof: 15.06". - m = re.search(r"Room\(s\) in Roof:\s+(\d+(?:\.\d+)?)", main_dim_body) + """Parse the §8.1 Rooms in Roof block for the Main bp.""" + section = self._between("8.1 Rooms in Roof:", "9.0 Floors:") + bp_chunks = self._split_section_by_bp(section) if section.strip() else [] + main_body = bp_chunks[0][1] if bp_chunks else "" + # Age band from §3: "Main Prop. Room(s) in Roof H 1991-1995" + age_m = re.search( + r"Main Prop\. Room\(s\) in Roof\s+([A-M] [^\n]+)", age_band_text + ) + age_band = age_m.group(1).strip() if age_m else None + return self._room_in_roof_from_bodies( + dim_body=main_dim_body, + rir_body=main_body, + age_band=age_band, + ) + + def _room_in_roof_from_bodies( + self, + dim_body: str, + rir_body: str, + age_band: Optional[str], + ) -> Optional[RoomInRoof]: + """Parse a single-BP Room(s) in Roof from the §4 dimension body + (floor area) and §8.1 construction body (assessment + surfaces). + Used for both Main and each extension — extensions get their + own per-BP slice of §4 and §8.1 + the per-extension age band + from §3's "th Ext. Room(s) in Roof " line. + """ + m = re.search(r"Room\(s\) in Roof:\s+(\d+(?:\.\d+)?)", dim_body) if m is None: return None floor_area = float(m.group(1)) if floor_area <= 0: return None - - section = self._between("8.1 Rooms in Roof:", "9.0 Floors:") - if not section.strip() or "Room in roof type" not in section: - return None - bp_chunks = self._split_section_by_bp(section) - main_body = bp_chunks[0][1] if bp_chunks else section - lines = [l.strip() for l in main_body.splitlines() if l.strip()] - + if not rir_body.strip() or "Room in roof type" not in rir_body: + # §4 lodged an RR area but §8.1 has no construction details + # for this BP — surface as a partial RR so the cascade can + # still attribute the floor area to TFA. Empty surfaces + # tuple is the sentinel the mapper consumes. + return RoomInRoof( + floor_area_m2=floor_area, + construction_age_band=age_band, + assessment="", + surfaces=[], + ) + lines = [l.strip() for l in rir_body.splitlines() if l.strip()] assessment_idx = next( (i for i, l in enumerate(lines) if l == "Assessment"), None ) assessment = ( - lines[assessment_idx + 1] if assessment_idx is not None and assessment_idx + 1 < len(lines) else "" + lines[assessment_idx + 1] + if assessment_idx is not None and assessment_idx + 1 < len(lines) + else "" ) - surfaces: List[RoomInRoofSurface] = [] for name in self._RIR_SURFACE_NAMES: try: @@ -424,13 +449,6 @@ class ElmhurstSiteNotesExtractor: except ValueError: continue surfaces.append(self._parse_rir_surface_row(name, lines, idx)) - - # Age band from §3: "Main Prop. Room(s) in Roof B 1900-1929" - age_m = re.search( - r"Main Prop\. Room\(s\) in Roof\s+([A-M] [^\n]+)", age_band_text - ) - age_band = age_m.group(1).strip() if age_m else None - return RoomInRoof( floor_area_m2=floor_area, construction_age_band=age_band, @@ -522,14 +540,26 @@ class ElmhurstSiteNotesExtractor: dim_section = self._between("4.0 Dimensions:", "5.0 Conservatory:") wall_section = self._between("7.0 Walls:", "8.0 Roofs:") roof_section = self._between("8.0 Roofs:", "8.1 Rooms in Roof:") + rir_section = self._between("8.1 Rooms in Roof:", "9.0 Floors:") floor_section = self._between("9.0 Floors:", "10.0 Doors:") dim_type = self._str_val("Dimension type") dim_chunks = dict(self._split_section_by_bp(dim_section)) wall_chunks = dict(self._split_section_by_bp(wall_section)) roof_chunks = dict(self._split_section_by_bp(roof_section)) + rir_chunks = dict(self._split_section_by_bp(rir_section)) if rir_section.strip() else {} floor_chunks = dict(self._split_section_by_bp(floor_section)) + # Per-extension RR age bands from §3: "1st Ext. Room(s) in Roof I 1996-2002". + ext_rir_age_re = re.compile( + r"(\d+(?:st|nd|rd|th))\s+Ext\.\s+Room\(s\) in Roof\s+([A-M] [^\n]+)", + re.MULTILINE, + ) + ext_rir_age_bands: dict[str, str] = { + f"{m.group(1)} Extension": m.group(2).strip() + for m in ext_rir_age_re.finditer(self._text) + } + main_walls = self._extract_walls() main_roof = self._extract_roof() main_floor = self._extract_floor() @@ -580,6 +610,11 @@ class ElmhurstSiteNotesExtractor: roof = main_roof if self._local_bool(roof_lines, "As Main") else self._roof_details_from_lines(roof_lines) floor = main_floor if self._local_bool(floor_lines, "As Main") else self._floor_details_from_lines(floor_lines) + rir = self._room_in_roof_from_bodies( + dim_body=dim_body, + rir_body=rir_chunks.get(name, ""), + age_band=ext_rir_age_bands.get(name), + ) extensions.append( ExtensionPart( name=name, @@ -591,6 +626,7 @@ class ElmhurstSiteNotesExtractor: walls=walls, roof=roof, floor=floor, + room_in_roof=rir, ) ) return extensions diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index adfffdf2..c663e2fe 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -378,7 +378,12 @@ class EpcPropertyDataMapper: for ext in survey.extensions for f in ext.dimensions.floors ) - + (survey.room_in_roof.floor_area_m2 if survey.room_in_roof else 0.0), + + (survey.room_in_roof.floor_area_m2 if survey.room_in_roof else 0.0) + + sum( + ext.room_in_roof.floor_area_m2 + for ext in survey.extensions + if ext.room_in_roof is not None + ), 2, ), built_form=built_form, @@ -3142,6 +3147,9 @@ def _map_elmhurst_building_parts( walls=ext.walls, roof=ext.roof, floor=ext.floor, + room_in_roof=_map_elmhurst_room_in_roof( + ext.room_in_roof, is_flat=is_flat, + ), ) ) return parts diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index cb96c682..c736ae92 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -340,6 +340,13 @@ class ExtensionPart: walls: WallDetails roof: RoofDetails floor: FloorDetails + # §4 + §8.1 Room(s) in Roof on this extension. None when no RR is + # lodged for the extension (typical single-storey extensions). For + # multi-storey extensions with a top-floor RR (cert 000565: Ext1=34 + # m², Ext2=5 m², Ext3=32 m², Ext4=2 m²), drops 73 m² of TFA from + # the cascade when None, pulling space_heating and lighting kWh + # down by ~23% on the cert. + room_in_roof: Optional[RoomInRoof] = None @dataclass