From 10437143c499b212a82ca8c60ce31fec24f6804e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 May 2026 22:58:43 +0000 Subject: [PATCH] Slice S0380.58: Elmhurst per-extension Room(s) in Roof extraction + TFA fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cert 000565 surfaced a per-extension Room(s) in Roof coverage gap. §4 Dimensions lodges an RR floor area for every BP (Main + each extension) and §8.1 lodges full construction details per BP. The old extractor parsed RR from §4 + §8.1 for Main only — the 4 extensions' RR areas (34 + 5 + 32 + 2 = 73 m²) were silently dropped, leaving TFA at 246.91 m² vs the worksheet's 319.91 m² (23% deficit). Schema: - `ExtensionPart.room_in_roof: Optional[RoomInRoof] = None` field. None for single-storey extensions (no RR lodged); populated for every extension that lodges a §4 RR floor area > 0. Extractor: - `_room_in_roof_from_bodies(dim_body, rir_body, age_band)` parameterises the previously Main-only `_extract_room_in_roof` so the same parsing applies to each extension. - `_extract_extensions` now slices §8.1 by BP (alongside the existing §4/§7/§8/§9 slicing) and reads each extension's RR age band from §3's "th Ext. Room(s) in Roof " line via a new regex. - A new defensive "§4 lodges RR area but §8.1 has no construction details" branch returns a partial `RoomInRoof` with empty surfaces so the cascade still attributes the floor area to TFA. (Not triggered on 000565 — all 5 BPs lodge construction details — but needed for older Elmhurst variants per the existing extractor comment style.) Mapper: - `_map_elmhurst_building_parts` now passes each extension's `room_in_roof` through `_map_elmhurst_room_in_roof` to the extension's `SapBuildingPart.sap_room_in_roof`. Previously the loop hardcoded the field as None. - `total_floor_area_m2` derivation now also sums each extension's `room_in_roof.floor_area_m2`. Without this, the per-BP RR floor area is lodged on the BP but the cert's top-level TFA stays at the pre-fix value. Cert 000565 cascade impact: - TFA: 246.91 → 319.91 ✓ (matches U985-0001-000565.pdf Block 1) - space_heating_kwh_per_yr: Δ −9,107.71 → −1,099.50 (88% reduction) - main_heating_fuel_kwh_per_yr: Δ −5,357.47 → −646.76 (88% reduction; space_heating × 1/HP COP — main_heating tracks space_heating) - lighting_kwh_per_yr: Δ −236.19 → +2.18 (essentially closed — RdSAP §12-1 lighting is TFA-proportional) - hot_water_kwh_per_yr: Δ +214.50 → +271.84 - co2_kg_per_yr: Δ −1,438.16 → −751.06 - total_fuel_cost_gbp: Δ −1,055.62 → −564.05 - sap_score_continuous: Δ +1.70 → +6.75 (cost/TFA dropped because cost rose ~14% but TFA rose ~30% — the remaining −564 cost gap has to close before SAP catches up) Single-storey-extension certs: `room_in_roof=None` for each extension (no §4 RR lodgement), no behavioural change. Cohort regression check: 415 pass + 10 expected 000565 fails — no regression on the 14 Summary fixtures + JSON fixtures that don't carry per-extension RR. Pyright net-zero on all 3 touched files (32 / 0 / 0). Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 86 +++++++++++++------ datatypes/epc/domain/mapper.py | 10 ++- datatypes/epc/surveys/elmhurst_site_notes.py | 7 ++ 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 67e87ff8..010beb4f 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -389,34 +389,59 @@ class ElmhurstSiteNotesExtractor: def _extract_room_in_roof( self, main_dim_body: str, age_band_text: str ) -> Optional[RoomInRoof]: - """Parse the §8.1 Rooms in Roof section for the Main bp. Returns - None when no RR is lodged (single-storey or simple loft houses). - `main_dim_body` is the Main-property §4 chunk used to pull the - RR floor area; `age_band_text` is the §3 raw text holding the - "Main Prop. Room(s) in Roof " line.""" - # RR floor area lives in §4 Dimensions immediately above the - # storey floor entries: "Room(s) in Roof: 15.06". - m = re.search(r"Room\(s\) in Roof:\s+(\d+(?:\.\d+)?)", main_dim_body) + """Parse the §8.1 Rooms in Roof block for the Main bp.""" + section = self._between("8.1 Rooms in Roof:", "9.0 Floors:") + bp_chunks = self._split_section_by_bp(section) if section.strip() else [] + main_body = bp_chunks[0][1] if bp_chunks else "" + # Age band from §3: "Main Prop. Room(s) in Roof H 1991-1995" + age_m = re.search( + r"Main Prop\. Room\(s\) in Roof\s+([A-M] [^\n]+)", age_band_text + ) + age_band = age_m.group(1).strip() if age_m else None + return self._room_in_roof_from_bodies( + dim_body=main_dim_body, + rir_body=main_body, + age_band=age_band, + ) + + def _room_in_roof_from_bodies( + self, + dim_body: str, + rir_body: str, + age_band: Optional[str], + ) -> Optional[RoomInRoof]: + """Parse a single-BP Room(s) in Roof from the §4 dimension body + (floor area) and §8.1 construction body (assessment + surfaces). + Used for both Main and each extension — extensions get their + own per-BP slice of §4 and §8.1 + the per-extension age band + from §3's "th Ext. Room(s) in Roof " line. + """ + m = re.search(r"Room\(s\) in Roof:\s+(\d+(?:\.\d+)?)", dim_body) if m is None: return None floor_area = float(m.group(1)) if floor_area <= 0: return None - - section = self._between("8.1 Rooms in Roof:", "9.0 Floors:") - if not section.strip() or "Room in roof type" not in section: - return None - bp_chunks = self._split_section_by_bp(section) - main_body = bp_chunks[0][1] if bp_chunks else section - lines = [l.strip() for l in main_body.splitlines() if l.strip()] - + if not rir_body.strip() or "Room in roof type" not in rir_body: + # §4 lodged an RR area but §8.1 has no construction details + # for this BP — surface as a partial RR so the cascade can + # still attribute the floor area to TFA. Empty surfaces + # tuple is the sentinel the mapper consumes. + return RoomInRoof( + floor_area_m2=floor_area, + construction_age_band=age_band, + assessment="", + surfaces=[], + ) + lines = [l.strip() for l in rir_body.splitlines() if l.strip()] assessment_idx = next( (i for i, l in enumerate(lines) if l == "Assessment"), None ) assessment = ( - lines[assessment_idx + 1] if assessment_idx is not None and assessment_idx + 1 < len(lines) else "" + lines[assessment_idx + 1] + if assessment_idx is not None and assessment_idx + 1 < len(lines) + else "" ) - surfaces: List[RoomInRoofSurface] = [] for name in self._RIR_SURFACE_NAMES: try: @@ -424,13 +449,6 @@ class ElmhurstSiteNotesExtractor: except ValueError: continue surfaces.append(self._parse_rir_surface_row(name, lines, idx)) - - # Age band from §3: "Main Prop. Room(s) in Roof B 1900-1929" - age_m = re.search( - r"Main Prop\. Room\(s\) in Roof\s+([A-M] [^\n]+)", age_band_text - ) - age_band = age_m.group(1).strip() if age_m else None - return RoomInRoof( floor_area_m2=floor_area, construction_age_band=age_band, @@ -522,14 +540,26 @@ class ElmhurstSiteNotesExtractor: dim_section = self._between("4.0 Dimensions:", "5.0 Conservatory:") wall_section = self._between("7.0 Walls:", "8.0 Roofs:") roof_section = self._between("8.0 Roofs:", "8.1 Rooms in Roof:") + rir_section = self._between("8.1 Rooms in Roof:", "9.0 Floors:") floor_section = self._between("9.0 Floors:", "10.0 Doors:") dim_type = self._str_val("Dimension type") dim_chunks = dict(self._split_section_by_bp(dim_section)) wall_chunks = dict(self._split_section_by_bp(wall_section)) roof_chunks = dict(self._split_section_by_bp(roof_section)) + rir_chunks = dict(self._split_section_by_bp(rir_section)) if rir_section.strip() else {} floor_chunks = dict(self._split_section_by_bp(floor_section)) + # Per-extension RR age bands from §3: "1st Ext. Room(s) in Roof I 1996-2002". + ext_rir_age_re = re.compile( + r"(\d+(?:st|nd|rd|th))\s+Ext\.\s+Room\(s\) in Roof\s+([A-M] [^\n]+)", + re.MULTILINE, + ) + ext_rir_age_bands: dict[str, str] = { + f"{m.group(1)} Extension": m.group(2).strip() + for m in ext_rir_age_re.finditer(self._text) + } + main_walls = self._extract_walls() main_roof = self._extract_roof() main_floor = self._extract_floor() @@ -580,6 +610,11 @@ class ElmhurstSiteNotesExtractor: roof = main_roof if self._local_bool(roof_lines, "As Main") else self._roof_details_from_lines(roof_lines) floor = main_floor if self._local_bool(floor_lines, "As Main") else self._floor_details_from_lines(floor_lines) + rir = self._room_in_roof_from_bodies( + dim_body=dim_body, + rir_body=rir_chunks.get(name, ""), + age_band=ext_rir_age_bands.get(name), + ) extensions.append( ExtensionPart( name=name, @@ -591,6 +626,7 @@ class ElmhurstSiteNotesExtractor: walls=walls, roof=roof, floor=floor, + room_in_roof=rir, ) ) return extensions diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index adfffdf2..c663e2fe 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -378,7 +378,12 @@ class EpcPropertyDataMapper: for ext in survey.extensions for f in ext.dimensions.floors ) - + (survey.room_in_roof.floor_area_m2 if survey.room_in_roof else 0.0), + + (survey.room_in_roof.floor_area_m2 if survey.room_in_roof else 0.0) + + sum( + ext.room_in_roof.floor_area_m2 + for ext in survey.extensions + if ext.room_in_roof is not None + ), 2, ), built_form=built_form, @@ -3142,6 +3147,9 @@ def _map_elmhurst_building_parts( walls=ext.walls, roof=ext.roof, floor=ext.floor, + room_in_roof=_map_elmhurst_room_in_roof( + ext.room_in_roof, is_flat=is_flat, + ), ) ) return parts diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index cb96c682..c736ae92 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -340,6 +340,13 @@ class ExtensionPart: walls: WallDetails roof: RoofDetails floor: FloorDetails + # §4 + §8.1 Room(s) in Roof on this extension. None when no RR is + # lodged for the extension (typical single-storey extensions). For + # multi-storey extensions with a top-floor RR (cert 000565: Ext1=34 + # m², Ext2=5 m², Ext3=32 m², Ext4=2 m²), drops 73 m² of TFA from + # the cascade when None, pulling space_heating and lighting kWh + # down by ~23% on the cert. + room_in_roof: Optional[RoomInRoof] = None @dataclass