From e1348c424b114bf95eff01c11fba7dbf9fec0ef7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 26 May 2026 21:19:17 +0000 Subject: [PATCH] =?UTF-8?q?Slice=2099b:=20Elmhurst=20mapper=20=E2=80=94=20?= =?UTF-8?q?flat=20floor-position=20from=20floor.location?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For flats, `EpcPropertyData.dwelling_type` needs a "Top-floor" / "Mid-floor" / "Ground-floor" prefix so the cascade's `_dwelling_exposure` (cert_to_inputs.py) gates floor + roof party- surface routing correctly per RdSAP 10 §5. Before Slice 99a, the broken `built_form` ("2.0 Number of Storeys:") meant cert 9501's `dwelling_type` was "2.0 Number of Storeys: flat" — never matched any flat-prefix in the cascade, so the cert was treated as a fully- exposed dwelling (worksheet had floor U=0 / party-ceiling-down, but cascade routed both as exposed → Δ +9.25 W/K on floor alone). After 99a's empty-attachment fix the prefix was just " flat" — still no match. Slice 99b composes the position prefix from the Summary's lodged floor location + RR presence: - floor.location lodges "dwelling below" → floor is party - + RR present → Top-floor (roof exposed) - + no RR → Mid-floor (roof party) - floor.location doesn't lodge dwelling below → Ground-floor For cert 9501: floor.location="A Another dwelling below" + RR present (cert lodges Room-in-Roof with gable walls + flat ceiling). Resulting `dwelling_type` = "Top-floor flat" — matches the cascade's `_dwelling_exposure` "top-floor" prefix → has_exposed_floor=False, has_exposed_roof=True, the worksheet's exposure shape. Houses keep the historical contract: `f"{built_form} {property_type.lower()}"` — cohort hand-builts and the 2 boiler chain tests (001479 + 0330) unchanged. Co-Authored-By: Claude Opus 4.7 --- .../tests/test_summary_pdf_mapper_chain.py | 27 +++++++++++ datatypes/epc/domain/mapper.py | 47 ++++++++++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 2f65cd6e..b098a16b 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -321,6 +321,33 @@ def test_summary_9501_flat_has_no_built_form_in_summary_pdf() -> None: assert epc.built_form == "" +def test_summary_9501_dwelling_type_is_top_floor_flat() -> None: + # Arrange — cert 9501's worksheet treats the cert as a TOP-floor + # flat: §3 (28a) "Ground floor Main … U=0.0" because the floor + # sits over "Another dwelling below" (worksheet line 9.0 Floor + # location); §3 (30) has both an external roof + RR contributions + # so the roof IS exposed. The cascade's `_dwelling_exposure` + # function does prefix matching on `dwelling_type.lower()` to gate + # which surfaces are party — without "top-floor flat" the cert + # falls through to fully-exposed houses (Δ +9.25 W/K on floor). + # + # Floor-position inference rules: + # - floor.location indicates "Another dwelling below" + # → not ground floor (rules out ground-floor flat) + # - room_in_roof OR external roof present + # → roof exposed (rules out mid-floor flat) + # - therefore → top-floor flat + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.dwelling_type is not None + assert epc.dwelling_type.lower().startswith("top-floor") + + def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf) # is the first cohort cert with a real GOV.UK EPB API counterpart diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 57aa0465..9b410d64 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -271,8 +271,15 @@ class EpcPropertyDataMapper: prefix = pd.house_number or pd.house_name or "" address_line_1 = f"{prefix}, {pd.street}" if prefix else pd.street + dwelling_type = _elmhurst_dwelling_type( + built_form=built_form, + property_type=property_type, + floor=survey.floor, + room_in_roof=survey.room_in_roof, + ) + return EpcPropertyData( - dwelling_type=f"{built_form} {property_type.lower()}", + dwelling_type=dwelling_type, inspection_date=pd.inspection_date, tenure=pd.tenure, transaction_type=pd.transaction_type, @@ -2080,6 +2087,44 @@ _ELMHURST_INSULATION_CODE_TO_SAP10: Dict[str, int] = { } +def _elmhurst_dwelling_type( + *, + built_form: str, + property_type: str, + floor: Optional[ElmhurstFloorDetails], + room_in_roof: Optional[ElmhurstRoomInRoof], +) -> str: + """Compose `EpcPropertyData.dwelling_type` from the Elmhurst Summary's + property-type + attachment + floor-location + RR presence. + + For HOUSES: returns `f"{built_form} {property_type.lower()}"` — the + historical contract ("Mid-Terrace house", "Detached house"). + + For FLATS: derives the floor-position prefix ("Top-floor", + "Mid-floor", "Ground-floor") from `floor.location` + RR presence: + - floor lodges "dwelling below" → roof exposed (RR present or + external roof) → Top-floor; roof party (no RR/external) → + Mid-floor; + - floor not over another dwelling → Ground-floor. + + The cascade's `_dwelling_exposure` (cert_to_inputs.py) is prefix- + matched on the lowercase result; correct flat-prefix detection is + the gate for floor / roof party-surface routing (RdSAP 10 §5). + """ + if property_type.lower() != "flat": + return f"{built_form} {property_type.lower()}".strip() + floor_loc = (floor.location if floor is not None else "") or "" + has_dwelling_below = "dwelling below" in floor_loc.lower() + has_exposed_roof = room_in_roof is not None + if has_dwelling_below and has_exposed_roof: + position = "Top-floor" + elif has_dwelling_below: + position = "Mid-floor" + else: + position = "Ground-floor" + return f"{position} flat" + + def _elmhurst_wall_construction_int(coded: str) -> Optional[int]: """Map an Elmhurst wall_type string ('CA Cavity') to the SAP10 integer code (4). Returns None when the leading code isn't a known