From 9faff3e122da64c62f7566d60bb87b7694129c40 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 May 2026 18:28:42 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.5:=20surface=20insulated=5Fdoor?= =?UTF-8?q?=5Fu=5Fvalue=20from=20Summary=20=C2=A710=20'Average=20U-value'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the three-layer gap that left the Summary mapper producing `insulated_door_u_value=None` even though Summary §10 lodges "Average U-value" / "1.20" explicitly on cert 0380: 1. `datatypes/epc/surveys/elmhurst_site_notes.py` — add `ElmhurstSiteNotes.insulated_door_u_value: Optional[float] = None`, placed in the defaulted-field block so existing fixtures that omit the field still construct without changes. 2. `backend/documents_parser/elmhurst_extractor.py` — add `_extract_door_u_value` that section-scopes the lookup to `_section_lines("10.0 Doors:", "11.0 Windows:")` so the bare "Average U-value" label cannot be shadowed by global U-value lookups in §7 Walls / §8 Roofs / §9 Floors. 3. `datatypes/epc/domain/mapper.py` — surface `insulated_door_u_value=survey.insulated_door_u_value` on the `from_elmhurst_site_notes` path. The comment in `epc_property_data.py:585` ("Not available in site notes") is now outdated for Elmhurst Summary PDFs that lodge the explicit value. Worksheet anchor (dr87-0001-000899.pdf line ref (26)): Doors insulated 1 NetArea 3.7000 U-value 1.2000 A×U 4.4400 W/K Forcing function (Slice S0380.1): cert 0380 Summary cascade `doors_w_per_k` moves from 5.1800 to **4.4400 W/K — exact match against worksheet line ref (26)**. The +0.74 W/K mis-attribution was the default door-U fall-through that the lodged 1.20 value silences. SAP moves 88.1981 (Δ -0.3123) → 88.2746 (Δ -0.2358). Added focused unit test `test_summary_0380_surfaces_insulated_door_u_value_1_2` that pins the mapper boundary directly to the worksheet's lodged U-value 1.2, so future debuggers can localise regressions in the new extractor / field / mapper path before walking the full chain. Pyright net-zero across all four edited files: datatypes/epc/domain/mapper.py: 32 (baseline) datatypes/epc/surveys/elmhurst_site_notes.py: 0 backend/documents_parser/elmhurst_extractor.py: 0 backend/documents_parser/tests/test_summary_pdf_mapper_chain.py: 0 Regression suite: 673 pass + 11 fail (vs handover baseline 669 + 10 — net +4 pass for the four GREEN unit tests across Slices S0380.2-5; the +1 fail vs baseline is the S0380.1 chain test which this slice moves to Δ -0.2358 but does not yet fully close). Spec refs: - SAP 10.2 Table 14 (door U-values: composite-construction default cascade is silenced when the assessor lodges an explicit measured U on the cert; routed via `insulated_door_u_value`). - Cert 0380 worksheet dr87-0001-000899.pdf line ref (26) — the A×U=4.4400 W/K spec value that this slice closes the Summary cascade to exactly. Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 15 ++++++++++++ .../tests/test_summary_pdf_mapper_chain.py | 23 +++++++++++++++++++ datatypes/epc/domain/mapper.py | 1 + datatypes/epc/surveys/elmhurst_site_notes.py | 7 ++++++ 4 files changed, 46 insertions(+) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index f682e665..c751c289 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -348,6 +348,20 @@ class ElmhurstSiteNotesExtractor: lines = [l.strip() for l in main_body.splitlines() if l.strip()] return self._floor_details_from_lines(lines) + def _extract_door_u_value(self) -> Optional[float]: + """Read the §10 Doors block's "Average U-value" lodging. + Scoped to the §10..§11 slice so the global "U-value" labels in + Walls/Roofs/Floors can't shadow the door reading. None when the + PDF omits the line (e.g. all doors recorded as uninsulated).""" + lines = self._section_lines("10.0 Doors:", "11.0 Windows:") + raw = self._local_val(lines, "Average U-value") + if not raw: + return None + try: + return float(raw.split()[0]) + except (ValueError, IndexError): + return None + # RIR surface row: ` [ [] # [] ]`. The middle slot # widths vary by surface kind; we match the four leading numerics @@ -1203,6 +1217,7 @@ class ElmhurstSiteNotesExtractor: floor=self._extract_floor(), door_count=self._int_val("Total Number of Doors"), insulated_door_count=self._int_val("Number of Insulated Doors"), + insulated_door_u_value=self._extract_door_u_value(), windows=self._extract_windows(), draught_proofing_percent=self._int_val("Draught Proofing"), ventilation=self._extract_ventilation(), diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 173c34d3..04dcbc3e 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -564,6 +564,29 @@ def test_summary_0380_surfaces_wall_insulation_thickness_100mm() -> None: assert main.wall_insulation_thickness == "100mm" +def test_summary_0380_surfaces_insulated_door_u_value_1_2() -> None: + # Arrange — cert 0380's Summary §10 Doors block lodges the door + # U-value on the "Average U-value" / "1.20" line pair. The dr87 + # worksheet line ref (26) confirms the spec value: "Doors + # insulated 1, NetArea 3.7000 m², U-value 1.2000, A×U 4.4400 W/K". + # Without surfacing the lodged U-value the cascade defaults the + # door U and overstates `doors_w_per_k` to 5.18 vs worksheet + # 4.44 W/K. The comment at + # `datatypes/epc/domain/epc_property_data.py:585` claimed the + # value was "not available in site notes" — that assertion is + # outdated for Elmhurst Summary PDFs which lodge it explicitly. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert — float compare with small tolerance (Summary lodges + # "1.20" which parses cleanly to 1.2; API lodges 1.2 directly). + assert epc.insulated_door_u_value is not None + assert abs(epc.insulated_door_u_value - 1.2) < 1e-6 + + def test_summary_0380_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 0380-2471-3250-2596-8761 (Summary_000899.pdf / # dr87-0001-000899.pdf) is the first heat-pump cert under per-cert diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index a19c1fd7..ec7c1c7b 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -352,6 +352,7 @@ class EpcPropertyDataMapper: open_chimneys_count=survey.ventilation.open_chimneys_count, habitable_rooms_count=survey.habitable_rooms, insulated_door_count=survey.insulated_door_count, + insulated_door_u_value=survey.insulated_door_u_value, cfl_fixed_lighting_bulbs_count=survey.lighting.cfl_count, led_fixed_lighting_bulbs_count=survey.lighting.led_count, incandescent_fixed_lighting_bulbs_count=survey.lighting.incandescent_count, diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index bc70ffbb..57663719 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -340,6 +340,13 @@ class ElmhurstSiteNotes: # (preserves backward compatibility with the existing fixture). extensions: List[ExtensionPart] = field(default_factory=lambda: []) # type: ignore[reportUnknownLambdaType] + # §10 "Average U-value" — lodged when at least one door is + # insulated. None when the line is absent from the PDF. Defaulted + # so existing fixtures that omit it continue to construct without + # changes; the API mapper surfaces this same field directly from + # the EPC schema. + insulated_door_u_value: Optional[float] = None + # §8.1 Rooms in Roof — Main property only in the observed corpus. # When None the dwelling has no RR storey (a 2-storey house with a # cold loft instead of a room-in-roof). The mapper translates the