From 5fcb594f0a615bbe64491e33d0ed411b7adef9c5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 May 2026 18:15:18 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.4:=20surface=20wall=5Finsulation?= =?UTF-8?q?=5Fthickness=20from=20Summary=20=C2=A77.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the three-layer gap that left the Summary mapper producing `wall_insulation_thickness=None` even though Summary §7.0 lodges "Insulation Thickness" / "100 mm" explicitly on cert 0380. Three small co-ordinated edits ship the field end-to-end: 1. `datatypes/epc/surveys/elmhurst_site_notes.py` — add `WallDetails.insulation_thickness_mm: Optional[int] = None`, mirroring the existing `RoofDetails.insulation_thickness_mm`. 2. `backend/documents_parser/elmhurst_extractor.py` — extend `_wall_details_from_lines` to read the `_local_val(lines, "Insulation Thickness")` label inside the §7 Walls block (the "Insulation Thickness" label is local-scoped per block, so it does not collide with §8 Roofs / §9 Floors). 3. `datatypes/epc/domain/mapper.py` — surface `wall_insulation_thickness=f"{walls.insulation_thickness_mm}mm"` on `SapBuildingPart`. Mirrors the API mapper's string-with-unit shape (`'100mm'`) so cert-to-cert parity tests (Summary EPC ≡ API EPC) compare equal; the cascade's `_parse_thickness_mm` accepts either form. Forcing function (Slice S0380.1): cert 0380 Summary cascade SAP moves from 86.8671 (Δ -1.6433 — i.e. after Slice S0380.3 only) to 88.1981 (Δ -0.3123) — closes ~81% of the remaining gap. Critically, `walls_w_per_k` now hits API parity exactly (Summary 11.6150 ≡ API 11.6150) — the composite filled-cavity-plus-external U-value calc is now keyed off the lodged 100 mm thickness rather than its internal default. Residual -0.31 SAP vs worksheet is comparable to the documented HP cohort's API-path residual of +0.06 (cert 0380 API path closes at +0.0594). Summary path is now within ±0.37 of API path. Remaining diffs to investigate (per the next-step diagnostic): hot-water cascade (Summary 1002.74 kWh vs API 878.05 kWh, +124.69 kWh), HLC parameters (heat_transfer_coefficient still differs slightly through secondary terms), and possibly secondary-heating routing. The worksheet vs API +0.06 residual is the documented Appendix N3.6 PSR-interpolation precision floor and out of scope for Summary-path closure. Added focused unit test `test_summary_0380_surfaces_wall_insulation_thickness_100mm` that pins the mapper boundary directly (Summary "100 mm" line pair → EPC `wall_insulation_thickness="100mm"`), so future debuggers can localise regressions in the new extractor / field / mapper path before walking the full chain. Pyright net-zero across all four edited files: datatypes/epc/domain/mapper.py: 32 (baseline) datatypes/epc/surveys/elmhurst_site_notes.py: 0 backend/documents_parser/elmhurst_extractor.py: 0 backend/documents_parser/tests/test_summary_pdf_mapper_chain.py: 0 Regression suite: 672 pass + 11 fail (vs handover baseline 669 + 10 — net +3 pass for the three Slices S0380.2-4 GREEN unit tests; the +1 fail vs baseline is still the S0380.1 chain test which this slice moves from Δ -1.6433 to Δ -0.3123 but does not yet fully close). Spec refs: - SAP 10.2 §3.7 / Appendix S Table S5 (composite filled-cavity-plus- external U-value calc — series-resistance form keyed off lodged insulation thickness) - Cert 0380 Summary PDF §7.0 lines 121-122 ("Insulation Thickness" / "100 mm" — the missing extractor read this slice adds) Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 14 ++++++++++ .../tests/test_summary_pdf_mapper_chain.py | 26 +++++++++++++++++++ datatypes/epc/domain/mapper.py | 9 +++++++ datatypes/epc/surveys/elmhurst_site_notes.py | 4 +++ 4 files changed, 53 insertions(+) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 78a86d97..f682e665 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -235,6 +235,19 @@ class ElmhurstSiteNotesExtractor: thickness_mm = ( int(thickness_raw.split()[0]) if thickness_raw else None ) + # Composite / retrofit insulation thickness — Summary §7.0 + # writes the value on the line pair "Insulation Thickness" / + # "100 mm" when a composite filled-cavity-plus-external (or + # equivalent) wall is lodged. The "Insulation Thickness" label + # is local-scoped inside the §7 block so it does not collide + # with the §8 Roofs / §9 Floors blocks. None when the PDF + # omits the line (no retrofit lodged). + ins_thickness_raw = self._local_val(lines, "Insulation Thickness") + insulation_thickness_mm = ( + int(ins_thickness_raw.split()[0]) + if ins_thickness_raw and ins_thickness_raw.split()[0].isdigit() + else None + ) return WallDetails( wall_type=self._local_str(lines, "Type"), insulation=self._local_str(lines, "Insulation"), @@ -242,6 +255,7 @@ class ElmhurstSiteNotesExtractor: u_value_known=self._local_bool(lines, "U-value Known"), party_wall_type=self._local_str(lines, "Party Wall Type"), thickness_mm=thickness_mm, + insulation_thickness_mm=insulation_thickness_mm, alternative_walls=self._alternative_walls_from_lines(lines), ) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 118ea9e9..173c34d3 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -538,6 +538,32 @@ def test_summary_0380_filled_cavity_plus_external_insulation_routes_to_code_6() assert main.wall_insulation_type == 6 # 6 = filled cavity + external +def test_summary_0380_surfaces_wall_insulation_thickness_100mm() -> None: + # Arrange — cert 0380's Summary §7.0 Walls block lodges the + # composite-wall insulation thickness on the line pair + # "Insulation Thickness" / "100 mm". Without surfacing this to + # `wall_insulation_thickness`, the heat-transmission cascade + # falls through `_parse_thickness_mm(None) → None` and the + # composite filled-cavity-plus-external U-value calc uses its + # default thickness rather than the lodged 100 mm — leaving cert + # 0380's `walls_w_per_k` at 24.62 vs API's 11.62 even with + # `wall_insulation_type=6` set (Slice S0380.3). Mirror of the + # existing `_roof_details_from_lines` reader that surfaces roof + # `insulation_thickness_mm` from the same "Insulation Thickness" + # label. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert — match the API mapper's "100mm" string (the EPC schema + # type is `Optional[str]`; the cascade's `_parse_thickness_mm` + # strips non-digit trailers). + main = epc.sap_building_parts[0] + assert main.wall_insulation_thickness == "100mm" + + def test_summary_0380_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 0380-2471-3250-2596-8761 (Summary_000899.pdf / # dr87-0001-000899.pdf) is the first heat-pump cert under per-cert diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 12c60a00..a19c1fd7 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -2824,6 +2824,15 @@ def _map_elmhurst_building_part( party_wall_construction=_elmhurst_party_wall_construction_int(walls.party_wall_type), sap_floor_dimensions=floor_dims, wall_thickness_mm=walls.thickness_mm, + # API mapper lodges wall_insulation_thickness as the string + # "100mm"; the cascade's `_parse_thickness_mm` accepts the + # digit-prefix form. Mirror the API shape so cert-to-cert + # parity tests (Summary EPC ≡ API EPC) compare equal. + wall_insulation_thickness=( + f"{walls.insulation_thickness_mm}mm" + if walls.insulation_thickness_mm is not None + else None + ), roof_construction_type=_strip_code(roof.roof_type), roof_insulation_location=_strip_code(roof.insulation), roof_insulation_thickness=_resolve_sloping_ceiling_thickness( diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index a110517b..bc70ffbb 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -79,6 +79,10 @@ class WallDetails: default_factory=lambda: [] # type: ignore[reportUnknownLambdaType] ) thickness_mm: Optional[int] = None + # Insulation thickness in mm — Summary §7.0 lodges this on the + # "Insulation Thickness" / "100 mm" line pair when a composite or + # retrofit insulation is recorded. None when the PDF omits the line. + insulation_thickness_mm: Optional[int] = None @dataclass