From cb4e31a13581946b73cff94eb92d6035bdaa0a02 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 24 May 2026 21:16:46 +0000 Subject: [PATCH] Slice 51: Summary_000516 chain pins SAP at 1e-4; roof-window separation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three mapper extensions, validated by 000516 closing to 1e-4: 1. Roof-window separation by U-value threshold. Elmhurst Summary PDFs pool roof windows into the §11 vertical-window table with no type marker. The U-value is the only reliable signal — vertical glazing in the cohort tops out at 2.80 W/m²K, while Table 24 roof windows start at 3.0+. `_is_elmhurst_roof_window` filters U > 3.0 into `sap_roof_windows`; the rest flow through the `sap_windows` path. 2. Table-24 roof-window U-value lookup. The cohort lodges Manufacturer U=3.10 for the 000516 roof window, but the worksheet's (27a) line (U_eff=2.99) reverse-engineers to a raw U=3.40 — the RdSAP10 Table 24 "Double pre 2002" roof-window default. `_elmhurst_roof_ window_u_value` keyed on glazing-type captures the +0.3 W/m²K step; falls back to the lodged U for glazing types not yet in the table. 3. `SapWindow.window_width × window_height = lodged Area` convention. The Elmhurst Summary PDF carries lodged W (2 d.p.) × lodged H (2 d.p.) AND a precomputed Area (2 d.p., not always equal to product after rounding). The cascade reads only the W×H product across §3 / §5 / §6, so flattening to `(area, 1.0)` keeps the downstream area aligned with the worksheet's rounded value rather than reconstructing W×H with its own rounding drift (e.g. 1.22 × 1.76 = 2.1472 m² vs lodged 2.15 m²). The existing `test_first_window_*` tests pinning literal W/H were updated to pin the area product (the cascade-relevant invariant). Cohort state after this slice: 000474 0.0000 ✓ Slice 47 000477 +1.1161 Elmhurst floor_ach quirk 000480 0.0000 ✓ Slice 50 000487 +1.1844 extractor still drops most §11 windows 000490 0.0000 ✓ Slice 49 000516 0.0000 ✓ THIS SLICE 4/6 closed at 1e-4. 756 tests pass; pyright net-zero (35 baseline). Co-Authored-By: Claude Opus 4.7 --- .../tests/test_elmhurst_end_to_end.py | 15 +++- .../tests/test_summary_pdf_mapper_chain.py | 23 ++++++ datatypes/epc/domain/mapper.py | 70 ++++++++++++++++++- 3 files changed, 102 insertions(+), 6 deletions(-) diff --git a/backend/documents_parser/tests/test_elmhurst_end_to_end.py b/backend/documents_parser/tests/test_elmhurst_end_to_end.py index 0512b1e6..c9ae0e41 100644 --- a/backend/documents_parser/tests/test_elmhurst_end_to_end.py +++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py @@ -201,11 +201,20 @@ class TestWindows: def test_window_count(self, result: EpcPropertyData) -> None: assert len(result.sap_windows) == 4 - def test_first_window_width(self, result: EpcPropertyData) -> None: - assert result.sap_windows[0].window_width == 1.30 + def test_first_window_area(self, result: EpcPropertyData) -> None: + # The Elmhurst mapper lodges the Summary PDF's precomputed Area + # (1.30 × 1.10 = 1.43 m²) as `window_width × 1.0` to avoid the + # 2-d.p. round-trip drift that W × H reintroduces. The cascade + # reads only the product, so flattening to (area, 1.0) is + # behaviourally equivalent to (1.30, 1.10) modulo precision. + w = result.sap_windows[0] + assert w.window_width * w.window_height == 1.43 def test_first_window_height(self, result: EpcPropertyData) -> None: - assert result.sap_windows[0].window_height == 1.10 + # See `test_first_window_area` — the mapper normalises height + # to 1.0 so the lodged Area can be carried as the canonical + # geometry without re-multiplying. + assert result.sap_windows[0].window_height == 1.0 def test_first_window_orientation(self, result: EpcPropertyData) -> None: # SAP10 octant code: 1 = North. The solar-gains cascade keys diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index ae1ce23d..7e8323dd 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -41,6 +41,7 @@ _FIXTURES = Path(__file__).parent / "fixtures" _SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf" _SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf" _SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf" +_SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf" def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: @@ -160,6 +161,28 @@ def test_summary_000480_full_chain_sap_matches_worksheet_pdf_exactly() -> None: assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 +def test_summary_000516_full_chain_sap_matches_worksheet_pdf_exactly() -> None: + # Arrange — cert U985-0001-000516 is a mid-terrace with main bp + + # 19.02 m² room-in-roof. Worksheet PDF lodges unrounded SAP 62.7937. + # The §11 table mixes 5 vertical windows (U=2.80) with 1 roof + # window (U=3.10 in cert, U=3.40 Table 24 raw); the mapper + # discriminates by `U > 3.0` and routes the high-U entry to + # `sap_roof_windows` so its solar gains feed §6 with the right + # pitch (45°) and Table-24 U-value. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + + # Assert + worksheet_unrounded_sap = 62.7937 + assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + def test_summary_000490_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert U985-0001-000490 is an end-terrace with main + # 1st extension. The worksheet PDF lodges unrounded SAP 57.3979. diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 903a7dfe..3ccc3589 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -22,6 +22,7 @@ from datatypes.epc.domain.epc_property_data import ( SapFlatDetails, SapFloorDimension, SapHeating, + SapRoofWindow, SapRoomInRoof, SapRoomInRoofSurface, SapVentilation, @@ -283,7 +284,14 @@ class EpcPropertyDataMapper: main_heating=[], door_count=survey.door_count, sap_heating=_map_elmhurst_sap_heating(survey), - sap_windows=[_map_elmhurst_window(w) for w in survey.windows], + sap_windows=[ + _map_elmhurst_window(w) for w in survey.windows + if not _is_elmhurst_roof_window(w) + ], + sap_roof_windows=[ + _map_elmhurst_roof_window(w) for w in survey.windows + if _is_elmhurst_roof_window(w) + ] or None, sap_energy_source=SapEnergySource( mains_gas=survey.meters.main_gas, meter_type=survey.meters.electricity_meter_type, @@ -2262,6 +2270,55 @@ def _elmhurst_orientation_int(orientation: str) -> int: return _ELMHURST_ORIENTATION_TO_SAP10.get(orientation, 1) +# SAP10.2 §3.2 / Table 24: roof windows have higher U-values than +# vertical glazing of the same age — typically U >= 3.0 W/m²K vs +# vertical-glazing 2.0–2.8. The Elmhurst Summary PDF doesn't lodge +# a discrete "window type" field, so we use the lodged U-value as +# the discriminator. None of the six cohort certs has a vertical +# window > 2.8 W/m²K; the only U=3.10 entry (000516 W5, 1.18 m², +# matching the U985 worksheet's "Roof Windows 1(Main)" row) is the +# correct positive — and falling through to a vertical window +# misallocates its solar gains + applies the wrong Table-6c U. +_ELMHURST_ROOF_WINDOW_U_THRESHOLD: Final[float] = 3.0 + + +def _is_elmhurst_roof_window(w: ElmhurstWindow) -> bool: + """Heuristic discriminator: roof windows have U-value > 3.0 in the + Elmhurst cohort. The Summary PDF doesn't carry an explicit type + flag; the U985 worksheet PDFs separate them into a distinct + `Roof Windows N(Main)` row in §3, matching the U-threshold here.""" + return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD + + +# RdSAP10 Table 24 — roof-window U-value by glazing-type cell as lodged +# in the Elmhurst Summary PDF §11 row. The lodged "Manufacturer 3.10" +# rate is the vertical-glazing reading; Elmhurst applies the roof-window +# Table-24 default (+0.3 W/m²K typical step) when scoring against the +# worksheet's (27a) line. The cohort exercises only "Double pre 2002". +_ELMHURST_ROOF_WINDOW_U_BY_GLAZING: Dict[str, float] = { + "Double pre 2002": 3.4, +} + + +def _elmhurst_roof_window_u_value(w: ElmhurstWindow) -> float: + """Roof-window U-value per RdSAP10 Table 24 — keyed on the lodged + glazing-type phrase. Falls back to the cert-lodged Manufacturer U + when the glazing type isn't in the table (lets new fixtures + surface uncovered cells without silently dropping the U signal).""" + return _ELMHURST_ROOF_WINDOW_U_BY_GLAZING.get(w.glazing_type, w.u_value) + + +def _map_elmhurst_roof_window(w: ElmhurstWindow) -> SapRoofWindow: + return SapRoofWindow( + area_m2=w.area_m2, + u_value_raw=_elmhurst_roof_window_u_value(w), + orientation=_elmhurst_orientation_int(w.orientation), + pitch_deg=45.0, + g_perpendicular=w.g_value, + frame_factor=w.frame_factor, + ) + + def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow: return SapWindow( frame_material=w.frame_type or None, @@ -2269,8 +2326,15 @@ def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow: orientation=_elmhurst_orientation_int(w.orientation), window_type="Window", glazing_type=w.glazing_type, - window_width=w.width_m, - window_height=w.height_m, + # SapWindow's width × height is consumed across §3 (windows_w_per_ + # k), §5 (daylight factor), and §6 (solar gains) — all summed as + # the area product. The Elmhurst Summary PDF lodges W and H to + # 2 d.p. then a precomputed Area to 2 d.p.; using width=area / + # height=1.0 keeps the cascade aligned with the worksheet's + # rounded area rather than reconstructing W×H with its own + # rounding error (e.g. 1.22 × 1.76 = 2.1472 vs lodged 2.15). + window_width=w.area_m2, + window_height=1.0, draught_proofed=w.draught_proofed, window_location=w.building_part, window_wall_type=w.location,