"""End-to-end validation for the Elmhurst Summary→EpcPropertyData chain. The 6 Elmhurst worksheet fixtures in `domain.sap10_calculator.worksheet.tests` build their `EpcPropertyData` synthetically — they validate the calculator + cascade in isolation from the mapper. This file pins the OTHER half of the chain: `from_elmhurst_site_notes` must produce a calculator-equivalent `EpcPropertyData` when fed the Summary PDF the worksheet was generated from. Together with the worksheet cascade tests, this closes the loop: extractor + mapper + cascade + calculator validated end-to-end against the authoritative Elmhurst documents. Status: GREEN. For cert U985-0001-000474, this pipeline produces an unrounded SAP within 0.5 of the worksheet PDF's `62.2584` (line 257). The cascade itself reproduces Elmhurst's calculator exactly on hand-built inputs (handbuilt → 62.2584 to 4 d.p.); the remaining sub-half-point gap from the mapped path is non-load-bearing field drift (e.g. central_heating_pump_age the Summary PDF doesn't lodge). Preprocessing: the existing `ElmhurstSiteNotesExtractor` was written against Textract-style output (label\\nvalue pairs in spatial reading order). We don't have Textract in the test environment, so this helper converts `pdftotext -layout` output (label-whitespace- value on a single line) into the Textract-style sequence the extractor expects. Test-only preprocessing; production runs through Textract directly. """ from __future__ import annotations import dataclasses import json import re import subprocess from pathlib import Path from typing import cast import pytest from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor from datatypes.epc.domain.mapper import ( EpcPropertyDataMapper, UnmappedApiCode, UnmappedElmhurstLabel, ) from domain.sap10_calculator.calculator import calculate_sap_from_inputs from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs from domain.sap10_ml.rdsap_uvalues import u_party_wall from domain.sap10_calculator.worksheet.tests import ( _elmhurst_worksheet_000474 as _w000474, _elmhurst_worksheet_000477 as _w000477, _elmhurst_worksheet_000480 as _w000480, _elmhurst_worksheet_000487 as _w000487, _elmhurst_worksheet_000490 as _w000490, _elmhurst_worksheet_000516 as _w000516, ) _FIXTURES = Path(__file__).parent / "fixtures" _SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf" _SUMMARY_000477_PDF = _FIXTURES / "Summary_000477.pdf" _SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf" _SUMMARY_000487_PDF = _FIXTURES / "Summary_000487.pdf" _SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf" _SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf" _SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf" _SUMMARY_000897_PDF = _FIXTURES / "Summary_000897.pdf" _SUMMARY_000784_PDF = _FIXTURES / "Summary_000784.pdf" _SUMMARY_000899_PDF = _FIXTURES / "Summary_000899.pdf" _SUMMARY_000903_PDF = _FIXTURES / "Summary_000903.pdf" _SUMMARY_000901_PDF = _FIXTURES / "Summary_000901.pdf" # cert 3800 _SUMMARY_000904_PDF = _FIXTURES / "Summary_000904.pdf" # cert 9285 _SUMMARY_000900_PDF = _FIXTURES / "Summary_000900.pdf" # cert 2225 _SUMMARY_000898_PDF = _FIXTURES / "Summary_000898.pdf" # cert 2636 _SUMMARY_000902_PDF = _FIXTURES / "Summary_000902.pdf" # cert 9418 _SUMMARY_000889_PDF = _FIXTURES / "Summary_000889.pdf" # cert 2536 (Normal cylinder) _SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cylinder) _SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0) _SUMMARY_000890_PDF = _FIXTURES / "Summary_000890.pdf" # cert 7800 (two electric showers) _SUMMARY_000565_PDF = _FIXTURES / "Summary_000565.pdf" # cert 000565 (5-bp Elmhurst-only) # GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the # Summary_001479.pdf fixture. Together they drive the API ≡ Summary # parity workstream; Layer 4 of the validation stack is "API cascade SAP # matches worksheet continuous SAP at 1e-4". _API_001479_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "0535-9020-6509-0821-6222.json" ) def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: """Convert a Summary PDF into the per-page text format the existing `ElmhurstSiteNotesExtractor` expects (label\\nvalue sequences). `pdftotext -layout` preserves the spatial pairing of label and value on each line; we split each line on 2+ spaces to surface the label/value tokens, then concatenate them back into a single newline-delimited stream per page. """ info = subprocess.run( ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True ).stdout m = re.search(r"Pages:\s+(\d+)", info) if m is None: raise RuntimeError(f"Could not parse page count from {pdf_path}") page_count = int(m.group(1)) pages: list[str] = [] for i in range(1, page_count + 1): layout = subprocess.run( [ "pdftotext", "-layout", "-f", str(i), "-l", str(i), str(pdf_path), "-", ], capture_output=True, text=True, check=True, ).stdout tokens: list[str] = [] for line in layout.splitlines(): if not line.strip(): tokens.append("") continue parts = [p for p in re.split(r"\s{2,}", line.strip()) if p] tokens.extend(parts) pages.append("\n".join(tokens)) return pages def test_summary_000474_mapper_produces_three_building_parts() -> None: # Arrange — cert U985-0001-000474 is a mid-terrace with 3 building # parts (Main + 2 extensions) per the hand-built worksheet fixture # at domain/sap10_calculator/worksheet/tests/ # _elmhurst_worksheet_000474.py. Routing the Summary PDF through # extractor + mapper must yield the same count. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert len(epc.sap_building_parts) == 3 def test_summary_000474_mapper_extracts_seven_windows() -> None: # Arrange — cert U985-0001-000474's §11 table lodges 7 windows # across Main + 1st Extension + 2nd Extension. The legacy Textract- # style window parser couldn't anchor on the Summary PDF's tabular # layout; the new W/H/Area-plus-Manufacturer anchor pair picks them # all up. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert len(epc.sap_windows) == 7 # Cohort chain SAP-pin tests follow. NOTE: certs 000474, 000480, 000487, # 000490 previously had chain tests here pinning their cascade SAP # against the U985 worksheet PDF — those tests were removed because # their worksheets violate RdSAP 10 §5 (12) "Floor infiltration # (suspended timber ground floor only)". Our cascade applies the spec # rule (via `cert_to_inputs._has_suspended_timber_floor_per_spec`); # the worksheet does not. So the spec-correct chain SAP for those # certs can't match the worksheet SAP — by design, not by mapper bug. # The Layer 1 hand-built fixtures for those 4 certs absorb the # worksheet quirk by lodging `has_suspended_timber_floor=False` # explicitly (overriding the spec inference) — so Layer 1 cascade pins # still pin the worksheet value exactly. The chain tests below remain # only for 000477, 000516 (and 001479 further down), where the # worksheet IS spec-correct. def test_summary_000477_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert U985-0001-000477 is a single-bp mid-terrace with # a 15.06 m² Room-in-Roof storey and zero baths lodged. Worksheet # PDF lodges unrounded SAP 65.0057. Drives the chain through the # `RoomInRoof.detailed_surfaces` cascade with stud walls @ 100mm # Mineral, two uninsulated slopes, two party gable walls, plus the # RR/storey-area suspended-timber-floor heuristic (RIR < storey → # 0.2 ACH floor infiltration). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert worksheet_unrounded_sap = 65.0057 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_summary_000516_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert U985-0001-000516 is a mid-terrace with main bp + # 19.02 m² room-in-roof. Worksheet PDF lodges unrounded SAP 62.7937. # The §11 table mixes 5 vertical windows (U=2.80) with 1 roof # window (U=3.10 in cert, U=3.40 Table 24 raw); the mapper # discriminates by `U > 3.0` and routes the high-U entry to # `sap_roof_windows` so its solar gains feed §6 with the right # pitch (45°) and Table-24 U-value. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert worksheet_unrounded_sap = 62.7937 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_summary_001479_mapper_extensions_count_matches_extension_bps() -> None: # Arrange — cert 0535-9020-6509-0821-6222 (Summary_001479) is the first # cohort cert with an actual GOV.UK API counterpart. Worksheet PDF # lodges Main + Extension 1 + Extension 2 (3 building parts, 2 # extensions). Pre-slice the Elmhurst mapper hard-coded # `extensions_count=0` regardless of survey.extensions; this asserts # the count flows through. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.extensions_count == 2 assert len(epc.sap_building_parts) == 3 def test_summary_001479_main_party_wall_construction_is_cavity_unfilled() -> None: # Arrange — cert 001479 Main §7 Walls lodges "Party Wall Type: CU # Cavity masonry unfilled". The Elmhurst leading-code map previously # only knew "S" and "C"; "CU" fell through to None, which made the # cascade default to U=0.25 instead of the worksheet's lodged U=0.50. # The fix adds "CU" → SAP10 wall_construction code 4 (WALL_CAVITY), # which `u_party_wall` resolves to U=0.50 — matching the worksheet's # §3 `Party walls Main … 0.50` row. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_building_parts[0].party_wall_construction == 4 def test_summary_001479_ext2_floor_is_exposed_to_external_air() -> None: # Arrange — cert 001479 Ext2 §9 lodges "Location: E To external air" # — a cantilevered exposed timber floor (the upper-storey extension # over the back garden). The worksheet's §3 row `Exposed floor Ext2 # … 1.92, 1.20, 1.20` pins this as U=1.20 via Table 20. Pre-slice the # mapper only routed "U Above unheated space" through `is_exposed_ # floor=True`; "E To external air" fell through to the BS EN ISO # 13370 ground-floor cascade, dropping the lodged exposure entirely. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert ext2 = epc.sap_building_parts[2] assert ext2.floor_type == "To external air" assert ext2.sap_floor_dimensions[0].is_exposed_floor is True def test_summary_001479_ext2_sloping_ceiling_roof_uninsulated_for_pre_1950() -> None: # Arrange — cert 001479 Ext2 §8 lodges "Type: PS Pitched, sloping # ceiling" + "Insulation Thickness: As Built" + age band C (1930-49). # Original 1930s construction had no sloping-ceiling insulation; # worksheet §3 `External roof Ext2 … 2.30` pins U=2.30 (uninsulated # Table 16 row 0). Pre-slice the mapper passed thickness=None through, # routing to `u_roof`'s pitched-roof Table 18 col 1 default (0.40 for # age C, assumes loft-joist retrofit) — wrong geometry for PS. # Ext1's PS roof at age M leaves thickness=None (modern build, # cascade default U=0.15 matches worksheet). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_building_parts[2].roof_insulation_thickness == 0 assert epc.sap_building_parts[1].roof_insulation_thickness is None def test_summary_001479_secondary_heating_routes_mains_gas_fuel() -> None: # Arrange — cert 001479 §14.1 Main Heating2 lodges "Secondary Heating # Code: SAP code 605, Flush fitting live effect gas fire, sealed to # chimney". The Summary surfaces only the SAP code (605); the fuel # type 26 (mains gas) must be derived from the code range so the # `_fuel_cost` orchestrator's `secondary_high_rate_gbp_per_kwh` # picks up Table 32's gas tariff (£0.0348/kWh) rather than the # default standard-electricity tariff (£0.132/kWh). Worksheet line # (242) "Space heating - secondary … 3.4800 70.5022" confirms gas # pricing. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_heating.secondary_heating_type == 605 assert epc.sap_heating.secondary_fuel_type == 26 def test_summary_2102_secondary_heating_routes_house_coal_for_open_fire() -> None: # Arrange — cohort-2 cert 2102-3018-0205-7886-5204 §14.1 lodges # "Secondary Heating Code: SAP code 631" — "Open fire in grate" # per SAP 10.2 Table 4a Category 10 (Room heaters), solid fuel # column. Without the per-code routing the cascade defaults to # standard electricity at 13.19 p/kWh and over-charges secondary # heating by ~£340/yr, pushing SAP -15.81 below the worksheet's # 63.87. Worksheet line (242) "Space heating - secondary 3585.24 # × 3.6700 = 131.58" confirms house-coal pricing (Table 32 fuel # code 11 = 3.67 p/kWh). cert_dir = Path( "sap worksheets/additional with api 2/2102-3018-0205-7886-5204" ) summary_pdf = next(cert_dir.glob("Summary_*.pdf")) pages = _summary_pdf_to_textract_style_pages(summary_pdf) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_heating.secondary_heating_type == 631 # 11 = "Coal" in `_ELMHURST_MAIN_FUEL_TO_SAP10` → Table 32 lookup # returns 3.67 p/kWh (house coal). assert epc.sap_heating.secondary_fuel_type == 11 def test_summary_9796_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cohort-2 cert 9796-3058-6205-0346-9200 (Summary_*.pdf / # dr87-0001-*.pdf) is a Mid-Terrace bungalow age D with a Mitsubishi # PUZ-WM50VHA ASHP (PCDB 104568) and a Suspended-timber ground floor # (46.87 m² / 15.0 m heat-loss perimeter). The other PCDF 104568 # cohort certs (0380, 2800, 3336, 4800) are End-Terrace bungalows # whose floor U lands well above 0.5; cert 9796's geometry is the # only one where the (broken) cascade routes the U through the solid # default → U=0.49 < 0.5 → spec rule (a) "U<0.5 → sealed" fires → # (12) = 0.1 (sealed) instead of (12) = 0.2 (unsealed). # # Per RdSAP10 §5 page 29 "Floor infiltration (suspended timber # ground floor only)": # Age band A-E: # a) if floor U-value < 0.5, assume "sealed" → 0.1 # b) if retro-fit + no U → "sealed" → 0.1 # otherwise "unsealed" → 0.2 # The cascade must use the SAME floor U-value the heat-transmission # cascade computes (which respects `floor_construction_type`) — not # a stale duplicate that ignores the per-bp lodgement. # # Pre-slice the 0.1 ach gap propagated: # (18) infiltration_rate 0.74 → ws 0.84 (cascade -0.10) # (25)m Jan 0.82 → ws 0.91 (cascade -0.09) # (38)m Jan 29.08 W/K → ws 32.37 (cascade -3.29 W/K) # (39) Jan 110.35 W/K → ws 113.64 (cascade -3.29 W/K) # HLP Jan 2.35 W/m²K → ws 2.42 (cascade -0.07) # T_h2 Jan 19.11°C → ws 19.07 (cascade +0.04) # MIT Jan 18.51°C → ws 18.45 (cascade +0.06) # SAP +0.55 vs worksheet 90.13. # Worksheet "SAP value" line lodges unrounded SAP **90.1318**. cert_dir = Path( "sap worksheets/additional with api 2/9796-3058-6205-0346-9200" ) summary_pdf = next(cert_dir.glob("Summary_*.pdf")) pages = _summary_pdf_to_textract_style_pages(summary_pdf) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance (matches the other # PCDF 104568 cohort residuals; the remaining ~+0.001 SAP delta is # the cohort-1 HP-COP precision-floor pattern, see handover thread 3). worksheet_unrounded_sap = 90.1318 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE def test_summary_7700_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cohort-2 cert 7700-3362-0922-7022-3563 (Summary_000905.pdf # / dr87-0001-000905.pdf) is the first cohort fixture to exercise # the alt-wall dry-lining adjustment. End-Terrace house age C, main # wall filled cavity (CavityWallDensePlasterDenseBlock, U=0.70), # alt wall 14.44 m² Cavity As-Built, Dry-lining: Yes # (CavityWallPlasterOnDabsDenseBlock, worksheet U=1.20). # # Per RdSAP10 §5.8 + Table 14 page 41: dry-lining adds R = 0.17 # m²K/W → U = 1/(1/1.5 + 0.17) = 1.19522... → 2 d.p. half-up = 1.20. # Pre-slice the alt sub-area's `wall_dry_lined="N"` hard-code routed # to the cavity-as-built default (U=1.50), giving fabric (33) # 148.72 W/K vs worksheet 144.38 (Δ +4.33 W/K = ~+0.44 SAP). Worksheet # "SAP value" line lodges unrounded SAP **63.4425**. cert_dir = Path( "sap worksheets/additional with api 2/7700-3362-0922-7022-3563" ) summary_pdf = next(cert_dir.glob("Summary_*.pdf")) pages = _summary_pdf_to_textract_style_pages(summary_pdf) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert worksheet_unrounded_sap = 63.4425 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_summary_9501_flat_has_no_built_form_in_summary_pdf() -> None: # Arrange — cert 9501 (Summary_000784.pdf) is a flat. The Elmhurst # Summary's §1.0 "Property type" section lodges the built-form # descriptor (e.g. "M Mid-Terrace", "D Detached") only for houses; # flats have no built-form line — the §2.0 "Number of Storeys" # section follows immediately after the "F Flat" property type. # # The extractor's `_extract_attachment` regex previously captured # the line immediately after the property-type value # unconditionally, so cert 9501 ends up with attachment # "2.0 Number of Storeys:" — pure section-header noise that the # mapper then surfaces on EpcPropertyData.built_form, breaking the # cascade's flat-exposure routing downstream. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — built_form is empty for flats. Houses set it to their # attachment descriptor; flats lodge no attachment. assert epc.built_form == "" def test_summary_9501_dwelling_type_is_top_floor_flat() -> None: # Arrange — cert 9501's worksheet treats the cert as a TOP-floor # flat: §3 (28a) "Ground floor Main … U=0.0" because the floor # sits over "Another dwelling below" (worksheet line 9.0 Floor # location); §3 (30) has both an external roof + RR contributions # so the roof IS exposed. The cascade's `_dwelling_exposure` # function does prefix matching on `dwelling_type.lower()` to gate # which surfaces are party — without "top-floor flat" the cert # falls through to fully-exposed houses (Δ +9.25 W/K on floor). # # Floor-position inference rules: # - floor.location indicates "Another dwelling below" # → not ground floor (rules out ground-floor flat) # - room_in_roof OR external roof present # → roof exposed (rules out mid-floor flat) # - therefore → top-floor flat pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.dwelling_type is not None assert epc.dwelling_type.lower().startswith("top-floor") def test_summary_9501_rr_gable_walls_route_to_external_walls_hlc() -> None: # Arrange — cert 9501's worksheet §3 lodges "Roof room Main Gable # Wall 1" + "Gable Wall 2" as line (29a) entries (external walls) # at the main-wall U (= 1.70 for age B Solid Brick): 13.50×1.70 + # 15.95×1.70 = 50.07 W/K added on top of the regular external-walls # 168.74 → 218.81 W/K total. # # The Summary mapper currently lodges these as # `SapRoomInRoofSurface(kind='gable_wall', ...)` — the cascade's # cohort-house default which routes to party walls at U=0.25 # (Table 4 row 2). For a top-floor flat in a mid-terrace block, # the gables sit at the ends of the building (no neighbour above) # — they're EXTERNAL not party. Surface them as # `gable_wall_external` so the cascade's (29a) sum picks them up. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act from domain.sap10_calculator.rdsap.cert_to_inputs import ( heat_transmission_section_from_cert, ) ht = heat_transmission_section_from_cert(epc) # Assert — worksheet (29a) total walls = 168.7420 (main) + # 22.95 (Gable 1) + 27.115 (Gable 2) = 218.807 W/K. Tolerance # 1e-2 absorbs the 2-d.p. rounding of the underlying U/area # products; the 1e-4 chain test downstream will tighten this # to the cascade-internal rounding floor. worksheet_walls_w_per_k = 218.807 assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2 def test_summary_000565_extractor_recognises_exposed_and_connected_gable_types() -> None: """Summary PDF §8.1 Room(s) in Roof per-surface table lists the gable-wall environment column with one of four published values: Party → §8.1 party-wall row Sheltered → §8.1 sheltered external row Exposed → §8.1 exposed external row Connected (to heated space) → §8.1 internal partition Per RdSAP 10 §3.10 (PDF p.30-35) "Detailed Room-in-Roof" + Table 4 (p.22) "Heat-loss surface variants": - Exposed gable wall → external wall at the lodged U-value (or the BP main-wall U when the lodged value is the default) - Sheltered gable wall → external wall at the lodged U-value - Party gable wall → party wall at U=0.25 (Table 4 row 2) - Connected gable wall → internal partition to heated space, NOT a heat-loss surface (drops from external + party totals) The extractor was only capturing `gable_type ∈ {"Party", "Sheltered", "Connected to heated space"}` — neither `"Exposed"` (every external gable on cert 000565) nor the plain `"Connected"` string (the actual lodging used in Summary PDFs vs the verbose "Connected to heated space") was recognised. Both fell through with `gable_type=None`, masking the downstream cascade gap (cert 000565 BP[0] Main Gable Wall 1 is lodged "Exposed" at U=0.35 but extracted as untyped → mapper routes to `gable_wall` (party at U=0.25) — see worksheet "Roof room Main Gable Wall 1" line at U=0.35). This pin asserts the extractor surfaces the lodged environment column verbatim. The downstream mapper + cascade behaviour stays unchanged until follow-up slices use the new field — this is a pure extractor data-completion step (no test pins move). """ # Arrange pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act — Main BP gables; Ext1/Ext2 gables expose both "Connected" # and "Exposed" values from the cert lodging. rir_main = site_notes.room_in_roof main_surfaces = {s.name: s for s in (rir_main.surfaces if rir_main else [])} rir_ext1 = ( site_notes.extensions[0].room_in_roof if site_notes.extensions and len(site_notes.extensions) > 0 else None ) ext1_surfaces = {s.name: s for s in (rir_ext1.surfaces if rir_ext1 else [])} # Assert # Main BP[0]: Gable Wall 1 lodged "Exposed" (default U 0.35); Gable # Wall 2 lodged "Sheltered" (default U 0.30). assert main_surfaces["Gable Wall 1"].gable_type == "Exposed", ( f"Main Gable Wall 1 gable_type = " f"{main_surfaces['Gable Wall 1'].gable_type!r}; expected 'Exposed'" ) assert main_surfaces["Gable Wall 2"].gable_type == "Sheltered", ( f"Main Gable Wall 2 gable_type = " f"{main_surfaces['Gable Wall 2'].gable_type!r}; expected 'Sheltered'" ) # Ext1 BP[1]: Gable Wall 1 lodged "Connected" (internal partition); # Gable Wall 2 lodged "Exposed" (default U 1.70). assert ext1_surfaces["Gable Wall 1"].gable_type == "Connected", ( f"Ext1 Gable Wall 1 gable_type = " f"{ext1_surfaces['Gable Wall 1'].gable_type!r}; expected 'Connected'" ) assert ext1_surfaces["Gable Wall 2"].gable_type == "Exposed", ( f"Ext1 Gable Wall 2 gable_type = " f"{ext1_surfaces['Gable Wall 2'].gable_type!r}; expected 'Exposed'" ) def test_summary_000565_rr_mapper_routes_exposed_to_external_drops_connected_and_surfaces_common_walls() -> None: """RdSAP 10 §3.9 (Simplified) + §3.10 (Detailed) + Table 4 (PDF p.22): the cert's Room-in-Roof per-surface table classifies gable walls by exposure column AND derives areas via two different methods depending on assessment type: Gable / common-wall environment column → heat-loss routing: Exposed → external wall at lodged or main-wall U Sheltered → external wall at lodged U Party → party wall at U = 0.25 Connected → internal partition (NOT a heat-loss surface) Area derivation: Detailed assessment → raw L × H per surface Simplified + Common Walls → L × (0.25 + H) for common walls; L × (0.25 + H_gable) − Σ_n (H_gable − H_common,n)² / 2 for gables Simplified + no Common Walls → raw L × H for gables (no structural-gap offset) The 0.25-m offset accounts for the structural gap between the RR floor and the storey-below ceiling (per RdSAP 10 §3.9.2 + Table 4 p.22). The gable correction subtracts the triangular slice above each common wall where the gable above transitions to the common wall below. Pin: cert 000565 BP[1] Ext1 lodges (Simplified, Common Wall 1 9×1, Common Wall 2 5×1.8, Gable Wall 1 4×6 Connected, Gable Wall 2 8×9 Exposed @ U=1.70). After this slice the mapper produces: - Common Wall 1 → SapRoomInRoofSurface(kind='common_wall', area_m2=11.25, u_value=1.70) - Common Wall 2 → SapRoomInRoofSurface(kind='common_wall', area_m2=10.25, u_value=1.70) - Gable Wall 1 → dropped (Connected, internal partition) - Gable Wall 2 → SapRoomInRoofSurface(kind='gable_wall_external', area_m2=16.08, u_value=1.70) All three values pin to the U985 worksheet for this BP at abs=1e-2: Roof room Ext1 common wall 1: 11.25 Roof room Ext1 common wall 2: 10.25 Roof room Ext1 Gable Wall 2 : 16.08 """ # Arrange pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — BP[1] is the Ext1 building part (BPs[0]=Main, [1]=Ext1). ext1_bp = epc.sap_building_parts[1] rir = ext1_bp.sap_room_in_roof assert rir is not None and rir.detailed_surfaces is not None detailed = rir.detailed_surfaces # Connected gables drop — no kind='gable_wall' surface at the raw 24 m² area. gable_walls_24 = [ s for s in detailed if s.kind == "gable_wall" and abs(s.area_m2 - 24.0) <= 1e-2 ] assert not gable_walls_24, ( f"Connected gable (24 m² raw) leaked into kind='gable_wall': " f"{gable_walls_24}" ) # Common walls surfaced at spec-formula areas. common_walls = [s for s in detailed if s.kind == "common_wall"] common_areas = sorted(s.area_m2 for s in common_walls) assert any(abs(a - 10.25) <= 1e-2 for a in common_areas), ( f"Ext1 Common Wall 2 (5 × (0.25 + 1.8) = 10.25) missing from " f"common_wall surfaces: areas={common_areas}" ) assert any(abs(a - 11.25) <= 1e-2 for a in common_areas), ( f"Ext1 Common Wall 1 (9 × (0.25 + 1.0) = 11.25) missing from " f"common_wall surfaces: areas={common_areas}" ) # Exposed gable surfaced at spec-corrected area + lodged U. gable_externals = [s for s in detailed if s.kind == "gable_wall_external"] assert any( abs(s.area_m2 - 16.08) <= 1e-2 and s.u_value == 1.70 for s in gable_externals ), ( f"Ext1 Gable Wall 2 (8 × (0.25 + 9) − ((9−1)² + (9−1.8)²)/2 = " f"16.08, U=1.70) missing from gable_wall_external surfaces: " f"{[(s.area_m2, s.u_value) for s in gable_externals]}" ) def test_summary_9501_pv_array_surfaced_from_elmhurst_section_19() -> None: # Arrange — cert 9501's Elmhurst §19.0 PV section lodges measured # array detail (2.36 kWp, South-West orientation, 45° elevation, # "None Or Little" overshading). The worksheet's §10a PV credit # of -250.02 GBP (-129.49 used in dwelling + -120.53 exported) # depends on Appendix M / Appendix U3.3 reading these from the # cascade's `SapEnergySource.photovoltaic_arrays` list. Without # the array surfacing the cascade computes total cost +£250 too # high → ECF 2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (current # Δ -9.27 after Slice 99c closed the fabric heat loss). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert arrays = epc.sap_energy_source.photovoltaic_arrays assert arrays is not None assert len(arrays) == 1 assert abs(arrays[0].peak_power - 2.36) <= 1e-4 assert arrays[0].orientation == 6 # SAP octant: South-West assert arrays[0].pitch == 3 # RdSAP §11.1 pitch enum: code 3 = 45° assert arrays[0].overshading == 1 # RdSAP code: None or very little def test_summary_9501_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 9501-3059-8202-7356-0204 (Summary_000784.pdf / # dr87-0001-000784.pdf) is the third boiler validation cert and # the first FLAT in the per-cert mapper validation cohort. # Mains-gas Vaillant PCDB idx 19007, mid-terrace top-floor flat # with Room-in-Roof + measured PV (2.36 kWp SW @ 45°). TFA 113.08 # m². Worksheet PDF "SAP value" line lodges unrounded SAP # **68.5252**. # # Slices 99a-99e jointly closed the Summary path from Δ -5.25 to # 1e-4: 99a extractor attachment fix (built_form=''), 99b dwelling # _type identifies top-floor flat (cascade exposure routing), 99c # RR gables external for flats + SO Solid Brick wall code, 99d # surface PV array from §19.0, 99e PV pitch enum-not-degrees. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — 1e-4 pin (project memory `feedback_zero_error_strict`). worksheet_unrounded_sap = 68.5252 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf) # is the first cohort cert with a real GOV.UK EPB API counterpart # (cert ref 0535-9020-6509-0821-6222). Worksheet PDF line "SAP value" # lodges unrounded SAP **69.0094** (rating C 69, also the API- # published integer). This is the load-bearing forcing function for # the API↔Elmhurst parity workstream: any drift from 1e-4 means a # mapper gap, not a calculator bug — the cohort 6 cert cascades all # reproduce Elmhurst exactly at 1e-4 on hand-built fixtures. # # Source-data caveat (documented for future debuggers): Summary §3 # lodges Ext1 age band as "M 2023 onwards"; the worksheet header # records "Ext1: L". Likely assessor data-entry inconsistency. The # mapper trusts the Summary (its source of truth); accept whatever # residual the M vs L disagreement produces. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — 1e-4 pin, no widening, no xfail (project memory # `feedback_zero_error_strict`). worksheet_unrounded_sap = 69.0094 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_summary_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 0330-2249-8150-2326-4121 (Summary_000897.pdf / # dr87-0001-000897.pdf) is the second boiler cert under per-cert # mapper validation: mains-gas boiler (PCDB idx 10241), mid-terrace # 2-bp dwelling, TFA 69.14 m². Worksheet PDF "SAP value" line lodges # unrounded SAP **61.5993**. Same load-bearing role as cert 001479 # (the first boiler) — Summary path proves itself against the # worksheet, then becomes the canonical reference for the API path. # Expected RED at Δ +0.4667 at handover-baseline (Summary mapper # cascade SAP 62.0660); mapper gaps to close are §11 glazing_type=14 # (windows HLC +6.71 W/K) and the §4 hot-water cascade (kWh +1060). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000897_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — 1e-4 pin, no widening, no xfail (project memory # `feedback_zero_error_strict`). worksheet_unrounded_sap = 61.5993 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_summary_0380_main_heating_category_is_heat_pump() -> None: # Arrange — cert 0380's Summary lodges main heating as a PCDB- # indexed Mitsubishi PUZ-WM50VHA (idx 104568), which lives in # PCDB Table 362 (heat pumps only). The Elmhurst mapper must # surface `main_heating_category=4` so the cascade routes the # cert through the Appendix N3.6/N3.7 heat-pump path instead of # falling through to the default boiler-ish branches that key off # `main_heating_category in {1, 2}`. Spec ref: SAP 10.2 Table 4a # (main heating category code 4 = heat pump). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_heating.main_heating_details, "no main heating details surfaced" main = epc.sap_heating.main_heating_details[0] assert main.main_heating_index_number == 104568 assert main.main_heating_category == 4 def test_summary_0380_filled_cavity_plus_external_insulation_routes_to_code_6() -> None: # Arrange — cert 0380's Summary lodges main walls as # `wall_type = "CA Cavity"` and `insulation = "FE Filled Cavity + # External"` (a cavity wall with subsequent external-insulation # upgrade). The cascade enum `wall_insulation_type=6` is # "filled cavity + external insulation" (per # `domain.sap10_ml.rdsap_uvalues` lines 120-131); without it the # cascade defaults to the as-built routing and overstates walls # heat loss by +58 W/K on cert 0380 (Summary 69.69 vs API 11.62 # at HEAD before this slice). API path EPC for cert 0380 surfaces # `wall_insulation_type=6` and is the ground-truth pin here. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_building_parts, "no building parts surfaced" main = epc.sap_building_parts[0] assert main.wall_construction == 4 # 4 = Cavity ('CA') assert main.wall_insulation_type == 6 # 6 = filled cavity + external def test_summary_0380_surfaces_wall_insulation_thickness_100mm() -> None: # Arrange — cert 0380's Summary §7.0 Walls block lodges the # composite-wall insulation thickness on the line pair # "Insulation Thickness" / "100 mm". Without surfacing this to # `wall_insulation_thickness`, the heat-transmission cascade # falls through `_parse_thickness_mm(None) → None` and the # composite filled-cavity-plus-external U-value calc uses its # default thickness rather than the lodged 100 mm — leaving cert # 0380's `walls_w_per_k` at 24.62 vs API's 11.62 even with # `wall_insulation_type=6` set (Slice S0380.3). Mirror of the # existing `_roof_details_from_lines` reader that surfaces roof # `insulation_thickness_mm` from the same "Insulation Thickness" # label. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — match the API mapper's "100mm" string (the EPC schema # type is `Optional[str]`; the cascade's `_parse_thickness_mm` # strips non-digit trailers). main = epc.sap_building_parts[0] assert main.wall_insulation_thickness == "100mm" def test_summary_0380_surfaces_insulated_door_u_value_1_2() -> None: # Arrange — cert 0380's Summary §10 Doors block lodges the door # U-value on the "Average U-value" / "1.20" line pair. The dr87 # worksheet line ref (26) confirms the spec value: "Doors # insulated 1, NetArea 3.7000 m², U-value 1.2000, A×U 4.4400 W/K". # Without surfacing the lodged U-value the cascade defaults the # door U and overstates `doors_w_per_k` to 5.18 vs worksheet # 4.44 W/K. The comment at # `datatypes/epc/domain/epc_property_data.py:585` claimed the # value was "not available in site notes" — that assertion is # outdated for Elmhurst Summary PDFs which lodge it explicitly. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — float compare with small tolerance (Summary lodges # "1.20" which parses cleanly to 1.2; API lodges 1.2 directly). assert epc.insulated_door_u_value is not None assert abs(epc.insulated_door_u_value - 1.2) < 1e-6 def test_summary_0380_cylinder_block_surfaces_full_15_1_lodging() -> None: # Arrange — cert 0380's Summary §15.1 Hot Water Cylinder block # lodges (L 340-347): # Cylinder Size Medium # Insulated Foam # Insulation Thickness 50 mm # Cylinder Thermostat Yes # The dr87 worksheet pins these as: # (47) Cylinder Volume 160.00 L → cascade enum 3 # "Cylinder Insulation Type Foam" → cascade enum 1 (factory) # "Cylinder Insulation Thickness 50 mm" → 50 # "Cylinder Stat Yes" → 'Y' # Worksheet (51) 0.0152 × (52) 0.9086 × (53) 0.5400 × (47) 160 ÷ 1000 # = daily storage loss 1.193 kWh/day → (56) annual ~435 kWh — exact # only when ALL FOUR fields are surfaced together: insulation_type # + thickness key the Table 2 loss factor (51), volume keys (52), # and cylinder_thermostat keys the Table 2b temperature factor (53). # Without cylinder_thermostat='Y' the cascade uses the no-stat # temperature factor (~0.9 instead of 0.54) and HW storage loss # over-counts by ~300 kWh/yr. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_heating.cylinder_size == 3 assert epc.sap_heating.cylinder_insulation_type == 1 assert epc.sap_heating.cylinder_insulation_thickness_mm == 50 assert epc.sap_heating.cylinder_thermostat == "Y" def test_summary_0350_surfaces_two_pv_arrays() -> None: # Arrange — cert 0350's Summary §19.0 Photovoltaic Panel block # lodges TWO arrays (L 503-510): # 1.50 kWp / South-East / 45° / None Or Little # 1.50 kWp / North-West / 45° / None Or Little # The Elmhurst extractor's `_extract_pv_array_detail` hardcodes a # single 4-value reader (loop breaks at `len(values) == 4`) and # the `Renewables` dataclass exposes only 4 scalar PV fields — # together they cap output at one array regardless of how many the # PDF lodges. Cert 0380 (single-array) is unaffected; cert 0350 # is the first multi-array cohort cert. Without both arrays the # cascade halves the PV export credit and the SAP score drops. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_energy_source is not None arrays = epc.sap_energy_source.photovoltaic_arrays assert arrays is not None assert len(arrays) == 2 # Both arrays at 1.5 kWp; order matches PDF row order. assert arrays[0].peak_power == 1.5 assert arrays[1].peak_power == 1.5 def test_summary_0350_ext1_inherits_main_wall_insulation_thickness() -> None: # Arrange — cert 0350-2968-2650-2796-5255 is a multi-bp dwelling # (Main + 1st Extension). Its Summary §7 Walls block lodges # "1st Extension / As Main Wall / Yes" — the extension's walls # inherit Main's lodgings (CA Cavity, FE Filled Cavity + External, # 100 mm). The `_extract_extensions` "As Main Wall" inheritance # at `elmhurst_extractor.py:559-567` builds a new WallDetails by # copying Main's fields, but the field set it copies was frozen # before Slice S0380.4 added `insulation_thickness_mm` — so the # extension's `WallDetails.insulation_thickness_mm` falls through # to its dataclass default (None), and the mapper surfaces # `wall_insulation_thickness=None` on bp[1]. The cascade then # routes Ext1's composite walls off the lodged-thickness path, # over-stating Ext1 `external_walls_w_per_k` against worksheet # line ref (29a) "External walls Ext1 5.21 0.25 1.3025". pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — Ext1 inherits Main's 100 mm thickness and the EPC # surfaces "100mm" on bp[1] (matching bp[0]). assert len(epc.sap_building_parts) == 2 main_bp, ext1_bp = epc.sap_building_parts assert main_bp.wall_insulation_thickness == "100mm" assert ext1_bp.wall_insulation_thickness == "100mm" def test_summary_0350_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cert 0350-2968-2650-2796-5255 (Summary_000903.pdf / # dr87-0001-000903.pdf) is the second heat-pump cert under per-cert # Summary-path mapper validation and the first multi-bp cohort # cert: Mitsubishi PUZ-WM50VHA ASHP (PCDB index 104568), main # dwelling + 1 extension, 2 PV arrays (2x 1.5 kWp at SE / NW). # Worksheet PDF "SAP value" line lodges unrounded SAP **84.1367**. # # First-attempt closure (validating the structural-debt-amortizes # hypothesis): after Slices S0380.2..S0380.6 (which were forced by # cert 0380) the cohort HP routing + cylinder block were already # in place; cert 0350 needed only TWO new slices: # - Slice S0380.8: extension "As Main Wall" inheritance copies # `insulation_thickness_mm` (cert 0380 was single-bp, didn't # exercise the inheritance path). # - Slice S0380.9: refactor Elmhurst `Renewables` to support # multiple PV arrays per dwelling (cert 0380 was single-array, # didn't exercise multi-array PV). # Both fixes are structural and apply cohort-wide. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance. worksheet_unrounded_sap = 84.1367 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE def test_summary_2636_alt_wall_window_parses_alternative_wall_location() -> None: # Arrange — cert 2636-0525-2600-0401-2296's §11 Windows block lodges # one alt-wall window (the 1.19 m² north-facing one): the row's # "Alternative wall" string appears BEFORE the W×H×A line, not # after the frame_factor (the normal position for "External wall"). # The extractor's `_parse_window_from_anchors` was only scanning # the post-frame_factor `middle` slice for wall-location tokens → # defaulted to "External wall" for the alt-wall row → cascade # allocated the window to the main wall instead of the alt-wall, # leaving Main external walls W/K under-deducted by ~0.54 vs # worksheet (29a). Fix: also scan the PRE-data slice # `lines[before_start:data_idx]` for wall tokens. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000898_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — the 1.19 m² window is recorded with wall_type = # "Alternative wall"; all other windows stay on "External wall". by_area = {round(w.window_width, 2): w.window_wall_type for w in epc.sap_windows} assert by_area[1.19] == "Alternative wall" assert by_area[2.25] == "External wall" # main-wall windows unchanged def test_summary_2225_no_showers_lodged_resolves_to_zero_counts() -> None: # Arrange — cert 2225-3062-8205-2856-7204's Summary §1x Baths and # Showers block lodges 0 baths and ZERO showers (no shower rows at # all). The Summary mapper's existing logic at # `mapper.py:3536-3537` predicates the count assignment on # `has_electric_shower`: when no electric shower is detected the # counts collapse to None — but cert 2225 has no showers at all, # not "non-electric showers". The None values then drive the # cascade's default-1-mixer assumption, over-counting HW kWh. # Same disposition the API path received in slice 102f-prep.8 # (commit 1d5183c6: "API mapper resolves shower_outlets=None → # 0 mixers"). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000900_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Pre-condition: §1x lodges zero showers (proves the test sees # the same no-showers fixture the cascade does). assert len(site_notes.baths_and_showers.showers) == 0 # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — zero-shower lodgings resolve to explicit 0 counts (not # None) so the cascade does not default-assume a mixer. assert epc.sap_heating.electric_shower_count == 0 assert epc.sap_heating.mixer_shower_count == 0 def test_summary_2225_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cert 2225-3062-8205-2856-7204 (Summary_000900.pdf): # Mitsubishi PUZ-WM50VHA, single-bp single-array PV (3.28 kWp SE), # ZERO showers lodged. Worksheet "SAP value" 88.7921. Slice # S0380.11 closed the zero-shower defaulting bug (None → 0 mixers # for cohort certs that lodge no showers); cert 2225 was the # forcing function. Same disposition the API path received in # slice 102f-prep.8 (commit 1d5183c6). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000900_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance. worksheet_unrounded_sap = 88.7921 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE def test_summary_2636_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cert 2636-0525-2600-0401-2296 (Summary_000898.pdf): # Mitsubishi PUZ-WM50VHA, mid-terrace house with **alt-wall + # cantilever** — the most complex geometry in the ASHP cohort. # Worksheet "SAP value" lodges 86.2641. # # Closed by two combined slices: # - S0380.12: alt-wall window-location parser fix (walls W/K # 20.5595 → 20.0240 = worksheet exact). # - S0380.13: cantilever gate accepts "House" descriptive form # in addition to the schema enum "0" (allowing the Summary # mapper's descriptive property_type to trigger the cantilever # detection that slice 102f-prep.9 added on the API path). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000898_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance. worksheet_unrounded_sap = 86.2641 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE def test_summary_2636_thermal_bridging_excludes_alt_wall_window_opening_per_sap_10_2_appendix_k() -> None: # Arrange — cert 2636 has BP0 with an alt-wall (gross 12.76 m²) # carrying one 1.19 m² alt-wall window (`window_wall_type=2`). # # SAP 10.2 Appendix K eqn (K2) p.84: HTB = y × Σ(Aexp), where # Aexp is "the total area of external elements calculated at # worksheet (31)". Worksheet line 187 (cert 2636 dr87-0001-000898) # labels (31) "Total NET area of external elements" — net of # openings. Cert 2636 worksheet (31) = 160.33 m² = 47.70 main net # + 11.57 alt net + 42.92 roof + 39.18 ground floor + 3.74 # cantilever + 11.52 windows + 3.70 doors. # # Pre-S0380.31 the cascade summed the alt-wall at its 12.76 m² # gross (no opening deduction) — (31) was 161.52 → (36) = 24.228, # worksheet (36) = 24.0495, Δ +0.1785 W/K. That drift propagated # through (39) HTC → MIT → space heating, leaving the cert at # Δ -0.015 SAP — the only ASHP cohort cert above the 1e-4 floor. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000898_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — worksheet (36) = 24.0495 W/K to 4 d.p.; full SAP # cascade lands within the 1e-4 spec-precision floor of the # worksheet's 86.2641. assert abs(result.intermediate["thermal_bridging_w_per_k"] - 24.0495) <= 1e-4 assert abs(result.sap_score_continuous - 86.2641) <= 1e-4 def test_summary_mapper_raises_on_unmapped_cylinder_size_label() -> None: # Arrange — start from a real cohort cert (any extracted site # notes) and inject an unmapped §15.1 "Cylinder Size" label # ("Tiny" — not in the lookup dict). `from_elmhurst_site_notes` # must raise `UnmappedElmhurstLabel` rather than silently # returning None for `cylinder_size` (the failure mode that hid # cert 9418's "Large" miss until Slice S0380.14 surfaced it as # a Δ +2.60 SAP gap). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() site_notes.water_heating.cylinder_size_label = "Tiny" # Act / Assert with pytest.raises(UnmappedElmhurstLabel) as excinfo: EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) assert excinfo.value.field == "cylinder_size" assert excinfo.value.value == "Tiny" def test_summary_mapper_raises_on_unmapped_cylinder_insulation_label() -> None: # Arrange — mirror test for the §15.1 "Insulated" label dict. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() site_notes.water_heating.cylinder_insulation_label = "Polyester wool" # Act / Assert with pytest.raises(UnmappedElmhurstLabel) as excinfo: EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) assert excinfo.value.field == "cylinder_insulation" assert excinfo.value.value == "Polyester wool" def test_all_seven_ashp_cohort_certs_extract_without_unmapped_label_raise() -> None: # Arrange — coverage forcing function: every cohort cert must # extract through `from_elmhurst_site_notes` without triggering an # `UnmappedElmhurstLabel` raise from any strict helper. New cohort # certs added in subsequent slices fall under the same gate, and # any future Elmhurst-PDF variant with an unmapped label fails # this test until the missing dict entry is added. cohort_pdfs = ( _SUMMARY_000899_PDF, _SUMMARY_000903_PDF, _SUMMARY_000900_PDF, _SUMMARY_000898_PDF, _SUMMARY_000901_PDF, _SUMMARY_000904_PDF, _SUMMARY_000902_PDF, ) # Act / Assert for pdf in cohort_pdfs: pages = _summary_pdf_to_textract_style_pages(pdf) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Strict mapper run — raises if any cylinder helper hits an # unknown label. EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) def test_summary_3336_triple_glazed_windows_route_to_code_6() -> None: # Arrange — cert 3336-2825-9400-0512-8292's Summary §11 lodges # "Triple post or during 2022" on every window; dr87-0001-000888 # confirms "Window, Triple glazed" on every line. The Elmhurst # mapper must surface SAP 10.2 Table U2 code 6 so the §5 (66).. # (67) daylight factor uses Table 6b col light g_L = 0.70 instead # of the default DG g_L = 0.80 — the +0.0274 SAP regression that # this slice closes is driven by the daylight-factor offset that # the default-DG silently masked. pages = _summary_pdf_to_textract_style_pages( _FIXTURES / "Summary_000888.pdf" ) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — every window on cert 3336 is triple-glazed → code 6. assert epc.sap_windows, "expected windows on cert 3336" for w in epc.sap_windows: assert w.glazing_type == 6 def test_summary_000474_double_glazed_windows_route_to_code_3() -> None: # Arrange — boiler-cohort cert (Summary_000474.pdf) lodges # "Double between 2002 and 2021" / "Double with unknown install # date" on every window. Both routes to SAP 10.2 Table U2 code 3 # (DG air-filled post-2002) per the `_ELMHURST_GLAZING_LABEL_TO # _SAP10` dict — same Table 6b col light g_L = 0.80 as the # default, so the cascade SAP is unchanged for these certs, but # the integer pin guards against future cascade consumers that # key on the subcode (e.g. a U-value default lookup for absent # `WindowTransmissionDetails`). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_windows, "expected windows on cert 000474" for w in epc.sap_windows: assert w.glazing_type == 3, ( f"expected DG post-2002 code 3, got {w.glazing_type!r}" ) def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None: # Arrange — same strict-coverage gate as the cylinder-size helper # (Slice S0380.15 + S0380.16): silently routing an unknown glazing # variant to a SAP default int hid the +0.05 SAP regression on 13 # triple-glazed certs until the cohort-2 first-attempt probe. After # this slice, an unrecognised lodging surfaces immediately at # extraction time. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Mutate the first window's glazing_type to an unmapped string. site_notes.windows[0].glazing_type = "Quintuple glazed with helium" # Act / Assert with pytest.raises(UnmappedElmhurstLabel) as excinfo: EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) assert excinfo.value.field == "glazing_type" assert excinfo.value.value == "Quintuple glazed with helium" def test_summary_000565_extractor_finds_electric_shower_in_section_1x_0() -> None: """SAP 10.2 Appendix J §J2 step 2a (PDF p.81) routes baths through `N_bath = 0.13 N + 0.19` when a shower is also present, but `0.35 N + 0.50` when no shower is present — a ~2.7× swing in (42b)m that compounds into worksheet (45)m energy content. Cert 000565 lodges one instantaneous electric shower in Summary §1x.0 Baths and Showers: Description Type Connected 1 Electric shower None The extractor's `_extract_baths_and_showers` walks 3-tuples after "Connected", but it locates "Connected" via `self._lines.index("Connected")`, which is a global search. Cert 000565 has the substring "Connected" earlier in the document (§3 building parts list "Connected" / "Exposed" / "Sheltered" wall elevation flags), so `idx` lands on a non-section anchor and the walk never reaches the shower row. Worksheet U985-0001-000565 line (42b) Jan = 35.0602 L/day requires the bath+shower branch (N_bath = 0.13 × 3.1578 + 0.19 = 0.6005); falling through to no-shower (N_bath = 0.35 × 3.1578 + 0.50 = 1.6052) yields ~93.7 L/day — the 2.67× over-count behind (45)m's +903 kWh/yr cascade gap for cert 000565. Fix: locate "Connected" within the section bounded by "1x.0 Baths and Showers" → "18.0 Flue Gas Heat Recovery System" (both unique anchors in the Elmhurst Summary PDF). """ # Arrange — Summary PDF tokenized as the extractor expects. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) # Act site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Assert — extractor finds the single electric shower lodged in # §1x.0, not the empty list it returns when "Connected" anchors # on the building-parts section. assert len(site_notes.baths_and_showers.showers) == 1, ( f"expected 1 shower from §1x.0; got " f"{len(site_notes.baths_and_showers.showers)} " f"({site_notes.baths_and_showers.showers!r})" ) shower = site_notes.baths_and_showers.showers[0] assert shower.shower_number == 1 assert shower.outlet_type == "Electric shower" assert shower.connected == "None" def test_summary_000565_ext1_wall_construction_routes_to_stone_granite() -> None: # Arrange — RdSAP 10 §3.3 + Table 4: cert 000565 Ext1 lodges # "SG Stone: granite or whinstone" which routes to SAP10 # WALL_STONE_GRANITE=1. Pre-S0380.64 fell through silent-None, # losing the Ext1 wall channel (worksheet line 29a: 91.83 m² × # U=1.7 = 156.11 W/K) from the cascade fabric subtotal. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_building_parts[1].wall_construction == 1 def test_summary_000565_ext3_ext4_wall_constructions_route_to_basement_code_6() -> None: # Arrange — RdSAP 10 §5.17 / Table 23: cert 000565 Ext3 + Ext4 # lodge "B Basement wall". The canonical `BASEMENT_WALL_ # CONSTRUCTION_CODE=6` triggers the cascade's # `part.main_wall_is_basement` route to `u_basement_wall` at # heat_transmission.py:640. Pre-S0380.64 silent-None bypassed # the basement-wall override entirely. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_building_parts[3].wall_construction == 6 assert epc.sap_building_parts[3].main_wall_is_basement is True assert epc.sap_building_parts[4].wall_construction == 6 assert epc.sap_building_parts[4].main_wall_is_basement is True def test_summary_000565_extractor_finds_curtain_wall_age_post_2023_on_bp_2_ext2() -> None: """Summary §7 per-BP Wall block carries a `Curtain Wall Age` line when `Type: CW Curtain Wall` is lodged. Cert 000565 Ext2 (BP[2]) is the cohort fixture: it lodges Type CW Curtain Wall Curtain Wall Age Post 2023 U-value Known No Per RdSAP 10 §5.18 (PDF p.48), the U-value of a curtain wall is keyed on the per-BP `Curtain Wall Age` (Post 2023 → Table 24 window row; Pre 2023 → 2.0 W/m²K), NOT on the dwelling-wide `construction_age_band`. The extractor must surface this field so the mapper + cascade can dispatch correctly. Pre-S0380.85 the line was silently dropped and `wall_construction=9` fell through to the cavity-default Table 6 row. Pure extractor data-completion step — downstream cascade impact lands when the mapper threads the new field through and `u_wall` grows a Curtain Wall branch (follow-up sub-step in the same slice). """ # Arrange pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) # Act site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Assert — BP[2] is Ext2 (index 1 in `extensions`). ext2_walls = site_notes.extensions[1].walls assert ext2_walls.wall_type == "CW Curtain Wall", ( f"Ext2 wall_type = {ext2_walls.wall_type!r}; expected 'CW Curtain Wall'" ) assert ext2_walls.curtain_wall_age == "Post 2023", ( f"Ext2 curtain_wall_age = {ext2_walls.curtain_wall_age!r}; " f"expected 'Post 2023'" ) # Negative case — BPs without Curtain Wall don't have a Curtain # Wall Age line; the field must be None (not the empty-string # sentinel `_local_str` returns). main_walls = site_notes.walls assert main_walls.curtain_wall_age is None, ( f"Main wall (non-CW) curtain_wall_age = " f"{main_walls.curtain_wall_age!r}; expected None" ) def test_summary_000565_mapper_threads_curtain_wall_age_post_2023_to_bp_2_sap_building_part() -> None: """The Elmhurst mapper builds a `SapBuildingPart` per BP from the extracted `WallDetails`. `curtain_wall_age` must be threaded through so the heat-transmission cascade can dispatch on it (per [[reference-unmapped-api-code]] strict-plumbing pattern). Cert 000565 BP[2] Ext2 is the fixture: `wall_construction=9` (WALL_CURTAIN) + `curtain_wall_age="Post 2023"`. Per RdSAP 10 §5.18 + §1.5: a curtain wall can be a main wall, an alt wall, or absorbed into the prevailing wall when <10% area. This slice scopes to the main-wall path (cert 000565 lodges CW only as the BP[2] main wall, never as an alt sub-area). """ # Arrange pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert bp_2 = epc.sap_building_parts[2] assert bp_2.wall_construction == 9, ( f"BP[2] wall_construction = {bp_2.wall_construction!r}; " f"expected 9 (WALL_CURTAIN)" ) assert bp_2.curtain_wall_age == "Post 2023", ( f"BP[2] curtain_wall_age = {bp_2.curtain_wall_age!r}; " f"expected 'Post 2023'" ) # Non-CW BPs preserve curtain_wall_age=None (no per-BP signal). assert epc.sap_building_parts[0].curtain_wall_age is None assert epc.sap_building_parts[1].curtain_wall_age is None def test_summary_000565_ext2_curtain_wall_routes_to_u_value_1p4_per_rdsap_10_section_5_18() -> None: """End-to-end cascade pin: with `curtain_wall_age="Post 2023"` plumbed through extractor + mapper + `u_wall` `WALL_CURTAIN` branch, the `heat_transmission_from_cert` walls subtotal on cert 000565 must reflect the §5.18 Curtain Wall U=1.4 W/m²K on BP[2] Ext2. Pre-S0380.85: BP[2] cascade U=0.60 (Cavity default, age H), Δ −0.80 W/m²K vs worksheet U=1.40. The BP[2] Ext2 gross wall area on cert 000565 multiplied by this U-delta accounts for the documented −112.2 W/K contribution to the walls subtotal residual. Asserts the cascade walls subtotal moves materially toward the worksheet target 604.07 W/K (from pre-S0380.85's 443 W/K). The remaining ~50 W/K gap is the BP[0] Main alt1 thin-wall stone granite cascade gap — out of scope for this slice; closes in follow-up S0380.86. """ # Arrange from domain.sap10_calculator.worksheet.heat_transmission import ( heat_transmission_from_cert, ) pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act ht = heat_transmission_from_cert(epc) # Assert — pre-S0380.85 cascade had walls 443 W/K. Curtain Wall # closure adds ~112 W/K (worksheet target 604 W/K). Lower-bound # 540 W/K is a robust gate that still leaves headroom for the # remaining BP[0] alt1 thin-wall gap; the cascade reaches ~555. assert ht.walls_w_per_k >= 540.0, ( f"walls_w_per_k = {ht.walls_w_per_k:.2f}; expected ≥540 after " f"Curtain Wall §5.18 dispatch (pre-S0380.85 baseline was 443)" ) def test_summary_000565_mapper_routes_alt_wall_thickness_120mm_to_wall_thickness_mm_field() -> None: """The Summary §7 "Alternative Wall N Thickness" line is the WALL thickness, NOT an insulation thickness. Cert 000565 BP[0] Main alt1 lodges Alternative Wall 1 Type SG Stone: granite or whinstone Alternative Wall 1 Insulation A As Built Alternative Wall 1 Dry-lining Yes Alternative Wall 1 Thickness 120 mm Pre-S0380.86 `_map_elmhurst_alternative_wall` routed this 120 mm onto `SapAlternativeWall.wall_insulation_thickness="120"`, a semantic mis-name flagged in `[[feedback-no-misleading-insulation- type]]`. The cascade then mis-bucketed it as insulation (bucket 100 → _BRICK_INS_100 → U=0.32 at age A) instead of routing to the RdSAP 10 §5.6 thin-wall stone formula (U₀=3.89 → §5.8 dry-line adjustment → U=2.34, matching worksheet line (29a)). This pin asserts the mapper now lodges the wall thickness on the new `SapAlternativeWall.wall_thickness_mm` field, leaving `wall_insulation_thickness=None` (the As-Built lodging carries no insulation thickness). """ # Arrange pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert alt1 = epc.sap_building_parts[0].sap_alternative_wall_1 assert alt1 is not None assert alt1.wall_construction == 1, ( f"BP[0] alt1 wall_construction = {alt1.wall_construction!r}; " f"expected 1 (WALL_STONE_GRANITE)" ) assert alt1.wall_thickness_mm == 120, ( f"BP[0] alt1 wall_thickness_mm = {alt1.wall_thickness_mm!r}; " f"expected 120 (the lodged wall thickness, not insulation)" ) assert alt1.wall_insulation_thickness is None, ( f"BP[0] alt1 wall_insulation_thickness = " f"{alt1.wall_insulation_thickness!r}; expected None (As-Built " f"lodging carries no insulation thickness)" ) assert alt1.wall_dry_lined == "Y" def test_summary_000565_bp0_alt1_stone_granite_thin_wall_routes_to_u_value_2p34_per_rdsap_10_section_5_6() -> None: """End-to-end cascade pin: with `wall_thickness_mm=120` plumbed through extractor + mapper + `u_wall` §5.6 thin-wall formula + §5.8 dry-line adjustment, cert 000565 BP[0] Main alt1 cascade U-value moves from 0.32 → 2.34 (worksheet line (29a) pin). Δ U=2.02 × area=23 m² → +46.5 W/K of cascade walls heat loss. Combined with S0380.85's Curtain Wall closure (+112 W/K), the cascade walls subtotal closes from 443 W/K (pre-S0380.84 baseline) → ~602 W/K (worksheet 604.07; <0.5% residual). Asserts the cascade walls subtotal is now within 2% of worksheet (post-S0380.85 was 555.93; this slice should bring it to ~602). """ # Arrange from domain.sap10_calculator.worksheet.heat_transmission import ( heat_transmission_from_cert, ) pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act ht = heat_transmission_from_cert(epc) # Assert — worksheet target 604.07; lower-bound 595 is a robust # gate that admits ≤2% residual against the worksheet pin. assert ht.walls_w_per_k >= 595.0, ( f"walls_w_per_k = {ht.walls_w_per_k:.2f}; expected ≥595 after " f"§5.6 thin-wall + §5.8 dry-line dispatch (post-S0380.85 was 555.93)" ) def test_summary_000565_ext1_party_wall_routes_to_cavity_filled_code_11() -> None: # Arrange — RdSAP 10 §5.10 Table 15 row 3 (PDF p.42) "Cavity masonry # filled -> U=0.2 W/m²K". Cert 000565 Ext1 lodges "CF Cavity masonry # filled". The synthetic SAP10 code `WALL_CAVITY_FILLED_PARTY=11` # (introduced S0380.91) distinguishes filled-cavity party walls from # the construction-class-shared code 4 (which `u_party_wall` resolves # to 0.5 per Table 15 row 2). Code 11 is party-wall-only; it never # appears as a main `wall_construction` so `u_wall` is unaffected. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_building_parts[1].party_wall_construction == 11 def test_summary_000565_ext1_party_wall_cf_routes_to_u_value_0p2() -> None: # Arrange — cascade integration check for slice S0380.91: route # cert 000565's Summary §8.1 "CF Cavity masonry filled" lodgement # through extractor + mapper + heat_transmission and verify Ext1's # party-wall U-value is 0.2 (Table 15 row 3) rather than the prior # 0.5 (cavity-unfilled approximation). Localises the slice to one # surface area × U product so the cascade aggregate movement (-28 # W/K on party_walls, ~-1000 kWh of cert 000565's +1460 SH residual) # is traceable to one BP. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) construction = epc.sap_building_parts[1].party_wall_construction assert isinstance(construction, int) # Act u = u_party_wall(party_wall_construction=construction) # Assert assert abs(u - 0.2) <= 1e-4 def test_summary_000565_section_12_2_pulse_pressure_test_ap4_extracted() -> None: # Arrange — cert 000565 §12.2 Air Pressure Test lodges: # Test Method: Pulse # Pressure Test Result (AP4): 2.00 # SAP 10.2 §2 line (17a) "Air permeability value, AP4, (m³/h/m²)" is # the measured air permeability at 4 Pa from the low-pressure pulse # technique. The cascade's `ventilation_from_inputs(air_permeability # _ap4=...)` consumes it via line (18) = 0.263 × AP4^0.924 + (8). # Pre-slice the extractor read only the Test Method string and # silently dropped the AP4 value, so the cascade fell back to the # components-based (16) infiltration rate (+0.375 ach over worksheet). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) # Act site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Assert assert site_notes.ventilation.pressure_test_method == "Pulse" ap4 = site_notes.ventilation.air_permeability_ap4_m3_h_m2 assert ap4 is not None assert abs(ap4 - 2.0) <= 1e-4 def test_summary_000565_air_permeability_ap4_routes_to_sap_ventilation_field() -> None: # Arrange — mapper plumbing for SAP 10.2 §2 (17a). The Elmhurst # `VentilationAndCooling.air_permeability_ap4_m3_h_m2` field carries # through to `SapVentilation.air_permeability_ap4_m3_h_m2` so the # `cert_to_inputs` ventilation cascade can read it and pass into # `ventilation_from_inputs(air_permeability_ap4=...)`. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_ventilation is not None ap4 = epc.sap_ventilation.air_permeability_ap4_m3_h_m2 assert ap4 is not None assert abs(ap4 - 2.0) <= 1e-4 def test_summary_000565_section_12_1_extracts_mechanical_extract_decentralised_mev_dc_kind() -> None: # Arrange — cert 000565 §12.1 Mechanical Ventilation lodges: # Mechanical Ventilation: Yes # Mechanical Ventilation Type: Mechanical extract, decentralised # (MEV dc) # SAP 10.2 §2 line (23a) for MEV: "system throughput = 0.5 ach"; the # effective ach formula (25) routes through (24c) "whole-house # extract ventilation or PIV from outside" — `(22b)m + 0.5 × (23b)` # when (22b) ≥ 0.5×(23b). Pre-slice the extractor read only the # "Mechanical Ventilation" yes/no bool and dropped the Type string, # so the cascade defaulted to mv_kind=NATURAL → (24d) formula. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) # Act site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Assert assert site_notes.ventilation.mechanical_ventilation is True assert ( site_notes.ventilation.mechanical_ventilation_type == "Mechanical extract, decentralised (MEV dc)" ) def test_summary_000565_detailed_rr_residual_area_closes_total_external_area_per_rdsap_10_section_3_10_1() -> None: # Arrange — RdSAP 10 §3.10.1 (PDF p.24) "Default U-values of the # roof rooms": # "The residual area (area of roof less the floor area of room(s)- # in-roof) has a U-value from Table 16 : Roof U-values when loft # insulation thickness is known according to its insulation # thickness if at least half the area concerned is accessible, # otherwise it is the default for the age band of the original # property or extension." # Worksheet pattern (cert 000565 BP[0]): "Roof room Main remaining # area" 43.97 m² × U=0.35 (Table 18 col 4 age H default). # Pre-slice S0380.95 the cascade computed residual area ONLY for # Simplified RR mode (via `rr_a_rr − rr_common − rr_gable` in # `_part_geometry`); the Detailed-RR branch in `heat_transmission` # iterated `rir.detailed_surfaces` and missed the residual entirely. # Cert 000565 routes all 5 BPs through Detailed mode (mapper # translates Simplified-Summary lodgements to `SapRoomInRoofSurface` # records), so cascade total_external_element_area_m2 was 779.27 m² # vs worksheet (31) = 857.64 m² (Δ −78.37 m² → thermal_bridging # under by ~−11.76 W/K). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act from domain.sap10_calculator.worksheet.heat_transmission import ( heat_transmission_from_cert, ) ht = heat_transmission_from_cert(epc, door_count=epc.door_count or 0) # Assert — cascade closes to within ±10 m² of worksheet (31). The # residual sums roughly to BP[0]'s 43.97 m² + BP[1]'s ~22 m² + # BP[3]'s ~17 m² + BP[4]'s small contribution; remaining residual # (BP[1] ~+3.7 m² over) traces to the spec's ambiguous Detailed- # mode residual formula for extensions with multi-storey heights. assert ht.total_external_element_area_m2 >= 845.0, ( f"cascade total_external_element_area_m2={ht.total_external_element_area_m2:.4f}; " f"expected ≥845 m² after §3.10.1 Detailed-RR residual area closure " f"(pre-slice was 779.27 m² vs worksheet 857.64)" ) def test_summary_000565_ext2_stud_wall_2_extracts_400_plus_mm_pur_or_pir_lodgement() -> None: # Arrange — cert 000565 Summary §8.1 BP[2] Ext2 (Detailed) lodges # "Stud Wall 2: 2.00 × 2.00, 400+ mm, PUR or PIR" with Default # U-value 0.10. Pre-slice the extractor regex `^\d+\s*mm$` failed # to match "400+ mm" (the trailing "+" tripped the digit-only # anchor) so the insulation token was silently dropped; and the # type allow-list `("Mineral or EPS", "PUR", "PIR")` failed to # match "PUR or PIR" (the conjunction is the actual Summary text). # Cascade fell through to Table 17 row 0 (uninsulated) → U=2.30 # against worksheet 0.10, over-counting Stud Wall 2 by ~8.80 W/K. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) # Act site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Assert ext2_rir = site_notes.extensions[1].room_in_roof assert ext2_rir is not None stud_wall_2 = next(s for s in ext2_rir.surfaces if s.name == "Stud Wall 2") assert stud_wall_2.insulation == "400+ mm" assert stud_wall_2.insulation_type == "PUR or PIR" def test_summary_000565_ext2_stud_wall_2_routes_to_400mm_rigid_foam_via_mapper() -> None: # Arrange — mapper plumbing: "400+ mm" parses to thickness 400 mm # (the trailing "+" is a bucket-cap convention; spec Table 17 max # tabulated row is 400 mm). "PUR or PIR" maps to the canonical # SAP10 insulation-type code "rigid_foam" so the cascade's # `_is_rigid_foam` resolves correctly. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert ext2_rir = epc.sap_building_parts[2].sap_room_in_roof assert ext2_rir is not None detailed = ext2_rir.detailed_surfaces or [] stud_walls = [s for s in detailed if s.kind == "stud_wall"] assert len(stud_walls) == 2 sw_2 = next(s for s in stud_walls if s.area_m2 == 4.0) assert sw_2.insulation_thickness_mm == 400 assert sw_2.insulation_type == "rigid_foam" def test_summary_000565_ext1_floor_above_partially_heated_routes_to_u_value_0p7_per_rdsap_10_section_5_14() -> None: # Arrange — RdSAP 10 §5.14 (PDF p.47) "U-value of floor above a # partially heated space": # "The U-value of a floor above partially heated premises is taken # as 0.7 W/m²K. This applies typically for a flat above non- # domestic premises that are not heated to the same extent or # duration as the flat." # Cert 000565 Summary §9 1st Extension lodges "Location: P Above # partially heated space" + "Default U-value: 0.70". Pre-slice the # cascade routed BP[1] floor through the BS EN ISO 13370 ground- # floor formula → cascade U=0.76 (vs spec 0.70, over by +2.04 W/K # × 34 m²). The mapper now flags `is_above_partially_heated_space= # True` on the ground SapFloorDimension so `heat_transmission` # dispatches to the §5.14 constant. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert ext1_ground = epc.sap_building_parts[1].sap_floor_dimensions[0] assert ext1_ground.floor == 0 assert ext1_ground.is_above_partially_heated_space is True def test_summary_000565_mev_decentralised_routes_to_extract_or_piv_outside_mv_kind() -> None: # Arrange — mapper plumbing for SAP 10.2 §2 (23a)/(24c) MEV: the # Elmhurst "Mechanical extract, decentralised (MEV dc)" string maps # to `MechanicalVentilationKind.EXTRACT_OR_PIV_OUTSIDE` so the # cascade picks the (24c) effective-ach formula. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_ventilation is not None assert epc.sap_ventilation.mechanical_ventilation_kind == "EXTRACT_OR_PIV_OUTSIDE" def test_summary_mapper_raises_on_unmapped_wall_type_code() -> None: # Arrange — strict-coverage gate per [[reference-unmapped-api- # code]] mirror: an Elmhurst wall_type lodgement that isn't in # `_ELMHURST_WALL_CODE_TO_SAP10` raises `UnmappedElmhurstLabel` # rather than silently routing through wall_construction=None. # The silent-None failure mode is what hid cert 000565 Ext1/3/4 # ~300 W/K cascade gap until the S0380.64 fabric-loss audit. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() site_notes.walls.wall_type = "XX Unknown construction" # Act / Assert with pytest.raises(UnmappedElmhurstLabel) as excinfo: EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) assert excinfo.value.field == "walls.wall_type" assert excinfo.value.value == "XX Unknown construction" def test_summary_mapper_raises_on_unmapped_party_wall_type_code() -> None: # Arrange — mirror strict-coverage gate for party-wall-type # lodgements (same silent-None failure mode at the # `_elmhurst_party_wall_construction_int` boundary). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() site_notes.walls.party_wall_type = "YY Unknown party wall" # Act / Assert with pytest.raises(UnmappedElmhurstLabel) as excinfo: EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) assert excinfo.value.field == "walls.party_wall_type" assert excinfo.value.value == "YY Unknown party wall" # ---------------------------------------------------------------------- # API mapper strict-raise — mirror the Elmhurst UnmappedElmhurstLabel # coverage gate on the GOV.UK API path. The same failure mode (silently # routing an unknown enum to a default int / None hides cascade gaps # until a SAP-delta investigation surfaces them) applies to API # integer codes. Each strict helper is unit-tested for its raise # behaviour; a cohort-coverage forcing function asserts every golden # fixture extracts cleanly via `from_api_response`. # ---------------------------------------------------------------------- _GOLDEN_FIXTURES_DIR = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" ) def _patch_api_doc_and_extract( cert: str, mutator: "callable[[dict], None]" ) -> None: """Load a golden cert JSON, apply a mutation, and run `from_api_response`. Used by the strict-raise unit tests to inject an unmapped integer code into a known-good document.""" doc = json.loads((_GOLDEN_FIXTURES_DIR / f"{cert}.json").read_text()) mutator(doc) EpcPropertyDataMapper.from_api_response(doc) def test_api_mapper_raises_on_unmapped_floor_construction_code() -> None: # Arrange — start from a real cohort cert and inject an unmapped # `floor_construction` integer (currently the dict covers 1 and 2). # The mapper must raise `UnmappedApiCode` rather than silently # dropping the floor_construction signal — losing it routes the # cascade to the wrong solid-vs-suspended branch (see Slice # S0380.27's floor_construction_type fix that closed cert 8135's # PE -4.96 → -0.07). def mutate(doc: "dict") -> None: doc["sap_building_parts"][0]["sap_floor_dimensions"][0]["floor_construction"] = 99 # Act / Assert with pytest.raises(UnmappedApiCode) as excinfo: _patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate) assert excinfo.value.field == "floor_construction" assert excinfo.value.value == 99 def test_api_mapper_raises_on_unmapped_roof_construction_code() -> None: # Arrange — inject an unmapped roof_construction integer. The # cascade's `_api_roof_construction_str` powers the cos(30°) # inclined-surface factor and the flat-roof Table 18 column-3 # dispatch — a silently-None value here under-counts roof loss # for sloping ceilings or routes flat roofs to the wrong column. def mutate(doc: "dict") -> None: doc["sap_building_parts"][0]["roof_construction"] = 99 # Act / Assert with pytest.raises(UnmappedApiCode) as excinfo: _patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate) assert excinfo.value.field == "roof_construction" assert excinfo.value.value == 99 def test_api_mapper_raises_on_unmapped_party_wall_construction_code() -> None: # Arrange — inject an unmapped party_wall_construction. The cohort # currently covers RdSAP10 Table 15 codes 0..5; out-of-range integers # must raise so the next fixture forces an explicit dict entry. def mutate(doc: "dict") -> None: doc["sap_building_parts"][0]["party_wall_construction"] = 99 # Act / Assert with pytest.raises(UnmappedApiCode) as excinfo: _patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate) assert excinfo.value.field == "party_wall_construction" assert excinfo.value.value == 99 def test_api_mapper_raises_on_unmapped_floor_heat_loss_code() -> None: # Arrange — codes 4/5/8+ aren't in the dict; injecting one must # raise. Codes 1/2/3/6/7 are mapped explicitly (some to None) so # the strict gate distinguishes "decided no string" from "unknown". def mutate(doc: "dict") -> None: doc["sap_building_parts"][0]["floor_heat_loss"] = 99 # Act / Assert with pytest.raises(UnmappedApiCode) as excinfo: _patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate) assert excinfo.value.field == "floor_heat_loss" assert excinfo.value.value == 99 def test_api_mapper_raises_on_unmapped_built_form_code() -> None: # Arrange — codes 1..6 cover detached / semi-detached / terraces; # an out-of-range integer must raise rather than silently routing # through the cascade's `_DEFAULT_SHELTERED_SIDES = 2`. def mutate(doc: "dict") -> None: doc["built_form"] = 99 # Act / Assert with pytest.raises(UnmappedApiCode) as excinfo: _patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate) assert excinfo.value.field == "built_form" assert excinfo.value.value == 99 def test_all_golden_fixtures_extract_via_api_without_unmapped_code_raise() -> None: # Arrange — coverage forcing function on the API path: every JSON # fixture in `fixtures/golden/` must round-trip through # `from_api_response` without triggering an `UnmappedApiCode` raise # from any strict helper. New cohort fixtures added in subsequent # slices fall under the same gate; future API enum variants # surface here at extraction time instead of as a downstream SAP # delta. fixtures = sorted(_GOLDEN_FIXTURES_DIR.glob("*.json")) assert fixtures, f"no golden fixtures under {_GOLDEN_FIXTURES_DIR}" # Act / Assert — strict run for each fixture for fixture in fixtures: doc = json.loads(fixture.read_text()) EpcPropertyDataMapper.from_api_response(doc) def test_summary_7800_two_electric_showers_count_as_two_not_one() -> None: # Arrange — cert 7800-1501-0922-7127-3563's Summary §16 lodges TWO # instantaneous electric showers ("Shower 01" + "Shower 11", both # `outlet_type='Electric shower'`). Pre-Slice S0380.19 the mapper # hardcoded `electric_shower_count = 1 if has_electric_shower else # None`, losing the multiplicity. Cascade-equivalent on this cert: # Appendix J eq J16 (N_ES,per_outlet = N_shower / N_outlets) and # eq J18 (Σ_j E_ES,j) yield the same (64a) value for 1 vs 2 outlets # when there are no mixer outlets, so the SAP delta is unchanged # — but the lodged multiplicity is now surfaced for any future # cascade consumer that needs it. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000890_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert — both lodged electric showers surface on the EPC. assert epc.sap_heating.electric_shower_count == 2 assert epc.sap_heating.mixer_shower_count == 0 def test_summary_0036_flat_unknown_party_wall_routes_to_u_zero() -> None: # Arrange — cert 0036-6325-1100-0063-1226 is a "Flat, Mid-Terrace" # whose Summary lodges party_wall_type='U Unable to determine'. # RdSAP 10 Table 15 footnote *: flats/maisonettes with unknown # party-wall construction default to U=0.0, NOT the U=0.25 house # default. Before Slice S0380.18 the cascade routed the lodging's # "unknown" sentinel to the house default → +6.03 W/K HLC excess # → SAP under-prediction of -0.37 vs worksheet 62.7471. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000910_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act — chain the EPC through cert_to_inputs + the calculator so # the assertion exercises the full cascade `u_party_wall` path, # not just the helper in isolation. result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — party walls contribute zero to HLC for this flat with # unknown party-wall construction (matches worksheet line (32) = # 24.13 m² × 0.00 = 0.0000 W/K). assert epc.property_type == "Flat" assert abs(result.intermediate["party_walls_w_per_k"] - 0.0) <= 1e-4 def test_summary_2536_normal_cylinder_routes_to_code_2() -> None: # Arrange — cert 2536-2525-0600-0788-2292's Summary §15.1 lodges # "Cylinder Size: Normal". The dr87 worksheet lodges "Cylinder # Volume 110.00" L on line ref (47); the cascade lookup # `_CYLINDER_SIZE_CODE_TO_LITRES` now maps code 2 → 110 L per # RdSAP 10 §10.5 Table 28's Normal (90-130 L) band midpoint. # First cohort cert to exercise the "Normal" cylinder lodging. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000889_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_heating.cylinder_size == 2 def test_summary_9421_normal_cylinder_routes_to_code_2() -> None: # Arrange — cert 9421-3045-3205-1646-6200's Summary §15.1 also # lodges "Cylinder Size: Normal" (same 110 L cylinder as cert # 2536). Second cohort cert exercising the "Normal" mapping — # pinned to guard against silent regression of either the mapper # dict entry OR the cascade volume default. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000884_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_heating.cylinder_size == 2 def test_summary_9418_large_cylinder_routes_to_code_4() -> None: # Arrange — cert 9418-3062-8205-3566-7200's Summary §15.1 lodges # "Cylinder Size: Large". The dr87 worksheet lodges "Cylinder # Volume 210.00" L, and the cascade lookup # `_CYLINDER_SIZE_CODE_TO_LITRES = {3: 160.0, 4: 210.0}` maps code # 4 → 210 L. Cert 9418 is the first cohort cert to exercise the # "Large" cylinder lodging (every other cohort cert is "Medium"). pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000902_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() # Act epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Assert assert epc.sap_heating.cylinder_size == 4 def test_summary_9418_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cert 9418-3062-8205-3566-7200 (Summary_000902.pdf): # **Daikin EDLQ05CAV3 ASHP** (PCDB index 102421 — distinct from # the rest of the cohort's Mitsubishi 104568), end-terrace house # with TWO 1.64 kWp PV arrays (N + S), 210 L cylinder. # `heating_duration_code='24'` per Table N4 (continuous heating). # Worksheet "SAP value" lodges 84.6305. # # Closes the cohort: the final ASHP cert. The only Summary-mapper # gap was the missing "Large" → 4 mapping in # `_ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10` (Slice S0380.14, this # commit) — multi-array PV + Large-cylinder were the variants # cert 9418 uniquely exercises. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000902_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance. worksheet_unrounded_sap = 84.6305 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE def test_summary_3800_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cert 3800-8515-0922-3398-3563 (Summary_000901.pdf / # dr87-0001-000901.pdf) is the third ASHP cohort cert to close on # the Summary path: Mitsubishi PUZ-WM50VHA ASHP (PCDB 104568). # Worksheet "SAP value" lodges 86.1458. # # **First-try closure — zero new mapper slices required**. The # structural work shipped in slices S0380.2..S0380.9 (HP routing, # cylinder block, composite walls, multi-array PV, extension # inheritance) was already sufficient for cert 3800's variant set. # Strong evidence that the Summary mapper has reached completeness # for the standard single-bp / single-array ASHP shape. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000901_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance. worksheet_unrounded_sap = 86.1458 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE def test_summary_9285_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cert 9285-3062-0205-7766-7200 (Summary_000904.pdf / # dr87-0001-000904.pdf) is the fourth ASHP cohort cert to close on # the Summary path: Mitsubishi PUZ-WM50VHA ASHP (PCDB 104568). # Worksheet "SAP value" lodges 84.1369. Same "first-try closure, # zero new slices" disposition as cert 3800 — the cohort's # structural mapper completeness is the load-bearing claim. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000904_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance. worksheet_unrounded_sap = 84.1369 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE def test_summary_0380_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Arrange — cert 0380-2471-3250-2596-8761 (Summary_000899.pdf / # dr87-0001-000899.pdf) is the first heat-pump cert under per-cert # Summary-path mapper validation: Mitsubishi PUZ-WM50VHA ASHP # (PCDB index 104568), semi-detached bungalow age D, TFA 60.43 m². # Worksheet PDF "SAP value" line lodges unrounded SAP **88.5104**. # Slices S0380.2..S0380.6 closed the Summary path from Δ -54.7184 # to Δ +0.0594 — the same Appendix N3.6 PSR-interpolation # precision floor at which the API path closes (commit c0086660 # slice 102f wired this floor for the full 7-cert ASHP cohort at # the same ±0.07 tolerance). Closing further requires calculator # work on the PSR interpolation step, not mapper work — the # Summary EPC and API EPC produce IDENTICAL cascade outputs at # this point (HW kWh, fabric W/K, HLC all match at 1e-4), so the # +0.0594 residual is structural to the calculator's HP path for # this fixture's PSR. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — ±0.07 ASHP-cohort spec-floor tolerance (matches API # path's slice 102f disposition; `_ASHP_COHORT_CHAIN_TOLERANCE` # is defined alongside the API-path equivalents below). worksheet_unrounded_sap = 88.5104 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE _API_0330_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "0330-2249-8150-2326-4121.json" ) _API_9501_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "9501-3059-8202-7356-0204.json" ) def test_api_9501_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 9501 is the third Layer 4 production gate (after # cert 001479 and cert 0330): API path → from_api_response → # cert_to_inputs → calculate_sap_from_inputs must hit the worksheet # SAP at 1e-4. Cert 9501 is the FIRST flat in the production gate # set — mid-terrace top-floor flat with RR + measured PV (2.36 kWp # SW @ 45°). Worksheet target unrounded SAP **68.5252**. # # Slices 100a-100c jointly closed the API path from Δ -14.82 to # 1e-4: 100a `room_in_roof_details` schema + Detailed-RR surface # population (HLC 382.19 → 297.54 W/K vs worksheet 296.68); 100b # per-bp TFA includes RR floor area (TFA 81.28 → 113.08); 100c # `photovoltaic_supply.pv_arrays` schema + gap-aware glazing # lookup (DG pre-2002 16+ → U=2.7 per RdSAP 10 Table 24). doc = json.loads(_API_9501_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — 1e-4 pin against the worksheet's continuous SAP. worksheet_unrounded_sap = 68.5252 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_api_9501_photovoltaic_array_surfaced() -> None: # Arrange — cert 9501's API JSON lodges measured PV under # `sap_energy_source.photovoltaic_supply.pv_arrays`. Two real-API # PV shapes coexist: cohort cert 2130 lodges the outer wrapper as # a nested list `[[{...}], ...]`; cert 9501 lodges a dict # `{"pv_arrays": [{...}]}`. The existing schema models only the # legacy `none_or_no_details` field on `PhotovoltaicSupply` — so # cert 9501's `pv_arrays` payload was silently dropped, leaving # `photovoltaic_arrays=None` and the cascade missing the worksheet's # £250.02 PV credit. doc = json.loads(_API_9501_JSON.read_text()) # Act epc = EpcPropertyDataMapper.from_api_response(doc) # Assert — single array with the lodged kWp/pitch/orientation/ # overshading values. arrays = epc.sap_energy_source.photovoltaic_arrays assert arrays is not None assert len(arrays) == 1 assert abs(arrays[0].peak_power - 2.36) <= 1e-4 assert arrays[0].pitch == 3 # RdSAP §11.1 enum: 3 = 45° assert arrays[0].orientation == 6 # SAP octant: SW assert arrays[0].overshading == 1 # RdSAP: None or very little _API_0380_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "0380-2471-3250-2596-8761.json" ) def test_api_0380_glazing_type_14_resolves_to_post_2022_dg_u_value() -> None: # Arrange — cert 0380 (ASHP semi-detached bungalow, worksheet SAP # 88.5104) lodges glazing_type=14 on all windows. The worksheet # uses U=1.3258 (post-curtain) for line (27), which back-calculates # to a raw U=1.40 — the SAP10.2 Table 24 row for "Double or triple # glazed, 2022 or later". Code 13 in our existing dict carries the # same U/g values; code 14 is the schema sibling for the same # post-2022 product family (DG sealed-unit variants differ in # the cert lodgement but agree on the spec U-value). doc = json.loads(_API_0380_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act — pick any window (cert 0380 lodges only glazing_type=14). w = epc.sap_windows[0] td = w.window_transmission_details # Assert assert td is not None assert abs(td.u_value - 1.40) <= 1e-4 assert abs(td.solar_transmittance - 0.72) <= 1e-4 def test_api_0380_wall_with_external_insulation_routes_to_filled_cavity_u() -> None: # Arrange — cert 0380's top-level walls[0].description lodges # "Cavity wall, filled cavity and external insulation". The # worksheet uses U=0.25 for the (29a) external-walls entry — the # very-low-U "filled cavity + external insulation" composite that # RdSAP 10 §5 routes through Table 6's filled-cavity row (with a # further EWI reduction). Our cascade was computing U=0.32 via # the as-built Table 13 bucketed cascade because # `_described_as_insulated` only matches the past-participle # "insulated" — "insulation" (noun) on its own falls through to # False. Cert 0380's lodgement uses the noun form. # # Fix: `_described_as_insulated` should also match the noun # "insulation" (excluding the existing "no insulation" hard # negation), so cavity walls described as carrying insulation # route to the cascade's Filled-cavity branch. doc = json.loads(_API_0380_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act from domain.sap10_calculator.rdsap.cert_to_inputs import ( heat_transmission_section_from_cert, ) ht = heat_transmission_section_from_cert(epc) # Assert — main-wall HLC ≈ 46.46 m² × 0.25 = 11.62 W/K (worksheet # exact). Tolerance 1e-2 absorbs sub-component rounding; the # 1e-4 chain test downstream tightens to the cascade floor. worksheet_walls_w_per_k = 11.62 assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2 def test_api_0380_heat_pump_no_secondary_heating_per_table_11() -> None: # Arrange — SAP 10.2 Table 11 explicitly notes "Cat 4 (heat pump): # 0.00 (HP eff includes any secondary)" — heat pumps don't apply a # Table 11 secondary fraction even when the cert lodges a secondary # heating type, because the HP efficiency already incorporates any # supplementary heat source. The `_SECONDARY_HEATING_FRACTION_BY_ # CATEGORY` dict in cert_to_inputs.py had entries for categories # 1/2/3/5/6/7/10 but DID NOT include cat 4 — so HP certs with a # lodged secondary fell through to the DEFAULT 0.10, billing 10% # of space-heating cost as "secondary" (cert 0380: £72 secondary # vs worksheet £0). # # Cert 0380 lodges secondary_heating_type=691 + main_heating_ # category=4 (HP, PCDB idx 104568). Worksheet line (242) "Space # heating - secondary" shows 0.0 kWh; cascade was producing # 547.30 kWh. Fix: dict entry `4: 0.0`. doc = json.loads(_API_0380_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act from domain.sap10_calculator.calculator import calculate_sap_from_inputs from domain.sap10_calculator.rdsap.cert_to_inputs import ( cert_to_inputs, SAP_10_2_SPEC_PRICES, ) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — secondary heating contributes 0 kWh / £0 on HP certs. assert result.secondary_heating_fuel_kwh_per_yr == 0.0 def test_api_0380_heat_pump_no_pumps_fans_kwh_per_table_4f() -> None: # Arrange — SAP 10.2 Table 4f lists annual pumps + fans electricity # consumption by main heating category. Gas-fired boilers (cat 2) # use 160 kWh/yr (115 central heating pump + 45 flue fan). Heat # pumps (cat 4) have NO additional pumps/fans contribution because # the HP system's circulation pump and fans are already # incorporated into the system COP. # # The cascade's `_PUMPS_FANS_KWH_BY_MAIN_CATEGORY` dict only had a # cat-2 entry; cat-4 HP certs fell through to the DEFAULT 130 # kWh/yr (~£17 at 13.19 p/kWh) — the worksheet line (249) "Pumps, # fans and electric keep-hot" shows 0.0000 kWh/yr for cert 0380. doc = json.loads(_API_0380_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act from domain.sap10_calculator.calculator import calculate_sap_from_inputs from domain.sap10_calculator.rdsap.cert_to_inputs import ( cert_to_inputs, SAP_10_2_SPEC_PRICES, ) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert assert result.pumps_fans_kwh_per_yr == 0.0 _API_9418_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "9418-3062-8205-3566-7200.json" ) _API_2225_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "2225-3062-8205-2856-7204.json" ) _API_2636_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "2636-0525-2600-0401-2296.json" ) def test_api_2636_cantilever_floor_surfaces_as_exposed_floor() -> None: # Arrange — cert 2636 (Mitsubishi ASHP, semi-detached, 2 storeys, # property_type=0) has BP0 floor 0 area 39.18 m² and floor 1 area # 42.92 m². The 3.74 m² difference is an upper-floor cantilever — # worksheet (28b) "Exposed floor Main: 3.74 × 1.20 = 4.4880" treats # it per RdSAP Table 20 U_exposed_floor at age-D + no insulation # = 1.20 W/m²K. # # Without the cantilever surfaced, cert 2636 cascade SAP = # 86.7514 vs worksheet 86.2641 (Δ +0.49 — by far the largest # outlier in the 7-cert ASHP cohort, where the other 6 cluster # at ±0.06). Pre-fix HLC drift was -4.51 W/K = 3.74 × 1.20 + # 0.15 × 3.74 thermal-bridging contribution on the extra exposed # area. Tolerance ±0.07 covers the residual PSR/HLC drift that # this cert shares with the 7-cohort cluster (per the slice # 102f-prep.10 alt-wall-allocation fix this cert moves from the # near-zero cancellation state into the cohort cluster). doc = json.loads(_API_2636_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act — full cert→inputs→calculator cascade result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — SAP within 0.07 of worksheet 86.2641. assert abs(result.sap_score_continuous - 86.2641) < 0.07, ( f"cascade SAP={result.sap_score_continuous:.4f} vs worksheet 86.2641" ) def test_api_2636_thermal_bridging_excludes_alt_wall_window_opening_per_sap_10_2_appendix_k() -> None: # Arrange — API-path mirror of the Summary-path (31) NET pin. # The Summary EPC and API EPC for cert 2636 produce identical # cascade output once the alt-wall window opening is deducted # from (31) per SAP 10.2 Appendix K eqn (K2) p.84. Worksheet (36) # = 24.0495 W/K, worksheet "SAP value" 86.2641 — cascade closes # to the 1e-4 spec-precision floor on the API path too. doc = json.loads(_API_2636_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert assert abs(result.intermediate["thermal_bridging_w_per_k"] - 24.0495) <= 1e-4 assert abs(result.sap_score_continuous - 86.2641) <= 1e-4 def test_api_2636_alt_wall_openings_deducted_from_alt_not_main() -> None: # Arrange — cert 2636 has BP0 with `sap_alternative_wall_1` # (area 12.76 m², cavity unfilled at age D → U=0.70) and 7 # windows. One window (1.14 × 1.04 ≈ 1.19 m²) lodges # `window_wall_type=2` → it sits on the alt wall, not main. # # Per RdSAP §1.4.2 wall openings deduct from the wall they # pierce. Worksheet (29a): # Main: gross 61.73, openings 14.03, net 47.70 → 0.25 × 47.70 = 11.925 # Alt.1: gross 12.76, openings 1.19, net 11.57 → 0.70 × 11.57 = 8.099 # Total walls (29a) = 20.024 # # Pre-fix cascade subtracted ALL openings from the (main+alt) # gross then routed the alt at its FULL gross — over-counting # alt's contribution by 1.19 × (0.70 − 0.25) ≈ 0.535 W/K, and # under-counting main by the matching 1.19 × 0.25 — net +0.535. doc = json.loads(_API_2636_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act — full cascade so windows + doors are read from the cert. inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) # Assert — worksheet sum 11.925 + 8.099 = 20.024 at 1e-3. assert abs(inputs.heat_transmission.walls_w_per_k - 20.024) < 1e-3, ( f"cascade walls={inputs.heat_transmission.walls_w_per_k:.4f} " f"vs worksheet 20.024" ) def test_api_2225_no_mixer_lodged_uses_zero_showers_per_worksheet() -> None: # Arrange — cert 2225 lodges `mixer_shower_count = None` (the field # is unlodged in the API JSON, not "0"). The worksheet (42a) "Hot # water usage for mixer showers" shows 0.0000 every month — the # Elmhurst convention is "absent ⇒ no shower". Cascade previously # defaulted to a single 7 L/min vented mixer when unlodged, which # raised (44) daily HW use from 122.89 → 130.56 l/day (Jan) and # added ~113 kWh/yr to (62) HW demand. The cohort-modal lodging # is 0 (5/7 certs lodge mixer=0 explicitly). doc = json.loads(_API_2225_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) # Assert — HW fuel kWh tracks worksheet (247) 1634.04 at 1e-1 # (η_water = 172.85 implies demand 2824.44; fuel = demand / η). worksheet_hw_fuel_kwh = 1634.04 assert abs(inputs.hot_water_kwh_per_yr - worksheet_hw_fuel_kwh) <= 0.1 def test_api_9418_daikin_24h_duration_mean_internal_temp_matches_worksheet_92() -> None: # Arrange — cert 9418 (Daikin Altherma EDLQ05CAV3, PCDB 102421) # lodges `heating_duration_code = "24"`. Per SAP 10.2 Table N4 (PDF # p.107) this means N24,9 = 365 (all days operate at 24-hour # heating, no off-period). Worksheet (87) MIT_living = 21.0 every # month (= Th1, no off period), worksheet (90) MIT_elsewhere # collapses to Th2 directly. Worksheet (92) blended at fLA = 0.30. # # Pre-slice-102f-prep.7 the helper's "V"-only gate returned None # for this duration → bimodal cascade gave MIT ~17.8-19.8 (off by # ~2°C). After Table N4 wiring the cascade lands at 1e-3. doc = json.loads(_API_9418_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) # Assert — worksheet (92) "MIT" 12-tuple at 1e-3 per month. worksheet_mit_92 = ( 19.8400, 19.8445, 19.8489, 19.8697, 19.8736, 19.8920, 19.8920, 19.8954, 19.8849, 19.8736, 19.8657, 19.8574, ) for m, (cascade, ws) in enumerate(zip( inputs.mean_internal_temp_monthly_c, worksheet_mit_92 )): assert abs(cascade - ws) < 1e-3, ( f"month {m + 1}: cascade={cascade:.4f} vs worksheet={ws:.4f}" ) def test_api_0380_mean_internal_temp_matches_worksheet_92_within_1e_3() -> None: # Arrange — SAP 10.2 Appendix N3.5 (PDF p.107) replaces Table 9c # steps 3-4 for heat-pump packages with PCDB data: each month # blends Th, T_unimodal, T_bimodal via Equation N5. # # Cert 0380 (Mitsubishi PUZ-WM50VHA, PCDB 104568, PSR ≈ 1.43) # lands on Table N5 row "1.2 or more" → annual totals (3, 38) → # Jan(3, 28) + Dec(0, 10) extended days. # # Pre-slice-102f-prep.6 the cold-month MIT drifted +0.008°C due to # `internal_gains_from_cert` injecting the central-heating pump's # heating-season gain (~7 W) on HP certs. SAP 10.2 Table 4f # specifies zero pump/fan gains on HP packages (cert 0380's # worksheet line 70 = 0.0 every month) — that gating drops the # spurious gain and tightens the MIT cascade against worksheet # (92) to 1e-3 per month. doc = json.loads(_API_0380_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) # Assert — pin against worksheet line (92) "MIT" 12-tuple. worksheet_mit_92 = ( 18.9539, 18.0081, 18.3466, 18.8491, 19.3582, 19.8174, 20.0288, 20.0064, 19.6975, 19.0702, 18.3966, 18.1573, ) for m, (cascade, ws) in enumerate(zip( inputs.mean_internal_temp_monthly_c, worksheet_mit_92 )): assert abs(cascade - ws) < 1e-3, ( f"month {m + 1}: cascade={cascade:.4f} vs worksheet={ws:.4f}" ) def test_api_9501_room_in_roof_surfaces_populated() -> None: # Arrange — cert 9501's API JSON lodges measured RR detail under # `sap_room_in_roof.room_in_roof_details`: two gable walls # (5.51 m × 2.45 m + 6.51 m × 2.45 m) and a flat ceiling (5.5 m × # 1.0 m, 300 mm insulation). The schema's `SapRoomInRoof` dataclass # exposed the inner block under the wrong field name # `room_in_roof_type_1` (the legacy Simplified Type 1 wrapper), # so `from_dict` parsed the inner block as None — the API mapper # then built `SapRoomInRoof` with no per-surface area data, and # the cascade defaulted to the Simplified Type 2 "all elements" # branch (RR floor_area × Table 18 col(4) age-B U=2.30) for the # whole RR → roof HLC 149.43 vs worksheet 18.10 (Δ +131). doc = json.loads(_API_9501_JSON.read_text()) # Act epc = EpcPropertyDataMapper.from_api_response(doc) # Assert — RR surfaces present and match worksheet element table: # Gable Wall 1 = 13.50 m², Gable Wall 2 = 15.95 m², Flat Ceiling 1 # = 5.50 m² (per worksheet §3 element table). rir = epc.sap_building_parts[0].sap_room_in_roof assert rir is not None assert rir.detailed_surfaces is not None kinds_by_area = sorted((s.kind, s.area_m2) for s in rir.detailed_surfaces) assert kinds_by_area == [ ("flat_ceiling", 5.5), ("gable_wall_external", 13.50), ("gable_wall_external", 15.95), ] def test_api_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 0330-2249-8150-2326-4121 (second boiler validation # cert: mains-gas Vaillant PCDB idx 10241, mid-terrace 2-bp dwelling, # TFA 90.56 m²) has both an Elmhurst Summary PDF and a GOV.UK EPB API # JSON. The Summary path lands at 1e-4 vs worksheet SAP 61.5993 # above; this Layer 4 production gate asserts the API path matches # the worksheet to the same 1e-4 tolerance — same forcing function # as cert 001479's Layer 4 test, applied to the second boiler cert. # # Slices 96-99 (flat-roof Table 18 col (3) U-values + glazing_type=2 # surfacing + shower-outlets list normalisation + window-area # rounding alignment) jointly closed the API path from # Δ +2.1453 → Δ -0.000011 vs worksheet 61.5993. doc = json.loads(_API_0330_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — 1e-4 pin against the worksheet's continuous SAP. worksheet_unrounded_sap = 61.5993 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 def test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 001479 has both an Elmhurst Summary PDF and a GOV.UK # EPB API JSON (ref 0535-9020-6509-0821-6222). The Summary cascade # already pins at worksheet's 69.0094 ± 1e-4 above; this test is the # Layer 4 production-path gate: API JSON → from_api_response → # cert_to_inputs → calculate_sap_from_inputs must also hit 69.0094 # at 1e-4. Identical inputs must produce identical outputs; the # calculator is deterministic, so any drift is a mapper coverage gap. doc = json.loads(_API_001479_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) # Act result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) # Assert — 1e-4 pin against the worksheet's continuous SAP. ±0.5 is # the API-only fallback (project memory `feedback_api_tolerance_1e_ # minus_4`); when the worksheet is available, identical-inputs-must- # produce-identical-outputs is the bar. worksheet_unrounded_sap = 69.0094 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 # ============================================================================ # Layer 4 chain tests — 7-cert ASHP cohort # ============================================================================ # These pin the API → from_api_response → cert_to_inputs → # calculate_sap_from_inputs cascade against each cert's Elmhurst dr87 # worksheet unrounded SAP. Tolerance is 0.07 (NOT 1e-4 like the boiler # cohort above) — see HANDOVER_CERT_0380_MIT_CASCADE.md for the # investigation: BRE web confirmed max_output_kw matches cascade # exactly (4.39 / 3.933), cascade (39) annual HLC matches worksheet # at 4 dp, but back-solving worksheet η_space implies ~0.15% drift # in Elmhurst's internal interpolation precision (likely a vendor # rounding convention not in the public SAP 10.2 spec). The 7 certs # cluster within +0.030..+0.060 SAP — this is the spec-precision # floor for the publicly-documented cascade. # # At rounded (integer SAP) precision, all 7 cascade integers match # the lodged values exactly (residual = 0, pinned in # `_GOLDEN_EXPECTATIONS`). _API_0350_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "0350-2968-2650-2796-5255.json" ) _API_3800_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "3800-8515-0922-3398-3563.json" ) _API_9285_JSON = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" / "9285-3062-0205-7766-7200.json" ) _ASHP_COHORT_CHAIN_TOLERANCE: float = 1e-4 """ASHP-cohort chain-test tolerance. The cohort closed cumulatively across S0380.26..S0380.35: §3.2 curtain + reciprocal-η interpolation (SAP 10.2 fn 43), glazing-code Table 6b extension to RdSAP21 codes 8-15, (31) NET area for alt-wall openings (SAP 10.2 K2), and the RdSAP10 §15 Decimal-rounding cluster on living area / gross wall / kWp. At HEAD all 7 ASHP cohort certs sit at < 5e-5 SAP on BOTH paths (worst residual: cert 2225 4.8e-5): Summary path: 7/7 < 1e-4 (cert 2636 -2e-6 after S0380.31) API path: 7/7 < 1e-4 (parity with Summary at cascade output level) 1e-4 matches the user's [[feedback-one-e-minus-4-across-the-board]] target with ~2x headroom over the worst residual. Any future cohort regression beyond ~5e-5 fires this tolerance loudly.""" def test_api_0380_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Mitsubishi PUZ-WM50VHA PCDB 104568, semi-detached bungalow age D. doc = json.loads(_API_0380_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) assert abs(result.sap_score_continuous - 88.5104) < _ASHP_COHORT_CHAIN_TOLERANCE def test_api_0350_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Mitsubishi PUZ-WM50VHA PCDB 104568. doc = json.loads(_API_0350_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) assert abs(result.sap_score_continuous - 84.1367) < _ASHP_COHORT_CHAIN_TOLERANCE def test_api_2225_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Mitsubishi PUZ-WM50VHA PCDB 104568, with PV. Slice 102f-prep.8 # closed the shower_outlets=None default. doc = json.loads(_API_2225_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) assert abs(result.sap_score_continuous - 88.7921) < _ASHP_COHORT_CHAIN_TOLERANCE def test_api_2636_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Mitsubishi PUZ-WM50VHA PCDB 104568, with cantilever + alt wall. # Slice 102f-prep.9 (cantilever) + 102f-prep.10 (alt-wall openings). doc = json.loads(_API_2636_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) assert abs(result.sap_score_continuous - 86.2641) < _ASHP_COHORT_CHAIN_TOLERANCE def test_api_3800_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Mitsubishi PUZ-WM50VHA PCDB 104568. doc = json.loads(_API_3800_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) assert abs(result.sap_score_continuous - 86.1458) < _ASHP_COHORT_CHAIN_TOLERANCE def test_api_9285_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Mitsubishi PUZ-WM50VHA PCDB 104568. doc = json.loads(_API_9285_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) assert abs(result.sap_score_continuous - 84.1369) < _ASHP_COHORT_CHAIN_TOLERANCE def test_api_9418_full_chain_sap_within_spec_floor_of_worksheet() -> None: # Daikin Altherma EDLQ05CAV3 PCDB 102421, heating_duration_code='24' # (continuous, all days at Th). Slice 102f-prep.7 closed Table N4. doc = json.loads(_API_9418_JSON.read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) result = calculate_sap_from_inputs( cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) ) assert abs(result.sap_score_continuous - 84.6305) < _ASHP_COHORT_CHAIN_TOLERANCE # ============================================================================ # Cohort-2 API-path chain tests (cross-mapper parity at the cascade) # ============================================================================ # Mirror the cohort-2 Summary-path sweep that closed across S0380.30..38. # Per [[feedback-cross-mapper-parity-via-cascade]]: API EPC and Elmhurst EPC # must produce SAP within 1e-4 of each other AND of the worksheet — the # SAP cascade is the load-bearing equivalence check. Each cert in this # cohort has both a Summary PDF (under `sap worksheets/additional with # api 2//Summary_*.pdf`) and an API JSON fixture (fetched into # `domain/sap10_calculator/rdsap/tests/fixtures/golden/.json` in # Slice S0380.39). Worksheet SAP is the source of truth. # # Cohort-2 API-path closure history (each slice closed a distinct # spec-citation gap, then re-pinned the cohort): # S0380.40 — parametrized over all 38 certs; 34 immediate / 4 open # S0380.41 — RdSAP 21 → SAP 10.2 glazing-type alias closed 0300/9380 # S0380.42 — Decimal HALF_UP per-window areas closed 1536 # S0380.43 — SAP 631 → spec fuel (House coal) closed 2102 # At HEAD: 38/38 cohort-2 certs hit <1e-4 on the API path, matching # the Summary-path sweep (also 38/38 <1e-4 at HEAD). Cross-mapper # parity at the cascade is fully established. _COHORT_2_API_FIXTURE_DIR: Path = ( Path(__file__).parents[3] / "domain/sap10_calculator/rdsap/tests/fixtures/golden" ) # (cert_dir, worksheet_unrounded_sap) — 34 cohort-2 certs whose API-path # cascade hits the worksheet's continuous SAP at 1e-4 without any # follow-up mapper work. Identical to the Summary-path sweep at the # same tolerance: cross-mapper parity is achieved via cascade output # equivalence (per [[feedback-cross-mapper-parity-via-cascade]]). _COHORT_2_API_CLOSED: list[tuple[str, float]] = [ ("0036-6325-1100-0063-1226", 62.7471), ("0100-5141-0522-4696-3463", 85.8332), ("0200-3155-0122-2602-3563", 80.8674), ("0300-2403-2650-2206-0235", 76.6541), # S0380.41 closure ("0310-2763-5450-2506-3501", 78.3593), ("0320-2126-2150-2326-6161", 71.7224), ("0320-2756-8640-2296-1101", 89.9458), ("0330-2257-3640-2196-3145", 84.6541), ("0360-2266-5650-2106-8285", 80.468), ("0380-2530-6150-2326-4161", 65.7795), ("0390-2066-4250-2026-4555", 65.3253), ("0464-3032-0205-4276-3204", 80.4533), ("0652-3022-1205-2826-1200", 70.9577), ("1536-9325-5100-0433-1226", 65.8928), # S0380.42 closure ("2007-3011-9205-8136-3204", 68.3914), ("2031-3007-0205-1296-3204", 64.1734), ("2102-3018-0205-7886-5204", 63.8732), # S0380.43 closure ("2130-3018-4205-4686-5204", 71.3158), ("2336-3124-3600-0517-1292", 83.4955), ("2536-2525-0600-0788-2292", 79.7264), ("2590-3025-7205-9066-0200", 65.9194), ("2699-3025-5205-8066-0200", 68.7535), ("2800-7999-0322-4594-3563", 78.1408), ("3136-7925-4500-0246-6202", 77.8872), ("3336-2825-9400-0512-8292", 78.3739), ("4536-5424-8600-0109-1226", 82.4974), ("4536-8325-3100-0409-1222", 65.6), ("4800-3992-0422-0599-3563", 86.7192), ("6835-3920-2509-0933-5226", 80.1977), ("7700-3362-0922-7022-3563", 63.4425), ("7800-1501-0922-7127-3563", 64.7504), ("7836-3125-0600-0526-2202", 80.1792), ("9036-0824-3500-0420-8222", 84.2727), ("9370-3060-1205-3546-4204", 87.8687), ("9380-2957-7490-2595-3141", 74.5902), # S0380.41 closure ("9421-3045-3205-1646-6200", 87.4495), ("9796-3058-6205-0346-9200", 90.1318), ("9836-7525-9500-0575-1202", 75.2223), ] def _cascade_continuous_sap_from_api(cert_dir_name: str) -> float: doc = json.loads((_COHORT_2_API_FIXTURE_DIR / f"{cert_dir_name}.json").read_text()) epc = EpcPropertyDataMapper.from_api_response(doc) r = calculate_sap_from_inputs(cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)) return r.sap_score_continuous @pytest.mark.parametrize("cert_dir_name,ws_sap", _COHORT_2_API_CLOSED) def test_api_cohort_2_full_chain_sap_matches_worksheet_at_1e_minus_4( cert_dir_name: str, ws_sap: float ) -> None: """API-path mirror of the cohort-2 Summary-path sweep. For each cert: the GOV.UK EPB API JSON → `from_api_response` → `cert_to_inputs` → `calculate_sap_from_inputs` chain must hit the worksheet's continuous SAP at abs <= 1e-4 — the same tolerance the Summary path achieves. Cross-mapper parity at the cascade output ([[feedback-cross-mapper-parity-via-cascade]]).""" # Arrange actual = _cascade_continuous_sap_from_api(cert_dir_name) # Act (no separate act phase — `actual` IS the cascade output) delta = actual - ws_sap # Assert assert abs(delta) <= 1e-4, ( f"cert {cert_dir_name}: cascade SAP={actual:.6f} vs worksheet {ws_sap}; Δ={delta:+.6f}" ) # ============================================================================ # Mapper-vs-hand-built EpcPropertyData diff tests # ============================================================================ # The 6 cohort hand-builts (_elmhurst_worksheet_NNNNNN.build_epc) are the # 100%-correct calculator-input ground truth — each cascades to its # worksheet PDF's lodged SAP at 1e-4. The chain tests above only assert # cascade-output equivalence; the mapper can pass them by producing a # *different* EpcPropertyData that happens to cascade to the same number. # # These tests pin the missing layer: the mapper's EpcPropertyData must # match the hand-built's load-bearing fields exactly. Every divergence # surfaced here is a mapper coverage gap to close as its own slice. # # "Load-bearing" = the subset of EpcPropertyData fields that drive the # SAP cascade or carry semantic cross-mapper meaning. Cert-metadata # fields (address, registration dates, descriptive EnergyElement lists, # tariff strings) are excluded because they don't change calculator # output and vary by mapper pathway (the API publishes some, the # Elmhurst Summary publishes others) without semantic disagreement. # SapWindow sub-fields the cascade doesn't read (descriptive Union[int, # str] codes lodged differently by each mapper). The cascade reads # window_width / window_height / orientation / window_location / # frame_factor / window_transmission_details.{u_value,solar_ # transmittance} — those WILL still be diffed; everything else on # SapWindow is metadata and excluded to avoid noise from the int/str # dual encoding (API mapper produces int codes; Elmhurst mapper # surfaces the Summary's lodged strings). _NON_LOAD_BEARING_WINDOW_SUBFIELDS: frozenset[str] = frozenset({ "frame_material", "glazing_gap", "window_type", "glazing_type", "window_wall_type", "draught_proofed", "permanent_shutters_present", "permanent_shutters_insulated", }) def _is_excluded_path(path: str) -> bool: """Return True for paths the diff should silently skip — non-cascade- affecting Union[int, str] encoding differences between the API and Elmhurst mapper outputs that cohort hand-built fixtures don't pin.""" if path.startswith("sap_windows[") and "]." in path: suffix = path.split("].", 1)[1] if suffix in _NON_LOAD_BEARING_WINDOW_SUBFIELDS: return True if suffix == "window_transmission_details.data_source": return True # `roof_construction_type` is set by the Elmhurst mapper from # `roof.roof_type` (e.g. "Pitched (slates/tiles), access to loft") and # left None by the cohort hand-builts. The cascade in # `heat_transmission.py:562` only dispatches on the "sloping ceiling" # substring (RdSAP §3.8); none of the cohort certs lodge pitched- # sloping-ceiling roofs, so both values produce identical cascade # output. Exclude from the diff to avoid flagging informational drift. if path.startswith("sap_building_parts[") and path.endswith(".roof_construction_type"): return True # `sap_ventilation.has_suspended_timber_floor` and # `..._sealed` are set explicitly on the hand-builts (to mirror the # cohort U985 worksheets' (12) infiltration values) but left None by # the Elmhurst mapper because the Summary PDF doesn't surface floor- # construction in a parseable form. When None, `cert_to_inputs._ # has_suspended_timber_floor_per_spec` infers the value mechanically # from per-bp floor-construction data — producing the same cascade # output the explicit-bool hand-built path produces for cohort 000477 # / 000516 (where the spec inference and the worksheet agree). Where # the spec inference and worksheet disagree (cohort 000474, 000480, # 000487, 000490), the chain SAP-pin tests fail separately — that's # a known Elmhurst-worksheet-vs-RdSAP-10 §5 (12) divergence, not a # mapper diff issue. if path == "sap_ventilation.has_suspended_timber_floor": return True if path == "sap_ventilation.suspended_timber_floor_sealed": return True return False _LOAD_BEARING_FIELDS: tuple[str, ...] = ( # Cascade-driving structural fields "sap_building_parts", "sap_windows", "sap_roof_windows", "sap_heating", "sap_ventilation", "sap_energy_source", "total_floor_area_m2", # Building-classification fields driving default cascades "dwelling_type", "built_form", "property_type", "country_code", "postcode", # Counts and openings "door_count", "insulated_door_count", "insulated_door_u_value", "habitable_rooms_count", "heated_rooms_count", "wet_rooms_count", "extensions_count", "open_chimneys_count", "blocked_chimneys_count", "extract_fans_count", # Lighting "cfl_fixed_lighting_bulbs_count", "led_fixed_lighting_bulbs_count", "incandescent_fixed_lighting_bulbs_count", "low_energy_fixed_lighting_bulbs_count", "fixed_lighting_outlets_count", "low_energy_fixed_lighting_outlets_count", # HW / appliances "solar_water_heating", "has_hot_water_cylinder", "has_fixed_air_conditioning", "has_conservatory", "has_heated_separate_conservatory", # Envelope drivers "percent_draughtproofed", "mechanical_ventilation", "pressure_test", # Construction-detail flags "addendum", "lzc_energy_sources", "any_unheated_rooms", "number_of_storeys", "sap_flat_details", ) def _diff_load_bearing( mapped: object, hand_built: object, path: str = "", ) -> list[str]: """Recursive field diff; yields one line per leaf divergence between mapped EpcPropertyData and the hand-built fixture. Int/float type differences with the same numeric value are not flagged. Strict-pyright posture: arguments typed `object` so each branch narrows via `isinstance` rather than threading `Any` through the recursion (which pyright can't reason about under `strict`/`typeCheckingMode = strict`).""" out: list[str] = [] if type(mapped) is not type(hand_built): if not (isinstance(mapped, (int, float)) and isinstance(hand_built, (int, float))): if not _is_excluded_path(path): out.append( f"{path}: TYPE {type(mapped).__name__} vs " f"{type(hand_built).__name__} mapped={mapped!r} " f"handbuilt={hand_built!r}" ) return out if dataclasses.is_dataclass(mapped) and not isinstance(mapped, type) \ and dataclasses.is_dataclass(hand_built) and not isinstance(hand_built, type): for fld in dataclasses.fields(mapped): out.extend(_diff_load_bearing( getattr(mapped, fld.name), getattr(hand_built, fld.name), f"{path}.{fld.name}" if path else fld.name, )) return out if isinstance(mapped, list) and isinstance(hand_built, list): mapped_list = cast("list[object]", mapped) hand_built_list = cast("list[object]", hand_built) if len(mapped_list) != len(hand_built_list): out.append(f"{path}: LEN {len(mapped_list)} vs {len(hand_built_list)}") return out for i, (m_item, h_item) in enumerate(zip(mapped_list, hand_built_list)): out.extend(_diff_load_bearing(m_item, h_item, f"{path}[{i}]")) return out if mapped != hand_built: if not _is_excluded_path(path): out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}") return out def test_from_elmhurst_site_notes_matches_hand_built_000474() -> None: # Arrange — _elmhurst_worksheet_000474.build_epc() is the canonical # hand-built EpcPropertyData for cert U985-0001-000474; it cascades # to the worksheet PDF's `SAP value 62.2584` at 1e-4 (cohort SAP- # result pin). Routing the corresponding Summary PDF through the # Elmhurst mapper MUST produce a load-bearing-field-equivalent # EpcPropertyData; any divergence is a mapper-coverage gap. # # Tracer-bullet scope: cert 000474 only. Once GREEN, parametrize # over the 5 other cohort fixtures. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) hand_built = _w000474.build_epc() # Act diffs: list[str] = [] for field_name in _LOAD_BEARING_FIELDS: diffs.extend(_diff_load_bearing( getattr(mapped, field_name, None), getattr(hand_built, field_name, None), field_name, )) # Assert assert not diffs, ( f"{len(diffs)} load-bearing divergence(s) between mapped and " f"hand-built EpcPropertyData for cohort cert 000474:\n " + "\n ".join(diffs) ) def test_from_elmhurst_site_notes_matches_hand_built_000477() -> None: # Arrange — _elmhurst_worksheet_000477.build_epc() is the canonical # hand-built EpcPropertyData for cert U985-0001-000477 (single-bp # mid-terrace, age band B, RIR with stud walls + party gables, no # extension); it cascades to the worksheet PDF's `SAP value 65.0057` # at 1e-4. Routing the Summary PDF through the Elmhurst mapper MUST # produce a load-bearing-field-equivalent EpcPropertyData; any # divergence is a mapper-coverage gap to close as its own slice. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) hand_built = _w000477.build_epc() # Act diffs: list[str] = [] for field_name in _LOAD_BEARING_FIELDS: diffs.extend(_diff_load_bearing( getattr(mapped, field_name, None), getattr(hand_built, field_name, None), field_name, )) # Assert assert not diffs, ( f"{len(diffs)} load-bearing divergence(s) between mapped and " f"hand-built EpcPropertyData for cohort cert 000477:\n " + "\n ".join(diffs) ) def test_from_elmhurst_site_notes_matches_hand_built_000480() -> None: # Arrange — _elmhurst_worksheet_000480.build_epc() is the canonical # hand-built EpcPropertyData for cert U985-0001-000480 (mid-terrace # with main + 1 extension + 19.83 m² RIR, gas combi); it cascades # to the worksheet PDF's `SAP value 61.2986` at 1e-4. Routing the # Summary PDF through the Elmhurst mapper MUST produce a load- # bearing-field-equivalent EpcPropertyData; any divergence is a # mapper-coverage gap to close as its own slice. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000480_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) hand_built = _w000480.build_epc() # Act diffs: list[str] = [] for field_name in _LOAD_BEARING_FIELDS: diffs.extend(_diff_load_bearing( getattr(mapped, field_name, None), getattr(hand_built, field_name, None), field_name, )) # Assert assert not diffs, ( f"{len(diffs)} load-bearing divergence(s) between mapped and " f"hand-built EpcPropertyData for cohort cert 000480:\n " + "\n ".join(diffs) ) def test_from_elmhurst_site_notes_matches_hand_built_000487() -> None: # Arrange — _elmhurst_worksheet_000487.build_epc() is the canonical # hand-built EpcPropertyData for cert U985-0001-000487 (Enclosed # Mid-Terrace, main + 1 extension + 21.03 m² RIR with explicit-U # gable_wall_external, gas combi, 1 electric shower, 1.43 m² # timber-frame alt wall on the extension); it cascades to the # worksheet PDF's `SAP value 61.6431` at 1e-4. Routing the Summary # PDF through the Elmhurst mapper MUST produce a load-bearing- # field-equivalent EpcPropertyData; any divergence is a mapper- # coverage gap to close as its own slice. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000487_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) hand_built = _w000487.build_epc() # Act diffs: list[str] = [] for field_name in _LOAD_BEARING_FIELDS: diffs.extend(_diff_load_bearing( getattr(mapped, field_name, None), getattr(hand_built, field_name, None), field_name, )) # Assert assert not diffs, ( f"{len(diffs)} load-bearing divergence(s) between mapped and " f"hand-built EpcPropertyData for cohort cert 000487:\n " + "\n ".join(diffs) ) def test_from_elmhurst_site_notes_matches_hand_built_000490() -> None: # Arrange — _elmhurst_worksheet_000490.build_epc() is the canonical # hand-built EpcPropertyData for cert U985-0001-000490 (End-Terrace, # main + 1 extension, gas combi + gas-secondary; sheltered_sides=1 # per RdSAP §S5); it cascades to the worksheet PDF's `SAP value # 57.3979` at 1e-4. Routing the Summary PDF through the Elmhurst # mapper MUST produce a load-bearing-field-equivalent # EpcPropertyData; any divergence is a mapper-coverage gap. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000490_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) hand_built = _w000490.build_epc() # Act diffs: list[str] = [] for field_name in _LOAD_BEARING_FIELDS: diffs.extend(_diff_load_bearing( getattr(mapped, field_name, None), getattr(hand_built, field_name, None), field_name, )) # Assert assert not diffs, ( f"{len(diffs)} load-bearing divergence(s) between mapped and " f"hand-built EpcPropertyData for cohort cert 000490:\n " + "\n ".join(diffs) ) def test_from_elmhurst_site_notes_matches_hand_built_000516() -> None: # Arrange — _elmhurst_worksheet_000516.build_epc() is the canonical # hand-built EpcPropertyData for cert U985-0001-000516 (Mid-Terrace, # main + 19.02 m² RIR, 5 vertical windows + 1 roof window which the # mapper routes to `sap_roof_windows` per `U > 3.0` discrimination); # it cascades to the worksheet PDF's `SAP value 62.7937` at 1e-4. # Routing the Summary PDF through the Elmhurst mapper MUST produce # a load-bearing-field-equivalent EpcPropertyData. pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF) site_notes = ElmhurstSiteNotesExtractor(pages).extract() mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) hand_built = _w000516.build_epc() # Act diffs: list[str] = [] for field_name in _LOAD_BEARING_FIELDS: diffs.extend(_diff_load_bearing( getattr(mapped, field_name, None), getattr(hand_built, field_name, None), field_name, )) # Assert assert not diffs, ( f"{len(diffs)} load-bearing divergence(s) between mapped and " f"hand-built EpcPropertyData for cohort cert 000516:\n " + "\n ".join(diffs) )