Model/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py
Khalim Conn-Kowlessar 8133521c43 S0380.237: map "Secondary glazing - Low emissivity" → SAP 10.2 code 12
Completes the secondary-glazing family. S0380.235 mapped the unknown-data
(7) and normal-emissivity (11) secondary variants; the RdSAP-21.0.1
`glazed_type` enum also defines code 12 "secondary glazing, low
emissivity", whose Elmhurst §11 label "Secondary glazing - Low
emissivity" was unmapped and would strict-raise. Cascade code 12 carries
the same daylight/solar bucket as 7/11 (g_L=0.80, g⊥=0.76); the lodged
manufacturer U/g drive §3/§6. With this the double family (codes 1/2/3/
7/13 via their Elmhurst phrasings) and the secondary family (4/11/12) are
fully covered. Coverage test extended.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 09:35:35 +00:00

4471 lines
207 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""End-to-end validation for the Elmhurst Summary→EpcPropertyData chain.
The 6 Elmhurst worksheet fixtures in `tests.domain.sap10_calculator.worksheet`
build their `EpcPropertyData` synthetically — they validate the
calculator + cascade in isolation from the mapper. This file pins
the OTHER half of the chain: `from_elmhurst_site_notes` must produce
a calculator-equivalent `EpcPropertyData` when fed the Summary PDF
the worksheet was generated from. Together with the worksheet
cascade tests, this closes the loop: extractor + mapper + cascade
+ calculator validated end-to-end against the authoritative
Elmhurst documents.
Status: GREEN. For cert U985-0001-000474, this pipeline produces an
unrounded SAP within 0.5 of the worksheet PDF's `62.2584` (line 257).
The cascade itself reproduces Elmhurst's calculator exactly on
hand-built inputs (handbuilt → 62.2584 to 4 d.p.); the remaining
sub-half-point gap from the mapped path is non-load-bearing field
drift (e.g. central_heating_pump_age the Summary PDF doesn't lodge).
Preprocessing: the existing `ElmhurstSiteNotesExtractor` was written
against Textract-style output (label\\nvalue pairs in spatial
reading order). We don't have Textract in the test environment, so
this helper converts `pdftotext -layout` output (label-whitespace-
value on a single line) into the Textract-style sequence the
extractor expects. Test-only preprocessing; production runs through
Textract directly.
"""
from __future__ import annotations
import dataclasses
import json
import re
import subprocess
from pathlib import Path
from typing import cast
import pytest
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.mapper import (
EpcPropertyDataMapper,
UnmappedApiCode,
UnmappedElmhurstLabel,
_elmhurst_glazing_type_code, # pyright: ignore[reportPrivateUsage]
)
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
from domain.sap10_ml.rdsap_uvalues import u_party_wall
from tests.domain.sap10_calculator.worksheet import (
_elmhurst_worksheet_000474 as _w000474,
_elmhurst_worksheet_000477 as _w000477,
_elmhurst_worksheet_000480 as _w000480,
_elmhurst_worksheet_000487 as _w000487,
_elmhurst_worksheet_000490 as _w000490,
_elmhurst_worksheet_000516 as _w000516,
)
_FIXTURES = Path(__file__).parent / "fixtures"
_SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf"
_SUMMARY_000477_PDF = _FIXTURES / "Summary_000477.pdf"
_SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf"
_SUMMARY_000487_PDF = _FIXTURES / "Summary_000487.pdf"
_SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf"
_SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf"
_SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf"
_SUMMARY_000897_PDF = _FIXTURES / "Summary_000897.pdf"
_SUMMARY_000784_PDF = _FIXTURES / "Summary_000784.pdf"
_SUMMARY_000899_PDF = _FIXTURES / "Summary_000899.pdf"
_SUMMARY_000903_PDF = _FIXTURES / "Summary_000903.pdf"
_SUMMARY_000901_PDF = _FIXTURES / "Summary_000901.pdf" # cert 3800
_SUMMARY_000904_PDF = _FIXTURES / "Summary_000904.pdf" # cert 9285
_SUMMARY_000900_PDF = _FIXTURES / "Summary_000900.pdf" # cert 2225
_SUMMARY_000898_PDF = _FIXTURES / "Summary_000898.pdf" # cert 2636
_SUMMARY_000902_PDF = _FIXTURES / "Summary_000902.pdf" # cert 9418
_SUMMARY_000889_PDF = _FIXTURES / "Summary_000889.pdf" # cert 2536 (Normal cylinder)
_SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cylinder)
_SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0)
_SUMMARY_000890_PDF = _FIXTURES / "Summary_000890.pdf" # cert 7800 (two electric showers)
_SUMMARY_000565_PDF = _FIXTURES / "Summary_000565.pdf" # cert 000565 (5-bp Elmhurst-only)
# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the
# Summary_001479.pdf fixture. Together they drive the API ≡ Summary
# parity workstream; Layer 4 of the validation stack is "API cascade SAP
# matches worksheet continuous SAP at 1e-4".
_API_001479_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "0535-9020-6509-0821-6222.json"
)
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
"""Convert a Summary PDF into the per-page text format the existing
`ElmhurstSiteNotesExtractor` expects (label\\nvalue sequences).
`pdftotext -layout` preserves the spatial pairing of label and value
on each line; we split each line on 2+ spaces to surface the
label/value tokens, then concatenate them back into a single
newline-delimited stream per page.
"""
info = subprocess.run(
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True
).stdout
m = re.search(r"Pages:\s+(\d+)", info)
if m is None:
raise RuntimeError(f"Could not parse page count from {pdf_path}")
page_count = int(m.group(1))
pages: list[str] = []
for i in range(1, page_count + 1):
layout = subprocess.run(
[
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
str(pdf_path), "-",
],
capture_output=True, text=True, check=True,
).stdout
tokens: list[str] = []
for line in layout.splitlines():
if not line.strip():
tokens.append("")
continue
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
tokens.extend(parts)
pages.append("\n".join(tokens))
return pages
def test_summary_000474_mapper_produces_three_building_parts() -> None:
# Arrange — cert U985-0001-000474 is a mid-terrace with 3 building
# parts (Main + 2 extensions) per the hand-built worksheet fixture
# at tests/domain/sap10_calculator/worksheet/
# _elmhurst_worksheet_000474.py. Routing the Summary PDF through
# extractor + mapper must yield the same count.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert len(epc.sap_building_parts) == 3
def test_summary_000474_mapper_extracts_seven_windows() -> None:
# Arrange — cert U985-0001-000474's §11 table lodges 7 windows
# across Main + 1st Extension + 2nd Extension. The legacy Textract-
# style window parser couldn't anchor on the Summary PDF's tabular
# layout; the new W/H/Area-plus-Manufacturer anchor pair picks them
# all up.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert len(epc.sap_windows) == 7
# Cohort chain SAP-pin tests follow. NOTE: certs 000474, 000480, 000487,
# 000490 previously had chain tests here pinning their cascade SAP
# against the U985 worksheet PDF — those tests were removed because
# their worksheets violate RdSAP 10 §5 (12) "Floor infiltration
# (suspended timber ground floor only)". Our cascade applies the spec
# rule (via `cert_to_inputs._has_suspended_timber_floor_per_spec`);
# the worksheet does not. So the spec-correct chain SAP for those
# certs can't match the worksheet SAP — by design, not by mapper bug.
# The Layer 1 hand-built fixtures for those 4 certs absorb the
# worksheet quirk by lodging `has_suspended_timber_floor=False`
# explicitly (overriding the spec inference) — so Layer 1 cascade pins
# still pin the worksheet value exactly. The chain tests below remain
# only for 000477, 000516 (and 001479 further down), where the
# worksheet IS spec-correct.
def test_summary_000477_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert U985-0001-000477 is a single-bp mid-terrace with
# a 15.06 m² Room-in-Roof storey and zero baths lodged. Worksheet
# PDF lodges unrounded SAP 65.0057. Drives the chain through the
# `RoomInRoof.detailed_surfaces` cascade with stud walls @ 100mm
# Mineral, two uninsulated slopes, two party gable walls, plus the
# RR/storey-area suspended-timber-floor heuristic (RIR < storey →
# 0.2 ACH floor infiltration).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert
worksheet_unrounded_sap = 65.0057
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_000516_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert U985-0001-000516 is a mid-terrace with main bp +
# 19.02 m² room-in-roof. Worksheet PDF lodges unrounded SAP 62.7937.
# The §11 table mixes 5 vertical windows (U=2.80) with 1 roof
# window (U=3.10 in cert, U=3.40 Table 24 raw); the mapper
# discriminates by `U > 3.0` and routes the high-U entry to
# `sap_roof_windows` so its solar gains feed §6 with the right
# pitch (45°) and Table-24 U-value.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert
worksheet_unrounded_sap = 62.7937
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_001479_mapper_extensions_count_matches_extension_bps() -> None:
# Arrange — cert 0535-9020-6509-0821-6222 (Summary_001479) is the first
# cohort cert with an actual GOV.UK API counterpart. Worksheet PDF
# lodges Main + Extension 1 + Extension 2 (3 building parts, 2
# extensions). Pre-slice the Elmhurst mapper hard-coded
# `extensions_count=0` regardless of survey.extensions; this asserts
# the count flows through.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.extensions_count == 2
assert len(epc.sap_building_parts) == 3
def test_summary_001431_oil_1_main_fuel_inferred_from_section_15_water_heating_fuel_type() -> None:
# Arrange — Heating-systems corpus fixture 001431 / "oil 1" lodges a
# Table 4b oil boiler (SAP code 127) at §14.0 Main Heating1 but with
# NO §14.0 "Fuel Type" lodging — the actual fuel only appears in
# §15.0 as "Water Heating Fuel Type: Heating oil". Same applies to
# the other Table 4b oil variants (oil pcdb 1/2/3 et al) and to the
# gov.uk EPC API's `main_fuel_type=28` ("oil (not community)") per
# epc_codes.csv.
#
# Pre-slice the mapper's `_elmhurst_main_fuel_int(mh.fuel_type)`
# returned None for the empty §14.0 fuel string, the electric-SAP-
# code inference didn't fire (SAP 127 isn't in
# `_ELECTRIC_SAP_MAIN_HEATING_CODES`), so `main_fuel_type` fell
# through to the raw empty string. `cert_to_inputs._main_fuel_code`
# then returned None (string is not int), and
# `table_32.unit_price_p_per_kwh(None)` defaulted to mains gas
# (3.48 p/kWh). The cascade therefore priced ~13.7k kWh/yr of oil
# heating at the gas tariff — a 56% under-count vs the worksheet's
# spec-lodged oil rate.
#
# The fix routes the §15.0 water_heating fuel through
# `_elmhurst_main_fuel_int` (which now knows "Heating oil" → 28
# per epc_codes.csv main_fuel row) and falls back to it for the
# main heating fuel when §14.0 is silent. The cascade then prices
# SH + HW at the heating-oil tariff per Table 32.
summary_pdf = (
Path(__file__).parents[3]
/ "sap worksheets/heating systems examples/oil 1/Summary_001431.pdf"
)
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
main_1 = epc.sap_heating.main_heating_details[0]
assert main_1.main_fuel_type == 28
assert epc.sap_heating.water_heating_fuel == 28
def test_summary_001431_solid_fuel_8_main_fuel_inferred_from_main_heating_ees_code() -> None:
# Arrange — heating-systems corpus fixture 001431 / "solid fuel 8"
# lodges §14.0 "Main Heating SAP Code: 160" + "Main Heating EES
# Code: BQI" with NO §14.0 "Fuel Type" lodging — typical of solid-
# fuel main heating where the SAP code (160 = "Closed room heater
# with boiler") covers multiple distinct fuels.
#
# Anthracite (EES BAI), Wood Chips (BQI), Dual Fuel (BDI), and
# Smokeless Fuel (BKI) all share SAP code 160 across the corpus;
# the SAP code alone can't disambiguate, so the mapper has to look
# at the EES code. Pre-S0380.133 the mapper produced
# `main_fuel_type=''`; post-S0380.132 the cascade strict-raised
# `MissingMainFuelType`.
#
# The fix routes the §14.0 EES code through
# `_ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE` (corpus-derived dict
# mirroring the §15.0 fallback added in S0380.130). BQI → Table 32
# code 21 = "wood chips" (3.07 p/kWh + 0.023 kg CO2/kWh + 1.046 PE
# factor per RdSAP 10 spec p.95). The dict uses Table 32 codes
# directly rather than the API enum because the API codes 1-9
# collide with Table 32 codes for unrelated fuels (e.g. API 5 =
# "anthracite" vs Table 32 5 = "bottled LPG main heating").
summary_pdf = (
Path(__file__).parents[3]
/ "sap worksheets/heating systems examples/solid fuel 8/Summary_001431.pdf"
)
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
main_1 = epc.sap_heating.main_heating_details[0]
assert site_notes.main_heating.main_heating_ees == "BQI"
assert main_1.main_fuel_type == 21
assert main_1.sap_main_heating_code == 160
def test_summary_001431_community_heating_1_main_heating_sap_code_extracted_when_no_main_heating_2_block() -> None:
# Arrange — Heating-systems corpus fixture 001431 / "community heating 1"
# lodges §14.0 Main Heating1 directly followed by §14.1 Community
# Heating/Heat Network (no §14.1 Main Heating2 block, since community-
# heated dwellings don't have a second main system to lodge). The §14.0
# block carries `Main Heating SAP Code: 301` (Community heating per
# SAP10.2 Table 4a category 6 — "Heat networks").
#
# Pre-slice the extractor's `_section_lines("14.0 Main Heating1",
# "14.1 Main Heating2")` returned an empty list because the end marker
# was missing, so every §14.0 field (incl. `Main Heating SAP Code`)
# came back as None. The mapper then raised `UnmappedElmhurstLabel`
# with "§14.0 Main Heating1 has neither PCDF boiler reference (None)
# nor SAP code (None)" — blocking all 6 community-heated + "no system"
# corpus variants from cascade execution.
#
# The fix closes the §14.0 block at whichever §14.1 marker appears
# first ("14.1 Main Heating2" or "14.1 Community Heating"), so the
# SAP code surfaces correctly on every Summary shape.
summary_pdf = (
Path(__file__).parents[3]
/ "sap worksheets/heating systems examples/community heating 1/Summary_001431.pdf"
)
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
main_1 = epc.sap_heating.main_heating_details[0]
assert main_1.sap_main_heating_code == 301
def test_summary_001431_pcdb_1_inaccessible_cylinder_resolves_to_normal_per_rdsap_10_table_28() -> None:
# Arrange — Heating-systems corpus fixture 001431 / "pcdb 1" lodges
# §15.1 "Cylinder Size: No Access" (the Elmhurst inaccessible-cylinder
# lodging form). Per RdSAP 10 Specification Table 28 page 55:
#
# "Inaccessible:
# - if off-peak electric dual immersion: 210 litres
# - if from solid fuel boiler: 160 litres
# - otherwise: 110 litres"
#
# pcdb 1 lodges §14.0 Main Heating as a Potterton oil boiler (PCDF
# 716) + §15.0 Water Heating Fuel Type "Heating oil" → not an
# electric dual immersion, not a solid fuel boiler → the spec's
# "otherwise" branch → **110 litres** = SAP10 cylinder_size enum 2
# (Normal per `_ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10`).
#
# Pre-slice the mapper strict-raised `UnmappedElmhurstLabel` on the
# "No Access" string because `_elmhurst_cylinder_size_code` only
# carried the three lodged-size dict entries (Normal/Medium/Large).
summary_pdf = (
Path(__file__).parents[3]
/ "sap worksheets/heating systems examples/pcdb 1/Summary_001431.pdf"
)
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.cylinder_size == 2
def test_summary_001431_pcdb_1_inaccessible_cylinder_resolves_insulation_to_25mm_foam_per_rdsap_10_table_29() -> None:
# Arrange — Heating-systems corpus fixture 001431 / "pcdb 1" lodges
# §15.1 "Cylinder Size: No Access" alongside age band G (1983-1990).
# Per RdSAP 10 Specification §10.11 Table 29 page 56 "Hot water
# cylinder insulation if not accessible":
#
# - Age band of main property A to F: 12 mm loose jacket
# - Age band of main property G, H: 25 mm foam
# - Age band of main property I to M: 38 mm foam
#
# pcdb 1 lodges construction_age_band = "G 1983-1990" → 25 mm foam.
# The SAP10 `cylinder_insulation_type` enum 1 maps to "factory-
# applied" (foam) per `_ELMHURST_CYLINDER_INSULATION_LABEL_TO_SAP10`;
# `cylinder_insulation_thickness_mm` carries the literal millimetre
# value the cascade feeds into SAP 10.2 Table 2 Note 1's smooth
# formula L = 0.005 + 0.55 / (t + 4) for the storage loss factor
# (worksheet pcdb 1 (51) = 0.024 ≡ 25 mm).
#
# Pre-slice the mapper left both fields as None on "No Access"
# lodging because `_elmhurst_cylinder_insulation_code` and the
# thickness field both look up only the §15.1 measured labels —
# which the Summary doesn't carry when the cylinder is
# inaccessible. The §4 (56)m storage-loss cascade then skipped the
# cylinder loss entirely (`_cylinder_storage_loss_override` requires
# insulation_type=factory + thickness to fire), driving worksheet
# (56)m sum ~695 kWh missing from cert pcdb 1's (62)m demand.
summary_pdf = (
Path(__file__).parents[3]
/ "sap worksheets/heating systems examples/pcdb 1/Summary_001431.pdf"
)
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.cylinder_insulation_type == 1, (
f"pcdb 1 cylinder_insulation_type: got "
f"{epc.sap_heating.cylinder_insulation_type!r}, want 1 "
f"(factory-applied / foam) per RdSAP 10 §10.11 Table 29 age G "
f"row."
)
assert epc.sap_heating.cylinder_insulation_thickness_mm == 25, (
f"pcdb 1 cylinder_insulation_thickness_mm: got "
f"{epc.sap_heating.cylinder_insulation_thickness_mm!r}, want 25 "
f"per RdSAP 10 §10.11 Table 29 age G row (25 mm foam)."
)
def test_summary_001431_electric_1_underfloor_heating_resolves_to_in_screed_per_rdsap_10_section_10_11() -> None:
# Arrange — Heating-systems corpus fixture 001431 / "electric 1" lodges
# §14.0 "Heat Emitter: Underfloor Heating" (bare form, no subtype
# qualifier). Per RdSAP 10 Specification §10.11 Table 29 page 56
# ("Heating and hot water parameters"):
#
# "Underfloor heating: If dwelling has a ground floor, then
# according to the floor construction (see Table 19 if unknown):
# - solid, main property age band A to E: concrete slab
# - solid, main property age band F to M: in screed
# - suspended timber: in timber floor
# - suspended, not timber: in screed"
#
# Property 001431 lodges §9.0 Floors as "Type: S Solid" + §3.0 Date
# Built "G 1983-1990" (age band G ∈ F-M), so the spec rule resolves
# to "in screed" → SAP10.2 Table 4d emitter enum 2 (R=0.75).
#
# Pre-slice the Elmhurst mapper passed the raw "Underfloor Heating"
# string through `_elmhurst_heat_emitter_int`'s `dict.get` (which
# returned None for the bare lodging) and then through to the
# MainHeatingDetail's `heat_emitter_type` field, which made the
# cascade strict-raise at `_responsiveness` for any of the 2
# corpus variants lodging this form (`electric 1` + `oil 6`).
summary_pdf = (
Path(__file__).parents[3]
/ "sap worksheets/heating systems examples/electric 1/Summary_001431.pdf"
)
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
main_1 = epc.sap_heating.main_heating_details[0]
assert main_1.heat_emitter_type == 2
def test_summary_001479_main_party_wall_construction_is_cavity_unfilled() -> None:
# Arrange — cert 001479 Main §7 Walls lodges "Party Wall Type: CU
# Cavity masonry unfilled". The Elmhurst leading-code map previously
# only knew "S" and "C"; "CU" fell through to None, which made the
# cascade default to U=0.25 instead of the worksheet's lodged U=0.50.
# The fix adds "CU" → SAP10 wall_construction code 4 (WALL_CAVITY),
# which `u_party_wall` resolves to U=0.50 — matching the worksheet's
# §3 `Party walls Main … 0.50` row.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_building_parts[0].party_wall_construction == 4
def test_summary_001479_ext2_floor_is_exposed_to_external_air() -> None:
# Arrange — cert 001479 Ext2 §9 lodges "Location: E To external air"
# — a cantilevered exposed timber floor (the upper-storey extension
# over the back garden). The worksheet's §3 row `Exposed floor Ext2
# … 1.92, 1.20, 1.20` pins this as U=1.20 via Table 20. Pre-slice the
# mapper only routed "U Above unheated space" through `is_exposed_
# floor=True`; "E To external air" fell through to the BS EN ISO
# 13370 ground-floor cascade, dropping the lodged exposure entirely.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
ext2 = epc.sap_building_parts[2]
assert ext2.floor_type == "To external air"
assert ext2.sap_floor_dimensions[0].is_exposed_floor is True
def test_summary_001479_ext2_sloping_ceiling_roof_uninsulated_for_pre_1950() -> None:
# Arrange — cert 001479 Ext2 §8 lodges "Type: PS Pitched, sloping
# ceiling" + "Insulation Thickness: As Built" + age band C (1930-49).
# Original 1930s construction had no sloping-ceiling insulation;
# worksheet §3 `External roof Ext2 … 2.30` pins U=2.30 (uninsulated
# Table 16 row 0). Pre-slice the mapper passed thickness=None through,
# routing to `u_roof`'s pitched-roof Table 18 col 1 default (0.40 for
# age C, assumes loft-joist retrofit) — wrong geometry for PS.
# Ext1's PS roof at age M leaves thickness=None (modern build,
# cascade default U=0.15 matches worksheet).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_building_parts[2].roof_insulation_thickness == 0
assert epc.sap_building_parts[1].roof_insulation_thickness is None
def test_summary_001479_secondary_heating_routes_mains_gas_fuel() -> None:
# Arrange — cert 001479 §14.1 Main Heating2 lodges "Secondary Heating
# Code: SAP code 605, Flush fitting live effect gas fire, sealed to
# chimney". The Summary surfaces only the SAP code (605); the fuel
# type 26 (mains gas) must be derived from the code range so the
# `_fuel_cost` orchestrator's `secondary_high_rate_gbp_per_kwh`
# picks up Table 32's gas tariff (£0.0348/kWh) rather than the
# default standard-electricity tariff (£0.132/kWh). Worksheet line
# (242) "Space heating - secondary … 3.4800 70.5022" confirms gas
# pricing.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.secondary_heating_type == 605
assert epc.sap_heating.secondary_fuel_type == 26
def test_summary_2102_secondary_heating_routes_house_coal_for_open_fire() -> None:
# Arrange — cohort-2 cert 2102-3018-0205-7886-5204 §14.1 lodges
# "Secondary Heating Code: SAP code 631" — "Open fire in grate"
# per SAP 10.2 Table 4a Category 10 (Room heaters), solid fuel
# column. Without the per-code routing the cascade defaults to
# standard electricity at 13.19 p/kWh and over-charges secondary
# heating by ~£340/yr, pushing SAP -15.81 below the worksheet's
# 63.87. Worksheet line (242) "Space heating - secondary 3585.24
# × 3.6700 = 131.58" confirms house-coal pricing (Table 32 fuel
# code 11 = 3.67 p/kWh).
cert_dir = Path(
"sap worksheets/additional with api 2/2102-3018-0205-7886-5204"
)
summary_pdf = next(cert_dir.glob("Summary_*.pdf"))
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.secondary_heating_type == 631
# 11 = "Coal" in `_ELMHURST_MAIN_FUEL_TO_SAP10` → Table 32 lookup
# returns 3.67 p/kWh (house coal).
assert epc.sap_heating.secondary_fuel_type == 11
def test_summary_9796_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cohort-2 cert 9796-3058-6205-0346-9200 (Summary_*.pdf /
# dr87-0001-*.pdf) is a Mid-Terrace bungalow age D with a Mitsubishi
# PUZ-WM50VHA ASHP (PCDB 104568) and a Suspended-timber ground floor
# (46.87 m² / 15.0 m heat-loss perimeter). The other PCDF 104568
# cohort certs (0380, 2800, 3336, 4800) are End-Terrace bungalows
# whose floor U lands well above 0.5; cert 9796's geometry is the
# only one where the (broken) cascade routes the U through the solid
# default → U=0.49 < 0.5 → spec rule (a) "U<0.5 → sealed" fires →
# (12) = 0.1 (sealed) instead of (12) = 0.2 (unsealed).
#
# Per RdSAP10 §5 page 29 "Floor infiltration (suspended timber
# ground floor only)":
# Age band A-E:
# a) if floor U-value < 0.5, assume "sealed" → 0.1
# b) if retro-fit + no U → "sealed" → 0.1
# otherwise "unsealed" → 0.2
# The cascade must use the SAME floor U-value the heat-transmission
# cascade computes (which respects `floor_construction_type`) — not
# a stale duplicate that ignores the per-bp lodgement.
#
# Pre-slice the 0.1 ach gap propagated:
# (18) infiltration_rate 0.74 → ws 0.84 (cascade -0.10)
# (25)m Jan 0.82 → ws 0.91 (cascade -0.09)
# (38)m Jan 29.08 W/K → ws 32.37 (cascade -3.29 W/K)
# (39) Jan 110.35 W/K → ws 113.64 (cascade -3.29 W/K)
# HLP Jan 2.35 W/m²K → ws 2.42 (cascade -0.07)
# T_h2 Jan 19.11°C → ws 19.07 (cascade +0.04)
# MIT Jan 18.51°C → ws 18.45 (cascade +0.06)
# SAP +0.55 vs worksheet 90.13.
# Worksheet "SAP value" line lodges unrounded SAP **90.1318**.
cert_dir = Path(
"sap worksheets/additional with api 2/9796-3058-6205-0346-9200"
)
summary_pdf = next(cert_dir.glob("Summary_*.pdf"))
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance (matches the other
# PCDF 104568 cohort residuals; the remaining ~+0.001 SAP delta is
# the cohort-1 HP-COP precision-floor pattern, see handover thread 3).
worksheet_unrounded_sap = 90.1318
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_summary_7700_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cohort-2 cert 7700-3362-0922-7022-3563 (Summary_000905.pdf
# / dr87-0001-000905.pdf) is the first cohort fixture to exercise
# the alt-wall dry-lining adjustment. End-Terrace house age C, main
# wall filled cavity (CavityWallDensePlasterDenseBlock, U=0.70),
# alt wall 14.44 m² Cavity As-Built, Dry-lining: Yes
# (CavityWallPlasterOnDabsDenseBlock, worksheet U=1.20).
#
# Per RdSAP10 §5.8 + Table 14 page 41: dry-lining adds R = 0.17
# m²K/W → U = 1/(1/1.5 + 0.17) = 1.19522... → 2 d.p. half-up = 1.20.
# Pre-slice the alt sub-area's `wall_dry_lined="N"` hard-code routed
# to the cavity-as-built default (U=1.50), giving fabric (33)
# 148.72 W/K vs worksheet 144.38 (Δ +4.33 W/K = ~+0.44 SAP). Worksheet
# "SAP value" line lodges unrounded SAP **63.4425**.
cert_dir = Path(
"sap worksheets/additional with api 2/7700-3362-0922-7022-3563"
)
summary_pdf = next(cert_dir.glob("Summary_*.pdf"))
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert
worksheet_unrounded_sap = 63.4425
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_9501_flat_has_no_built_form_in_summary_pdf() -> None:
# Arrange — cert 9501 (Summary_000784.pdf) is a flat. The Elmhurst
# Summary's §1.0 "Property type" section lodges the built-form
# descriptor (e.g. "M Mid-Terrace", "D Detached") only for houses;
# flats have no built-form line — the §2.0 "Number of Storeys"
# section follows immediately after the "F Flat" property type.
#
# The extractor's `_extract_attachment` regex previously captured
# the line immediately after the property-type value
# unconditionally, so cert 9501 ends up with attachment
# "2.0 Number of Storeys:" — pure section-header noise that the
# mapper then surfaces on EpcPropertyData.built_form, breaking the
# cascade's flat-exposure routing downstream.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — built_form is empty for flats. Houses set it to their
# attachment descriptor; flats lodge no attachment.
assert epc.built_form == ""
def test_summary_9501_dwelling_type_is_top_floor_flat() -> None:
# Arrange — cert 9501's worksheet treats the cert as a TOP-floor
# flat: §3 (28a) "Ground floor Main … U=0.0" because the floor
# sits over "Another dwelling below" (worksheet line 9.0 Floor
# location); §3 (30) has both an external roof + RR contributions
# so the roof IS exposed. The cascade's `_dwelling_exposure`
# function does prefix matching on `dwelling_type.lower()` to gate
# which surfaces are party — without "top-floor flat" the cert
# falls through to fully-exposed houses (Δ +9.25 W/K on floor).
#
# Floor-position inference rules:
# - floor.location indicates "Another dwelling below"
# → not ground floor (rules out ground-floor flat)
# - room_in_roof OR external roof present
# → roof exposed (rules out mid-floor flat)
# - therefore → top-floor flat
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.dwelling_type is not None
assert epc.dwelling_type.lower().startswith("top-floor")
def test_summary_9501_rr_gable_walls_route_to_external_walls_hlc() -> None:
# Arrange — cert 9501's worksheet §3 lodges "Roof room Main Gable
# Wall 1" + "Gable Wall 2" as line (29a) entries (external walls)
# at the main-wall U (= 1.70 for age B Solid Brick): 13.50×1.70 +
# 15.95×1.70 = 50.07 W/K added on top of the regular external-walls
# 168.74 → 218.81 W/K total.
#
# The Summary mapper currently lodges these as
# `SapRoomInRoofSurface(kind='gable_wall', ...)` — the cascade's
# cohort-house default which routes to party walls at U=0.25
# (Table 4 row 2). For a top-floor flat in a mid-terrace block,
# the gables sit at the ends of the building (no neighbour above)
# — they're EXTERNAL not party. Surface them as
# `gable_wall_external` so the cascade's (29a) sum picks them up.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
from domain.sap10_calculator.rdsap.cert_to_inputs import (
heat_transmission_section_from_cert,
)
ht = heat_transmission_section_from_cert(epc)
# Assert — worksheet (29a) total walls = 168.7420 (main) +
# 22.95 (Gable 1) + 27.115 (Gable 2) = 218.807 W/K. Tolerance
# 1e-2 absorbs the 2-d.p. rounding of the underlying U/area
# products; the 1e-4 chain test downstream will tighten this
# to the cascade-internal rounding floor.
worksheet_walls_w_per_k = 218.807
assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2
def test_summary_000565_extractor_recognises_exposed_and_connected_gable_types() -> None:
"""Summary PDF §8.1 Room(s) in Roof per-surface table lists the
gable-wall environment column with one of four published values:
Party → §8.1 party-wall row
Sheltered → §8.1 sheltered external row
Exposed → §8.1 exposed external row
Connected (to heated space) → §8.1 internal partition
Per RdSAP 10 §3.10 (PDF p.30-35) "Detailed Room-in-Roof" + Table 4
(p.22) "Heat-loss surface variants":
- Exposed gable wall → external wall at the lodged U-value (or
the BP main-wall U when the lodged value is the default)
- Sheltered gable wall → external wall at the lodged U-value
- Party gable wall → party wall at U=0.25 (Table 4 row 2)
- Connected gable wall → internal partition to heated space,
NOT a heat-loss surface (drops from external + party totals)
The extractor was only capturing `gable_type ∈ {"Party",
"Sheltered", "Connected to heated space"}` — neither `"Exposed"`
(every external gable on cert 000565) nor the plain `"Connected"`
string (the actual lodging used in Summary PDFs vs the verbose
"Connected to heated space") was recognised. Both fell through
with `gable_type=None`, masking the downstream cascade gap (cert
000565 BP[0] Main Gable Wall 1 is lodged "Exposed" at U=0.35 but
extracted as untyped → mapper routes to `gable_wall` (party at
U=0.25) — see worksheet "Roof room Main Gable Wall 1" line at
U=0.35).
This pin asserts the extractor surfaces the lodged environment
column verbatim. The downstream mapper + cascade behaviour stays
unchanged until follow-up slices use the new field — this is a
pure extractor data-completion step (no test pins move).
"""
# Arrange
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act — Main BP gables; Ext1/Ext2 gables expose both "Connected"
# and "Exposed" values from the cert lodging.
rir_main = site_notes.room_in_roof
main_surfaces = {s.name: s for s in (rir_main.surfaces if rir_main else [])}
rir_ext1 = (
site_notes.extensions[0].room_in_roof
if site_notes.extensions and len(site_notes.extensions) > 0
else None
)
ext1_surfaces = {s.name: s for s in (rir_ext1.surfaces if rir_ext1 else [])}
# Assert
# Main BP[0]: Gable Wall 1 lodged "Exposed" (default U 0.35); Gable
# Wall 2 lodged "Sheltered" (default U 0.30).
assert main_surfaces["Gable Wall 1"].gable_type == "Exposed", (
f"Main Gable Wall 1 gable_type = "
f"{main_surfaces['Gable Wall 1'].gable_type!r}; expected 'Exposed'"
)
assert main_surfaces["Gable Wall 2"].gable_type == "Sheltered", (
f"Main Gable Wall 2 gable_type = "
f"{main_surfaces['Gable Wall 2'].gable_type!r}; expected 'Sheltered'"
)
# Ext1 BP[1]: Gable Wall 1 lodged "Connected" (internal partition);
# Gable Wall 2 lodged "Exposed" (default U 1.70).
assert ext1_surfaces["Gable Wall 1"].gable_type == "Connected", (
f"Ext1 Gable Wall 1 gable_type = "
f"{ext1_surfaces['Gable Wall 1'].gable_type!r}; expected 'Connected'"
)
assert ext1_surfaces["Gable Wall 2"].gable_type == "Exposed", (
f"Ext1 Gable Wall 2 gable_type = "
f"{ext1_surfaces['Gable Wall 2'].gable_type!r}; expected 'Exposed'"
)
def test_summary_000565_rr_mapper_routes_exposed_to_external_drops_connected_and_surfaces_common_walls() -> None:
"""RdSAP 10 §3.9 (Simplified) + §3.10 (Detailed) + Table 4 (PDF p.22):
the cert's Room-in-Roof per-surface table classifies gable walls
by exposure column AND derives areas via two different methods
depending on assessment type:
Gable / common-wall environment column → heat-loss routing:
Exposed → external wall at lodged or main-wall U
Sheltered → external wall at lodged U
Party → party wall at U = 0.25
Connected → internal partition (NOT a heat-loss surface)
Area derivation:
Detailed assessment → raw L × H per surface
Simplified + Common Walls → L × (0.25 + H) for common walls;
L × (0.25 + H_gable) Σ_n
(H_gable H_common,n)² / 2 for
gables
Simplified + no Common Walls → raw L × H for gables (no
structural-gap offset)
The 0.25-m offset accounts for the structural gap between the RR
floor and the storey-below ceiling (per RdSAP 10 §3.9.2 + Table 4
p.22). The gable correction subtracts the triangular slice above
each common wall where the gable above transitions to the common
wall below.
Pin: cert 000565 BP[1] Ext1 lodges (Simplified, Common Wall 1 9×1,
Common Wall 2 5×1.8, Gable Wall 1 4×6 Connected, Gable Wall 2 8×9
Exposed @ U=1.70). After this slice the mapper produces:
- Common Wall 1 → SapRoomInRoofSurface(kind='common_wall',
area_m2=11.25, u_value=1.70)
- Common Wall 2 → SapRoomInRoofSurface(kind='common_wall',
area_m2=10.25, u_value=1.70)
- Gable Wall 1 → dropped (Connected, internal partition)
- Gable Wall 2 → SapRoomInRoofSurface(kind='gable_wall_external',
area_m2=16.08, u_value=1.70)
All three values pin to the U985 worksheet for this BP at abs=1e-2:
Roof room Ext1 common wall 1: 11.25
Roof room Ext1 common wall 2: 10.25
Roof room Ext1 Gable Wall 2 : 16.08
"""
# Arrange
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — BP[1] is the Ext1 building part (BPs[0]=Main, [1]=Ext1).
ext1_bp = epc.sap_building_parts[1]
rir = ext1_bp.sap_room_in_roof
assert rir is not None and rir.detailed_surfaces is not None
detailed = rir.detailed_surfaces
# Connected gables drop — no kind='gable_wall' surface at the raw 24 m² area.
gable_walls_24 = [
s for s in detailed
if s.kind == "gable_wall" and abs(s.area_m2 - 24.0) <= 1e-2
]
assert not gable_walls_24, (
f"Connected gable (24 m² raw) leaked into kind='gable_wall': "
f"{gable_walls_24}"
)
# Common walls surfaced at spec-formula areas.
common_walls = [s for s in detailed if s.kind == "common_wall"]
common_areas = sorted(s.area_m2 for s in common_walls)
assert any(abs(a - 10.25) <= 1e-2 for a in common_areas), (
f"Ext1 Common Wall 2 (5 × (0.25 + 1.8) = 10.25) missing from "
f"common_wall surfaces: areas={common_areas}"
)
assert any(abs(a - 11.25) <= 1e-2 for a in common_areas), (
f"Ext1 Common Wall 1 (9 × (0.25 + 1.0) = 11.25) missing from "
f"common_wall surfaces: areas={common_areas}"
)
# Exposed gable surfaced at spec-corrected area + lodged U.
gable_externals = [s for s in detailed if s.kind == "gable_wall_external"]
assert any(
abs(s.area_m2 - 16.08) <= 1e-2 and s.u_value == 1.70
for s in gable_externals
), (
f"Ext1 Gable Wall 2 (8 × (0.25 + 9) ((91)² + (91.8)²)/2 = "
f"16.08, U=1.70) missing from gable_wall_external surfaces: "
f"{[(s.area_m2, s.u_value) for s in gable_externals]}"
)
def test_summary_9501_pv_array_surfaced_from_elmhurst_section_19() -> None:
# Arrange — cert 9501's Elmhurst §19.0 PV section lodges measured
# array detail (2.36 kWp, South-West orientation, 45° elevation,
# "None Or Little" overshading). The worksheet's §10a PV credit
# of -250.02 GBP (-129.49 used in dwelling + -120.53 exported)
# depends on Appendix M / Appendix U3.3 reading these from the
# cascade's `SapEnergySource.photovoltaic_arrays` list. Without
# the array surfacing the cascade computes total cost +£250 too
# high → ECF 2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (current
# Δ -9.27 after Slice 99c closed the fabric heat loss).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
arrays = epc.sap_energy_source.photovoltaic_arrays
assert arrays is not None
assert len(arrays) == 1
assert abs(arrays[0].peak_power - 2.36) <= 1e-4
assert arrays[0].orientation == 6 # SAP octant: South-West
assert arrays[0].pitch == 3 # RdSAP §11.1 pitch enum: code 3 = 45°
assert arrays[0].overshading == 1 # RdSAP code: None or very little
def test_summary_9501_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 9501-3059-8202-7356-0204 (Summary_000784.pdf /
# dr87-0001-000784.pdf) is the third boiler validation cert and
# the first FLAT in the per-cert mapper validation cohort.
# Mains-gas Vaillant PCDB idx 19007, mid-terrace top-floor flat
# with Room-in-Roof + measured PV (2.36 kWp SW @ 45°). TFA 113.08
# m². Worksheet PDF "SAP value" line lodges unrounded SAP
# **68.5252**.
#
# Slices 99a-99e jointly closed the Summary path from Δ -5.25 to
# 1e-4: 99a extractor attachment fix (built_form=''), 99b dwelling
# _type identifies top-floor flat (cascade exposure routing), 99c
# RR gables external for flats + SO Solid Brick wall code, 99d
# surface PV array from §19.0, 99e PV pitch enum-not-degrees.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — 1e-4 pin (project memory `feedback_zero_error_strict`).
worksheet_unrounded_sap = 68.5252
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf)
# is the first cohort cert with a real GOV.UK EPB API counterpart
# (cert ref 0535-9020-6509-0821-6222). Worksheet PDF line "SAP value"
# lodges unrounded SAP **69.0094** (rating C 69, also the API-
# published integer). This is the load-bearing forcing function for
# the API↔Elmhurst parity workstream: any drift from 1e-4 means a
# mapper gap, not a calculator bug — the cohort 6 cert cascades all
# reproduce Elmhurst exactly at 1e-4 on hand-built fixtures.
#
# Source-data caveat (documented for future debuggers): Summary §3
# lodges Ext1 age band as "M 2023 onwards"; the worksheet header
# records "Ext1: L". Likely assessor data-entry inconsistency. The
# mapper trusts the Summary (its source of truth); accept whatever
# residual the M vs L disagreement produces.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — 1e-4 pin, no widening, no xfail (project memory
# `feedback_zero_error_strict`).
worksheet_unrounded_sap = 69.0094
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 0330-2249-8150-2326-4121 (Summary_000897.pdf /
# dr87-0001-000897.pdf) is the second boiler cert under per-cert
# mapper validation: mains-gas boiler (PCDB idx 10241), mid-terrace
# 2-bp dwelling, TFA 69.14 m². Worksheet PDF "SAP value" line lodges
# unrounded SAP **61.5993**. Same load-bearing role as cert 001479
# (the first boiler) — Summary path proves itself against the
# worksheet, then becomes the canonical reference for the API path.
# Expected RED at Δ +0.4667 at handover-baseline (Summary mapper
# cascade SAP 62.0660); mapper gaps to close are §11 glazing_type=14
# (windows HLC +6.71 W/K) and the §4 hot-water cascade (kWh +1060).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000897_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — 1e-4 pin, no widening, no xfail (project memory
# `feedback_zero_error_strict`).
worksheet_unrounded_sap = 61.5993
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_0380_main_heating_category_is_heat_pump() -> None:
# Arrange — cert 0380's Summary lodges main heating as a PCDB-
# indexed Mitsubishi PUZ-WM50VHA (idx 104568), which lives in
# PCDB Table 362 (heat pumps only). The Elmhurst mapper must
# surface `main_heating_category=4` so the cascade routes the
# cert through the Appendix N3.6/N3.7 heat-pump path instead of
# falling through to the default boiler-ish branches that key off
# `main_heating_category in {1, 2}`. Spec ref: SAP 10.2 Table 4a
# (main heating category code 4 = heat pump).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.main_heating_details, "no main heating details surfaced"
main = epc.sap_heating.main_heating_details[0]
assert main.main_heating_index_number == 104568
assert main.main_heating_category == 4
def test_summary_0380_filled_cavity_plus_external_insulation_routes_to_code_6() -> None:
# Arrange — cert 0380's Summary lodges main walls as
# `wall_type = "CA Cavity"` and `insulation = "FE Filled Cavity +
# External"` (a cavity wall with subsequent external-insulation
# upgrade). The cascade enum `wall_insulation_type=6` is
# "filled cavity + external insulation" (per
# `domain.sap10_ml.rdsap_uvalues` lines 120-131); without it the
# cascade defaults to the as-built routing and overstates walls
# heat loss by +58 W/K on cert 0380 (Summary 69.69 vs API 11.62
# at HEAD before this slice). API path EPC for cert 0380 surfaces
# `wall_insulation_type=6` and is the ground-truth pin here.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_building_parts, "no building parts surfaced"
main = epc.sap_building_parts[0]
assert main.wall_construction == 4 # 4 = Cavity ('CA')
assert main.wall_insulation_type == 6 # 6 = filled cavity + external
def test_summary_0380_surfaces_wall_insulation_thickness_100mm() -> None:
# Arrange — cert 0380's Summary §7.0 Walls block lodges the
# composite-wall insulation thickness on the line pair
# "Insulation Thickness" / "100 mm". Without surfacing this to
# `wall_insulation_thickness`, the heat-transmission cascade
# falls through `_parse_thickness_mm(None) → None` and the
# composite filled-cavity-plus-external U-value calc uses its
# default thickness rather than the lodged 100 mm — leaving cert
# 0380's `walls_w_per_k` at 24.62 vs API's 11.62 even with
# `wall_insulation_type=6` set (Slice S0380.3). Mirror of the
# existing `_roof_details_from_lines` reader that surfaces roof
# `insulation_thickness_mm` from the same "Insulation Thickness"
# label.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — match the API mapper's "100mm" string (the EPC schema
# type is `Optional[str]`; the cascade's `_parse_thickness_mm`
# strips non-digit trailers).
main = epc.sap_building_parts[0]
assert main.wall_insulation_thickness == "100mm"
def test_summary_0380_surfaces_insulated_door_u_value_1_2() -> None:
# Arrange — cert 0380's Summary §10 Doors block lodges the door
# U-value on the "Average U-value" / "1.20" line pair. The dr87
# worksheet line ref (26) confirms the spec value: "Doors
# insulated 1, NetArea 3.7000 m², U-value 1.2000, A×U 4.4400 W/K".
# Without surfacing the lodged U-value the cascade defaults the
# door U and overstates `doors_w_per_k` to 5.18 vs worksheet
# 4.44 W/K. The comment at
# `datatypes/epc/domain/epc_property_data.py:585` claimed the
# value was "not available in site notes" — that assertion is
# outdated for Elmhurst Summary PDFs which lodge it explicitly.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — float compare with small tolerance (Summary lodges
# "1.20" which parses cleanly to 1.2; API lodges 1.2 directly).
assert epc.insulated_door_u_value is not None
assert abs(epc.insulated_door_u_value - 1.2) < 1e-6
def test_summary_0380_cylinder_block_surfaces_full_15_1_lodging() -> None:
# Arrange — cert 0380's Summary §15.1 Hot Water Cylinder block
# lodges (L 340-347):
# Cylinder Size Medium
# Insulated Foam
# Insulation Thickness 50 mm
# Cylinder Thermostat Yes
# The dr87 worksheet pins these as:
# (47) Cylinder Volume 160.00 L → cascade enum 3
# "Cylinder Insulation Type Foam" → cascade enum 1 (factory)
# "Cylinder Insulation Thickness 50 mm" → 50
# "Cylinder Stat Yes" → 'Y'
# Worksheet (51) 0.0152 × (52) 0.9086 × (53) 0.5400 × (47) 160 ÷ 1000
# = daily storage loss 1.193 kWh/day → (56) annual ~435 kWh — exact
# only when ALL FOUR fields are surfaced together: insulation_type
# + thickness key the Table 2 loss factor (51), volume keys (52),
# and cylinder_thermostat keys the Table 2b temperature factor (53).
# Without cylinder_thermostat='Y' the cascade uses the no-stat
# temperature factor (~0.9 instead of 0.54) and HW storage loss
# over-counts by ~300 kWh/yr.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.cylinder_size == 3
assert epc.sap_heating.cylinder_insulation_type == 1
assert epc.sap_heating.cylinder_insulation_thickness_mm == 50
assert epc.sap_heating.cylinder_thermostat == "Y"
def test_summary_0350_surfaces_two_pv_arrays() -> None:
# Arrange — cert 0350's Summary §19.0 Photovoltaic Panel block
# lodges TWO arrays (L 503-510):
# 1.50 kWp / South-East / 45° / None Or Little
# 1.50 kWp / North-West / 45° / None Or Little
# The Elmhurst extractor's `_extract_pv_array_detail` hardcodes a
# single 4-value reader (loop breaks at `len(values) == 4`) and
# the `Renewables` dataclass exposes only 4 scalar PV fields —
# together they cap output at one array regardless of how many the
# PDF lodges. Cert 0380 (single-array) is unaffected; cert 0350
# is the first multi-array cohort cert. Without both arrays the
# cascade halves the PV export credit and the SAP score drops.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_energy_source is not None
arrays = epc.sap_energy_source.photovoltaic_arrays
assert arrays is not None
assert len(arrays) == 2
# Both arrays at 1.5 kWp; order matches PDF row order.
assert arrays[0].peak_power == 1.5
assert arrays[1].peak_power == 1.5
def test_summary_0350_ext1_inherits_main_wall_insulation_thickness() -> None:
# Arrange — cert 0350-2968-2650-2796-5255 is a multi-bp dwelling
# (Main + 1st Extension). Its Summary §7 Walls block lodges
# "1st Extension / As Main Wall / Yes" — the extension's walls
# inherit Main's lodgings (CA Cavity, FE Filled Cavity + External,
# 100 mm). The `_extract_extensions` "As Main Wall" inheritance
# at `elmhurst_extractor.py:559-567` builds a new WallDetails by
# copying Main's fields, but the field set it copies was frozen
# before Slice S0380.4 added `insulation_thickness_mm` — so the
# extension's `WallDetails.insulation_thickness_mm` falls through
# to its dataclass default (None), and the mapper surfaces
# `wall_insulation_thickness=None` on bp[1]. The cascade then
# routes Ext1's composite walls off the lodged-thickness path,
# over-stating Ext1 `external_walls_w_per_k` against worksheet
# line ref (29a) "External walls Ext1 5.21 0.25 1.3025".
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — Ext1 inherits Main's 100 mm thickness and the EPC
# surfaces "100mm" on bp[1] (matching bp[0]).
assert len(epc.sap_building_parts) == 2
main_bp, ext1_bp = epc.sap_building_parts
assert main_bp.wall_insulation_thickness == "100mm"
assert ext1_bp.wall_insulation_thickness == "100mm"
def test_summary_0350_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cert 0350-2968-2650-2796-5255 (Summary_000903.pdf /
# dr87-0001-000903.pdf) is the second heat-pump cert under per-cert
# Summary-path mapper validation and the first multi-bp cohort
# cert: Mitsubishi PUZ-WM50VHA ASHP (PCDB index 104568), main
# dwelling + 1 extension, 2 PV arrays (2x 1.5 kWp at SE / NW).
# Worksheet PDF "SAP value" line lodges unrounded SAP **84.1367**.
#
# First-attempt closure (validating the structural-debt-amortizes
# hypothesis): after Slices S0380.2..S0380.6 (which were forced by
# cert 0380) the cohort HP routing + cylinder block were already
# in place; cert 0350 needed only TWO new slices:
# - Slice S0380.8: extension "As Main Wall" inheritance copies
# `insulation_thickness_mm` (cert 0380 was single-bp, didn't
# exercise the inheritance path).
# - Slice S0380.9: refactor Elmhurst `Renewables` to support
# multiple PV arrays per dwelling (cert 0380 was single-array,
# didn't exercise multi-array PV).
# Both fixes are structural and apply cohort-wide.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000903_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance.
worksheet_unrounded_sap = 84.1367
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_summary_2636_alt_wall_window_parses_alternative_wall_location() -> None:
# Arrange — cert 2636-0525-2600-0401-2296's §11 Windows block lodges
# one alt-wall window (the 1.19 m² north-facing one): the row's
# "Alternative wall" string appears BEFORE the W×H×A line, not
# after the frame_factor (the normal position for "External wall").
# The extractor's `_parse_window_from_anchors` was only scanning
# the post-frame_factor `middle` slice for wall-location tokens →
# defaulted to "External wall" for the alt-wall row → cascade
# allocated the window to the main wall instead of the alt-wall,
# leaving Main external walls W/K under-deducted by ~0.54 vs
# worksheet (29a). Fix: also scan the PRE-data slice
# `lines[before_start:data_idx]` for wall tokens.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000898_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — the 1.19 m² window is recorded with wall_type =
# "Alternative wall"; all other windows stay on "External wall".
by_area = {round(w.window_width, 2): w.window_wall_type for w in epc.sap_windows}
assert by_area[1.19] == "Alternative wall"
assert by_area[2.25] == "External wall" # main-wall windows unchanged
def test_summary_2225_no_showers_lodged_resolves_to_zero_counts() -> None:
# Arrange — cert 2225-3062-8205-2856-7204's Summary §1x Baths and
# Showers block lodges 0 baths and ZERO showers (no shower rows at
# all). The Summary mapper's existing logic at
# `mapper.py:3536-3537` predicates the count assignment on
# `has_electric_shower`: when no electric shower is detected the
# counts collapse to None — but cert 2225 has no showers at all,
# not "non-electric showers". The None values then drive the
# cascade's default-1-mixer assumption, over-counting HW kWh.
# Same disposition the API path received in slice 102f-prep.8
# (commit 1d5183c6: "API mapper resolves shower_outlets=None →
# 0 mixers").
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000900_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Pre-condition: §1x lodges zero showers (proves the test sees
# the same no-showers fixture the cascade does).
assert len(site_notes.baths_and_showers.showers) == 0
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — zero-shower lodgings resolve to explicit 0 counts (not
# None) so the cascade does not default-assume a mixer.
assert epc.sap_heating.electric_shower_count == 0
assert epc.sap_heating.mixer_shower_count == 0
def test_summary_2225_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cert 2225-3062-8205-2856-7204 (Summary_000900.pdf):
# Mitsubishi PUZ-WM50VHA, single-bp single-array PV (3.28 kWp SE),
# ZERO showers lodged. Worksheet "SAP value" 88.7921. Slice
# S0380.11 closed the zero-shower defaulting bug (None → 0 mixers
# for cohort certs that lodge no showers); cert 2225 was the
# forcing function. Same disposition the API path received in
# slice 102f-prep.8 (commit 1d5183c6).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000900_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance.
worksheet_unrounded_sap = 88.7921
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_summary_2636_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cert 2636-0525-2600-0401-2296 (Summary_000898.pdf):
# Mitsubishi PUZ-WM50VHA, mid-terrace house with **alt-wall +
# cantilever** — the most complex geometry in the ASHP cohort.
# Worksheet "SAP value" lodges 86.2641.
#
# Closed by two combined slices:
# - S0380.12: alt-wall window-location parser fix (walls W/K
# 20.5595 → 20.0240 = worksheet exact).
# - S0380.13: cantilever gate accepts "House" descriptive form
# in addition to the schema enum "0" (allowing the Summary
# mapper's descriptive property_type to trigger the cantilever
# detection that slice 102f-prep.9 added on the API path).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000898_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance.
worksheet_unrounded_sap = 86.2641
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_summary_2636_thermal_bridging_excludes_alt_wall_window_opening_per_sap_10_2_appendix_k() -> None:
# Arrange — cert 2636 has BP0 with an alt-wall (gross 12.76 m²)
# carrying one 1.19 m² alt-wall window (`window_wall_type=2`).
#
# SAP 10.2 Appendix K eqn (K2) p.84: HTB = y × Σ(Aexp), where
# Aexp is "the total area of external elements calculated at
# worksheet (31)". Worksheet line 187 (cert 2636 dr87-0001-000898)
# labels (31) "Total NET area of external elements" — net of
# openings. Cert 2636 worksheet (31) = 160.33 m² = 47.70 main net
# + 11.57 alt net + 42.92 roof + 39.18 ground floor + 3.74
# cantilever + 11.52 windows + 3.70 doors.
#
# Pre-S0380.31 the cascade summed the alt-wall at its 12.76 m²
# gross (no opening deduction) — (31) was 161.52 → (36) = 24.228,
# worksheet (36) = 24.0495, Δ +0.1785 W/K. That drift propagated
# through (39) HTC → MIT → space heating, leaving the cert at
# Δ -0.015 SAP — the only ASHP cohort cert above the 1e-4 floor.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000898_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — worksheet (36) = 24.0495 W/K to 4 d.p.; full SAP
# cascade lands within the 1e-4 spec-precision floor of the
# worksheet's 86.2641.
assert abs(result.intermediate["thermal_bridging_w_per_k"] - 24.0495) <= 1e-4
assert abs(result.sap_score_continuous - 86.2641) <= 1e-4
def test_summary_mapper_raises_on_unmapped_cylinder_size_label() -> None:
# Arrange — start from a real cohort cert (any extracted site
# notes) and inject an unmapped §15.1 "Cylinder Size" label
# ("Tiny" — not in the lookup dict). `from_elmhurst_site_notes`
# must raise `UnmappedElmhurstLabel` rather than silently
# returning None for `cylinder_size` (the failure mode that hid
# cert 9418's "Large" miss until Slice S0380.14 surfaced it as
# a Δ +2.60 SAP gap).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
site_notes.water_heating.cylinder_size_label = "Tiny"
# Act / Assert
with pytest.raises(UnmappedElmhurstLabel) as excinfo:
EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
assert excinfo.value.field == "cylinder_size"
assert excinfo.value.value == "Tiny"
def test_summary_mapper_raises_on_unmapped_cylinder_insulation_label() -> None:
# Arrange — mirror test for the §15.1 "Insulated" label dict.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
site_notes.water_heating.cylinder_insulation_label = "Polyester wool"
# Act / Assert
with pytest.raises(UnmappedElmhurstLabel) as excinfo:
EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
assert excinfo.value.field == "cylinder_insulation"
assert excinfo.value.value == "Polyester wool"
def test_all_seven_ashp_cohort_certs_extract_without_unmapped_label_raise() -> None:
# Arrange — coverage forcing function: every cohort cert must
# extract through `from_elmhurst_site_notes` without triggering an
# `UnmappedElmhurstLabel` raise from any strict helper. New cohort
# certs added in subsequent slices fall under the same gate, and
# any future Elmhurst-PDF variant with an unmapped label fails
# this test until the missing dict entry is added.
cohort_pdfs = (
_SUMMARY_000899_PDF, _SUMMARY_000903_PDF, _SUMMARY_000900_PDF,
_SUMMARY_000898_PDF, _SUMMARY_000901_PDF, _SUMMARY_000904_PDF,
_SUMMARY_000902_PDF,
)
# Act / Assert
for pdf in cohort_pdfs:
pages = _summary_pdf_to_textract_style_pages(pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Strict mapper run — raises if any cylinder helper hits an
# unknown label.
EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
def test_summary_3336_triple_glazed_windows_route_to_code_6() -> None:
# Arrange — cert 3336-2825-9400-0512-8292's Summary §11 lodges
# "Triple post or during 2022" on every window; dr87-0001-000888
# confirms "Window, Triple glazed" on every line. The Elmhurst
# mapper must surface SAP 10.2 Table U2 code 6 so the §5 (66)..
# (67) daylight factor uses Table 6b col light g_L = 0.70 instead
# of the default DG g_L = 0.80 — the +0.0274 SAP regression that
# this slice closes is driven by the daylight-factor offset that
# the default-DG silently masked.
pages = _summary_pdf_to_textract_style_pages(
_FIXTURES / "Summary_000888.pdf"
)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — every window on cert 3336 is triple-glazed → code 6.
assert epc.sap_windows, "expected windows on cert 3336"
for w in epc.sap_windows:
assert w.glazing_type == 6
def test_summary_000474_double_glazed_windows_route_to_code_3() -> None:
# Arrange — boiler-cohort cert (Summary_000474.pdf) lodges
# "Double between 2002 and 2021" / "Double with unknown install
# date" on every window. Both routes to SAP 10.2 Table U2 code 3
# (DG air-filled post-2002) per the `_ELMHURST_GLAZING_LABEL_TO
# _SAP10` dict — same Table 6b col light g_L = 0.80 as the
# default, so the cascade SAP is unchanged for these certs, but
# the integer pin guards against future cascade consumers that
# key on the subcode (e.g. a U-value default lookup for absent
# `WindowTransmissionDetails`).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_windows, "expected windows on cert 000474"
for w in epc.sap_windows:
assert w.glazing_type == 3, (
f"expected DG post-2002 code 3, got {w.glazing_type!r}"
)
def test_elmhurst_glazing_label_full_coverage_per_sap10_table_6b() -> None:
# Arrange — the double_glazing recommendation fixture (Summary_001431)
# exercises every RdSAP-21 §11 glazing-type lodging in one cert. Each
# label must resolve to the SAP 10.2 Table 6b cascade code whose
# `_G_LIGHT_BY_GLAZING_CODE` daylight factor g_L is correct for the
# glazing family: single 0.90, double / secondary 0.80, triple 0.70
# (the lodged manufacturer U/g drive §3/§6; the code only sets g_L).
expected: dict[str, int] = {
"Single glazing": 1,
"Single glazing, known data": 15,
"Double pre 2002": 2,
"Double between 2002 and 2021": 3,
"Double with unknown install date": 3,
"Double glazing, known data": 3,
"Double post or during 2022": 5,
"Secondary glazing": 7,
"Secondary glazing - Normal emissivity": 11,
"Secondary glazing - Low emissivity": 12,
"Triple pre 2002": 10,
"Triple between 2002 and 2021": 9,
"Triple post or during 2022": 6,
"Triple with unknown install date": 6,
}
# Act / Assert
for label, code in expected.items():
assert _elmhurst_glazing_type_code(label) == code, (
f"{label!r} should map to SAP 10.2 Table 6b code {code}"
)
def test_extension_party_wall_type_read_independently_of_as_main_wall() -> None:
# Arrange — RdSAP 10 §3.3: "As Main Wall: Yes" inherits only the
# external wall CONSTRUCTION; the party wall type is lodged
# separately per building part and may differ. The double_glazing
# fixture (Summary_001431) lodges Main party "CU Cavity masonry
# unfilled" (SAP10 wall_construction 4 → u_party_wall 0.5) but the
# 1st Extension party "U Unable to determine" (→ wall_construction 0
# → RdSAP default u_party_wall 0.25), even though the extension is
# "As Main Wall: Yes". Pre-fix the extension inherited the Main's
# party type (both 0.5), inflating worksheet (32) party heat loss.
pages = _summary_pdf_to_textract_style_pages(
_FIXTURES / "Summary_001431_double_glazing.pdf"
)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — Main BP keeps cavity-unfilled (4); the extension BP gets
# the "Unable to determine" sentinel (0), a distinct party wall U.
party_codes = [
bp.party_wall_construction for bp in epc.sap_building_parts
]
assert party_codes == [4, 0], (
f"expected Main=4 (CU, U=0.5) + Ext=0 (Unable, U=0.25), got {party_codes}"
)
# The two map to different SAP party-wall U-values.
assert abs(u_party_wall(4) - 0.5) <= 1e-9
assert abs(u_party_wall(0) - 0.25) <= 1e-9
def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None:
# Arrange — same strict-coverage gate as the cylinder-size helper
# (Slice S0380.15 + S0380.16): silently routing an unknown glazing
# variant to a SAP default int hid the +0.05 SAP regression on 13
# triple-glazed certs until the cohort-2 first-attempt probe. After
# this slice, an unrecognised lodging surfaces immediately at
# extraction time.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Mutate the first window's glazing_type to an unmapped string.
site_notes.windows[0].glazing_type = "Quintuple glazed with helium"
# Act / Assert
with pytest.raises(UnmappedElmhurstLabel) as excinfo:
EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
assert excinfo.value.field == "glazing_type"
assert excinfo.value.value == "Quintuple glazed with helium"
def test_summary_000565_extractor_finds_electric_shower_in_section_1x_0() -> None:
"""SAP 10.2 Appendix J §J2 step 2a (PDF p.81) routes baths through
`N_bath = 0.13 N + 0.19` when a shower is also present, but
`0.35 N + 0.50` when no shower is present — a ~2.7× swing in (42b)m
that compounds into worksheet (45)m energy content.
Cert 000565 lodges one instantaneous electric shower in Summary
§1x.0 Baths and Showers:
Description Type Connected
1 Electric shower None
The extractor's `_extract_baths_and_showers` walks 3-tuples after
"Connected", but it locates "Connected" via
`self._lines.index("Connected")`, which is a global search. Cert
000565 has the substring "Connected" earlier in the document
(§3 building parts list "Connected" / "Exposed" / "Sheltered" wall
elevation flags), so `idx` lands on a non-section anchor and the
walk never reaches the shower row.
Worksheet U985-0001-000565 line (42b) Jan = 35.0602 L/day requires
the bath+shower branch (N_bath = 0.13 × 3.1578 + 0.19 = 0.6005);
falling through to no-shower (N_bath = 0.35 × 3.1578 + 0.50 =
1.6052) yields ~93.7 L/day — the 2.67× over-count behind (45)m's
+903 kWh/yr cascade gap for cert 000565.
Fix: locate "Connected" within the section bounded by
"1x.0 Baths and Showers""18.0 Flue Gas Heat Recovery System"
(both unique anchors in the Elmhurst Summary PDF).
"""
# Arrange — Summary PDF tokenized as the extractor expects.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
# Act
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Assert — extractor finds the single electric shower lodged in
# §1x.0, not the empty list it returns when "Connected" anchors
# on the building-parts section.
assert len(site_notes.baths_and_showers.showers) == 1, (
f"expected 1 shower from §1x.0; got "
f"{len(site_notes.baths_and_showers.showers)} "
f"({site_notes.baths_and_showers.showers!r})"
)
shower = site_notes.baths_and_showers.showers[0]
assert shower.shower_number == 1
assert shower.outlet_type == "Electric shower"
assert shower.connected == "None"
def test_summary_000565_ext1_wall_construction_routes_to_stone_granite() -> None:
# Arrange — RdSAP 10 §3.3 + Table 4: cert 000565 Ext1 lodges
# "SG Stone: granite or whinstone" which routes to SAP10
# WALL_STONE_GRANITE=1. Pre-S0380.64 fell through silent-None,
# losing the Ext1 wall channel (worksheet line 29a: 91.83 m² ×
# U=1.7 = 156.11 W/K) from the cascade fabric subtotal.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_building_parts[1].wall_construction == 1
def test_summary_000565_ext3_ext4_wall_constructions_route_to_basement_code_6() -> None:
# Arrange — RdSAP 10 §5.17 / Table 23: cert 000565 Ext3 + Ext4
# lodge "B Basement wall". The canonical `BASEMENT_WALL_
# CONSTRUCTION_CODE=6` triggers the cascade's
# `part.main_wall_is_basement` route to `u_basement_wall` at
# heat_transmission.py:640. Pre-S0380.64 silent-None bypassed
# the basement-wall override entirely.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_building_parts[3].wall_construction == 6
assert epc.sap_building_parts[3].main_wall_is_basement is True
assert epc.sap_building_parts[4].wall_construction == 6
assert epc.sap_building_parts[4].main_wall_is_basement is True
def test_summary_000565_extractor_finds_curtain_wall_age_post_2023_on_bp_2_ext2() -> None:
"""Summary §7 per-BP Wall block carries a `Curtain Wall Age` line
when `Type: CW Curtain Wall` is lodged. Cert 000565 Ext2 (BP[2])
is the cohort fixture: it lodges
Type CW Curtain Wall
Curtain Wall Age Post 2023
U-value Known No
Per RdSAP 10 §5.18 (PDF p.48), the U-value of a curtain wall is
keyed on the per-BP `Curtain Wall Age` (Post 2023 → Table 24
window row; Pre 2023 → 2.0 W/m²K), NOT on the dwelling-wide
`construction_age_band`. The extractor must surface this field
so the mapper + cascade can dispatch correctly. Pre-S0380.85 the
line was silently dropped and `wall_construction=9` fell through
to the cavity-default Table 6 row.
Pure extractor data-completion step — downstream cascade impact
lands when the mapper threads the new field through and `u_wall`
grows a Curtain Wall branch (follow-up sub-step in the same slice).
"""
# Arrange
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
# Act
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Assert — BP[2] is Ext2 (index 1 in `extensions`).
ext2_walls = site_notes.extensions[1].walls
assert ext2_walls.wall_type == "CW Curtain Wall", (
f"Ext2 wall_type = {ext2_walls.wall_type!r}; expected 'CW Curtain Wall'"
)
assert ext2_walls.curtain_wall_age == "Post 2023", (
f"Ext2 curtain_wall_age = {ext2_walls.curtain_wall_age!r}; "
f"expected 'Post 2023'"
)
# Negative case — BPs without Curtain Wall don't have a Curtain
# Wall Age line; the field must be None (not the empty-string
# sentinel `_local_str` returns).
main_walls = site_notes.walls
assert main_walls.curtain_wall_age is None, (
f"Main wall (non-CW) curtain_wall_age = "
f"{main_walls.curtain_wall_age!r}; expected None"
)
def test_summary_000565_mapper_threads_curtain_wall_age_post_2023_to_bp_2_sap_building_part() -> None:
"""The Elmhurst mapper builds a `SapBuildingPart` per BP from the
extracted `WallDetails`. `curtain_wall_age` must be threaded
through so the heat-transmission cascade can dispatch on it (per
[[reference-unmapped-api-code]] strict-plumbing pattern). Cert
000565 BP[2] Ext2 is the fixture: `wall_construction=9`
(WALL_CURTAIN) + `curtain_wall_age="Post 2023"`.
Per RdSAP 10 §5.18 + §1.5: a curtain wall can be a main wall, an
alt wall, or absorbed into the prevailing wall when <10% area.
This slice scopes to the main-wall path (cert 000565 lodges CW
only as the BP[2] main wall, never as an alt sub-area).
"""
# Arrange
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
bp_2 = epc.sap_building_parts[2]
assert bp_2.wall_construction == 9, (
f"BP[2] wall_construction = {bp_2.wall_construction!r}; "
f"expected 9 (WALL_CURTAIN)"
)
assert bp_2.curtain_wall_age == "Post 2023", (
f"BP[2] curtain_wall_age = {bp_2.curtain_wall_age!r}; "
f"expected 'Post 2023'"
)
# Non-CW BPs preserve curtain_wall_age=None (no per-BP signal).
assert epc.sap_building_parts[0].curtain_wall_age is None
assert epc.sap_building_parts[1].curtain_wall_age is None
def test_summary_000565_ext2_curtain_wall_routes_to_u_value_1p4_per_rdsap_10_section_5_18() -> None:
"""End-to-end cascade pin: with `curtain_wall_age="Post 2023"` plumbed
through extractor + mapper + `u_wall` `WALL_CURTAIN` branch, the
`heat_transmission_from_cert` walls subtotal on cert 000565 must
reflect the §5.18 Curtain Wall U=1.4 W/m²K on BP[2] Ext2.
Pre-S0380.85: BP[2] cascade U=0.60 (Cavity default, age H), Δ 0.80
W/m²K vs worksheet U=1.40. The BP[2] Ext2 gross wall area on cert
000565 multiplied by this U-delta accounts for the documented
112.2 W/K contribution to the walls subtotal residual.
Asserts the cascade walls subtotal moves materially toward the
worksheet target 604.07 W/K (from pre-S0380.85's 443 W/K). The
remaining ~50 W/K gap is the BP[0] Main alt1 thin-wall stone
granite cascade gap — out of scope for this slice; closes in
follow-up S0380.86.
"""
# Arrange
from domain.sap10_calculator.worksheet.heat_transmission import (
heat_transmission_from_cert,
)
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
ht = heat_transmission_from_cert(epc)
# Assert — pre-S0380.85 cascade had walls 443 W/K. Curtain Wall
# closure adds ~112 W/K (worksheet target 604 W/K). Lower-bound
# 540 W/K is a robust gate that still leaves headroom for the
# remaining BP[0] alt1 thin-wall gap; the cascade reaches ~555.
assert ht.walls_w_per_k >= 540.0, (
f"walls_w_per_k = {ht.walls_w_per_k:.2f}; expected ≥540 after "
f"Curtain Wall §5.18 dispatch (pre-S0380.85 baseline was 443)"
)
def test_summary_000565_mapper_routes_alt_wall_thickness_120mm_to_wall_thickness_mm_field() -> None:
"""The Summary §7 "Alternative Wall N Thickness" line is the WALL
thickness, NOT an insulation thickness. Cert 000565 BP[0] Main
alt1 lodges
Alternative Wall 1 Type SG Stone: granite or whinstone
Alternative Wall 1 Insulation A As Built
Alternative Wall 1 Dry-lining Yes
Alternative Wall 1 Thickness 120 mm
Pre-S0380.86 `_map_elmhurst_alternative_wall` routed this 120 mm
onto `SapAlternativeWall.wall_insulation_thickness="120"`, a
semantic mis-name flagged in `[[feedback-no-misleading-insulation-
type]]`. The cascade then mis-bucketed it as insulation (bucket
100 → _BRICK_INS_100 → U=0.32 at age A) instead of routing to the
RdSAP 10 §5.6 thin-wall stone formula (U₀=3.89 → §5.8 dry-line
adjustment → U=2.34, matching worksheet line (29a)).
This pin asserts the mapper now lodges the wall thickness on the
new `SapAlternativeWall.wall_thickness_mm` field, leaving
`wall_insulation_thickness=None` (the As-Built lodging carries
no insulation thickness).
"""
# Arrange
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
alt1 = epc.sap_building_parts[0].sap_alternative_wall_1
assert alt1 is not None
assert alt1.wall_construction == 1, (
f"BP[0] alt1 wall_construction = {alt1.wall_construction!r}; "
f"expected 1 (WALL_STONE_GRANITE)"
)
assert alt1.wall_thickness_mm == 120, (
f"BP[0] alt1 wall_thickness_mm = {alt1.wall_thickness_mm!r}; "
f"expected 120 (the lodged wall thickness, not insulation)"
)
assert alt1.wall_insulation_thickness is None, (
f"BP[0] alt1 wall_insulation_thickness = "
f"{alt1.wall_insulation_thickness!r}; expected None (As-Built "
f"lodging carries no insulation thickness)"
)
assert alt1.wall_dry_lined == "Y"
def test_summary_000565_bp0_alt1_stone_granite_thin_wall_routes_to_u_value_2p34_per_rdsap_10_section_5_6() -> None:
"""End-to-end cascade pin: with `wall_thickness_mm=120` plumbed
through extractor + mapper + `u_wall` §5.6 thin-wall formula +
§5.8 dry-line adjustment, cert 000565 BP[0] Main alt1 cascade
U-value moves from 0.32 → 2.34 (worksheet line (29a) pin).
Δ U=2.02 × area=23 m² → +46.5 W/K of cascade walls heat loss.
Combined with S0380.85's Curtain Wall closure (+112 W/K), the
cascade walls subtotal closes from 443 W/K (pre-S0380.84
baseline) → ~602 W/K (worksheet 604.07; <0.5% residual).
Asserts the cascade walls subtotal is now within 2% of worksheet
(post-S0380.85 was 555.93; this slice should bring it to ~602).
"""
# Arrange
from domain.sap10_calculator.worksheet.heat_transmission import (
heat_transmission_from_cert,
)
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
ht = heat_transmission_from_cert(epc)
# Assert — worksheet target 604.07; lower-bound 595 is a robust
# gate that admits ≤2% residual against the worksheet pin.
assert ht.walls_w_per_k >= 595.0, (
f"walls_w_per_k = {ht.walls_w_per_k:.2f}; expected ≥595 after "
f"§5.6 thin-wall + §5.8 dry-line dispatch (post-S0380.85 was 555.93)"
)
def test_summary_000565_ext1_party_wall_routes_to_cavity_filled_code_11() -> None:
# Arrange — RdSAP 10 §5.10 Table 15 row 3 (PDF p.42) "Cavity masonry
# filled -> U=0.2 W/m²K". Cert 000565 Ext1 lodges "CF Cavity masonry
# filled". The synthetic SAP10 code `WALL_CAVITY_FILLED_PARTY=11`
# (introduced S0380.91) distinguishes filled-cavity party walls from
# the construction-class-shared code 4 (which `u_party_wall` resolves
# to 0.5 per Table 15 row 2). Code 11 is party-wall-only; it never
# appears as a main `wall_construction` so `u_wall` is unaffected.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_building_parts[1].party_wall_construction == 11
def test_summary_000565_ext1_party_wall_cf_routes_to_u_value_0p2() -> None:
# Arrange — cascade integration check for slice S0380.91: route
# cert 000565's Summary §8.1 "CF Cavity masonry filled" lodgement
# through extractor + mapper + heat_transmission and verify Ext1's
# party-wall U-value is 0.2 (Table 15 row 3) rather than the prior
# 0.5 (cavity-unfilled approximation). Localises the slice to one
# surface area × U product so the cascade aggregate movement (-28
# W/K on party_walls, ~-1000 kWh of cert 000565's +1460 SH residual)
# is traceable to one BP.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
construction = epc.sap_building_parts[1].party_wall_construction
assert isinstance(construction, int)
# Act
u = u_party_wall(party_wall_construction=construction)
# Assert
assert abs(u - 0.2) <= 1e-4
def test_summary_000565_section_12_2_pulse_pressure_test_ap4_extracted() -> None:
# Arrange — cert 000565 §12.2 Air Pressure Test lodges:
# Test Method: Pulse
# Pressure Test Result (AP4): 2.00
# SAP 10.2 §2 line (17a) "Air permeability value, AP4, (m³/h/m²)" is
# the measured air permeability at 4 Pa from the low-pressure pulse
# technique. The cascade's `ventilation_from_inputs(air_permeability
# _ap4=...)` consumes it via line (18) = 0.263 × AP4^0.924 + (8).
# Pre-slice the extractor read only the Test Method string and
# silently dropped the AP4 value, so the cascade fell back to the
# components-based (16) infiltration rate (+0.375 ach over worksheet).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
# Act
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Assert
assert site_notes.ventilation.pressure_test_method == "Pulse"
ap4 = site_notes.ventilation.air_permeability_ap4_m3_h_m2
assert ap4 is not None
assert abs(ap4 - 2.0) <= 1e-4
def test_summary_000565_air_permeability_ap4_routes_to_sap_ventilation_field() -> None:
# Arrange — mapper plumbing for SAP 10.2 §2 (17a). The Elmhurst
# `VentilationAndCooling.air_permeability_ap4_m3_h_m2` field carries
# through to `SapVentilation.air_permeability_ap4_m3_h_m2` so the
# `cert_to_inputs` ventilation cascade can read it and pass into
# `ventilation_from_inputs(air_permeability_ap4=...)`.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_ventilation is not None
ap4 = epc.sap_ventilation.air_permeability_ap4_m3_h_m2
assert ap4 is not None
assert abs(ap4 - 2.0) <= 1e-4
def test_summary_000565_section_12_1_extracts_mechanical_extract_decentralised_mev_dc_kind() -> None:
# Arrange — cert 000565 §12.1 Mechanical Ventilation lodges:
# Mechanical Ventilation: Yes
# Mechanical Ventilation Type: Mechanical extract, decentralised
# (MEV dc)
# SAP 10.2 §2 line (23a) for MEV: "system throughput = 0.5 ach"; the
# effective ach formula (25) routes through (24c) "whole-house
# extract ventilation or PIV from outside" — `(22b)m + 0.5 × (23b)`
# when (22b) ≥ 0.5×(23b). Pre-slice the extractor read only the
# "Mechanical Ventilation" yes/no bool and dropped the Type string,
# so the cascade defaulted to mv_kind=NATURAL → (24d) formula.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
# Act
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Assert
assert site_notes.ventilation.mechanical_ventilation is True
assert (
site_notes.ventilation.mechanical_ventilation_type
== "Mechanical extract, decentralised (MEV dc)"
)
def test_summary_000565_detailed_rr_residual_area_closes_total_external_area_per_rdsap_10_section_3_10_1() -> None:
# Arrange — RdSAP 10 §3.10.1 (PDF p.24) "Default U-values of the
# roof rooms":
# "The residual area (area of roof less the floor area of room(s)-
# in-roof) has a U-value from Table 16 : Roof U-values when loft
# insulation thickness is known according to its insulation
# thickness if at least half the area concerned is accessible,
# otherwise it is the default for the age band of the original
# property or extension."
# Worksheet pattern (cert 000565 BP[0]): "Roof room Main remaining
# area" 43.97 m² × U=0.35 (Table 18 col 4 age H default).
# Pre-slice S0380.95 the cascade computed residual area ONLY for
# Simplified RR mode (via `rr_a_rr rr_common rr_gable` in
# `_part_geometry`); the Detailed-RR branch in `heat_transmission`
# iterated `rir.detailed_surfaces` and missed the residual entirely.
# Cert 000565 routes all 5 BPs through Detailed mode (mapper
# translates Simplified-Summary lodgements to `SapRoomInRoofSurface`
# records), so cascade total_external_element_area_m2 was 779.27 m²
# vs worksheet (31) = 857.64 m² (Δ 78.37 m² → thermal_bridging
# under by ~11.76 W/K).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
from domain.sap10_calculator.worksheet.heat_transmission import (
heat_transmission_from_cert,
)
ht = heat_transmission_from_cert(epc, door_count=epc.door_count or 0)
# Assert — cascade closes to within ±10 m² of worksheet (31). The
# residual sums roughly to BP[0]'s 43.97 m² + BP[1]'s ~22 m² +
# BP[3]'s ~17 m² + BP[4]'s small contribution; remaining residual
# (BP[1] ~+3.7 m² over) traces to the spec's ambiguous Detailed-
# mode residual formula for extensions with multi-storey heights.
assert ht.total_external_element_area_m2 >= 845.0, (
f"cascade total_external_element_area_m2={ht.total_external_element_area_m2:.4f}; "
f"expected ≥845 m² after §3.10.1 Detailed-RR residual area closure "
f"(pre-slice was 779.27 m² vs worksheet 857.64)"
)
def test_summary_000565_a_rr_shell_rounded_2_dp_closes_roof_w_per_k_per_rdsap_10_section_15() -> None:
# Arrange — RdSAP 10 §15 "Rounding of data" (PDF p.66):
# "For consistency of application, after expanding the RdSAP data
# into SAP data using the rules in this Appendix, the data are
# rounded before being passed to the SAP calculator. The rounding
# rules are: ... All element areas (gross) including window areas
# and conservatory wall area: 2 d.p."
# The §3.9.1 / §3.10.1 simplified-formula A_RR_shell = 12.5 × √(A_RR_
# floor / 1.5) produces a gross element area for the room-in-roof
# shell. Pre-slice the cascade kept the raw float (e.g. cert 000565
# BP[0]: 12.5 × √(45/1.5) = 68.46532...), then subtracted lodged
# wall surfaces to obtain the residual roof area. The worksheet
# rounds A_RR_shell to 2 d.p. (68.47) BEFORE the subtraction —
# which moves Main's residual from 43.97 0.0047 = 43.9653 (cascade)
# to exactly 43.97 (worksheet) per RdSAP 10 §15.
#
# Cert 000565 has three BPs that hit this path (Main, Ext1, Ext3 —
# all have detailed wall surfaces with no `slope` / `flat_ceiling`
# / `stud_wall` lodgement, so the §3.10.1 residual fires). Each
# contributes a sub-rounding residual ≤ 0.005 m² × U_RR_default that
# the unrounded cascade was missing:
#
# BP[0] Main: A_RR=68.4653 raw → 68.47 rounded; residual
# 43.9653 → 43.97 (+0.0047 m² × U=0.35 = +0.0016 W/K)
# BP[1] Ext1: A_RR=59.5119 raw → 59.51 rounded; residual
# 18.2519 → 18.25 (0.0019 m² × U=0.35 = 0.00068 W/K)
# BP[3] Ext3: A_RR=57.7350 raw → 57.74 rounded; residual
# 17.3450 → 17.35 (+0.005 m² × U=0.35 = +0.0017 W/K)
#
# Worksheet (30) per-line breakdown (U985-0001-000565.pdf):
# Main remaining area 43.97 × 0.35 = 15.3895
# Ext1 remaining area 18.25 × 0.35 = 6.3875
# Ext2 stud + slope + external roof = 14.9800
# Ext3 remaining area 17.35 × 0.35 = 6.0725
# Ext4 flat ceilings + slope = 8.5500
# Σ (30) = 51.3795
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
from domain.sap10_calculator.worksheet.heat_transmission import (
heat_transmission_from_cert,
)
ht = heat_transmission_from_cert(epc, door_count=epc.door_count or 0)
# Assert — cascade roof_w_per_k pins to worksheet (30) Σ at abs=1e-4.
expected_roof_w_per_k = 51.3795
diff = abs(ht.roof_w_per_k - expected_roof_w_per_k)
assert diff <= 1e-4, (
f"cascade roof_w_per_k={ht.roof_w_per_k:.6f} vs worksheet (30) Σ="
f"{expected_roof_w_per_k}; diff={diff:.6f}. Per RdSAP 10 §15 (p.66) "
f"the A_RR_shell formula 12.5 × √(A_RR_floor / 1.5) must round to "
f"2 d.p. before the §3.10.1 residual subtraction."
)
def test_summary_000565_ext2_stud_wall_2_extracts_400_plus_mm_pur_or_pir_lodgement() -> None:
# Arrange — cert 000565 Summary §8.1 BP[2] Ext2 (Detailed) lodges
# "Stud Wall 2: 2.00 × 2.00, 400+ mm, PUR or PIR" with Default
# U-value 0.10. Pre-slice the extractor regex `^\d+\s*mm$` failed
# to match "400+ mm" (the trailing "+" tripped the digit-only
# anchor) so the insulation token was silently dropped; and the
# type allow-list `("Mineral or EPS", "PUR", "PIR")` failed to
# match "PUR or PIR" (the conjunction is the actual Summary text).
# Cascade fell through to Table 17 row 0 (uninsulated) → U=2.30
# against worksheet 0.10, over-counting Stud Wall 2 by ~8.80 W/K.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
# Act
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Assert
ext2_rir = site_notes.extensions[1].room_in_roof
assert ext2_rir is not None
stud_wall_2 = next(s for s in ext2_rir.surfaces if s.name == "Stud Wall 2")
assert stud_wall_2.insulation == "400+ mm"
assert stud_wall_2.insulation_type == "PUR or PIR"
def test_summary_000565_ext2_stud_wall_2_routes_to_400mm_rigid_foam_via_mapper() -> None:
# Arrange — mapper plumbing: "400+ mm" parses to thickness 400 mm
# (the trailing "+" is a bucket-cap convention; spec Table 17 max
# tabulated row is 400 mm). "PUR or PIR" maps to the canonical
# SAP10 insulation-type code "rigid_foam" so the cascade's
# `_is_rigid_foam` resolves correctly.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
ext2_rir = epc.sap_building_parts[2].sap_room_in_roof
assert ext2_rir is not None
detailed = ext2_rir.detailed_surfaces or []
stud_walls = [s for s in detailed if s.kind == "stud_wall"]
assert len(stud_walls) == 2
sw_2 = next(s for s in stud_walls if s.area_m2 == 4.0)
assert sw_2.insulation_thickness_mm == 400
assert sw_2.insulation_type == "rigid_foam"
def test_summary_000565_ext4_flat_ceiling_1_extracts_unknown_thickness_pur_or_pir_lodgement() -> None:
# Arrange — cert 000565 Summary §8.1 BP[4] Ext4 lodges:
# "Flat Ceiling 1 5.00 1.00 Unknown PUR or PIR 0.15 No"
# Worksheet line (30): `Roof room Ext4 Flat Ceiling 1: 5 × 0.15
# = 0.75 W/K` (U985-0001-000565 line 333). Pre-slice the extractor
# allow-list `_RIR_INSULATION_THICKNESS_RE | ("As Built", "None")`
# did NOT include the "Unknown" thickness token, so the cell was
# dropped (`insulation = ""`). Mapper translated `""` to
# `insulation_thickness_mm=0`, cascade hit Table 17 row 0 → U=2.30
# vs worksheet 0.15 (over by +10.75 W/K on a 5 m² ceiling).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
# Act
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Assert
ext4_rir = site_notes.extensions[3].room_in_roof
assert ext4_rir is not None
flat_ceiling_1 = next(s for s in ext4_rir.surfaces if s.name == "Flat Ceiling 1")
assert flat_ceiling_1.insulation == "Unknown"
assert flat_ceiling_1.insulation_type == "PUR or PIR"
def test_summary_000565_ext4_flat_ceiling_1_maps_unknown_to_none_thickness_per_rdsap_10_section_3_10_1() -> None:
# Arrange — RdSAP 10 §3.10.1 (PDF p.24) "Default U-values of the
# roof rooms":
# "Where the details of insulation are not available, the default
# U-values are those for the appropriate age band for the
# construction of the roof rooms (see Table 18 : Assumed roof
# U-values when Table 16 or Table 17 do not apply). The default
# U-values apply when the roof room insulation is 'as built' or
# 'unknown'."
# Translation: when Summary lodges "Unknown" thickness (regardless
# of named insulation material), the mapper must set
# `insulation_thickness_mm=None` (not 0). The cascade's existing
# `_u_rr_table_17` falls back to `u_rr_default_all_elements`
# (Table 18 col 4) → for cert 000565 BP[4] age band M, returns
# 0.15 W/m²K ✓ matching the worksheet.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
ext4_rir = epc.sap_building_parts[4].sap_room_in_roof
assert ext4_rir is not None
detailed = ext4_rir.detailed_surfaces or []
flat_ceilings = [s for s in detailed if s.kind == "flat_ceiling"]
fc_1 = next(s for s in flat_ceilings if s.area_m2 == 5.0)
assert fc_1.insulation_thickness_mm is None
assert fc_1.insulation_type == "rigid_foam"
def test_summary_000565_ext2_floor_extracts_200mm_retro_fitted_insulation_thickness() -> None:
# Arrange — cert 000565 Summary §9 2nd Extension lodges:
# Location: U Above unheated space
# Type: N Suspended, not timber
# Insulation: R Retro-fitted
# Insulation Thickness: 200 mm
# Default U-value: 0.22
# Pre-slice the extractor's `_floor_details_from_lines` parsed
# only location / floor_type / insulation / u_value_known /
# default_u_value — the "Insulation Thickness" cell was silently
# dropped. Mirror of the §8 roof extractor's existing
# `_local_val(lines, "Insulation Thickness")` path.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
# Act
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Assert
ext2_floor = site_notes.extensions[1].floor
assert ext2_floor.location == "U Above unheated space"
assert ext2_floor.floor_type == "N Suspended, not timber"
assert ext2_floor.insulation_thickness_mm == 200
def test_summary_000565_ext2_floor_routes_to_u_value_0p22_via_table_20_per_rdsap_10_section_5_13() -> None:
# Arrange — RdSAP 10 §5.13 (PDF p.47) "U-values of exposed and
# semi-exposed upper floors" + Table 20:
#
# Age band Unknown/as built 50 mm 100 mm 150 mm
# A to G 1.20 0.50 0.30 0.22
# H or I 0.51 0.50 0.30 0.22
#
# Cert 000565 BP[2] Ext2 age band = H, floor location = "U Above
# unheated space" (→ `is_exposed_floor=True`), lodged Insulation
# Thickness = 200 mm. The 200 mm bucket maps to Table 20's 150 mm
# column (the largest tabulated thickness; cascade clamps at row[3]
# for thickness ≥ 125 mm) → U=0.22 ✓ vs worksheet (U985-0001-000565
# line ~ floor lookup) lodged Default U=0.22.
#
# Pre-slice the mapper translated `FloorDetails.insulation_thickness
# _mm=None` (extractor gap) → `SapBuildingPart.floor_insulation_
# thickness=None` → cascade `u_exposed_floor(age=H, ins=None)` →
# U=0.51 (Table 20 row[0]) over-counting BP[2] floor by (0.51-0.22)
# × 30 m² = +8.70 W/K.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
bp_2 = epc.sap_building_parts[2]
assert bp_2.floor_insulation_thickness == "200mm"
def test_summary_000565_mev_fans_cost_uses_table_12a_grid_2_fans_for_mech_vent_rate() -> None:
# Arrange — SAP 10.2 Table 12a Grid 2 (PDF p.191) "Other electricity
# uses" splits two cost categories on off-peak tariffs:
#
# Fans for mechanical ventilation systems 10-hour 0.58
# All other uses, and locally generated 10-hour 0.80
# electricity
#
# Cert 000565 lodges 127.5159 kWh of MEV decentralised fan energy
# (line 230a) which must be billed at the FANS_FOR_MECH_VENT blend
# (0.58 × 14.68 + 0.42 × 7.50 = 11.6644 p/kWh), NOT the
# ALL_OTHER_USES blend (13.244 p/kWh). The remaining 125 kWh of
# pumps_fans (45 flue fan + 80 solar HW pump) stay at 13.244.
#
# Worksheet line (249) verifies the split:
# Pumps, fans and electric keep-hot 172.5159 × effective 12.076 = £20.8338
# = 127.5159 × 0.11664 + 45 × 0.13244
# = 14.8753 + 5.9598 = £20.8351 ≈ £20.8338 ✓
# Pump for solar water heating 80.0000 × 13.244 / 100 = £10.5952
#
# Pre-slice the cascade applied 0.13244 to ALL 252.5159 kWh, over-
# counting MEV cost by 127.5159 × (0.13244 - 0.11664) = £2.01.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
# Act
inputs = cert_to_inputs(epc)
# Assert — the effective pumps_fans cost rate equals the kWh-
# weighted MEV-split blend (12.4467 p/kWh for cert 000565), NOT the
# ALL_OTHER_USES blend (13.244 p/kWh). The total fuel cost line ref
# (255) couples to multiple SH-cascade downstream effects, so we
# pin the rate directly — the specific thing S0380.103 closes.
expected_rate_gbp_per_kwh = 12.4467 / 100.0
actual = inputs.pumps_fans_fuel_cost_gbp_per_kwh
assert actual is not None
assert abs(actual - expected_rate_gbp_per_kwh) <= 1e-5, (
f"cascade pumps_fans_fuel_cost_gbp_per_kwh={actual:.6f}; "
f"ws-split target={expected_rate_gbp_per_kwh:.6f}; "
f"Δ={actual - expected_rate_gbp_per_kwh:+.6f} (expected MEV-"
f"split kWh-weighted blend per S0380.103)"
)
def test_summary_000565_mev_fans_co2_factor_uses_table_12a_grid_2_fans_for_mech_vent_split() -> None:
# Arrange — SAP 10.2 Table 12a Grid 2 (PDF p.191) + Table 12d
# (PDF p.194) — CO2-side mirror of the cost split landed in
# S0380.103. The Table 12a Grid 2 high-rate fractions on TEN_HOUR
# are:
#
# Fans for mechanical ventilation systems high_frac = 0.58
# All other uses, and locally generated high_frac = 0.80
# electricity
#
# Table 12d codes for TEN_HOUR are 34 (high) + 33 (low). Days-
# weighted Σ(F_m × N_m) / Σ N_m over the 12 months of code 30
# uniform-per-day proxy yields:
#
# F_FANS = 0.58 × F_code34 + 0.42 × F_code33 = 0.13872 kg/kWh
# F_OTHER = 0.80 × F_code34 + 0.20 × F_code33 = 0.14116 kg/kWh
#
# Cert 000565 splits pumps_fans into 127.5159 kWh MEV + 125 kWh
# non-MEV (45 flue fan + 80 solar HW pump). kWh-weighted blend:
#
# F_eff = (127.5159 × 0.13872 + 125 × 0.14116) / 252.5159
# = 0.13993 kg/kWh
#
# Worksheet line (267) verifies the split:
# Pumps, fans and electric keep-hot 252.5159 × 0.1412 = 35.3349
# (display rounds factor to 0.1412 but the product is the
# kWh-weighted MEV-split total of 35.3349)
#
# Pre-slice the cascade applied 0.14116 to ALL 252.5159 kWh →
# 35.6457 kg/yr → +0.31 over ws. With the MEV-aware split the
# cascade lands on 35.3349 kg/yr.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
# Act
inputs = cert_to_inputs(epc)
# Assert — pumps_fans CO2 factor equals the worksheet line (267)
# effective rate (within 1e-4 / kWh).
expected_factor = 35.3349 / 252.5159
actual = inputs.pumps_fans_co2_factor_kg_per_kwh
assert actual is not None
assert abs(actual - expected_factor) <= 1e-4, (
f"cascade pumps_fans_co2_factor={actual:.6f}; "
f"ws (267) effective={expected_factor:.6f}; Δ={actual - expected_factor:+.6f} "
f"(expected MEV-split kWh-weighted blend post-S0380.105)"
)
def test_summary_000565_mev_fans_pe_factor_uses_table_12a_grid_2_fans_for_mech_vent_split() -> None:
# Arrange — SAP 10.2 Table 12a Grid 2 (PDF p.191) + Table 12e
# (PDF p.195) — PE-side mirror of the cost split (S0380.103) and
# CO2 split (S0380.105). The Table 12a Grid 2 high-rate fractions
# on TEN_HOUR are:
#
# Fans for mechanical ventilation systems high_frac = 0.58
# All other uses, and locally generated high_frac = 0.80
# electricity
#
# Table 12e codes for TEN_HOUR are 34 (high) + 33 (low). Days-
# weighted Σ(F_m × N_m) / Σ N_m over the 12 months yields:
#
# F_FANS = 0.58 × F_code34 + 0.42 × F_code33 = 1.51268 kWh/kWh
# F_OTHER = 0.80 × F_code34 + 0.20 × F_code33 = 1.52391 kWh/kWh
#
# Cert 000565 splits pumps_fans into 127.5159 kWh MEV + 125 kWh
# non-MEV (45 flue fan + 80 solar HW pump). kWh-weighted blend:
#
# F_eff = (127.5159 × 1.51268 + 125 × 1.52391) / 252.5159
# = 1.51824 kWh/kWh
#
# Worksheet line (281):
# Pumps, fans and electric keep-hot 252.5159 × 1.5239 = 383.3796
# (display rounds factor to 1.5239 but the product is the
# kWh-weighted MEV-split total of 383.3796)
#
# Pre-slice the cascade applied 1.52391 to ALL 252.5159 kWh →
# 384.81 → +1.43 over ws.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
# Act
inputs = cert_to_inputs(epc)
# Assert — pumps_fans PE factor equals the worksheet line (281)
# effective rate (within 1e-4 / kWh).
expected_factor = 383.3796 / 252.5159
actual = inputs.pumps_fans_primary_factor
assert actual is not None
assert abs(actual - expected_factor) <= 1e-4, (
f"cascade pumps_fans_primary_factor={actual:.6f}; "
f"ws (281) effective={expected_factor:.6f}; "
f"Δ={actual - expected_factor:+.6f} "
f"(expected MEV-split kWh-weighted blend post-S0380.106)"
)
def test_summary_000565_window_routing_uses_bp_roof_type_per_rdsap_10_section_3_7_1() -> None:
# Arrange — RdSAP 10 §3.7.1 (PDF p.21) "Window data": windows in
# the source RdSAP data set are classified as either "Window
# (vertical)" or "Roof window (inclined)" per the assessor's
# discrete lodgement. The Summary PDF §11.0 flattens this signal
# — every row's Location column reads "External wall" regardless
# of whether the window is vertical or in the roof — so the
# mapper must reconstruct the classification heuristically.
#
# The PRE-S0380.107 heuristic was "U > 3.0 → roof window", which
# works for the simpler 6-cert cohort (all BPs PA/PN pitched +
# the only U > 3 windows are skylights) but breaks for cert
# 000565 in three distinct ways:
#
# - Item 4 (Main, Single glazing, U=3.35) — a vertical window
# in an old single-glazed gable wall; pre-slice misrouted to
# roof. Single glazing on a rooflight has been disallowed
# under Part L since 2006 (current SAP convention assumes
# double glazing minimum for any rooflight).
#
# - Item 2 (2nd Extension, Triple, U=2.0) — a rooflight in
# Ext2's external roof (Summary §8 lodges Ext2 roof type
# "NR Non-residential space above" → worksheet (30)
# External roof Ext2: 25 m² gross × 0.30 with 1.2 m²
# openings, matching the worksheet's Roof Windows 1).
#
# - Item 5 (4th Extension, Double, U=2.0) — a rooflight in
# Ext4's external roof (Summary §8 lodges Ext4 roof type
# "A Another dwelling above" → worksheet (30) External
# roof Ext4: 3 m² gross × 0 U with 0.5 m² openings).
#
# New heuristic (in priority order):
# 1. "Single glazing" → never a roof window (Part L)
# 2. BP roof type starts with "NR" or "A" → roof window
# (BP has its own external roof structure with rooflights)
# 3. U_value > 3.0 → roof window (cohort backstop, matches
# cert 000516 W6 Wood-frame Double pre-2002 U=3.10 on
# Main PA, the only U > 3 vertical-glazing reading in the
# cohort that the worksheet routes via (27a))
# 4. Else → vertical window
#
# Worksheet ground truth for cert 000565:
# sap_windows (27): items 1 (Main 1.2 + item 6 Main 0.6 →
# Windows 1 / 1.8 m²); item 4 (Main 1.7 → Windows 3); item
# 3 (Ext1 1.92 → Windows 2). Total 5.42 m².
# sap_roof_windows (27a): item 2 (Ext2 1.2 → Roof Windows 1);
# item 5 (Ext4 0.5 → Roof Windows 2). Total 1.7 m².
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — sap_windows holds the 4 vertical windows (items 1, 3,
# 4, 6) and sap_roof_windows holds the 2 rooflights (items 2, 5).
sap_window_areas = sorted(
round(float(w.window_width) * float(w.window_height), 2)
for w in epc.sap_windows or []
)
assert sap_window_areas == [0.6, 1.2, 1.7, 1.92], (
f"sap_windows areas: {sap_window_areas} (expected [0.6, 1.2, 1.7, 1.92] "
f"— items 1, 6 on Main + item 4 Single Main + item 3 Ext1)"
)
assert epc.sap_roof_windows is not None
rw_areas = sorted(round(float(rw.area_m2), 2) for rw in epc.sap_roof_windows)
assert rw_areas == [0.5, 1.2], (
f"sap_roof_windows areas: {rw_areas} (expected [0.5, 1.2] "
f"— items 2 Ext2 NR + 5 Ext4 A rooflights)"
)
def test_summary_000565_ext1_rir_connected_gable_deducts_from_a_rr_per_rdsap_10_section_3_9_2() -> None:
# Arrange — RdSAP 10 §3.9.2 (PDF p.23) step (d) verbatim:
#
# "The areas of gable walls are deducted from the calculated total
# RR area, and the remaining area of RR, ARR_final is then
# calculated. This area is treated as roof structure.
# ARR_final = ARR_wall (ΣARR_common_wall + ΣARR_gable +
# ΣARR_party + ΣARR_sheltered +
# ΣARR_connected)"
#
# RdSAP 10 Table 4 row 4 (PDF p.22): "ARR_connected — Adjacent to
# heated space — U-value = 0". The U=0 means no heat-loss
# contribution, but the area STILL deducts from the residual A_RR
# (spec step (d) explicitly sums ARR_connected in the deduction).
#
# Cert 000565 Ext1 §8.1 lodges (Simplified Type 2 RR):
#
# Gable Wall 1 L=4.00 H=6.00 Connected U=0
# Gable Wall 2 L=8.00 H=9.00 Exposed U=1.70
# Common Wall 1 L=9.00 H=1.00 U=1.70
# Common Wall 2 L=5.00 H=1.80 U=1.70
#
# Gable area via §3.9.2 quadratic (subtract triangular slice above
# each common wall):
#
# A_gable_1 = 4 × (0.25 + 6) (6 1)²/2 (6 1.8)²/2
# = 25.0 12.5 8.82
# = 3.68 m²
#
# Pre-S0380.108 the mapper dropped Connected gables entirely
# (`_map_elmhurst_rir_surface` returned None). The cascade's
# residual A_RR was therefore over by +3.68 m²:
#
# A_RR shell = 12.5 × √(34 / 1.5) = 59.51 m²
# Σ wall areas (current) = 11.25 + 10.25 + 16.08 = 37.58 m²
# Residual (cascade) = 59.51 37.58 = 21.93 m² (over)
# Residual (worksheet) = 59.51 37.58 3.68 = 18.25 m²
#
# Worksheet (30) row "Roof room Ext1 remaining area: 18.25" at U=0.35
# → 6.3875 W/K. Cascade pre-slice 21.93 × 0.35 → 7.6755 W/K
# (over by +1.29 W/K on roof — the largest single localised
# residual on cert 000565 per HANDOVER_POST_S0380_103.md).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — Ext1 RIR detailed_surfaces holds the Connected gable
# with the quadratic-corrected area, so the cascade deducts it
# from A_RR per step (d).
ext1_rir = epc.sap_building_parts[1].sap_room_in_roof
assert ext1_rir is not None
assert ext1_rir.detailed_surfaces is not None
connected_gables = [
s for s in ext1_rir.detailed_surfaces
if s.kind == "connected_wall"
]
assert len(connected_gables) == 1, (
f"expected 1 Connected gable; got {len(connected_gables)} "
f"(detailed_surfaces kinds: "
f"{[s.kind for s in ext1_rir.detailed_surfaces]})"
)
# 4 × (0.25 + 6) (6 1)²/2 (6 1.8)²/2 = 3.68
assert abs(connected_gables[0].area_m2 - 3.68) <= 1e-4
# U-value = 0 per Table 4 row 4 (no heat-loss contribution)
assert connected_gables[0].u_value == 0.0
def test_summary_000565_main_solid_brick_external_insulation_uses_rdsap_10_section_5_7_plus_5_8_formula() -> None:
# Arrange — RdSAP 10 §5.7 (PDF p.41) Table 13 + §5.8 (PDF p.42)
# Table 14 + step 2 derivation.
#
# §5.7 Table 13: "Default U-values of brick walls"
# Wall thickness, mm U-value, W/m²K
# Up to 200 mm 2.5
# 200 to 280 mm 1.7
# 280 to 420 mm 1.4 ← cert 000565 Main, W=300 mm
# More than 420 mm 1.1
#
# §5.8 step 2: "The U-value of the insulated wall is
# U = 1 / (1/U₀ + R_insulation)"
#
# §5.8 Table 14 (λ = 0.04 W/m·K column) + interpolation rule
# "R = 0.025 × T + 0.25" for T = 75 mm gives R = 2.125 m²K/W
# (direct Table-14 row 75 mm column λ=0.04 reads "2.125").
#
# Cert 000565 Main §7.0 lodges:
# Type SO Solid Brick (wall_construction = 3)
# Insulation E External (wall_insulation_type = 1)
# Insulation Thickness 75 mm
# Wall Thickness 300 mm (measured)
# Conductivity Known No → λ defaults to 0.04 per §5.8 column
# Age band A
#
# Formula chain:
# U₀ = 1.4 (§5.7 Table 13 row "280 to 420 mm")
# R = 0.025 × 75 + 0.25 = 2.125 m²K/W
# U = 1 / (1/1.4 + 2.125) = 1 / 2.8393 = 0.3522
# U (2 d.p.) = 0.35 W/m²K
#
# Worksheet (29a) row "External walls Main: 51.72 × 0.35 = 18.10"
# → 18.10 W/K. Pre-slice the cascade ignored §5.7 (Table-13 lookup
# on wall thickness) and §5.8 (Table-14 interpolation by lodged
# insulation thickness) entirely. The bucket cascade routed the
# 75 mm lodgement to the 100 mm Table-6 column (0.32 for age A)
# — a -1.54 W/K under-count on Main's external wall area (= the
# full BP[0] walls residual driving the remaining net HTC gap on
# cert 000565 post-S0380.108).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import heat_transmission_section_from_cert
# Act
ht = heat_transmission_section_from_cert(epc)
# Assert — `walls_w_per_k` matches the worksheet's (29a)+(32) sum
# (Main wall contribution per the §5.7+§5.8 formula chain dominates
# the residual; closing it brings cascade walls to within 1e-4 of
# ws 604.07 = 18.10 + 3.43 + 4.41 + 53.82 (Main) + 219.997 (Ext1)
# + 229.95 (Ext2) + 39.852 (Ext3) + 34.51 (Ext4)).
assert abs(ht.walls_w_per_k - 604.0710) <= 1e-4, (
f"cascade walls_w_per_k={ht.walls_w_per_k:.4f}; "
f"ws 604.0710; Δ={ht.walls_w_per_k - 604.0710:+.4f} "
f"(expected within 1e-4 after §5.7+§5.8 formula chain replaces "
f"the Table-6 bucket lookup for solid-brick + lodged-thickness "
f"+ insulated walls)"
)
def test_summary_000565_main_1_ashp_sap_code_224_routes_to_main_heating_category_4_per_sap_table_4a() -> None:
# Arrange — SAP 10.2 Table 4a (PDF p.165) "Main heating systems":
# the category column lists "Heat pumps" as category 4. Codes in
# rows 211-217 (ground/water source HP) and 221-227 (air source HP)
# and 521-527 (warm-air HP) all map to category 4.
#
# Cert 000565 Main 1 lodges `Main Heating SAP Code = 224` (Air
# source heat pump, 2013 or later — SAP 10.2 Appendix N efficiency
# row 224, COP 1.70). Without a PCDB Table 362 record (cert lodges
# `PCDF boiler Reference = 0`) the existing mapper's `_elmhurst_
# main_heating_category` returns None, which falls through to the
# cascade's `_DEFAULT_PUMPS_FANS_KWH_PER_YR = 130` (incorrect — HP
# circulation pump's electricity is inside the system COP per
# SAP 10.2 Table 4f, so the category 4 row is 0 kWh/year).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating is not None
main_1 = epc.sap_heating.main_heating_details[0]
assert main_1.sap_main_heating_code == 224
assert main_1.main_heating_category == 4
def test_summary_000565_ext1_floor_above_partially_heated_routes_to_u_value_0p7_per_rdsap_10_section_5_14() -> None:
# Arrange — RdSAP 10 §5.14 (PDF p.47) "U-value of floor above a
# partially heated space":
# "The U-value of a floor above partially heated premises is taken
# as 0.7 W/m²K. This applies typically for a flat above non-
# domestic premises that are not heated to the same extent or
# duration as the flat."
# Cert 000565 Summary §9 1st Extension lodges "Location: P Above
# partially heated space" + "Default U-value: 0.70". Pre-slice the
# cascade routed BP[1] floor through the BS EN ISO 13370 ground-
# floor formula → cascade U=0.76 (vs spec 0.70, over by +2.04 W/K
# × 34 m²). The mapper now flags `is_above_partially_heated_space=
# True` on the ground SapFloorDimension so `heat_transmission`
# dispatches to the §5.14 constant.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
ext1_ground = epc.sap_building_parts[1].sap_floor_dimensions[0]
assert ext1_ground.floor == 0
assert ext1_ground.is_above_partially_heated_space is True
def test_summary_000565_mev_decentralised_routes_to_extract_or_piv_outside_mv_kind() -> None:
# Arrange — mapper plumbing for SAP 10.2 §2 (23a)/(24c) MEV: the
# Elmhurst "Mechanical extract, decentralised (MEV dc)" string maps
# to `MechanicalVentilationKind.EXTRACT_OR_PIV_OUTSIDE` so the
# cascade picks the (24c) effective-ach formula.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_ventilation is not None
assert epc.sap_ventilation.mechanical_ventilation_kind == "EXTRACT_OR_PIV_OUTSIDE"
def test_summary_000565_rooflight_per_window_g_l_routes_via_glazing_type_per_sap_10_2_appendix_l_l2a() -> None:
# Arrange — SAP 10.2 Appendix L §L2a (PDF p.88) verbatim:
#
# 0.9 × Σ Aw × gL × FF × ZL
# GL = --------------------------- (L2a)
# TFA
#
# "where
# FF is the frame factor (fraction of window that is glazed) for
# the actual window or from Table 6c
# Aw is the area of a window, m²
# TFA is the total floor area of the dwelling, m²
# gL is the light transmittance factor from Table 6b
# ZL is the light access factor from Table 6d"
#
# Table 6b gL by glazing type (PDF p.178):
# Single glazed 0.90
# Double glazed (any variant) 0.80
# Triple glazed (any variant) 0.70
#
# Table 6d note 2 (PDF p.178): "A solar access factor of 1.0 and a
# light access factor of 1.0 should be used for roof windows/
# rooflights." → ZL = 1.0 for every rooflight regardless of cert
# overshading.
#
# The numerator sum is PER WINDOW — each rooflight contributes its
# own gL and FF, not a single dwelling-wide default. Pre-slice the
# cascade collapsed every rooflight into a single
# `rooflight_total_area_m2 × _G_LIGHT_DEFAULT (0.80) × _FRAME_FACTOR_
# DEFAULT (0.70)` product, which over-counted any rooflight whose
# actual gL or FF was below the default.
#
# Cert 000565 §11 lodges 2 rooflights (per S0380.107 routing):
# Item 2 (Ext2 NR rooflight): 1.2 m², "Triple between 2002 and
# 2021", PVC frame FF=0.70 → gL=0.70 (Table 6b Triple)
# Item 5 (Ext4 A rooflight): 0.5 m², "Double between 2002 and
# 2021", Wood frame FF=0.70 → gL=0.80 (Table 6b Double)
#
# Per-rooflight L2a numerator contributions (Z_L=1.0):
# Item 2: 1.2 × 0.70 × 0.70 × 1.0 = 0.5880
# Item 5: 0.5 × 0.80 × 0.70 × 1.0 = 0.2800
# Sum : 0.8680
#
# Pre-slice cascade (defaults across both):
# Sum : 1.7 × 0.80 × 0.70 × 1.0 = 0.9520 (over by +0.0840)
#
# The +0.0840 numerator delta lowers GL → lowers C_daylight (via the
# L2b convex quadratic 52.2 GL² 9.94 GL + 1.433) → lowers
# E_L,fixed (L9a) → lowers worksheet (232). The cascade was 2.17
# kWh/yr under the worksheet's (232) = 1384.8353 kWh/yr until this
# spec-correct fix.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
inputs = cert_to_inputs(epc)
# Assert — sap_roof_windows lodge the lodged glazing types so the
# cascade's L2a per-rooflight gL dispatch can fire. SAP 10.2 codes:
# 9 = "Triple between 2002 and 2021"; 3 = "Double between 2002 and
# 2021" (and "Double with unknown install date" variants).
assert epc.sap_roof_windows is not None
rooflights_by_area = {
round(float(rw.area_m2), 2): rw for rw in epc.sap_roof_windows
}
assert rooflights_by_area[1.2].glazing_type == 9, (
f"Ext2 rooflight glazing_type={rooflights_by_area[1.2].glazing_type} "
f"(expected 9 'Triple between 2002 and 2021' for gL=0.70 dispatch)"
)
assert rooflights_by_area[0.5].glazing_type == 3, (
f"Ext4 rooflight glazing_type={rooflights_by_area[0.5].glazing_type} "
f"(expected 3 'Double between 2002 and 2021' for gL=0.80 dispatch)"
)
# Assert — worksheet (232) closes to PDF lodgement at abs=1e-4 after
# the per-rooflight gL dispatch corrects the daylight factor.
assert abs(inputs.lighting_kwh_per_yr - 1384.8353) <= 1e-4, (
f"cascade lighting_kwh_per_yr={inputs.lighting_kwh_per_yr:.4f}; "
f"ws (232)=1384.8353; Δ={inputs.lighting_kwh_per_yr - 1384.8353:+.4f} "
f"(expected within 1e-4 after L2a iterates sap_roof_windows for "
f"per-rooflight gL × FF instead of applying defaults to total area)"
)
def test_summary_000565_roof_window_u_value_applies_table_6e_note_2_inclination_adjustment_per_sap_10_2_section_3_2() -> None:
# Arrange — SAP 10.2 §3.2 "Roof windows" (PDF p.10) verbatim:
#
# "In the case of roof windows, unless the measurement or
# calculation has been done for the actual inclination of the
# roof window, adjustments as given in Notes 1 and 2 to Table 6e
# or from BR443 (2019) should be applied."
#
# SAP 10.2 Table 6e Note 2 (PDF p.180) — "For roof windows the
# following adjustments should be applied to convert a known
# vertical U-value into the U-value for the known inclined
# position":
#
# Inclination Twin skin or DG Triple skin or TG
# 70° or more (vertical) +0.0 +0.0
# < 70° and > 60° +0.2 +0.1
# 60° and > 40° +0.3 +0.2
# 40° and > 30° +0.4 +0.2
# 30° or less (horizontal) +0.5 +0.3
#
# SAP 10.2 §3.2 formula (2) — curtain transform applied after the
# inclination adjustment:
#
# U_w,effective = 1 / (1/U_w + 0.04) (2)
#
# Cert 000565 §11 lodges 2 roof windows (per S0380.107 routing) at
# pitch=45° (Openings table: "Roof Windows 1(Ext2), Roof Window,
# External roof Ext2, North West, 45, ..."):
#
# Item 2 (Ext2 NR): 1.2 m², "Triple between 2002 and 2021",
# PVC FF=0.70, Manufacturer U=2.0, g=0.72
# Item 5 (Ext4 A): 0.5 m², "Double between 2002 and 2021",
# Wood FF=0.70, Manufacturer U=2.0, g=0.72
#
# Both lodge as Manufacturer-supplied U=2.0 (vertical-tested per
# Table 6e header), so Note 2 inclination adjustment applies. The
# worksheet (27a) shows U_eff = 2.1062 for BOTH items — back-solving
# via formula (2): 1/2.1062 = 0.4748; 0.4748 - 0.04 = 0.4348;
# U_inclined = 1/0.4348 = 2.3000 = U_raw + 0.30. Elmhurst applies
# the DG-column +0.30 adjustment uniformly across roof windows at
# 40-60° inclination (the Triple-glazed-column +0.20 alternative
# would yield 2.0222, contradicting the worksheet's 2.1062 for the
# Triple item). The +0.30 = Note 2 "60° and > 40°" DG row.
#
# Worksheet (27a) totals: 1.2 × 2.1062 + 0.5 × 2.1062 = 3.5806 W/K.
# Pre-slice cascade: u_eff = 1/(1/2.0 + 0.04) = 1.852 for both →
# 1.7 × 1.852 = 3.1484 W/K. Net residual -0.43 W/K.
#
# Cohort safety: cert 000516 W6 ("Double pre 2002", Manufacturer
# U=3.10) is routed via the mapper's RdSAP10 Table 24 lookup which
# already returns 3.40 (the pre-adjusted inclined-position value
# per RdSAP10 Table 24 "Roof window" column). The new inclination
# adjustment fires ONLY in the fall-through branch (i.e. when the
# lodged glazing label is not in `_ELMHURST_ROOF_WINDOW_U_BY_
# GLAZING`), so 000516's 3.40 stays unchanged.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import heat_transmission_section_from_cert
# Act
ht = heat_transmission_section_from_cert(epc)
# Assert — sap_roof_windows[*].u_value_raw carries the inclined-
# position U (mapper applies +0.30) so the cascade's formula (2)
# curtain transform lands on the worksheet's U_eff=2.1062.
assert epc.sap_roof_windows is not None
inclined_us = sorted(round(float(rw.u_value_raw), 4) for rw in epc.sap_roof_windows)
assert inclined_us == [2.3000, 2.3000], (
f"sap_roof_windows u_value_raw: {inclined_us} (expected [2.3, 2.3] "
f"after Table 6e Note 2 DG-column +0.30 W/m²K adjustment fires on "
f"both rooflights for pitch 40-60°)"
)
# Assert — roof_windows_w_per_k closes to the worksheet's Σ A×U_eff
# at abs=1e-4. ws (27a) = 1.2×2.1062 + 0.5×2.1062 = 3.5805 W/K.
assert abs(ht.roof_windows_w_per_k - 3.5805) <= 1e-4, (
f"cascade roof_windows_w_per_k={ht.roof_windows_w_per_k:.4f}; "
f"ws (27a)=3.5805; Δ={ht.roof_windows_w_per_k - 3.5805:+.4f} "
f"(expected within 1e-4 after Table 6e Note 2 inclination "
f"adjustment + formula (2) curtain transform)"
)
def test_summary_000565_rooflights_deduct_from_their_own_bp_gross_roof_per_rdsap_10_section_3_7() -> None:
# Arrange — RdSAP 10 §3.7 "Door and window areas" (PDF p.19)
# verbatim:
#
# "for each building part, software will deduct window/door areas
# contained in the relevant wall areas"
#
# The same convention applies to roof windows / rooflights piercing
# a BP's roof: the rooflight's area deducts from the BP's gross roof
# area on worksheet (30). Pre-S0380.112 the cascade lumped every
# rooflight's area onto BP[0] Main's `rw_area_part`, leaving the
# actual host BP's gross roof un-deducted — a +1.20 m² double-count
# for cert 000565 (RW1 area 1.20 lives on Ext2 but Ext2's gross
# roof 25.00 stayed un-deducted, and the same 1.20 also appeared in
# Main's `rw_area_part`).
#
# Cert 000565 §11 Openings table lodges:
# Roof Windows 1(Ext2) Roof Window, External roof Ext2, ...
# Roof Windows 2(Ext4) Roof Window, External roof Ext4, ...
#
# Per-BP roof-window allocation (worksheet ground truth):
# Ext2 (BP[2]): gross 25.00 1.20 RW1 = 23.80 net, U=0.30
# → ws (30): 23.80 × 0.30 = 7.1400 W/K
# Ext4 (BP[4]): gross 3.00 0.50 RW2 = 2.50 net, U=0.00
# → ws (30): 2.50 × 0.00 = 0.0000 W/K
#
# Pre-slice cascade:
# Ext2 cascade: 25.00 (un-deducted) × 0.30 = 7.5000 → +0.36 W/K over ws
# Ext4 cascade: 0 (party roof, rooflight allocated to Main) → no contribution
# Plus +1.70 m² of rooflight area lumped onto Main's external area
#
# Post-slice expected:
# sap_roof_windows[*].window_location threads the lodged BP index
# so the cascade's per-BP loop deducts each rooflight's area from
# its host BP's gross roof + contributes the area to that BP's
# external area aggregate (matching the worksheet's per-BP rows).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import heat_transmission_section_from_cert
# Act
ht = heat_transmission_section_from_cert(epc)
# Assert — sap_roof_windows lodge their host-BP signal so the
# cascade's per-BP rooflight deduction routes correctly. Mirrors
# SapWindow.window_location Union[int, str] shape — the cascade
# resolves both forms via `_window_bp_index`. Here we assert the
# raw lodged string the Elmhurst mapper threads through (matches
# how `_map_elmhurst_window` populates `SapWindow.window_location`).
assert epc.sap_roof_windows is not None
rooflights_by_area = {
round(float(rw.area_m2), 2): rw for rw in epc.sap_roof_windows
}
assert rooflights_by_area[1.20].window_location == "2nd Extension", (
f"RW1 (Ext2 rooflight) window_location="
f"{rooflights_by_area[1.20].window_location!r} (expected '2nd Extension')"
)
assert rooflights_by_area[0.50].window_location == "4th Extension", (
f"RW2 (Ext4 rooflight) window_location="
f"{rooflights_by_area[0.50].window_location!r} (expected '4th Extension')"
)
# Assert — Ext2 +0.36 W/K roof over-count closes (cascade no longer
# leaves Ext2's gross roof un-deducted). Combined with S0380.113
# (H=0 gable retention) the cascade closes to ws within 1e-2.
assert abs(ht.roof_w_per_k - 51.3795) <= 1e-2, (
f"cascade roof_w_per_k={ht.roof_w_per_k:.4f}; "
f"ws 51.3795; Δ={ht.roof_w_per_k - 51.3795:+.4f}"
)
# Assert — Ext2 +1.20 m² rooflight double-count closes.
assert abs(ht.total_external_element_area_m2 - 857.64) <= 1e-2, (
f"cascade total_external_element_area_m2={ht.total_external_element_area_m2:.4f}; "
f"ws 857.64; Δ={ht.total_external_element_area_m2 - 857.64:+.4f}"
)
def test_summary_000565_ext3_absent_gable_h_zero_lodgement_deducts_per_rdsap_10_section_3_9_2_step_b() -> None:
# Arrange — RdSAP 10 §3.9.2 step (b) (PDF p.23) verbatim:
#
# ┌ ┌ (H_gable H_common_1)² (H_gable H_common_2)² ┐ ┐
# A_RR_gable=│ L_gable × (0.25 + H_gable) │ ─────────────────────── + ─────────────────────── │ │
# └ └ 2 2 ┘ ┘
#
# Step (d): A_RR_final = A_RR_shell Σ(common + gable + party +
# sheltered + connected).
#
# Cert 000565 §8.1 lodges Ext3's Room in Roof as Simplified Type 2:
#
# Gable Wall 1 L=9.00 H=7.00 Exposed U=0.45
# Gable Wall 2 L=4.00 H=0.00 U=0.00 ← lodged but H=0
# Common Wall 1 L=5.00 H=1.50 U=0.45
# Common Wall 2 L=7.50 H=0.30 U=0.45
#
# Elmhurst's worksheet (30) shows Ext3 remaining area = 17.35 m².
# Back-solving via the spec equation with the H=0 Gable Wall 2:
#
# A_gable_2 = 4 × (0.25 + 0) (0 1.5)²/2 (0 0.30)²/2
# = 1.0 1.125 0.045 = 0.17 m² (negative)
#
# A_RR_shell = 12.5 × √(32.0 / 1.5) = 57.7350
# Σ walls (incl. -0.17 absent gable) = 40.3850
# residual = shell walls = 17.3500 ✓
#
# Pre-slice the mapper filtered out lodged surfaces with
# `height_m <= 0` (mapper.py:3350) and clamped the gable area at 0
# via `max(0.0, ...)` (mapper.py:3443). Both clamps prevented the
# spec-computed 0.17 m² adjustment from reaching the cascade —
# cascade residual landed at 17.18 m² (= 57.735 40.555), -0.17
# m² under the worksheet.
#
# Spec-correct path: lodged Type 2 gable walls with H=0 still
# contribute via §3.9.2 step (b). The result can go negative when
# the common walls are taller than the gable; that signed value
# adjusts the residual deduction (step d) without billing a
# physical wall area (the wall doesn't exist).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import heat_transmission_section_from_cert
# Act
ht = heat_transmission_section_from_cert(epc)
# Assert — total external area closes to worksheet (31) at 1e-2.
# Pre-slice it was 857.46 (cascade UNDER by 0.18 after S0380.112);
# this slice picks up the +0.17 m² Ext3 residual adjustment, taking
# the cascade to ~857.63, within ws 857.64.
assert abs(ht.total_external_element_area_m2 - 857.64) <= 1e-2, (
f"cascade total_external_element_area_m2={ht.total_external_element_area_m2:.4f}; "
f"ws (31)=857.64; Δ={ht.total_external_element_area_m2 - 857.64:+.4f} "
f"(expected within 1e-2 after lodged H=0 gable contributes 0.17 "
f"m² via the §3.9.2 step (b) spec equation)"
)
# Assert — roof_w_per_k closes the Ext3 0.06 W/K residual. Ws
# row "Roof room Ext3 remaining area" = 17.35 × 0.35 = 6.0725 W/K.
# Pre-slice cascade ran the residual on 17.175 × 0.35 = 6.011 W/K.
assert abs(ht.roof_w_per_k - 51.3795) <= 1e-2, (
f"cascade roof_w_per_k={ht.roof_w_per_k:.4f}; "
f"ws 51.3795; Δ={ht.roof_w_per_k - 51.3795:+.4f} "
f"(expected within 1e-2 after Ext3 residual area picks up the "
f"+0.17 m² adjustment)"
)
def test_summary_000565_hp_plus_boiler_pump_gain_3w_per_sap_10_2_table_5a_note_a() -> None:
# Arrange — SAP 10.2 Table 5a (PDF p.177) verbatim:
#
# "Central heating pump in heated space, 2013 or later: 3 W"
#
# Note a): "Where there are two main heating systems serving
# different parts of the dwelling, assume each has its own
# circulation pump and therefore include two figures from this
# table. ... Set to zero in summer months. Not applicable for
# electric heat pumps from database. Where two main systems serve
# the same space a single pump is assumed."
#
# Pre-slice the cascade zeroed the central-heating pump GAIN
# (worksheet line 70) whenever `main_heating_details[0].
# main_heating_category == 4` (heat pump). This is the right rule
# for ELECTRICITY (Table 4f: HP pump electricity is in the COP) but
# wrong for GAINS — Table 5a's "not applicable for electric heat
# pumps" only zeros the contribution from the HP itself. Any other
# non-HP main heating system in the cert still has its own pump,
# and that pump's gain still applies to internal gains.
#
# Cert 000565 lodges two main systems:
# [0] HP (category 4) pump_age "2013 or later"
# [1] Gas boiler (category 2) pump_age None
#
# Per spec, system [1]'s pump contributes 3 W (post-2013 default
# date from [0]'s lodgement). Worksheet line (70) confirms:
#
# "Pumps, fans 3.0000 3.0000 3.0000 3.0000 3.0000 0.0000
# 0.0000 0.0000 0.0000 3.0000 3.0000 3.0000 (70)"
#
# → 3 W in 8 winter months, 0 in summer (per Table 5a Note a).
#
# Pre-slice cascade: 0 W every month, leaving 24 W·months of
# internal gains uncounted. The missing gains → ~10 kWh/yr extra
# space heating → +£0.70 cost, +0.90 kg CO2, 0.008 continuous
# SAP. The full §5 (70)..(73) line refs except (70) match the
# worksheet at 1e-3 already — this is the last cascade gap on
# cert 000565.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
from domain.sap10_calculator.rdsap.cert_to_inputs import internal_gains_section_from_cert
# Act
ig = internal_gains_section_from_cert(epc)
# Assert — (70)m winter values match the worksheet's 3 W; summer
# values stay 0 per Table 5a Note a) seasonal mask.
assert ig is not None
expected = (3.0, 3.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 3.0)
pumps_fans = ig.pumps_fans_monthly_w
assert all(
abs(pumps_fans[m] - expected[m]) <= 1e-4
for m in range(12)
), (
f"cascade (70)m pumps_fans gain={tuple(round(x, 4) for x in pumps_fans)}; "
f"ws (70)m={expected}; deltas="
f"{tuple(round(pumps_fans[m] - expected[m], 4) for m in range(12))} "
f"(expected 3.0 W in winter, 0.0 W summer — Table 5a row 1 + Note a)"
)
def test_summary_mapper_raises_on_unmapped_wall_type_code() -> None:
# Arrange — strict-coverage gate per [[reference-unmapped-api-
# code]] mirror: an Elmhurst wall_type lodgement that isn't in
# `_ELMHURST_WALL_CODE_TO_SAP10` raises `UnmappedElmhurstLabel`
# rather than silently routing through wall_construction=None.
# The silent-None failure mode is what hid cert 000565 Ext1/3/4
# ~300 W/K cascade gap until the S0380.64 fabric-loss audit.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
site_notes.walls.wall_type = "XX Unknown construction"
# Act / Assert
with pytest.raises(UnmappedElmhurstLabel) as excinfo:
EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
assert excinfo.value.field == "walls.wall_type"
assert excinfo.value.value == "XX Unknown construction"
def test_summary_mapper_raises_on_unmapped_party_wall_type_code() -> None:
# Arrange — mirror strict-coverage gate for party-wall-type
# lodgements (same silent-None failure mode at the
# `_elmhurst_party_wall_construction_int` boundary).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
site_notes.walls.party_wall_type = "YY Unknown party wall"
# Act / Assert
with pytest.raises(UnmappedElmhurstLabel) as excinfo:
EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
assert excinfo.value.field == "walls.party_wall_type"
assert excinfo.value.value == "YY Unknown party wall"
# ----------------------------------------------------------------------
# API mapper strict-raise — mirror the Elmhurst UnmappedElmhurstLabel
# coverage gate on the GOV.UK API path. The same failure mode (silently
# routing an unknown enum to a default int / None hides cascade gaps
# until a SAP-delta investigation surfaces them) applies to API
# integer codes. Each strict helper is unit-tested for its raise
# behaviour; a cohort-coverage forcing function asserts every golden
# fixture extracts cleanly via `from_api_response`.
# ----------------------------------------------------------------------
_GOLDEN_FIXTURES_DIR = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
)
def _patch_api_doc_and_extract(
cert: str, mutator: "callable[[dict], None]"
) -> None:
"""Load a golden cert JSON, apply a mutation, and run
`from_api_response`. Used by the strict-raise unit tests to inject
an unmapped integer code into a known-good document."""
doc = json.loads((_GOLDEN_FIXTURES_DIR / f"{cert}.json").read_text())
mutator(doc)
EpcPropertyDataMapper.from_api_response(doc)
def test_api_mapper_raises_on_unmapped_floor_construction_code() -> None:
# Arrange — start from a real cohort cert and inject an unmapped
# `floor_construction` integer (currently the dict covers 1 and 2).
# The mapper must raise `UnmappedApiCode` rather than silently
# dropping the floor_construction signal — losing it routes the
# cascade to the wrong solid-vs-suspended branch (see Slice
# S0380.27's floor_construction_type fix that closed cert 8135's
# PE -4.96 → -0.07).
def mutate(doc: "dict") -> None:
doc["sap_building_parts"][0]["sap_floor_dimensions"][0]["floor_construction"] = 99
# Act / Assert
with pytest.raises(UnmappedApiCode) as excinfo:
_patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate)
assert excinfo.value.field == "floor_construction"
assert excinfo.value.value == 99
def test_api_mapper_raises_on_unmapped_roof_construction_code() -> None:
# Arrange — inject an unmapped roof_construction integer. The
# cascade's `_api_roof_construction_str` powers the cos(30°)
# inclined-surface factor and the flat-roof Table 18 column-3
# dispatch — a silently-None value here under-counts roof loss
# for sloping ceilings or routes flat roofs to the wrong column.
def mutate(doc: "dict") -> None:
doc["sap_building_parts"][0]["roof_construction"] = 99
# Act / Assert
with pytest.raises(UnmappedApiCode) as excinfo:
_patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate)
assert excinfo.value.field == "roof_construction"
assert excinfo.value.value == 99
def test_api_mapper_raises_on_unmapped_party_wall_construction_code() -> None:
# Arrange — inject an unmapped party_wall_construction. The cohort
# currently covers RdSAP10 Table 15 codes 0..5; out-of-range integers
# must raise so the next fixture forces an explicit dict entry.
def mutate(doc: "dict") -> None:
doc["sap_building_parts"][0]["party_wall_construction"] = 99
# Act / Assert
with pytest.raises(UnmappedApiCode) as excinfo:
_patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate)
assert excinfo.value.field == "party_wall_construction"
assert excinfo.value.value == 99
def test_api_mapper_raises_on_unmapped_floor_heat_loss_code() -> None:
# Arrange — codes 4/5/8+ aren't in the dict; injecting one must
# raise. Codes 1/2/3/6/7 are mapped explicitly (some to None) so
# the strict gate distinguishes "decided no string" from "unknown".
def mutate(doc: "dict") -> None:
doc["sap_building_parts"][0]["floor_heat_loss"] = 99
# Act / Assert
with pytest.raises(UnmappedApiCode) as excinfo:
_patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate)
assert excinfo.value.field == "floor_heat_loss"
assert excinfo.value.value == 99
def test_api_mapper_raises_on_unmapped_built_form_code() -> None:
# Arrange — codes 1..6 cover detached / semi-detached / terraces;
# an out-of-range integer must raise rather than silently routing
# through the cascade's `_DEFAULT_SHELTERED_SIDES = 2`.
def mutate(doc: "dict") -> None:
doc["built_form"] = 99
# Act / Assert
with pytest.raises(UnmappedApiCode) as excinfo:
_patch_api_doc_and_extract("0380-2471-3250-2596-8761", mutate)
assert excinfo.value.field == "built_form"
assert excinfo.value.value == 99
def test_all_golden_fixtures_extract_via_api_without_unmapped_code_raise() -> None:
# Arrange — coverage forcing function on the API path: every JSON
# fixture in `fixtures/golden/` must round-trip through
# `from_api_response` without triggering an `UnmappedApiCode` raise
# from any strict helper. New cohort fixtures added in subsequent
# slices fall under the same gate; future API enum variants
# surface here at extraction time instead of as a downstream SAP
# delta.
fixtures = sorted(_GOLDEN_FIXTURES_DIR.glob("*.json"))
assert fixtures, f"no golden fixtures under {_GOLDEN_FIXTURES_DIR}"
# Act / Assert — strict run for each fixture
for fixture in fixtures:
doc = json.loads(fixture.read_text())
EpcPropertyDataMapper.from_api_response(doc)
def test_summary_7800_two_electric_showers_count_as_two_not_one() -> None:
# Arrange — cert 7800-1501-0922-7127-3563's Summary §16 lodges TWO
# instantaneous electric showers ("Shower 01" + "Shower 11", both
# `outlet_type='Electric shower'`). Pre-Slice S0380.19 the mapper
# hardcoded `electric_shower_count = 1 if has_electric_shower else
# None`, losing the multiplicity. Cascade-equivalent on this cert:
# Appendix J eq J16 (N_ES,per_outlet = N_shower / N_outlets) and
# eq J18 (Σ_j E_ES,j) yield the same (64a) value for 1 vs 2 outlets
# when there are no mixer outlets, so the SAP delta is unchanged
# — but the lodged multiplicity is now surfaced for any future
# cascade consumer that needs it.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000890_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — both lodged electric showers surface on the EPC.
assert epc.sap_heating.electric_shower_count == 2
assert epc.sap_heating.mixer_shower_count == 0
def test_summary_0036_flat_unknown_party_wall_routes_to_u_zero() -> None:
# Arrange — cert 0036-6325-1100-0063-1226 is a "Flat, Mid-Terrace"
# whose Summary lodges party_wall_type='U Unable to determine'.
# RdSAP 10 Table 15 footnote *: flats/maisonettes with unknown
# party-wall construction default to U=0.0, NOT the U=0.25 house
# default. Before Slice S0380.18 the cascade routed the lodging's
# "unknown" sentinel to the house default → +6.03 W/K HLC excess
# → SAP under-prediction of -0.37 vs worksheet 62.7471.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000910_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act — chain the EPC through cert_to_inputs + the calculator so
# the assertion exercises the full cascade `u_party_wall` path,
# not just the helper in isolation.
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — party walls contribute zero to HLC for this flat with
# unknown party-wall construction (matches worksheet line (32) =
# 24.13 m² × 0.00 = 0.0000 W/K).
assert epc.property_type == "Flat"
assert abs(result.intermediate["party_walls_w_per_k"] - 0.0) <= 1e-4
def test_summary_2536_normal_cylinder_routes_to_code_2() -> None:
# Arrange — cert 2536-2525-0600-0788-2292's Summary §15.1 lodges
# "Cylinder Size: Normal". The dr87 worksheet lodges "Cylinder
# Volume 110.00" L on line ref (47); the cascade lookup
# `_CYLINDER_SIZE_CODE_TO_LITRES` now maps code 2 → 110 L per
# RdSAP 10 §10.5 Table 28's Normal (90-130 L) band midpoint.
# First cohort cert to exercise the "Normal" cylinder lodging.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000889_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.cylinder_size == 2
def test_summary_9421_normal_cylinder_routes_to_code_2() -> None:
# Arrange — cert 9421-3045-3205-1646-6200's Summary §15.1 also
# lodges "Cylinder Size: Normal" (same 110 L cylinder as cert
# 2536). Second cohort cert exercising the "Normal" mapping —
# pinned to guard against silent regression of either the mapper
# dict entry OR the cascade volume default.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000884_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.cylinder_size == 2
def test_summary_9418_large_cylinder_routes_to_code_4() -> None:
# Arrange — cert 9418-3062-8205-3566-7200's Summary §15.1 lodges
# "Cylinder Size: Large". The dr87 worksheet lodges "Cylinder
# Volume 210.00" L, and the cascade lookup
# `_CYLINDER_SIZE_CODE_TO_LITRES = {3: 160.0, 4: 210.0}` maps code
# 4 → 210 L. Cert 9418 is the first cohort cert to exercise the
# "Large" cylinder lodging (every other cohort cert is "Medium").
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000902_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert
assert epc.sap_heating.cylinder_size == 4
def test_summary_9418_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cert 9418-3062-8205-3566-7200 (Summary_000902.pdf):
# **Daikin EDLQ05CAV3 ASHP** (PCDB index 102421 — distinct from
# the rest of the cohort's Mitsubishi 104568), end-terrace house
# with TWO 1.64 kWp PV arrays (N + S), 210 L cylinder.
# `heating_duration_code='24'` per Table N4 (continuous heating).
# Worksheet "SAP value" lodges 84.6305.
#
# Closes the cohort: the final ASHP cert. The only Summary-mapper
# gap was the missing "Large" → 4 mapping in
# `_ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10` (Slice S0380.14, this
# commit) — multi-array PV + Large-cylinder were the variants
# cert 9418 uniquely exercises.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000902_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance.
worksheet_unrounded_sap = 84.6305
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_summary_3800_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cert 3800-8515-0922-3398-3563 (Summary_000901.pdf /
# dr87-0001-000901.pdf) is the third ASHP cohort cert to close on
# the Summary path: Mitsubishi PUZ-WM50VHA ASHP (PCDB 104568).
# Worksheet "SAP value" lodges 86.1458.
#
# **First-try closure — zero new mapper slices required**. The
# structural work shipped in slices S0380.2..S0380.9 (HP routing,
# cylinder block, composite walls, multi-array PV, extension
# inheritance) was already sufficient for cert 3800's variant set.
# Strong evidence that the Summary mapper has reached completeness
# for the standard single-bp / single-array ASHP shape.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000901_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance.
worksheet_unrounded_sap = 86.1458
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_summary_9285_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cert 9285-3062-0205-7766-7200 (Summary_000904.pdf /
# dr87-0001-000904.pdf) is the fourth ASHP cohort cert to close on
# the Summary path: Mitsubishi PUZ-WM50VHA ASHP (PCDB 104568).
# Worksheet "SAP value" lodges 84.1369. Same "first-try closure,
# zero new slices" disposition as cert 3800 — the cohort's
# structural mapper completeness is the load-bearing claim.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000904_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance.
worksheet_unrounded_sap = 84.1369
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_summary_0380_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Arrange — cert 0380-2471-3250-2596-8761 (Summary_000899.pdf /
# dr87-0001-000899.pdf) is the first heat-pump cert under per-cert
# Summary-path mapper validation: Mitsubishi PUZ-WM50VHA ASHP
# (PCDB index 104568), semi-detached bungalow age D, TFA 60.43 m².
# Worksheet PDF "SAP value" line lodges unrounded SAP **88.5104**.
# Slices S0380.2..S0380.6 closed the Summary path from Δ -54.7184
# to Δ +0.0594 — the same Appendix N3.6 PSR-interpolation
# precision floor at which the API path closes (commit c0086660
# slice 102f wired this floor for the full 7-cert ASHP cohort at
# the same ±0.07 tolerance). Closing further requires calculator
# work on the PSR interpolation step, not mapper work — the
# Summary EPC and API EPC produce IDENTICAL cascade outputs at
# this point (HW kWh, fabric W/K, HLC all match at 1e-4), so the
# +0.0594 residual is structural to the calculator's HP path for
# this fixture's PSR.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — ±0.07 ASHP-cohort spec-floor tolerance (matches API
# path's slice 102f disposition; `_ASHP_COHORT_CHAIN_TOLERANCE`
# is defined alongside the API-path equivalents below).
worksheet_unrounded_sap = 88.5104
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
_API_0330_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "0330-2249-8150-2326-4121.json"
)
_API_9501_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "9501-3059-8202-7356-0204.json"
)
def test_api_9501_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 9501 is the third Layer 4 production gate (after
# cert 001479 and cert 0330): API path → from_api_response →
# cert_to_inputs → calculate_sap_from_inputs must hit the worksheet
# SAP at 1e-4. Cert 9501 is the FIRST flat in the production gate
# set — mid-terrace top-floor flat with RR + measured PV (2.36 kWp
# SW @ 45°). Worksheet target unrounded SAP **68.5252**.
#
# Slices 100a-100c jointly closed the API path from Δ -14.82 to
# 1e-4: 100a `room_in_roof_details` schema + Detailed-RR surface
# population (HLC 382.19 → 297.54 W/K vs worksheet 296.68); 100b
# per-bp TFA includes RR floor area (TFA 81.28 → 113.08); 100c
# `photovoltaic_supply.pv_arrays` schema + gap-aware glazing
# lookup (DG pre-2002 16+ → U=2.7 per RdSAP 10 Table 24).
doc = json.loads(_API_9501_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — 1e-4 pin against the worksheet's continuous SAP.
worksheet_unrounded_sap = 68.5252
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_api_9501_photovoltaic_array_surfaced() -> None:
# Arrange — cert 9501's API JSON lodges measured PV under
# `sap_energy_source.photovoltaic_supply.pv_arrays`. Two real-API
# PV shapes coexist: cohort cert 2130 lodges the outer wrapper as
# a nested list `[[{...}], ...]`; cert 9501 lodges a dict
# `{"pv_arrays": [{...}]}`. The existing schema models only the
# legacy `none_or_no_details` field on `PhotovoltaicSupply` — so
# cert 9501's `pv_arrays` payload was silently dropped, leaving
# `photovoltaic_arrays=None` and the cascade missing the worksheet's
# £250.02 PV credit.
doc = json.loads(_API_9501_JSON.read_text())
# Act
epc = EpcPropertyDataMapper.from_api_response(doc)
# Assert — single array with the lodged kWp/pitch/orientation/
# overshading values.
arrays = epc.sap_energy_source.photovoltaic_arrays
assert arrays is not None
assert len(arrays) == 1
assert abs(arrays[0].peak_power - 2.36) <= 1e-4
assert arrays[0].pitch == 3 # RdSAP §11.1 enum: 3 = 45°
assert arrays[0].orientation == 6 # SAP octant: SW
assert arrays[0].overshading == 1 # RdSAP: None or very little
_API_0380_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "0380-2471-3250-2596-8761.json"
)
def test_api_0380_glazing_type_14_resolves_to_post_2022_dg_u_value() -> None:
# Arrange — cert 0380 (ASHP semi-detached bungalow, worksheet SAP
# 88.5104) lodges glazing_type=14 on all windows. The worksheet
# uses U=1.3258 (post-curtain) for line (27), which back-calculates
# to a raw U=1.40 — the SAP10.2 Table 24 row for "Double or triple
# glazed, 2022 or later". Code 13 in our existing dict carries the
# same U/g values; code 14 is the schema sibling for the same
# post-2022 product family (DG sealed-unit variants differ in
# the cert lodgement but agree on the spec U-value).
doc = json.loads(_API_0380_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act — pick any window (cert 0380 lodges only glazing_type=14).
w = epc.sap_windows[0]
td = w.window_transmission_details
# Assert
assert td is not None
assert abs(td.u_value - 1.40) <= 1e-4
assert abs(td.solar_transmittance - 0.72) <= 1e-4
def test_api_0380_wall_with_external_insulation_routes_to_filled_cavity_u() -> None:
# Arrange — cert 0380's top-level walls[0].description lodges
# "Cavity wall, filled cavity and external insulation". The
# worksheet uses U=0.25 for the (29a) external-walls entry — the
# very-low-U "filled cavity + external insulation" composite that
# RdSAP 10 §5 routes through Table 6's filled-cavity row (with a
# further EWI reduction). Our cascade was computing U=0.32 via
# the as-built Table 13 bucketed cascade because
# `_described_as_insulated` only matches the past-participle
# "insulated" — "insulation" (noun) on its own falls through to
# False. Cert 0380's lodgement uses the noun form.
#
# Fix: `_described_as_insulated` should also match the noun
# "insulation" (excluding the existing "no insulation" hard
# negation), so cavity walls described as carrying insulation
# route to the cascade's Filled-cavity branch.
doc = json.loads(_API_0380_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
from domain.sap10_calculator.rdsap.cert_to_inputs import (
heat_transmission_section_from_cert,
)
ht = heat_transmission_section_from_cert(epc)
# Assert — main-wall HLC ≈ 46.46 m² × 0.25 = 11.62 W/K (worksheet
# exact). Tolerance 1e-2 absorbs sub-component rounding; the
# 1e-4 chain test downstream tightens to the cascade floor.
worksheet_walls_w_per_k = 11.62
assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2
def test_api_0380_heat_pump_no_secondary_heating_per_table_11() -> None:
# Arrange — SAP 10.2 Table 11 explicitly notes "Cat 4 (heat pump):
# 0.00 (HP eff includes any secondary)" — heat pumps don't apply a
# Table 11 secondary fraction even when the cert lodges a secondary
# heating type, because the HP efficiency already incorporates any
# supplementary heat source. The `_SECONDARY_HEATING_FRACTION_BY_
# CATEGORY` dict in cert_to_inputs.py had entries for categories
# 1/2/3/5/6/7/10 but DID NOT include cat 4 — so HP certs with a
# lodged secondary fell through to the DEFAULT 0.10, billing 10%
# of space-heating cost as "secondary" (cert 0380: £72 secondary
# vs worksheet £0).
#
# Cert 0380 lodges secondary_heating_type=691 + main_heating_
# category=4 (HP, PCDB idx 104568). Worksheet line (242) "Space
# heating - secondary" shows 0.0 kWh; cascade was producing
# 547.30 kWh. Fix: dict entry `4: 0.0`.
doc = json.loads(_API_0380_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import (
cert_to_inputs, SAP_10_2_SPEC_PRICES,
)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — secondary heating contributes 0 kWh / £0 on HP certs.
assert result.secondary_heating_fuel_kwh_per_yr == 0.0
def test_api_0380_heat_pump_no_pumps_fans_kwh_per_table_4f() -> None:
# Arrange — SAP 10.2 Table 4f lists annual pumps + fans electricity
# consumption by main heating category. Gas-fired boilers (cat 2)
# use 160 kWh/yr (115 central heating pump + 45 flue fan). Heat
# pumps (cat 4) have NO additional pumps/fans contribution because
# the HP system's circulation pump and fans are already
# incorporated into the system COP.
#
# The cascade's `_PUMPS_FANS_KWH_BY_MAIN_CATEGORY` dict only had a
# cat-2 entry; cat-4 HP certs fell through to the DEFAULT 130
# kWh/yr (~£17 at 13.19 p/kWh) — the worksheet line (249) "Pumps,
# fans and electric keep-hot" shows 0.0000 kWh/yr for cert 0380.
doc = json.loads(_API_0380_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import (
cert_to_inputs, SAP_10_2_SPEC_PRICES,
)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert
assert result.pumps_fans_kwh_per_yr == 0.0
_API_9418_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "9418-3062-8205-3566-7200.json"
)
_API_2225_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "2225-3062-8205-2856-7204.json"
)
_API_2636_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "2636-0525-2600-0401-2296.json"
)
def test_api_2636_cantilever_floor_surfaces_as_exposed_floor() -> None:
# Arrange — cert 2636 (Mitsubishi ASHP, semi-detached, 2 storeys,
# property_type=0) has BP0 floor 0 area 39.18 m² and floor 1 area
# 42.92 m². The 3.74 m² difference is an upper-floor cantilever —
# worksheet (28b) "Exposed floor Main: 3.74 × 1.20 = 4.4880" treats
# it per RdSAP Table 20 U_exposed_floor at age-D + no insulation
# = 1.20 W/m²K.
#
# Without the cantilever surfaced, cert 2636 cascade SAP =
# 86.7514 vs worksheet 86.2641 (Δ +0.49 — by far the largest
# outlier in the 7-cert ASHP cohort, where the other 6 cluster
# at ±0.06). Pre-fix HLC drift was -4.51 W/K = 3.74 × 1.20 +
# 0.15 × 3.74 thermal-bridging contribution on the extra exposed
# area. Tolerance ±0.07 covers the residual PSR/HLC drift that
# this cert shares with the 7-cohort cluster (per the slice
# 102f-prep.10 alt-wall-allocation fix this cert moves from the
# near-zero cancellation state into the cohort cluster).
doc = json.loads(_API_2636_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act — full cert→inputs→calculator cascade
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — SAP within 0.07 of worksheet 86.2641.
assert abs(result.sap_score_continuous - 86.2641) < 0.07, (
f"cascade SAP={result.sap_score_continuous:.4f} vs worksheet 86.2641"
)
def test_api_2636_thermal_bridging_excludes_alt_wall_window_opening_per_sap_10_2_appendix_k() -> None:
# Arrange — API-path mirror of the Summary-path (31) NET pin.
# The Summary EPC and API EPC for cert 2636 produce identical
# cascade output once the alt-wall window opening is deducted
# from (31) per SAP 10.2 Appendix K eqn (K2) p.84. Worksheet (36)
# = 24.0495 W/K, worksheet "SAP value" 86.2641 — cascade closes
# to the 1e-4 spec-precision floor on the API path too.
doc = json.loads(_API_2636_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert
assert abs(result.intermediate["thermal_bridging_w_per_k"] - 24.0495) <= 1e-4
assert abs(result.sap_score_continuous - 86.2641) <= 1e-4
def test_api_2636_alt_wall_openings_deducted_from_alt_not_main() -> None:
# Arrange — cert 2636 has BP0 with `sap_alternative_wall_1`
# (area 12.76 m², cavity unfilled at age D → U=0.70) and 7
# windows. One window (1.14 × 1.04 ≈ 1.19 m²) lodges
# `window_wall_type=2` → it sits on the alt wall, not main.
#
# Per RdSAP §1.4.2 wall openings deduct from the wall they
# pierce. Worksheet (29a):
# Main: gross 61.73, openings 14.03, net 47.70 → 0.25 × 47.70 = 11.925
# Alt.1: gross 12.76, openings 1.19, net 11.57 → 0.70 × 11.57 = 8.099
# Total walls (29a) = 20.024
#
# Pre-fix cascade subtracted ALL openings from the (main+alt)
# gross then routed the alt at its FULL gross — over-counting
# alt's contribution by 1.19 × (0.70 0.25) ≈ 0.535 W/K, and
# under-counting main by the matching 1.19 × 0.25 — net +0.535.
doc = json.loads(_API_2636_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act — full cascade so windows + doors are read from the cert.
inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
# Assert — worksheet sum 11.925 + 8.099 = 20.024 at 1e-3.
assert abs(inputs.heat_transmission.walls_w_per_k - 20.024) < 1e-3, (
f"cascade walls={inputs.heat_transmission.walls_w_per_k:.4f} "
f"vs worksheet 20.024"
)
def test_api_2225_no_mixer_lodged_uses_zero_showers_per_worksheet() -> None:
# Arrange — cert 2225 lodges `mixer_shower_count = None` (the field
# is unlodged in the API JSON, not "0"). The worksheet (42a) "Hot
# water usage for mixer showers" shows 0.0000 every month — the
# Elmhurst convention is "absent ⇒ no shower". Cascade previously
# defaulted to a single 7 L/min vented mixer when unlodged, which
# raised (44) daily HW use from 122.89 → 130.56 l/day (Jan) and
# added ~113 kWh/yr to (62) HW demand. The cohort-modal lodging
# is 0 (5/7 certs lodge mixer=0 explicitly).
doc = json.loads(_API_2225_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
# Assert — HW fuel kWh tracks worksheet (247) 1634.04 at 1e-1
# (η_water = 172.85 implies demand 2824.44; fuel = demand / η).
worksheet_hw_fuel_kwh = 1634.04
assert abs(inputs.hot_water_kwh_per_yr - worksheet_hw_fuel_kwh) <= 0.1
def test_api_9418_daikin_24h_duration_mean_internal_temp_matches_worksheet_92() -> None:
# Arrange — cert 9418 (Daikin Altherma EDLQ05CAV3, PCDB 102421)
# lodges `heating_duration_code = "24"`. Per SAP 10.2 Table N4 (PDF
# p.107) this means N24,9 = 365 (all days operate at 24-hour
# heating, no off-period). Worksheet (87) MIT_living = 21.0 every
# month (= Th1, no off period), worksheet (90) MIT_elsewhere
# collapses to Th2 directly. Worksheet (92) blended at fLA = 0.30.
#
# Pre-slice-102f-prep.7 the helper's "V"-only gate returned None
# for this duration → bimodal cascade gave MIT ~17.8-19.8 (off by
# ~2°C). After Table N4 wiring the cascade lands at 1e-3.
doc = json.loads(_API_9418_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
# Assert — worksheet (92) "MIT" 12-tuple at 1e-3 per month.
worksheet_mit_92 = (
19.8400, 19.8445, 19.8489, 19.8697, 19.8736, 19.8920,
19.8920, 19.8954, 19.8849, 19.8736, 19.8657, 19.8574,
)
for m, (cascade, ws) in enumerate(zip(
inputs.mean_internal_temp_monthly_c, worksheet_mit_92
)):
assert abs(cascade - ws) < 1e-3, (
f"month {m + 1}: cascade={cascade:.4f} vs worksheet={ws:.4f}"
)
def test_api_0380_mean_internal_temp_matches_worksheet_92_within_1e_3() -> None:
# Arrange — SAP 10.2 Appendix N3.5 (PDF p.107) replaces Table 9c
# steps 3-4 for heat-pump packages with PCDB data: each month
# blends Th, T_unimodal, T_bimodal via Equation N5.
#
# Cert 0380 (Mitsubishi PUZ-WM50VHA, PCDB 104568, PSR ≈ 1.43)
# lands on Table N5 row "1.2 or more" → annual totals (3, 38) →
# Jan(3, 28) + Dec(0, 10) extended days.
#
# Pre-slice-102f-prep.6 the cold-month MIT drifted +0.008°C due to
# `internal_gains_from_cert` injecting the central-heating pump's
# heating-season gain (~7 W) on HP certs. SAP 10.2 Table 4f
# specifies zero pump/fan gains on HP packages (cert 0380's
# worksheet line 70 = 0.0 every month) — that gating drops the
# spurious gain and tightens the MIT cascade against worksheet
# (92) to 1e-3 per month.
doc = json.loads(_API_0380_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
# Assert — pin against worksheet line (92) "MIT" 12-tuple.
worksheet_mit_92 = (
18.9539, 18.0081, 18.3466, 18.8491, 19.3582, 19.8174,
20.0288, 20.0064, 19.6975, 19.0702, 18.3966, 18.1573,
)
for m, (cascade, ws) in enumerate(zip(
inputs.mean_internal_temp_monthly_c, worksheet_mit_92
)):
assert abs(cascade - ws) < 1e-3, (
f"month {m + 1}: cascade={cascade:.4f} vs worksheet={ws:.4f}"
)
def test_api_9501_room_in_roof_surfaces_populated() -> None:
# Arrange — cert 9501's API JSON lodges measured RR detail under
# `sap_room_in_roof.room_in_roof_details`: two gable walls
# (5.51 m × 2.45 m + 6.51 m × 2.45 m) and a flat ceiling (5.5 m ×
# 1.0 m, 300 mm insulation). The schema's `SapRoomInRoof` dataclass
# exposed the inner block under the wrong field name
# `room_in_roof_type_1` (the legacy Simplified Type 1 wrapper),
# so `from_dict` parsed the inner block as None — the API mapper
# then built `SapRoomInRoof` with no per-surface area data, and
# the cascade defaulted to the Simplified Type 2 "all elements"
# branch (RR floor_area × Table 18 col(4) age-B U=2.30) for the
# whole RR → roof HLC 149.43 vs worksheet 18.10 (Δ +131).
doc = json.loads(_API_9501_JSON.read_text())
# Act
epc = EpcPropertyDataMapper.from_api_response(doc)
# Assert — RR surfaces present and match worksheet element table:
# Gable Wall 1 = 13.50 m², Gable Wall 2 = 15.95 m², Flat Ceiling 1
# = 5.50 m² (per worksheet §3 element table).
rir = epc.sap_building_parts[0].sap_room_in_roof
assert rir is not None
assert rir.detailed_surfaces is not None
kinds_by_area = sorted((s.kind, s.area_m2) for s in rir.detailed_surfaces)
assert kinds_by_area == [
("flat_ceiling", 5.5),
("gable_wall_external", 13.50),
("gable_wall_external", 15.95),
]
def test_api_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 0330-2249-8150-2326-4121 (second boiler validation
# cert: mains-gas Vaillant PCDB idx 10241, mid-terrace 2-bp dwelling,
# TFA 90.56 m²) has both an Elmhurst Summary PDF and a GOV.UK EPB API
# JSON. The Summary path lands at 1e-4 vs worksheet SAP 61.5993
# above; this Layer 4 production gate asserts the API path matches
# the worksheet to the same 1e-4 tolerance — same forcing function
# as cert 001479's Layer 4 test, applied to the second boiler cert.
#
# Slices 96-99 (flat-roof Table 18 col (3) U-values + glazing_type=2
# surfacing + shower-outlets list normalisation + window-area
# rounding alignment) jointly closed the API path from
# Δ +2.1453 → Δ -0.000011 vs worksheet 61.5993.
doc = json.loads(_API_0330_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — 1e-4 pin against the worksheet's continuous SAP.
worksheet_unrounded_sap = 61.5993
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 001479 has both an Elmhurst Summary PDF and a GOV.UK
# EPB API JSON (ref 0535-9020-6509-0821-6222). The Summary cascade
# already pins at worksheet's 69.0094 ± 1e-4 above; this test is the
# Layer 4 production-path gate: API JSON → from_api_response →
# cert_to_inputs → calculate_sap_from_inputs must also hit 69.0094
# at 1e-4. Identical inputs must produce identical outputs; the
# calculator is deterministic, so any drift is a mapper coverage gap.
doc = json.loads(_API_001479_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — 1e-4 pin against the worksheet's continuous SAP. ±0.5 is
# the API-only fallback (project memory `feedback_api_tolerance_1e_
# minus_4`); when the worksheet is available, identical-inputs-must-
# produce-identical-outputs is the bar.
worksheet_unrounded_sap = 69.0094
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
# ============================================================================
# Layer 4 chain tests — 7-cert ASHP cohort
# ============================================================================
# These pin the API → from_api_response → cert_to_inputs →
# calculate_sap_from_inputs cascade against each cert's Elmhurst dr87
# worksheet unrounded SAP. Tolerance is 0.07 (NOT 1e-4 like the boiler
# cohort above) — see HANDOVER_CERT_0380_MIT_CASCADE.md for the
# investigation: BRE web confirmed max_output_kw matches cascade
# exactly (4.39 / 3.933), cascade (39) annual HLC matches worksheet
# at 4 dp, but back-solving worksheet η_space implies ~0.15% drift
# in Elmhurst's internal interpolation precision (likely a vendor
# rounding convention not in the public SAP 10.2 spec). The 7 certs
# cluster within +0.030..+0.060 SAP — this is the spec-precision
# floor for the publicly-documented cascade.
#
# At rounded (integer SAP) precision, all 7 cascade integers match
# the lodged values exactly (residual = 0, pinned in
# `_GOLDEN_EXPECTATIONS`).
_API_0350_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "0350-2968-2650-2796-5255.json"
)
_API_3800_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "3800-8515-0922-3398-3563.json"
)
_API_9285_JSON = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
/ "9285-3062-0205-7766-7200.json"
)
_ASHP_COHORT_CHAIN_TOLERANCE: float = 1e-4
"""ASHP-cohort chain-test tolerance.
The cohort closed cumulatively across S0380.26..S0380.35: §3.2 curtain
+ reciprocal-η interpolation (SAP 10.2 fn 43), glazing-code Table 6b
extension to RdSAP21 codes 8-15, (31) NET area for alt-wall openings
(SAP 10.2 K2), and the RdSAP10 §15 Decimal-rounding cluster on living
area / gross wall / kWp. At HEAD all 7 ASHP cohort certs sit at < 5e-5
SAP on BOTH paths (worst residual: cert 2225 4.8e-5):
Summary path: 7/7 < 1e-4 (cert 2636 -2e-6 after S0380.31)
API path: 7/7 < 1e-4 (parity with Summary at cascade output level)
1e-4 matches the user's [[feedback-one-e-minus-4-across-the-board]]
target with ~2x headroom over the worst residual. Any future cohort
regression beyond ~5e-5 fires this tolerance loudly."""
def test_api_0380_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Mitsubishi PUZ-WM50VHA PCDB 104568, semi-detached bungalow age D.
doc = json.loads(_API_0380_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
assert abs(result.sap_score_continuous - 88.5104) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_api_0350_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Mitsubishi PUZ-WM50VHA PCDB 104568.
doc = json.loads(_API_0350_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
assert abs(result.sap_score_continuous - 84.1367) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_api_2225_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Mitsubishi PUZ-WM50VHA PCDB 104568, with PV. Slice 102f-prep.8
# closed the shower_outlets=None default.
doc = json.loads(_API_2225_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
assert abs(result.sap_score_continuous - 88.7921) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_api_2636_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Mitsubishi PUZ-WM50VHA PCDB 104568, with cantilever + alt wall.
# Slice 102f-prep.9 (cantilever) + 102f-prep.10 (alt-wall openings).
doc = json.loads(_API_2636_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
assert abs(result.sap_score_continuous - 86.2641) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_api_3800_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Mitsubishi PUZ-WM50VHA PCDB 104568.
doc = json.loads(_API_3800_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
assert abs(result.sap_score_continuous - 86.1458) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_api_9285_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Mitsubishi PUZ-WM50VHA PCDB 104568.
doc = json.loads(_API_9285_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
assert abs(result.sap_score_continuous - 84.1369) < _ASHP_COHORT_CHAIN_TOLERANCE
def test_api_9418_full_chain_sap_within_spec_floor_of_worksheet() -> None:
# Daikin Altherma EDLQ05CAV3 PCDB 102421, heating_duration_code='24'
# (continuous, all days at Th). Slice 102f-prep.7 closed Table N4.
doc = json.loads(_API_9418_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
assert abs(result.sap_score_continuous - 84.6305) < _ASHP_COHORT_CHAIN_TOLERANCE
# ============================================================================
# Cohort-2 API-path chain tests (cross-mapper parity at the cascade)
# ============================================================================
# Mirror the cohort-2 Summary-path sweep that closed across S0380.30..38.
# Per [[feedback-cross-mapper-parity-via-cascade]]: API EPC and Elmhurst EPC
# must produce SAP within 1e-4 of each other AND of the worksheet — the
# SAP cascade is the load-bearing equivalence check. Each cert in this
# cohort has both a Summary PDF (under `sap worksheets/additional with
# api 2/<cert>/Summary_*.pdf`) and an API JSON fixture (fetched into
# `tests/domain/sap10_calculator/rdsap/fixtures/golden/<cert>.json` in
# Slice S0380.39). Worksheet SAP is the source of truth.
#
# Cohort-2 API-path closure history (each slice closed a distinct
# spec-citation gap, then re-pinned the cohort):
# S0380.40 — parametrized over all 38 certs; 34 immediate / 4 open
# S0380.41 — RdSAP 21 → SAP 10.2 glazing-type alias closed 0300/9380
# S0380.42 — Decimal HALF_UP per-window areas closed 1536
# S0380.43 — SAP 631 → spec fuel (House coal) closed 2102
# At HEAD: 38/38 cohort-2 certs hit <1e-4 on the API path, matching
# the Summary-path sweep (also 38/38 <1e-4 at HEAD). Cross-mapper
# parity at the cascade is fully established.
_COHORT_2_API_FIXTURE_DIR: Path = (
Path(__file__).parents[3]
/ "tests/domain/sap10_calculator/rdsap/fixtures/golden"
)
# (cert_dir, worksheet_unrounded_sap) — 34 cohort-2 certs whose API-path
# cascade hits the worksheet's continuous SAP at 1e-4 without any
# follow-up mapper work. Identical to the Summary-path sweep at the
# same tolerance: cross-mapper parity is achieved via cascade output
# equivalence (per [[feedback-cross-mapper-parity-via-cascade]]).
_COHORT_2_API_CLOSED: list[tuple[str, float]] = [
("0036-6325-1100-0063-1226", 62.7471),
("0100-5141-0522-4696-3463", 85.8332),
("0200-3155-0122-2602-3563", 80.8674),
("0300-2403-2650-2206-0235", 76.6541), # S0380.41 closure
("0310-2763-5450-2506-3501", 78.3593),
("0320-2126-2150-2326-6161", 71.7224),
("0320-2756-8640-2296-1101", 89.9458),
("0330-2257-3640-2196-3145", 84.6541),
("0360-2266-5650-2106-8285", 80.468),
("0380-2530-6150-2326-4161", 65.7795),
("0390-2066-4250-2026-4555", 65.3253),
("0464-3032-0205-4276-3204", 80.4533),
("0652-3022-1205-2826-1200", 70.9577),
("1536-9325-5100-0433-1226", 65.8928), # S0380.42 closure
("2007-3011-9205-8136-3204", 68.3914),
("2031-3007-0205-1296-3204", 64.1734),
("2102-3018-0205-7886-5204", 63.8732), # S0380.43 closure
("2130-3018-4205-4686-5204", 71.3158),
("2336-3124-3600-0517-1292", 83.4955),
("2536-2525-0600-0788-2292", 79.7264),
("2590-3025-7205-9066-0200", 65.9194),
("2699-3025-5205-8066-0200", 68.7535),
("2800-7999-0322-4594-3563", 78.1408),
("3136-7925-4500-0246-6202", 77.8872),
("3336-2825-9400-0512-8292", 78.3739),
("4536-5424-8600-0109-1226", 82.4974),
("4536-8325-3100-0409-1222", 65.6),
("4800-3992-0422-0599-3563", 86.7192),
("6835-3920-2509-0933-5226", 80.1977),
("7700-3362-0922-7022-3563", 63.4425),
("7800-1501-0922-7127-3563", 64.7504),
("7836-3125-0600-0526-2202", 80.1792),
("9036-0824-3500-0420-8222", 84.2727),
("9370-3060-1205-3546-4204", 87.8687),
("9380-2957-7490-2595-3141", 74.5902), # S0380.41 closure
("9421-3045-3205-1646-6200", 87.4495),
("9796-3058-6205-0346-9200", 90.1318),
("9836-7525-9500-0575-1202", 75.2223),
]
def _cascade_continuous_sap_from_api(cert_dir_name: str) -> float:
doc = json.loads((_COHORT_2_API_FIXTURE_DIR / f"{cert_dir_name}.json").read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
r = calculate_sap_from_inputs(cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES))
return r.sap_score_continuous
@pytest.mark.parametrize("cert_dir_name,ws_sap", _COHORT_2_API_CLOSED)
def test_api_cohort_2_full_chain_sap_matches_worksheet_at_1e_minus_4(
cert_dir_name: str, ws_sap: float
) -> None:
"""API-path mirror of the cohort-2 Summary-path sweep.
For each cert: the GOV.UK EPB API JSON → `from_api_response` →
`cert_to_inputs` → `calculate_sap_from_inputs` chain must hit the
worksheet's continuous SAP at abs <= 1e-4 — the same tolerance
the Summary path achieves. Cross-mapper parity at the cascade
output ([[feedback-cross-mapper-parity-via-cascade]])."""
# Arrange
actual = _cascade_continuous_sap_from_api(cert_dir_name)
# Act (no separate act phase — `actual` IS the cascade output)
delta = actual - ws_sap
# Assert
assert abs(delta) <= 1e-4, (
f"cert {cert_dir_name}: cascade SAP={actual:.6f} vs worksheet {ws_sap}; Δ={delta:+.6f}"
)
# ============================================================================
# Mapper-vs-hand-built EpcPropertyData diff tests
# ============================================================================
# The 6 cohort hand-builts (_elmhurst_worksheet_NNNNNN.build_epc) are the
# 100%-correct calculator-input ground truth — each cascades to its
# worksheet PDF's lodged SAP at 1e-4. The chain tests above only assert
# cascade-output equivalence; the mapper can pass them by producing a
# *different* EpcPropertyData that happens to cascade to the same number.
#
# These tests pin the missing layer: the mapper's EpcPropertyData must
# match the hand-built's load-bearing fields exactly. Every divergence
# surfaced here is a mapper coverage gap to close as its own slice.
#
# "Load-bearing" = the subset of EpcPropertyData fields that drive the
# SAP cascade or carry semantic cross-mapper meaning. Cert-metadata
# fields (address, registration dates, descriptive EnergyElement lists,
# tariff strings) are excluded because they don't change calculator
# output and vary by mapper pathway (the API publishes some, the
# Elmhurst Summary publishes others) without semantic disagreement.
# SapWindow sub-fields the cascade doesn't read (descriptive Union[int,
# str] codes lodged differently by each mapper). The cascade reads
# window_width / window_height / orientation / window_location /
# frame_factor / window_transmission_details.{u_value,solar_
# transmittance} — those WILL still be diffed; everything else on
# SapWindow is metadata and excluded to avoid noise from the int/str
# dual encoding (API mapper produces int codes; Elmhurst mapper
# surfaces the Summary's lodged strings).
_NON_LOAD_BEARING_WINDOW_SUBFIELDS: frozenset[str] = frozenset({
"frame_material",
"glazing_gap",
"window_type",
"glazing_type",
"window_wall_type",
"draught_proofed",
"permanent_shutters_present",
"permanent_shutters_insulated",
})
def _is_excluded_path(path: str) -> bool:
"""Return True for paths the diff should silently skip — non-cascade-
affecting Union[int, str] encoding differences between the API and
Elmhurst mapper outputs that cohort hand-built fixtures don't pin."""
if path.startswith("sap_windows[") and "]." in path:
suffix = path.split("].", 1)[1]
if suffix in _NON_LOAD_BEARING_WINDOW_SUBFIELDS:
return True
if suffix == "window_transmission_details.data_source":
return True
# `roof_construction_type` is set by the Elmhurst mapper from
# `roof.roof_type` (e.g. "Pitched (slates/tiles), access to loft") and
# left None by the cohort hand-builts. The cascade in
# `heat_transmission.py:562` only dispatches on the "sloping ceiling"
# substring (RdSAP §3.8); none of the cohort certs lodge pitched-
# sloping-ceiling roofs, so both values produce identical cascade
# output. Exclude from the diff to avoid flagging informational drift.
if path.startswith("sap_building_parts[") and path.endswith(".roof_construction_type"):
return True
# `sap_ventilation.has_suspended_timber_floor` and
# `..._sealed` are set explicitly on the hand-builts (to mirror the
# cohort U985 worksheets' (12) infiltration values) but left None by
# the Elmhurst mapper because the Summary PDF doesn't surface floor-
# construction in a parseable form. When None, `cert_to_inputs._
# has_suspended_timber_floor_per_spec` infers the value mechanically
# from per-bp floor-construction data — producing the same cascade
# output the explicit-bool hand-built path produces for cohort 000477
# / 000516 (where the spec inference and the worksheet agree). Where
# the spec inference and worksheet disagree (cohort 000474, 000480,
# 000487, 000490), the chain SAP-pin tests fail separately — that's
# a known Elmhurst-worksheet-vs-RdSAP-10 §5 (12) divergence, not a
# mapper diff issue.
if path == "sap_ventilation.has_suspended_timber_floor":
return True
if path == "sap_ventilation.suspended_timber_floor_sealed":
return True
return False
_LOAD_BEARING_FIELDS: tuple[str, ...] = (
# Cascade-driving structural fields
"sap_building_parts",
"sap_windows",
"sap_roof_windows",
"sap_heating",
"sap_ventilation",
"sap_energy_source",
"total_floor_area_m2",
# Building-classification fields driving default cascades
"dwelling_type",
"built_form",
"property_type",
"country_code",
"postcode",
# Counts and openings
"door_count",
"insulated_door_count",
"insulated_door_u_value",
"habitable_rooms_count",
"heated_rooms_count",
"wet_rooms_count",
"extensions_count",
"open_chimneys_count",
"blocked_chimneys_count",
"extract_fans_count",
# Lighting
"cfl_fixed_lighting_bulbs_count",
"led_fixed_lighting_bulbs_count",
"incandescent_fixed_lighting_bulbs_count",
"low_energy_fixed_lighting_bulbs_count",
"fixed_lighting_outlets_count",
"low_energy_fixed_lighting_outlets_count",
# HW / appliances
"solar_water_heating",
"has_hot_water_cylinder",
"has_fixed_air_conditioning",
"has_conservatory",
"has_heated_separate_conservatory",
# Envelope drivers
"percent_draughtproofed",
"mechanical_ventilation",
"pressure_test",
# Construction-detail flags
"addendum",
"lzc_energy_sources",
"any_unheated_rooms",
"number_of_storeys",
"sap_flat_details",
)
def _diff_load_bearing(
mapped: object, hand_built: object, path: str = "",
) -> list[str]:
"""Recursive field diff; yields one line per leaf divergence between
mapped EpcPropertyData and the hand-built fixture. Int/float type
differences with the same numeric value are not flagged.
Strict-pyright posture: arguments typed `object` so each branch
narrows via `isinstance` rather than threading `Any` through the
recursion (which pyright can't reason about under
`strict`/`typeCheckingMode = strict`)."""
out: list[str] = []
if type(mapped) is not type(hand_built):
if not (isinstance(mapped, (int, float)) and isinstance(hand_built, (int, float))):
if not _is_excluded_path(path):
out.append(
f"{path}: TYPE {type(mapped).__name__} vs "
f"{type(hand_built).__name__} mapped={mapped!r} "
f"handbuilt={hand_built!r}"
)
return out
if dataclasses.is_dataclass(mapped) and not isinstance(mapped, type) \
and dataclasses.is_dataclass(hand_built) and not isinstance(hand_built, type):
for fld in dataclasses.fields(mapped):
out.extend(_diff_load_bearing(
getattr(mapped, fld.name),
getattr(hand_built, fld.name),
f"{path}.{fld.name}" if path else fld.name,
))
return out
if isinstance(mapped, list) and isinstance(hand_built, list):
mapped_list = cast("list[object]", mapped)
hand_built_list = cast("list[object]", hand_built)
if len(mapped_list) != len(hand_built_list):
out.append(f"{path}: LEN {len(mapped_list)} vs {len(hand_built_list)}")
return out
for i, (m_item, h_item) in enumerate(zip(mapped_list, hand_built_list)):
out.extend(_diff_load_bearing(m_item, h_item, f"{path}[{i}]"))
return out
if mapped != hand_built:
if not _is_excluded_path(path):
out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}")
return out
def test_from_elmhurst_site_notes_matches_hand_built_000474() -> None:
# Arrange — _elmhurst_worksheet_000474.build_epc() is the canonical
# hand-built EpcPropertyData for cert U985-0001-000474; it cascades
# to the worksheet PDF's `SAP value 62.2584` at 1e-4 (cohort SAP-
# result pin). Routing the corresponding Summary PDF through the
# Elmhurst mapper MUST produce a load-bearing-field-equivalent
# EpcPropertyData; any divergence is a mapper-coverage gap.
#
# Tracer-bullet scope: cert 000474 only. Once GREEN, parametrize
# over the 5 other cohort fixtures.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
hand_built = _w000474.build_epc()
# Act
diffs: list[str] = []
for field_name in _LOAD_BEARING_FIELDS:
diffs.extend(_diff_load_bearing(
getattr(mapped, field_name, None),
getattr(hand_built, field_name, None),
field_name,
))
# Assert
assert not diffs, (
f"{len(diffs)} load-bearing divergence(s) between mapped and "
f"hand-built EpcPropertyData for cohort cert 000474:\n " +
"\n ".join(diffs)
)
def test_from_elmhurst_site_notes_matches_hand_built_000477() -> None:
# Arrange — _elmhurst_worksheet_000477.build_epc() is the canonical
# hand-built EpcPropertyData for cert U985-0001-000477 (single-bp
# mid-terrace, age band B, RIR with stud walls + party gables, no
# extension); it cascades to the worksheet PDF's `SAP value 65.0057`
# at 1e-4. Routing the Summary PDF through the Elmhurst mapper MUST
# produce a load-bearing-field-equivalent EpcPropertyData; any
# divergence is a mapper-coverage gap to close as its own slice.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
hand_built = _w000477.build_epc()
# Act
diffs: list[str] = []
for field_name in _LOAD_BEARING_FIELDS:
diffs.extend(_diff_load_bearing(
getattr(mapped, field_name, None),
getattr(hand_built, field_name, None),
field_name,
))
# Assert
assert not diffs, (
f"{len(diffs)} load-bearing divergence(s) between mapped and "
f"hand-built EpcPropertyData for cohort cert 000477:\n " +
"\n ".join(diffs)
)
def test_from_elmhurst_site_notes_matches_hand_built_000480() -> None:
# Arrange — _elmhurst_worksheet_000480.build_epc() is the canonical
# hand-built EpcPropertyData for cert U985-0001-000480 (mid-terrace
# with main + 1 extension + 19.83 m² RIR, gas combi); it cascades
# to the worksheet PDF's `SAP value 61.2986` at 1e-4. Routing the
# Summary PDF through the Elmhurst mapper MUST produce a load-
# bearing-field-equivalent EpcPropertyData; any divergence is a
# mapper-coverage gap to close as its own slice.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000480_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
hand_built = _w000480.build_epc()
# Act
diffs: list[str] = []
for field_name in _LOAD_BEARING_FIELDS:
diffs.extend(_diff_load_bearing(
getattr(mapped, field_name, None),
getattr(hand_built, field_name, None),
field_name,
))
# Assert
assert not diffs, (
f"{len(diffs)} load-bearing divergence(s) between mapped and "
f"hand-built EpcPropertyData for cohort cert 000480:\n " +
"\n ".join(diffs)
)
def test_from_elmhurst_site_notes_matches_hand_built_000487() -> None:
# Arrange — _elmhurst_worksheet_000487.build_epc() is the canonical
# hand-built EpcPropertyData for cert U985-0001-000487 (Enclosed
# Mid-Terrace, main + 1 extension + 21.03 m² RIR with explicit-U
# gable_wall_external, gas combi, 1 electric shower, 1.43 m²
# timber-frame alt wall on the extension); it cascades to the
# worksheet PDF's `SAP value 61.6431` at 1e-4. Routing the Summary
# PDF through the Elmhurst mapper MUST produce a load-bearing-
# field-equivalent EpcPropertyData; any divergence is a mapper-
# coverage gap to close as its own slice.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000487_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
hand_built = _w000487.build_epc()
# Act
diffs: list[str] = []
for field_name in _LOAD_BEARING_FIELDS:
diffs.extend(_diff_load_bearing(
getattr(mapped, field_name, None),
getattr(hand_built, field_name, None),
field_name,
))
# Assert
assert not diffs, (
f"{len(diffs)} load-bearing divergence(s) between mapped and "
f"hand-built EpcPropertyData for cohort cert 000487:\n " +
"\n ".join(diffs)
)
def test_from_elmhurst_site_notes_matches_hand_built_000490() -> None:
# Arrange — _elmhurst_worksheet_000490.build_epc() is the canonical
# hand-built EpcPropertyData for cert U985-0001-000490 (End-Terrace,
# main + 1 extension, gas combi + gas-secondary; sheltered_sides=1
# per RdSAP §S5); it cascades to the worksheet PDF's `SAP value
# 57.3979` at 1e-4. Routing the Summary PDF through the Elmhurst
# mapper MUST produce a load-bearing-field-equivalent
# EpcPropertyData; any divergence is a mapper-coverage gap.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000490_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
hand_built = _w000490.build_epc()
# Act
diffs: list[str] = []
for field_name in _LOAD_BEARING_FIELDS:
diffs.extend(_diff_load_bearing(
getattr(mapped, field_name, None),
getattr(hand_built, field_name, None),
field_name,
))
# Assert
assert not diffs, (
f"{len(diffs)} load-bearing divergence(s) between mapped and "
f"hand-built EpcPropertyData for cohort cert 000490:\n " +
"\n ".join(diffs)
)
def test_from_elmhurst_site_notes_matches_hand_built_000516() -> None:
# Arrange — _elmhurst_worksheet_000516.build_epc() is the canonical
# hand-built EpcPropertyData for cert U985-0001-000516 (Mid-Terrace,
# main + 19.02 m² RIR, 5 vertical windows + 1 roof window which the
# mapper routes to `sap_roof_windows` per `U > 3.0` discrimination);
# it cascades to the worksheet PDF's `SAP value 62.7937` at 1e-4.
# Routing the Summary PDF through the Elmhurst mapper MUST produce
# a load-bearing-field-equivalent EpcPropertyData.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
hand_built = _w000516.build_epc()
# Act
diffs: list[str] = []
for field_name in _LOAD_BEARING_FIELDS:
diffs.extend(_diff_load_bearing(
getattr(mapped, field_name, None),
getattr(hand_built, field_name, None),
field_name,
))
# Assert
assert not diffs, (
f"{len(diffs)} load-bearing divergence(s) between mapped and "
f"hand-built EpcPropertyData for cohort cert 000516:\n " +
"\n ".join(diffs)
)
def test_elmhurst_jacket_cylinder_insulation_maps_to_loose_jacket_code_2() -> None:
# Arrange — an Elmhurst §15.1 "Cylinder Insulation Type: Jacket"
# lodging is a loose jacket, which SAP 10.2 Table 2 Note 1 gives a
# separate (higher) storage-loss factor than factory foam. The SAP10
# `cylinder_insulation_type` enum uses 2 for loose jacket (1 = factory
# foam), matching the GOV.UK API path — so the Summary "Jacket" label
# must resolve to 2 for cross-mapper parity, and so the
# loose-jacket storage-loss branch (S0380.224) fires. Observed on the
# simulated-case-19 worksheet (210 L jacket cylinder + storage heaters).
from datatypes.epc.domain.mapper import _elmhurst_cylinder_insulation_code # pyright: ignore[reportPrivateUsage]
# Act
code = _elmhurst_cylinder_insulation_code("Jacket", cylinder_present=True)
# Assert
assert code == 2
def test_elmhurst_foam_cylinder_insulation_still_maps_to_factory_code_1() -> None:
# Arrange — regression guard: the factory-foam label is unchanged.
from datatypes.epc.domain.mapper import _elmhurst_cylinder_insulation_code # pyright: ignore[reportPrivateUsage]
# Act
code = _elmhurst_cylinder_insulation_code("Foam", cylinder_present=True)
# Assert
assert code == 1
def test_elmhurst_roof_construction_int_matches_api_codes() -> None:
# Arrange — cross-mapper structural parity: the gov-EPC API mapper
# populates BOTH roof_construction (int) and roof_construction_type
# (str derived via `_API_ROOF_CONSTRUCTION_TO_STR`), but the Elmhurst
# mapper set only the string, leaving the int None. The SAP cascade
# reads the string (so SAP parity held), but consumers of the int
# (e.g. domain/sap10_ml ML aggregates) saw None on every site-notes
# cert. `_elmhurst_roof_construction_int` closes the gap, mapping the
# Elmhurst roof code to the same SAP10 int the API lodges. Unmapped
# codes return None (not a raise) — the int is not cascade-load-
# bearing, so an unknown roof must not block the cert.
from datatypes.epc.domain.mapper import _elmhurst_roof_construction_int # pyright: ignore[reportPrivateUsage]
# Act / Assert — each Elmhurst roof code → the gov-EPC API int.
assert _elmhurst_roof_construction_int("F Flat") == 1
assert _elmhurst_roof_construction_int("PN Pitched (slates/tiles), no access") == 3
assert _elmhurst_roof_construction_int("PA Pitched (slates/tiles), access to loft") == 4
assert _elmhurst_roof_construction_int("PS Pitched, sloping ceiling") == 8
assert _elmhurst_roof_construction_int("S Same dwelling above") == 7
assert _elmhurst_roof_construction_int("A Another dwelling above") == 7
# Absent / unmapped → None (no raise; not cascade-load-bearing).
assert _elmhurst_roof_construction_int(None) is None
assert _elmhurst_roof_construction_int("") is None
assert _elmhurst_roof_construction_int("NR Non-residential space above") is None
def test_elmhurst_wall_is_basement_disambiguates_system_built_from_basement() -> None:
# Arrange — "SY System build" and "B Basement wall" both map to SAP10
# wall_construction=6 (canonical WALL_SYSTEM_BUILT). The explicit
# basement flag separates them: only "B" is a basement wall (drives
# RdSAP §5.17 u_basement_wall); "SY" is False so it routes through the
# normal system-built U-value table; any other code → None (the
# gov-EPC API code-6 heuristic still applies).
from datatypes.epc.domain.mapper import _elmhurst_wall_construction_int # pyright: ignore[reportPrivateUsage]
from datatypes.epc.domain.mapper import _elmhurst_wall_is_basement # pyright: ignore[reportPrivateUsage]
# Act / Assert — system-built keeps code 6 but is NOT basement.
assert _elmhurst_wall_construction_int("SY System build") == 6
assert _elmhurst_wall_is_basement("SY System build") is False
# Genuine basement: code 6 AND flagged basement.
assert _elmhurst_wall_construction_int("B Basement wall") == 6
assert _elmhurst_wall_is_basement("B Basement wall") is True
# Other constructions defer to the API code-6 heuristic.
assert _elmhurst_wall_is_basement("CA Cavity") is None
assert _elmhurst_wall_is_basement("") is None