mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
SAP 10.2 Table 4f (PDF p.169) — heat-pump packages (main heating
category 4) bundle the circulation pump's electricity into the
system COP, so worksheet line (70) "Pumps, fans" reports zero gain
for every month on HP certs. Cert 0380's worksheet confirms 0.0
through Jan-Dec.
`internal_gains_from_cert` previously called `central_heating_pump_w`
unconditionally and routed the 3/7/10 W (date-bucket) result through
the seasonal mask in `pumps_fans_monthly_w`. For HP certs that added
~7 W of spurious heating-season gains to (73)m → cold-month MIT
drifted +0.008°C above worksheet (92).
Gating the pump-W computation on `_CATEGORIES_WITHOUT_CENTRAL_HEATING
_PUMP = {4}` zeroes the gain for HP certs and leaves every other
category (gas, oil, electric storage, …) on the existing cascade.
Cohort impact:
- Cert 0380 MIT 12-tuple now matches worksheet (92) at 1e-3 per
month (worst Δ at Nov = -0.0009°C).
- SAP residual closes from +0.155 → +0.059 vs worksheet 88.5104.
- Closed certs (001479 / 0330 / 9501 — all boiler cohorts, cat 2
or 1) are unaffected; Layer 4 1e-4 chain gates remain GREEN.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1155 lines
51 KiB
Python
1155 lines
51 KiB
Python
"""End-to-end validation for the Elmhurst Summary→EpcPropertyData chain.
|
||
|
||
The 6 Elmhurst worksheet fixtures in `domain.sap10_calculator.worksheet.tests`
|
||
build their `EpcPropertyData` synthetically — they validate the
|
||
calculator + cascade in isolation from the mapper. This file pins
|
||
the OTHER half of the chain: `from_elmhurst_site_notes` must produce
|
||
a calculator-equivalent `EpcPropertyData` when fed the Summary PDF
|
||
the worksheet was generated from. Together with the worksheet
|
||
cascade tests, this closes the loop: extractor + mapper + cascade
|
||
+ calculator validated end-to-end against the authoritative
|
||
Elmhurst documents.
|
||
|
||
Status: GREEN. For cert U985-0001-000474, this pipeline produces an
|
||
unrounded SAP within 0.5 of the worksheet PDF's `62.2584` (line 257).
|
||
The cascade itself reproduces Elmhurst's calculator exactly on
|
||
hand-built inputs (handbuilt → 62.2584 to 4 d.p.); the remaining
|
||
sub-half-point gap from the mapped path is non-load-bearing field
|
||
drift (e.g. central_heating_pump_age the Summary PDF doesn't lodge).
|
||
|
||
Preprocessing: the existing `ElmhurstSiteNotesExtractor` was written
|
||
against Textract-style output (label\\nvalue pairs in spatial
|
||
reading order). We don't have Textract in the test environment, so
|
||
this helper converts `pdftotext -layout` output (label-whitespace-
|
||
value on a single line) into the Textract-style sequence the
|
||
extractor expects. Test-only preprocessing; production runs through
|
||
Textract directly.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import dataclasses
|
||
import json
|
||
import re
|
||
import subprocess
|
||
from pathlib import Path
|
||
from typing import cast
|
||
|
||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
||
from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
|
||
from domain.sap10_calculator.worksheet.tests import (
|
||
_elmhurst_worksheet_000474 as _w000474,
|
||
_elmhurst_worksheet_000477 as _w000477,
|
||
_elmhurst_worksheet_000480 as _w000480,
|
||
_elmhurst_worksheet_000487 as _w000487,
|
||
_elmhurst_worksheet_000490 as _w000490,
|
||
_elmhurst_worksheet_000516 as _w000516,
|
||
)
|
||
|
||
_FIXTURES = Path(__file__).parent / "fixtures"
|
||
_SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf"
|
||
_SUMMARY_000477_PDF = _FIXTURES / "Summary_000477.pdf"
|
||
_SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf"
|
||
_SUMMARY_000487_PDF = _FIXTURES / "Summary_000487.pdf"
|
||
_SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf"
|
||
_SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf"
|
||
_SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf"
|
||
_SUMMARY_000897_PDF = _FIXTURES / "Summary_000897.pdf"
|
||
_SUMMARY_000784_PDF = _FIXTURES / "Summary_000784.pdf"
|
||
|
||
# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the
|
||
# Summary_001479.pdf fixture. Together they drive the API ≡ Summary
|
||
# parity workstream; Layer 4 of the validation stack is "API cascade SAP
|
||
# matches worksheet continuous SAP at 1e-4".
|
||
_API_001479_JSON = (
|
||
Path(__file__).parents[3]
|
||
/ "domain/sap10_calculator/rdsap/tests/fixtures/golden"
|
||
/ "0535-9020-6509-0821-6222.json"
|
||
)
|
||
|
||
|
||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||
"""Convert a Summary PDF into the per-page text format the existing
|
||
`ElmhurstSiteNotesExtractor` expects (label\\nvalue sequences).
|
||
|
||
`pdftotext -layout` preserves the spatial pairing of label and value
|
||
on each line; we split each line on 2+ spaces to surface the
|
||
label/value tokens, then concatenate them back into a single
|
||
newline-delimited stream per page.
|
||
"""
|
||
info = subprocess.run(
|
||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True
|
||
).stdout
|
||
m = re.search(r"Pages:\s+(\d+)", info)
|
||
if m is None:
|
||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||
page_count = int(m.group(1))
|
||
|
||
pages: list[str] = []
|
||
for i in range(1, page_count + 1):
|
||
layout = subprocess.run(
|
||
[
|
||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||
str(pdf_path), "-",
|
||
],
|
||
capture_output=True, text=True, check=True,
|
||
).stdout
|
||
tokens: list[str] = []
|
||
for line in layout.splitlines():
|
||
if not line.strip():
|
||
tokens.append("")
|
||
continue
|
||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||
tokens.extend(parts)
|
||
pages.append("\n".join(tokens))
|
||
return pages
|
||
|
||
|
||
def test_summary_000474_mapper_produces_three_building_parts() -> None:
|
||
# Arrange — cert U985-0001-000474 is a mid-terrace with 3 building
|
||
# parts (Main + 2 extensions) per the hand-built worksheet fixture
|
||
# at domain/sap10_calculator/worksheet/tests/
|
||
# _elmhurst_worksheet_000474.py. Routing the Summary PDF through
|
||
# extractor + mapper must yield the same count.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
assert len(epc.sap_building_parts) == 3
|
||
|
||
|
||
def test_summary_000474_mapper_extracts_seven_windows() -> None:
|
||
# Arrange — cert U985-0001-000474's §11 table lodges 7 windows
|
||
# across Main + 1st Extension + 2nd Extension. The legacy Textract-
|
||
# style window parser couldn't anchor on the Summary PDF's tabular
|
||
# layout; the new W/H/Area-plus-Manufacturer anchor pair picks them
|
||
# all up.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
assert len(epc.sap_windows) == 7
|
||
|
||
|
||
# Cohort chain SAP-pin tests follow. NOTE: certs 000474, 000480, 000487,
|
||
# 000490 previously had chain tests here pinning their cascade SAP
|
||
# against the U985 worksheet PDF — those tests were removed because
|
||
# their worksheets violate RdSAP 10 §5 (12) "Floor infiltration
|
||
# (suspended timber ground floor only)". Our cascade applies the spec
|
||
# rule (via `cert_to_inputs._has_suspended_timber_floor_per_spec`);
|
||
# the worksheet does not. So the spec-correct chain SAP for those
|
||
# certs can't match the worksheet SAP — by design, not by mapper bug.
|
||
# The Layer 1 hand-built fixtures for those 4 certs absorb the
|
||
# worksheet quirk by lodging `has_suspended_timber_floor=False`
|
||
# explicitly (overriding the spec inference) — so Layer 1 cascade pins
|
||
# still pin the worksheet value exactly. The chain tests below remain
|
||
# only for 000477, 000516 (and 001479 further down), where the
|
||
# worksheet IS spec-correct.
|
||
|
||
|
||
def test_summary_000477_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert U985-0001-000477 is a single-bp mid-terrace with
|
||
# a 15.06 m² Room-in-Roof storey and zero baths lodged. Worksheet
|
||
# PDF lodges unrounded SAP 65.0057. Drives the chain through the
|
||
# `RoomInRoof.detailed_surfaces` cascade with stud walls @ 100mm
|
||
# Mineral, two uninsulated slopes, two party gable walls, plus the
|
||
# RR/storey-area suspended-timber-floor heuristic (RIR < storey →
|
||
# 0.2 ACH floor infiltration).
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert
|
||
worksheet_unrounded_sap = 65.0057
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
def test_summary_000516_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert U985-0001-000516 is a mid-terrace with main bp +
|
||
# 19.02 m² room-in-roof. Worksheet PDF lodges unrounded SAP 62.7937.
|
||
# The §11 table mixes 5 vertical windows (U=2.80) with 1 roof
|
||
# window (U=3.10 in cert, U=3.40 Table 24 raw); the mapper
|
||
# discriminates by `U > 3.0` and routes the high-U entry to
|
||
# `sap_roof_windows` so its solar gains feed §6 with the right
|
||
# pitch (45°) and Table-24 U-value.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert
|
||
worksheet_unrounded_sap = 62.7937
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
def test_summary_001479_mapper_extensions_count_matches_extension_bps() -> None:
|
||
# Arrange — cert 0535-9020-6509-0821-6222 (Summary_001479) is the first
|
||
# cohort cert with an actual GOV.UK API counterpart. Worksheet PDF
|
||
# lodges Main + Extension 1 + Extension 2 (3 building parts, 2
|
||
# extensions). Pre-slice the Elmhurst mapper hard-coded
|
||
# `extensions_count=0` regardless of survey.extensions; this asserts
|
||
# the count flows through.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
assert epc.extensions_count == 2
|
||
assert len(epc.sap_building_parts) == 3
|
||
|
||
|
||
def test_summary_001479_main_party_wall_construction_is_cavity_unfilled() -> None:
|
||
# Arrange — cert 001479 Main §7 Walls lodges "Party Wall Type: CU
|
||
# Cavity masonry unfilled". The Elmhurst leading-code map previously
|
||
# only knew "S" and "C"; "CU" fell through to None, which made the
|
||
# cascade default to U=0.25 instead of the worksheet's lodged U=0.50.
|
||
# The fix adds "CU" → SAP10 wall_construction code 4 (WALL_CAVITY),
|
||
# which `u_party_wall` resolves to U=0.50 — matching the worksheet's
|
||
# §3 `Party walls Main … 0.50` row.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
assert epc.sap_building_parts[0].party_wall_construction == 4
|
||
|
||
|
||
def test_summary_001479_ext2_floor_is_exposed_to_external_air() -> None:
|
||
# Arrange — cert 001479 Ext2 §9 lodges "Location: E To external air"
|
||
# — a cantilevered exposed timber floor (the upper-storey extension
|
||
# over the back garden). The worksheet's §3 row `Exposed floor Ext2
|
||
# … 1.92, 1.20, 1.20` pins this as U=1.20 via Table 20. Pre-slice the
|
||
# mapper only routed "U Above unheated space" through `is_exposed_
|
||
# floor=True`; "E To external air" fell through to the BS EN ISO
|
||
# 13370 ground-floor cascade, dropping the lodged exposure entirely.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
ext2 = epc.sap_building_parts[2]
|
||
assert ext2.floor_type == "To external air"
|
||
assert ext2.sap_floor_dimensions[0].is_exposed_floor is True
|
||
|
||
|
||
def test_summary_001479_ext2_sloping_ceiling_roof_uninsulated_for_pre_1950() -> None:
|
||
# Arrange — cert 001479 Ext2 §8 lodges "Type: PS Pitched, sloping
|
||
# ceiling" + "Insulation Thickness: As Built" + age band C (1930-49).
|
||
# Original 1930s construction had no sloping-ceiling insulation;
|
||
# worksheet §3 `External roof Ext2 … 2.30` pins U=2.30 (uninsulated
|
||
# Table 16 row 0). Pre-slice the mapper passed thickness=None through,
|
||
# routing to `u_roof`'s pitched-roof Table 18 col 1 default (0.40 for
|
||
# age C, assumes loft-joist retrofit) — wrong geometry for PS.
|
||
# Ext1's PS roof at age M leaves thickness=None (modern build,
|
||
# cascade default U=0.15 matches worksheet).
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
assert epc.sap_building_parts[2].roof_insulation_thickness == 0
|
||
assert epc.sap_building_parts[1].roof_insulation_thickness is None
|
||
|
||
|
||
def test_summary_001479_secondary_heating_routes_mains_gas_fuel() -> None:
|
||
# Arrange — cert 001479 §14.1 Main Heating2 lodges "Secondary Heating
|
||
# Code: SAP code 605, Flush fitting live effect gas fire, sealed to
|
||
# chimney". The Summary surfaces only the SAP code (605); the fuel
|
||
# type 26 (mains gas) must be derived from the code range so the
|
||
# `_fuel_cost` orchestrator's `secondary_high_rate_gbp_per_kwh`
|
||
# picks up Table 32's gas tariff (£0.0348/kWh) rather than the
|
||
# default standard-electricity tariff (£0.132/kWh). Worksheet line
|
||
# (242) "Space heating - secondary … 3.4800 70.5022" confirms gas
|
||
# pricing.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
assert epc.sap_heating.secondary_heating_type == 605
|
||
assert epc.sap_heating.secondary_fuel_type == 26
|
||
|
||
|
||
def test_summary_9501_flat_has_no_built_form_in_summary_pdf() -> None:
|
||
# Arrange — cert 9501 (Summary_000784.pdf) is a flat. The Elmhurst
|
||
# Summary's §1.0 "Property type" section lodges the built-form
|
||
# descriptor (e.g. "M Mid-Terrace", "D Detached") only for houses;
|
||
# flats have no built-form line — the §2.0 "Number of Storeys"
|
||
# section follows immediately after the "F Flat" property type.
|
||
#
|
||
# The extractor's `_extract_attachment` regex previously captured
|
||
# the line immediately after the property-type value
|
||
# unconditionally, so cert 9501 ends up with attachment
|
||
# "2.0 Number of Storeys:" — pure section-header noise that the
|
||
# mapper then surfaces on EpcPropertyData.built_form, breaking the
|
||
# cascade's flat-exposure routing downstream.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert — built_form is empty for flats. Houses set it to their
|
||
# attachment descriptor; flats lodge no attachment.
|
||
assert epc.built_form == ""
|
||
|
||
|
||
def test_summary_9501_dwelling_type_is_top_floor_flat() -> None:
|
||
# Arrange — cert 9501's worksheet treats the cert as a TOP-floor
|
||
# flat: §3 (28a) "Ground floor Main … U=0.0" because the floor
|
||
# sits over "Another dwelling below" (worksheet line 9.0 Floor
|
||
# location); §3 (30) has both an external roof + RR contributions
|
||
# so the roof IS exposed. The cascade's `_dwelling_exposure`
|
||
# function does prefix matching on `dwelling_type.lower()` to gate
|
||
# which surfaces are party — without "top-floor flat" the cert
|
||
# falls through to fully-exposed houses (Δ +9.25 W/K on floor).
|
||
#
|
||
# Floor-position inference rules:
|
||
# - floor.location indicates "Another dwelling below"
|
||
# → not ground floor (rules out ground-floor flat)
|
||
# - room_in_roof OR external roof present
|
||
# → roof exposed (rules out mid-floor flat)
|
||
# - therefore → top-floor flat
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
assert epc.dwelling_type is not None
|
||
assert epc.dwelling_type.lower().startswith("top-floor")
|
||
|
||
|
||
def test_summary_9501_rr_gable_walls_route_to_external_walls_hlc() -> None:
|
||
# Arrange — cert 9501's worksheet §3 lodges "Roof room Main Gable
|
||
# Wall 1" + "Gable Wall 2" as line (29a) entries (external walls)
|
||
# at the main-wall U (= 1.70 for age B Solid Brick): 13.50×1.70 +
|
||
# 15.95×1.70 = 50.07 W/K added on top of the regular external-walls
|
||
# 168.74 → 218.81 W/K total.
|
||
#
|
||
# The Summary mapper currently lodges these as
|
||
# `SapRoomInRoofSurface(kind='gable_wall', ...)` — the cascade's
|
||
# cohort-house default which routes to party walls at U=0.25
|
||
# (Table 4 row 2). For a top-floor flat in a mid-terrace block,
|
||
# the gables sit at the ends of the building (no neighbour above)
|
||
# — they're EXTERNAL not party. Surface them as
|
||
# `gable_wall_external` so the cascade's (29a) sum picks them up.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Act
|
||
from domain.sap10_calculator.rdsap.cert_to_inputs import (
|
||
heat_transmission_section_from_cert,
|
||
)
|
||
ht = heat_transmission_section_from_cert(epc)
|
||
|
||
# Assert — worksheet (29a) total walls = 168.7420 (main) +
|
||
# 22.95 (Gable 1) + 27.115 (Gable 2) = 218.807 W/K. Tolerance
|
||
# 1e-2 absorbs the 2-d.p. rounding of the underlying U/area
|
||
# products; the 1e-4 chain test downstream will tighten this
|
||
# to the cascade-internal rounding floor.
|
||
worksheet_walls_w_per_k = 218.807
|
||
assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2
|
||
|
||
|
||
def test_summary_9501_pv_array_surfaced_from_elmhurst_section_19() -> None:
|
||
# Arrange — cert 9501's Elmhurst §19.0 PV section lodges measured
|
||
# array detail (2.36 kWp, South-West orientation, 45° elevation,
|
||
# "None Or Little" overshading). The worksheet's §10a PV credit
|
||
# of -250.02 GBP (-129.49 used in dwelling + -120.53 exported)
|
||
# depends on Appendix M / Appendix U3.3 reading these from the
|
||
# cascade's `SapEnergySource.photovoltaic_arrays` list. Without
|
||
# the array surfacing the cascade computes total cost +£250 too
|
||
# high → ECF 2.92 vs worksheet 2.26 → SAP 59.26 vs 68.53 (current
|
||
# Δ -9.27 after Slice 99c closed the fabric heat loss).
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Assert
|
||
arrays = epc.sap_energy_source.photovoltaic_arrays
|
||
assert arrays is not None
|
||
assert len(arrays) == 1
|
||
assert abs(arrays[0].peak_power - 2.36) <= 1e-4
|
||
assert arrays[0].orientation == 6 # SAP octant: South-West
|
||
assert arrays[0].pitch == 3 # RdSAP §11.1 pitch enum: code 3 = 45°
|
||
assert arrays[0].overshading == 1 # RdSAP code: None or very little
|
||
|
||
|
||
def test_summary_9501_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert 9501-3059-8202-7356-0204 (Summary_000784.pdf /
|
||
# dr87-0001-000784.pdf) is the third boiler validation cert and
|
||
# the first FLAT in the per-cert mapper validation cohort.
|
||
# Mains-gas Vaillant PCDB idx 19007, mid-terrace top-floor flat
|
||
# with Room-in-Roof + measured PV (2.36 kWp SW @ 45°). TFA 113.08
|
||
# m². Worksheet PDF "SAP value" line lodges unrounded SAP
|
||
# **68.5252**.
|
||
#
|
||
# Slices 99a-99e jointly closed the Summary path from Δ -5.25 to
|
||
# 1e-4: 99a extractor attachment fix (built_form=''), 99b dwelling
|
||
# _type identifies top-floor flat (cascade exposure routing), 99c
|
||
# RR gables external for flats + SO Solid Brick wall code, 99d
|
||
# surface PV array from §19.0, 99e PV pitch enum-not-degrees.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000784_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert — 1e-4 pin (project memory `feedback_zero_error_strict`).
|
||
worksheet_unrounded_sap = 68.5252
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf)
|
||
# is the first cohort cert with a real GOV.UK EPB API counterpart
|
||
# (cert ref 0535-9020-6509-0821-6222). Worksheet PDF line "SAP value"
|
||
# lodges unrounded SAP **69.0094** (rating C 69, also the API-
|
||
# published integer). This is the load-bearing forcing function for
|
||
# the API↔Elmhurst parity workstream: any drift from 1e-4 means a
|
||
# mapper gap, not a calculator bug — the cohort 6 cert cascades all
|
||
# reproduce Elmhurst exactly at 1e-4 on hand-built fixtures.
|
||
#
|
||
# Source-data caveat (documented for future debuggers): Summary §3
|
||
# lodges Ext1 age band as "M 2023 onwards"; the worksheet header
|
||
# records "Ext1: L". Likely assessor data-entry inconsistency. The
|
||
# mapper trusts the Summary (its source of truth); accept whatever
|
||
# residual the M vs L disagreement produces.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert — 1e-4 pin, no widening, no xfail (project memory
|
||
# `feedback_zero_error_strict`).
|
||
worksheet_unrounded_sap = 69.0094
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
def test_summary_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert 0330-2249-8150-2326-4121 (Summary_000897.pdf /
|
||
# dr87-0001-000897.pdf) is the second boiler cert under per-cert
|
||
# mapper validation: mains-gas boiler (PCDB idx 10241), mid-terrace
|
||
# 2-bp dwelling, TFA 69.14 m². Worksheet PDF "SAP value" line lodges
|
||
# unrounded SAP **61.5993**. Same load-bearing role as cert 001479
|
||
# (the first boiler) — Summary path proves itself against the
|
||
# worksheet, then becomes the canonical reference for the API path.
|
||
# Expected RED at Δ +0.4667 at handover-baseline (Summary mapper
|
||
# cascade SAP 62.0660); mapper gaps to close are §11 glazing_type=14
|
||
# (windows HLC +6.71 W/K) and the §4 hot-water cascade (kWh +1060).
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000897_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert — 1e-4 pin, no widening, no xfail (project memory
|
||
# `feedback_zero_error_strict`).
|
||
worksheet_unrounded_sap = 61.5993
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
_API_0330_JSON = (
|
||
Path(__file__).parents[3]
|
||
/ "domain/sap10_calculator/rdsap/tests/fixtures/golden"
|
||
/ "0330-2249-8150-2326-4121.json"
|
||
)
|
||
|
||
_API_9501_JSON = (
|
||
Path(__file__).parents[3]
|
||
/ "domain/sap10_calculator/rdsap/tests/fixtures/golden"
|
||
/ "9501-3059-8202-7356-0204.json"
|
||
)
|
||
|
||
|
||
def test_api_9501_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert 9501 is the third Layer 4 production gate (after
|
||
# cert 001479 and cert 0330): API path → from_api_response →
|
||
# cert_to_inputs → calculate_sap_from_inputs must hit the worksheet
|
||
# SAP at 1e-4. Cert 9501 is the FIRST flat in the production gate
|
||
# set — mid-terrace top-floor flat with RR + measured PV (2.36 kWp
|
||
# SW @ 45°). Worksheet target unrounded SAP **68.5252**.
|
||
#
|
||
# Slices 100a-100c jointly closed the API path from Δ -14.82 to
|
||
# 1e-4: 100a `room_in_roof_details` schema + Detailed-RR surface
|
||
# population (HLC 382.19 → 297.54 W/K vs worksheet 296.68); 100b
|
||
# per-bp TFA includes RR floor area (TFA 81.28 → 113.08); 100c
|
||
# `photovoltaic_supply.pv_arrays` schema + gap-aware glazing
|
||
# lookup (DG pre-2002 16+ → U=2.7 per RdSAP 10 Table 24).
|
||
doc = json.loads(_API_9501_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert — 1e-4 pin against the worksheet's continuous SAP.
|
||
worksheet_unrounded_sap = 68.5252
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
def test_api_9501_photovoltaic_array_surfaced() -> None:
|
||
# Arrange — cert 9501's API JSON lodges measured PV under
|
||
# `sap_energy_source.photovoltaic_supply.pv_arrays`. Two real-API
|
||
# PV shapes coexist: cohort cert 2130 lodges the outer wrapper as
|
||
# a nested list `[[{...}], ...]`; cert 9501 lodges a dict
|
||
# `{"pv_arrays": [{...}]}`. The existing schema models only the
|
||
# legacy `none_or_no_details` field on `PhotovoltaicSupply` — so
|
||
# cert 9501's `pv_arrays` payload was silently dropped, leaving
|
||
# `photovoltaic_arrays=None` and the cascade missing the worksheet's
|
||
# £250.02 PV credit.
|
||
doc = json.loads(_API_9501_JSON.read_text())
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Assert — single array with the lodged kWp/pitch/orientation/
|
||
# overshading values.
|
||
arrays = epc.sap_energy_source.photovoltaic_arrays
|
||
assert arrays is not None
|
||
assert len(arrays) == 1
|
||
assert abs(arrays[0].peak_power - 2.36) <= 1e-4
|
||
assert arrays[0].pitch == 3 # RdSAP §11.1 enum: 3 = 45°
|
||
assert arrays[0].orientation == 6 # SAP octant: SW
|
||
assert arrays[0].overshading == 1 # RdSAP: None or very little
|
||
|
||
|
||
_API_0380_JSON = (
|
||
Path(__file__).parents[3]
|
||
/ "domain/sap10_calculator/rdsap/tests/fixtures/golden"
|
||
/ "0380-2471-3250-2596-8761.json"
|
||
)
|
||
|
||
|
||
def test_api_0380_glazing_type_14_resolves_to_post_2022_dg_u_value() -> None:
|
||
# Arrange — cert 0380 (ASHP semi-detached bungalow, worksheet SAP
|
||
# 88.5104) lodges glazing_type=14 on all windows. The worksheet
|
||
# uses U=1.3258 (post-curtain) for line (27), which back-calculates
|
||
# to a raw U=1.40 — the SAP10.2 Table 24 row for "Double or triple
|
||
# glazed, 2022 or later". Code 13 in our existing dict carries the
|
||
# same U/g values; code 14 is the schema sibling for the same
|
||
# post-2022 product family (DG sealed-unit variants differ in
|
||
# the cert lodgement but agree on the spec U-value).
|
||
doc = json.loads(_API_0380_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act — pick any window (cert 0380 lodges only glazing_type=14).
|
||
w = epc.sap_windows[0]
|
||
td = w.window_transmission_details
|
||
|
||
# Assert
|
||
assert td is not None
|
||
assert abs(td.u_value - 1.40) <= 1e-4
|
||
assert abs(td.solar_transmittance - 0.72) <= 1e-4
|
||
|
||
|
||
def test_api_0380_wall_with_external_insulation_routes_to_filled_cavity_u() -> None:
|
||
# Arrange — cert 0380's top-level walls[0].description lodges
|
||
# "Cavity wall, filled cavity and external insulation". The
|
||
# worksheet uses U=0.25 for the (29a) external-walls entry — the
|
||
# very-low-U "filled cavity + external insulation" composite that
|
||
# RdSAP 10 §5 routes through Table 6's filled-cavity row (with a
|
||
# further EWI reduction). Our cascade was computing U=0.32 via
|
||
# the as-built Table 13 bucketed cascade because
|
||
# `_described_as_insulated` only matches the past-participle
|
||
# "insulated" — "insulation" (noun) on its own falls through to
|
||
# False. Cert 0380's lodgement uses the noun form.
|
||
#
|
||
# Fix: `_described_as_insulated` should also match the noun
|
||
# "insulation" (excluding the existing "no insulation" hard
|
||
# negation), so cavity walls described as carrying insulation
|
||
# route to the cascade's Filled-cavity branch.
|
||
doc = json.loads(_API_0380_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act
|
||
from domain.sap10_calculator.rdsap.cert_to_inputs import (
|
||
heat_transmission_section_from_cert,
|
||
)
|
||
ht = heat_transmission_section_from_cert(epc)
|
||
|
||
# Assert — main-wall HLC ≈ 46.46 m² × 0.25 = 11.62 W/K (worksheet
|
||
# exact). Tolerance 1e-2 absorbs sub-component rounding; the
|
||
# 1e-4 chain test downstream tightens to the cascade floor.
|
||
worksheet_walls_w_per_k = 11.62
|
||
assert abs(ht.walls_w_per_k - worksheet_walls_w_per_k) <= 1e-2
|
||
|
||
|
||
def test_api_0380_heat_pump_no_secondary_heating_per_table_11() -> None:
|
||
# Arrange — SAP 10.2 Table 11 explicitly notes "Cat 4 (heat pump):
|
||
# 0.00 (HP eff includes any secondary)" — heat pumps don't apply a
|
||
# Table 11 secondary fraction even when the cert lodges a secondary
|
||
# heating type, because the HP efficiency already incorporates any
|
||
# supplementary heat source. The `_SECONDARY_HEATING_FRACTION_BY_
|
||
# CATEGORY` dict in cert_to_inputs.py had entries for categories
|
||
# 1/2/3/5/6/7/10 but DID NOT include cat 4 — so HP certs with a
|
||
# lodged secondary fell through to the DEFAULT 0.10, billing 10%
|
||
# of space-heating cost as "secondary" (cert 0380: £72 secondary
|
||
# vs worksheet £0).
|
||
#
|
||
# Cert 0380 lodges secondary_heating_type=691 + main_heating_
|
||
# category=4 (HP, PCDB idx 104568). Worksheet line (242) "Space
|
||
# heating - secondary" shows 0.0 kWh; cascade was producing
|
||
# 547.30 kWh. Fix: dict entry `4: 0.0`.
|
||
doc = json.loads(_API_0380_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act
|
||
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
||
from domain.sap10_calculator.rdsap.cert_to_inputs import (
|
||
cert_to_inputs, SAP_10_2_SPEC_PRICES,
|
||
)
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert — secondary heating contributes 0 kWh / £0 on HP certs.
|
||
assert result.secondary_heating_fuel_kwh_per_yr == 0.0
|
||
|
||
|
||
def test_api_0380_heat_pump_no_pumps_fans_kwh_per_table_4f() -> None:
|
||
# Arrange — SAP 10.2 Table 4f lists annual pumps + fans electricity
|
||
# consumption by main heating category. Gas-fired boilers (cat 2)
|
||
# use 160 kWh/yr (115 central heating pump + 45 flue fan). Heat
|
||
# pumps (cat 4) have NO additional pumps/fans contribution because
|
||
# the HP system's circulation pump and fans are already
|
||
# incorporated into the system COP.
|
||
#
|
||
# The cascade's `_PUMPS_FANS_KWH_BY_MAIN_CATEGORY` dict only had a
|
||
# cat-2 entry; cat-4 HP certs fell through to the DEFAULT 130
|
||
# kWh/yr (~£17 at 13.19 p/kWh) — the worksheet line (249) "Pumps,
|
||
# fans and electric keep-hot" shows 0.0000 kWh/yr for cert 0380.
|
||
doc = json.loads(_API_0380_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act
|
||
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
||
from domain.sap10_calculator.rdsap.cert_to_inputs import (
|
||
cert_to_inputs, SAP_10_2_SPEC_PRICES,
|
||
)
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert
|
||
assert result.pumps_fans_kwh_per_yr == 0.0
|
||
|
||
|
||
def test_api_0380_mean_internal_temp_matches_worksheet_92_within_1e_3() -> None:
|
||
# Arrange — SAP 10.2 Appendix N3.5 (PDF p.107) replaces Table 9c
|
||
# steps 3-4 for heat-pump packages with PCDB data: each month
|
||
# blends Th, T_unimodal, T_bimodal via Equation N5.
|
||
#
|
||
# Cert 0380 (Mitsubishi PUZ-WM50VHA, PCDB 104568, PSR ≈ 1.43)
|
||
# lands on Table N5 row "1.2 or more" → annual totals (3, 38) →
|
||
# Jan(3, 28) + Dec(0, 10) extended days.
|
||
#
|
||
# Pre-slice-102f-prep.6 the cold-month MIT drifted +0.008°C due to
|
||
# `internal_gains_from_cert` injecting the central-heating pump's
|
||
# heating-season gain (~7 W) on HP certs. SAP 10.2 Table 4f
|
||
# specifies zero pump/fan gains on HP packages (cert 0380's
|
||
# worksheet line 70 = 0.0 every month) — that gating drops the
|
||
# spurious gain and tightens the MIT cascade against worksheet
|
||
# (92) to 1e-3 per month.
|
||
doc = json.loads(_API_0380_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act
|
||
inputs = cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
|
||
# Assert — pin against worksheet line (92) "MIT" 12-tuple.
|
||
worksheet_mit_92 = (
|
||
18.9539, 18.0081, 18.3466, 18.8491, 19.3582, 19.8174,
|
||
20.0288, 20.0064, 19.6975, 19.0702, 18.3966, 18.1573,
|
||
)
|
||
for m, (cascade, ws) in enumerate(zip(
|
||
inputs.mean_internal_temp_monthly_c, worksheet_mit_92
|
||
)):
|
||
assert abs(cascade - ws) < 1e-3, (
|
||
f"month {m + 1}: cascade={cascade:.4f} vs worksheet={ws:.4f}"
|
||
)
|
||
|
||
|
||
def test_api_9501_room_in_roof_surfaces_populated() -> None:
|
||
# Arrange — cert 9501's API JSON lodges measured RR detail under
|
||
# `sap_room_in_roof.room_in_roof_details`: two gable walls
|
||
# (5.51 m × 2.45 m + 6.51 m × 2.45 m) and a flat ceiling (5.5 m ×
|
||
# 1.0 m, 300 mm insulation). The schema's `SapRoomInRoof` dataclass
|
||
# exposed the inner block under the wrong field name
|
||
# `room_in_roof_type_1` (the legacy Simplified Type 1 wrapper),
|
||
# so `from_dict` parsed the inner block as None — the API mapper
|
||
# then built `SapRoomInRoof` with no per-surface area data, and
|
||
# the cascade defaulted to the Simplified Type 2 "all elements"
|
||
# branch (RR floor_area × Table 18 col(4) age-B U=2.30) for the
|
||
# whole RR → roof HLC 149.43 vs worksheet 18.10 (Δ +131).
|
||
doc = json.loads(_API_9501_JSON.read_text())
|
||
|
||
# Act
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Assert — RR surfaces present and match worksheet element table:
|
||
# Gable Wall 1 = 13.50 m², Gable Wall 2 = 15.95 m², Flat Ceiling 1
|
||
# = 5.50 m² (per worksheet §3 element table).
|
||
rir = epc.sap_building_parts[0].sap_room_in_roof
|
||
assert rir is not None
|
||
assert rir.detailed_surfaces is not None
|
||
kinds_by_area = sorted((s.kind, s.area_m2) for s in rir.detailed_surfaces)
|
||
assert kinds_by_area == [
|
||
("flat_ceiling", 5.5),
|
||
("gable_wall_external", 13.50),
|
||
("gable_wall_external", 15.95),
|
||
]
|
||
|
||
|
||
def test_api_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert 0330-2249-8150-2326-4121 (second boiler validation
|
||
# cert: mains-gas Vaillant PCDB idx 10241, mid-terrace 2-bp dwelling,
|
||
# TFA 90.56 m²) has both an Elmhurst Summary PDF and a GOV.UK EPB API
|
||
# JSON. The Summary path lands at 1e-4 vs worksheet SAP 61.5993
|
||
# above; this Layer 4 production gate asserts the API path matches
|
||
# the worksheet to the same 1e-4 tolerance — same forcing function
|
||
# as cert 001479's Layer 4 test, applied to the second boiler cert.
|
||
#
|
||
# Slices 96-99 (flat-roof Table 18 col (3) U-values + glazing_type=2
|
||
# surfacing + shower-outlets list normalisation + window-area
|
||
# rounding alignment) jointly closed the API path from
|
||
# Δ +2.1453 → Δ -0.000011 vs worksheet 61.5993.
|
||
doc = json.loads(_API_0330_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert — 1e-4 pin against the worksheet's continuous SAP.
|
||
worksheet_unrounded_sap = 61.5993
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
def test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
||
# Arrange — cert 001479 has both an Elmhurst Summary PDF and a GOV.UK
|
||
# EPB API JSON (ref 0535-9020-6509-0821-6222). The Summary cascade
|
||
# already pins at worksheet's 69.0094 ± 1e-4 above; this test is the
|
||
# Layer 4 production-path gate: API JSON → from_api_response →
|
||
# cert_to_inputs → calculate_sap_from_inputs must also hit 69.0094
|
||
# at 1e-4. Identical inputs must produce identical outputs; the
|
||
# calculator is deterministic, so any drift is a mapper coverage gap.
|
||
doc = json.loads(_API_001479_JSON.read_text())
|
||
epc = EpcPropertyDataMapper.from_api_response(doc)
|
||
|
||
# Act
|
||
result = calculate_sap_from_inputs(
|
||
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
||
)
|
||
|
||
# Assert — 1e-4 pin against the worksheet's continuous SAP. ±0.5 is
|
||
# the API-only fallback (project memory `feedback_api_tolerance_1e_
|
||
# minus_4`); when the worksheet is available, identical-inputs-must-
|
||
# produce-identical-outputs is the bar.
|
||
worksheet_unrounded_sap = 69.0094
|
||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||
|
||
|
||
# ============================================================================
|
||
# Mapper-vs-hand-built EpcPropertyData diff tests
|
||
# ============================================================================
|
||
# The 6 cohort hand-builts (_elmhurst_worksheet_NNNNNN.build_epc) are the
|
||
# 100%-correct calculator-input ground truth — each cascades to its
|
||
# worksheet PDF's lodged SAP at 1e-4. The chain tests above only assert
|
||
# cascade-output equivalence; the mapper can pass them by producing a
|
||
# *different* EpcPropertyData that happens to cascade to the same number.
|
||
#
|
||
# These tests pin the missing layer: the mapper's EpcPropertyData must
|
||
# match the hand-built's load-bearing fields exactly. Every divergence
|
||
# surfaced here is a mapper coverage gap to close as its own slice.
|
||
#
|
||
# "Load-bearing" = the subset of EpcPropertyData fields that drive the
|
||
# SAP cascade or carry semantic cross-mapper meaning. Cert-metadata
|
||
# fields (address, registration dates, descriptive EnergyElement lists,
|
||
# tariff strings) are excluded because they don't change calculator
|
||
# output and vary by mapper pathway (the API publishes some, the
|
||
# Elmhurst Summary publishes others) without semantic disagreement.
|
||
|
||
# SapWindow sub-fields the cascade doesn't read (descriptive Union[int,
|
||
# str] codes lodged differently by each mapper). The cascade reads
|
||
# window_width / window_height / orientation / window_location /
|
||
# frame_factor / window_transmission_details.{u_value,solar_
|
||
# transmittance} — those WILL still be diffed; everything else on
|
||
# SapWindow is metadata and excluded to avoid noise from the int/str
|
||
# dual encoding (API mapper produces int codes; Elmhurst mapper
|
||
# surfaces the Summary's lodged strings).
|
||
_NON_LOAD_BEARING_WINDOW_SUBFIELDS: frozenset[str] = frozenset({
|
||
"frame_material",
|
||
"glazing_gap",
|
||
"window_type",
|
||
"glazing_type",
|
||
"window_wall_type",
|
||
"draught_proofed",
|
||
"permanent_shutters_present",
|
||
"permanent_shutters_insulated",
|
||
})
|
||
|
||
|
||
def _is_excluded_path(path: str) -> bool:
|
||
"""Return True for paths the diff should silently skip — non-cascade-
|
||
affecting Union[int, str] encoding differences between the API and
|
||
Elmhurst mapper outputs that cohort hand-built fixtures don't pin."""
|
||
if path.startswith("sap_windows[") and "]." in path:
|
||
suffix = path.split("].", 1)[1]
|
||
if suffix in _NON_LOAD_BEARING_WINDOW_SUBFIELDS:
|
||
return True
|
||
if suffix == "window_transmission_details.data_source":
|
||
return True
|
||
# `roof_construction_type` is set by the Elmhurst mapper from
|
||
# `roof.roof_type` (e.g. "Pitched (slates/tiles), access to loft") and
|
||
# left None by the cohort hand-builts. The cascade in
|
||
# `heat_transmission.py:562` only dispatches on the "sloping ceiling"
|
||
# substring (RdSAP §3.8); none of the cohort certs lodge pitched-
|
||
# sloping-ceiling roofs, so both values produce identical cascade
|
||
# output. Exclude from the diff to avoid flagging informational drift.
|
||
if path.startswith("sap_building_parts[") and path.endswith(".roof_construction_type"):
|
||
return True
|
||
# `sap_ventilation.has_suspended_timber_floor` and
|
||
# `..._sealed` are set explicitly on the hand-builts (to mirror the
|
||
# cohort U985 worksheets' (12) infiltration values) but left None by
|
||
# the Elmhurst mapper because the Summary PDF doesn't surface floor-
|
||
# construction in a parseable form. When None, `cert_to_inputs._
|
||
# has_suspended_timber_floor_per_spec` infers the value mechanically
|
||
# from per-bp floor-construction data — producing the same cascade
|
||
# output the explicit-bool hand-built path produces for cohort 000477
|
||
# / 000516 (where the spec inference and the worksheet agree). Where
|
||
# the spec inference and worksheet disagree (cohort 000474, 000480,
|
||
# 000487, 000490), the chain SAP-pin tests fail separately — that's
|
||
# a known Elmhurst-worksheet-vs-RdSAP-10 §5 (12) divergence, not a
|
||
# mapper diff issue.
|
||
if path == "sap_ventilation.has_suspended_timber_floor":
|
||
return True
|
||
if path == "sap_ventilation.suspended_timber_floor_sealed":
|
||
return True
|
||
return False
|
||
|
||
|
||
_LOAD_BEARING_FIELDS: tuple[str, ...] = (
|
||
# Cascade-driving structural fields
|
||
"sap_building_parts",
|
||
"sap_windows",
|
||
"sap_roof_windows",
|
||
"sap_heating",
|
||
"sap_ventilation",
|
||
"sap_energy_source",
|
||
"total_floor_area_m2",
|
||
# Building-classification fields driving default cascades
|
||
"dwelling_type",
|
||
"built_form",
|
||
"property_type",
|
||
"country_code",
|
||
"postcode",
|
||
# Counts and openings
|
||
"door_count",
|
||
"insulated_door_count",
|
||
"insulated_door_u_value",
|
||
"habitable_rooms_count",
|
||
"heated_rooms_count",
|
||
"wet_rooms_count",
|
||
"extensions_count",
|
||
"open_chimneys_count",
|
||
"blocked_chimneys_count",
|
||
"extract_fans_count",
|
||
# Lighting
|
||
"cfl_fixed_lighting_bulbs_count",
|
||
"led_fixed_lighting_bulbs_count",
|
||
"incandescent_fixed_lighting_bulbs_count",
|
||
"low_energy_fixed_lighting_bulbs_count",
|
||
"fixed_lighting_outlets_count",
|
||
"low_energy_fixed_lighting_outlets_count",
|
||
# HW / appliances
|
||
"solar_water_heating",
|
||
"has_hot_water_cylinder",
|
||
"has_fixed_air_conditioning",
|
||
"has_conservatory",
|
||
"has_heated_separate_conservatory",
|
||
# Envelope drivers
|
||
"percent_draughtproofed",
|
||
"mechanical_ventilation",
|
||
"pressure_test",
|
||
# Construction-detail flags
|
||
"addendum",
|
||
"lzc_energy_sources",
|
||
"any_unheated_rooms",
|
||
"number_of_storeys",
|
||
"sap_flat_details",
|
||
)
|
||
|
||
|
||
def _diff_load_bearing(
|
||
mapped: object, hand_built: object, path: str = "",
|
||
) -> list[str]:
|
||
"""Recursive field diff; yields one line per leaf divergence between
|
||
mapped EpcPropertyData and the hand-built fixture. Int/float type
|
||
differences with the same numeric value are not flagged.
|
||
|
||
Strict-pyright posture: arguments typed `object` so each branch
|
||
narrows via `isinstance` rather than threading `Any` through the
|
||
recursion (which pyright can't reason about under
|
||
`strict`/`typeCheckingMode = strict`)."""
|
||
out: list[str] = []
|
||
if type(mapped) is not type(hand_built):
|
||
if not (isinstance(mapped, (int, float)) and isinstance(hand_built, (int, float))):
|
||
if not _is_excluded_path(path):
|
||
out.append(
|
||
f"{path}: TYPE {type(mapped).__name__} vs "
|
||
f"{type(hand_built).__name__} mapped={mapped!r} "
|
||
f"handbuilt={hand_built!r}"
|
||
)
|
||
return out
|
||
if dataclasses.is_dataclass(mapped) and not isinstance(mapped, type) \
|
||
and dataclasses.is_dataclass(hand_built) and not isinstance(hand_built, type):
|
||
for fld in dataclasses.fields(mapped):
|
||
out.extend(_diff_load_bearing(
|
||
getattr(mapped, fld.name),
|
||
getattr(hand_built, fld.name),
|
||
f"{path}.{fld.name}" if path else fld.name,
|
||
))
|
||
return out
|
||
if isinstance(mapped, list) and isinstance(hand_built, list):
|
||
mapped_list = cast("list[object]", mapped)
|
||
hand_built_list = cast("list[object]", hand_built)
|
||
if len(mapped_list) != len(hand_built_list):
|
||
out.append(f"{path}: LEN {len(mapped_list)} vs {len(hand_built_list)}")
|
||
return out
|
||
for i, (m_item, h_item) in enumerate(zip(mapped_list, hand_built_list)):
|
||
out.extend(_diff_load_bearing(m_item, h_item, f"{path}[{i}]"))
|
||
return out
|
||
if mapped != hand_built:
|
||
if not _is_excluded_path(path):
|
||
out.append(f"{path}: mapped={mapped!r} handbuilt={hand_built!r}")
|
||
return out
|
||
|
||
|
||
def test_from_elmhurst_site_notes_matches_hand_built_000474() -> None:
|
||
# Arrange — _elmhurst_worksheet_000474.build_epc() is the canonical
|
||
# hand-built EpcPropertyData for cert U985-0001-000474; it cascades
|
||
# to the worksheet PDF's `SAP value 62.2584` at 1e-4 (cohort SAP-
|
||
# result pin). Routing the corresponding Summary PDF through the
|
||
# Elmhurst mapper MUST produce a load-bearing-field-equivalent
|
||
# EpcPropertyData; any divergence is a mapper-coverage gap.
|
||
#
|
||
# Tracer-bullet scope: cert 000474 only. Once GREEN, parametrize
|
||
# over the 5 other cohort fixtures and add cert 001479 (after
|
||
# `_elmhurst_worksheet_001479` lands at 1e-4 via Slice 62 iteration).
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
hand_built = _w000474.build_epc()
|
||
|
||
# Act
|
||
diffs: list[str] = []
|
||
for field_name in _LOAD_BEARING_FIELDS:
|
||
diffs.extend(_diff_load_bearing(
|
||
getattr(mapped, field_name, None),
|
||
getattr(hand_built, field_name, None),
|
||
field_name,
|
||
))
|
||
|
||
# Assert
|
||
assert not diffs, (
|
||
f"{len(diffs)} load-bearing divergence(s) between mapped and "
|
||
f"hand-built EpcPropertyData for cohort cert 000474:\n " +
|
||
"\n ".join(diffs)
|
||
)
|
||
|
||
|
||
def test_from_elmhurst_site_notes_matches_hand_built_000477() -> None:
|
||
# Arrange — _elmhurst_worksheet_000477.build_epc() is the canonical
|
||
# hand-built EpcPropertyData for cert U985-0001-000477 (single-bp
|
||
# mid-terrace, age band B, RIR with stud walls + party gables, no
|
||
# extension); it cascades to the worksheet PDF's `SAP value 65.0057`
|
||
# at 1e-4. Routing the Summary PDF through the Elmhurst mapper MUST
|
||
# produce a load-bearing-field-equivalent EpcPropertyData; any
|
||
# divergence is a mapper-coverage gap to close as its own slice.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
hand_built = _w000477.build_epc()
|
||
|
||
# Act
|
||
diffs: list[str] = []
|
||
for field_name in _LOAD_BEARING_FIELDS:
|
||
diffs.extend(_diff_load_bearing(
|
||
getattr(mapped, field_name, None),
|
||
getattr(hand_built, field_name, None),
|
||
field_name,
|
||
))
|
||
|
||
# Assert
|
||
assert not diffs, (
|
||
f"{len(diffs)} load-bearing divergence(s) between mapped and "
|
||
f"hand-built EpcPropertyData for cohort cert 000477:\n " +
|
||
"\n ".join(diffs)
|
||
)
|
||
|
||
|
||
def test_from_elmhurst_site_notes_matches_hand_built_000480() -> None:
|
||
# Arrange — _elmhurst_worksheet_000480.build_epc() is the canonical
|
||
# hand-built EpcPropertyData for cert U985-0001-000480 (mid-terrace
|
||
# with main + 1 extension + 19.83 m² RIR, gas combi); it cascades
|
||
# to the worksheet PDF's `SAP value 61.2986` at 1e-4. Routing the
|
||
# Summary PDF through the Elmhurst mapper MUST produce a load-
|
||
# bearing-field-equivalent EpcPropertyData; any divergence is a
|
||
# mapper-coverage gap to close as its own slice.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000480_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
hand_built = _w000480.build_epc()
|
||
|
||
# Act
|
||
diffs: list[str] = []
|
||
for field_name in _LOAD_BEARING_FIELDS:
|
||
diffs.extend(_diff_load_bearing(
|
||
getattr(mapped, field_name, None),
|
||
getattr(hand_built, field_name, None),
|
||
field_name,
|
||
))
|
||
|
||
# Assert
|
||
assert not diffs, (
|
||
f"{len(diffs)} load-bearing divergence(s) between mapped and "
|
||
f"hand-built EpcPropertyData for cohort cert 000480:\n " +
|
||
"\n ".join(diffs)
|
||
)
|
||
|
||
|
||
def test_from_elmhurst_site_notes_matches_hand_built_000487() -> None:
|
||
# Arrange — _elmhurst_worksheet_000487.build_epc() is the canonical
|
||
# hand-built EpcPropertyData for cert U985-0001-000487 (Enclosed
|
||
# Mid-Terrace, main + 1 extension + 21.03 m² RIR with explicit-U
|
||
# gable_wall_external, gas combi, 1 electric shower, 1.43 m²
|
||
# timber-frame alt wall on the extension); it cascades to the
|
||
# worksheet PDF's `SAP value 61.6431` at 1e-4. Routing the Summary
|
||
# PDF through the Elmhurst mapper MUST produce a load-bearing-
|
||
# field-equivalent EpcPropertyData; any divergence is a mapper-
|
||
# coverage gap to close as its own slice.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000487_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
hand_built = _w000487.build_epc()
|
||
|
||
# Act
|
||
diffs: list[str] = []
|
||
for field_name in _LOAD_BEARING_FIELDS:
|
||
diffs.extend(_diff_load_bearing(
|
||
getattr(mapped, field_name, None),
|
||
getattr(hand_built, field_name, None),
|
||
field_name,
|
||
))
|
||
|
||
# Assert
|
||
assert not diffs, (
|
||
f"{len(diffs)} load-bearing divergence(s) between mapped and "
|
||
f"hand-built EpcPropertyData for cohort cert 000487:\n " +
|
||
"\n ".join(diffs)
|
||
)
|
||
|
||
|
||
def test_from_elmhurst_site_notes_matches_hand_built_000490() -> None:
|
||
# Arrange — _elmhurst_worksheet_000490.build_epc() is the canonical
|
||
# hand-built EpcPropertyData for cert U985-0001-000490 (End-Terrace,
|
||
# main + 1 extension, gas combi + gas-secondary; sheltered_sides=1
|
||
# per RdSAP §S5); it cascades to the worksheet PDF's `SAP value
|
||
# 57.3979` at 1e-4. Routing the Summary PDF through the Elmhurst
|
||
# mapper MUST produce a load-bearing-field-equivalent
|
||
# EpcPropertyData; any divergence is a mapper-coverage gap.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000490_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
hand_built = _w000490.build_epc()
|
||
|
||
# Act
|
||
diffs: list[str] = []
|
||
for field_name in _LOAD_BEARING_FIELDS:
|
||
diffs.extend(_diff_load_bearing(
|
||
getattr(mapped, field_name, None),
|
||
getattr(hand_built, field_name, None),
|
||
field_name,
|
||
))
|
||
|
||
# Assert
|
||
assert not diffs, (
|
||
f"{len(diffs)} load-bearing divergence(s) between mapped and "
|
||
f"hand-built EpcPropertyData for cohort cert 000490:\n " +
|
||
"\n ".join(diffs)
|
||
)
|
||
|
||
|
||
def test_from_elmhurst_site_notes_matches_hand_built_000516() -> None:
|
||
# Arrange — _elmhurst_worksheet_000516.build_epc() is the canonical
|
||
# hand-built EpcPropertyData for cert U985-0001-000516 (Mid-Terrace,
|
||
# main + 19.02 m² RIR, 5 vertical windows + 1 roof window which the
|
||
# mapper routes to `sap_roof_windows` per `U > 3.0` discrimination);
|
||
# it cascades to the worksheet PDF's `SAP value 62.7937` at 1e-4.
|
||
# Routing the Summary PDF through the Elmhurst mapper MUST produce
|
||
# a load-bearing-field-equivalent EpcPropertyData.
|
||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
hand_built = _w000516.build_epc()
|
||
|
||
# Act
|
||
diffs: list[str] = []
|
||
for field_name in _LOAD_BEARING_FIELDS:
|
||
diffs.extend(_diff_load_bearing(
|
||
getattr(mapped, field_name, None),
|
||
getattr(hand_built, field_name, None),
|
||
field_name,
|
||
))
|
||
|
||
# Assert
|
||
assert not diffs, (
|
||
f"{len(diffs)} load-bearing divergence(s) between mapped and "
|
||
f"hand-built EpcPropertyData for cohort cert 000516:\n " +
|
||
"\n ".join(diffs)
|
||
)
|