This commit is contained in:
KhalimCK 2026-06-08 08:28:00 +00:00 committed by GitHub
commit 0498047682
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 2950 additions and 151 deletions

View file

@ -1,6 +1,6 @@
import re
from datetime import date, datetime
from typing import List, Optional
from typing import Final, List, Optional
from datatypes.epc.surveys.elmhurst_site_notes import (
AlternativeWall,
@ -811,6 +811,19 @@ class ElmhurstSiteNotesExtractor:
r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
)
_MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
# "Known data" rows (BFRC / SAP Table) lodge the §11 Data-Source cell on
# its own layout line with the U-value following on the next line — and
# carry no Frame Type / Frame Factor / Glazing Gap cells. The joined
# "<source> <U>" `_MANUFACTURER_RE` shape never matches them, so they are
# anchored by this standalone form instead (cert 001431 §11 has one
# "BFRC data" window). "Manufacturer"/"Default" are kept here only for
# symmetry; in practice they always join with the U-value above.
_STANDALONE_DATA_SOURCE_RE = re.compile(
r"^(BFRC data|BFRC|SAP Table|Assessor|Manufacturer|Default)$"
)
# RdSAP 10 §3.7 default window frame factor, used for "known data" rows
# that lodge U and g directly and omit the frame-factor cell.
_DEFAULT_FRAME_FACTOR: Final[float] = 0.7
_ORIENTATION_TOKENS = frozenset({
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
})
@ -848,7 +861,7 @@ class ElmhurstSiteNotesExtractor:
)
if not m:
return []
lines = m.group(1).splitlines()
lines = self._merge_split_dimension_lines(m.group(1).splitlines())
# Locate all (data_line, manufacturer_line) pairs in document
# order. Each pair is one window.
@ -898,9 +911,46 @@ class ElmhurstSiteNotesExtractor:
windows.append(window)
return windows
# A "W H" pair on its own line (e.g. "5.79 2.00") whose Area cell the
# layout preprocessor pushed onto the following line as a lone decimal
# ("11.58"). Wider Area columns in the §11 grid trigger the 2+-space
# split; narrower ones keep all three on one line (the 3-decimal anchor).
_WIDTH_HEIGHT_RE = re.compile(r"^(\d+\.\d+)\s+(\d+\.\d+)$")
_AREA_ONLY_RE = re.compile(r"^(\d+\.\d+)$")
def _merge_split_dimension_lines(self, lines: List[str]) -> List[str]:
"""Re-join a window's "W H" line with a following bare-Area line
into the canonical "W H Area" shape the data anchor expects.
Gated on Area W × H (the §11 Area is always the product), so an
unrelated lone decimal below a "W H" line a frame factor, g-value
or U-value is never absorbed. Layouts that already lodge all
three on one line are untouched (their line has 3 decimals, not 2).
"""
merged: List[str] = []
i = 0
while i < len(lines):
wh = self._WIDTH_HEIGHT_RE.match(lines[i].strip())
area = (
self._AREA_ONLY_RE.match(lines[i + 1].strip())
if wh is not None and i + 1 < len(lines) else None
)
if wh is not None and area is not None:
w, h, a = float(wh.group(1)), float(wh.group(2)), float(area.group(1))
if abs(w * h - a) <= 0.05:
merged.append(f"{wh.group(1)} {wh.group(2)} {area.group(1)}")
i += 2
continue
merged.append(lines[i])
i += 1
return merged
def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
if self._MANUFACTURER_RE.match(lines[j].strip()):
stripped = lines[j].strip()
if self._MANUFACTURER_RE.match(stripped) or (
self._STANDALONE_DATA_SOURCE_RE.match(stripped)
):
return j
return None
@ -985,6 +1035,20 @@ class ElmhurstSiteNotesExtractor:
# would-be glazing-prefix scan.
inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
# The data-source line is either the joined "Manufacturer 4.80" shape
# (source keyword + U on one line) or a sparse standalone "BFRC data"
# / "SAP Table" shape (keyword alone, U on the next line, and no frame
# cells lodged). Resolve which up front: a sparse row has no frame
# type/factor to parse.
data_source_line = lines[manuf_idx].strip()
joined_match = self._MANUFACTURER_RE.match(data_source_line)
standalone_match = (
None if joined_match is not None
else self._STANDALONE_DATA_SOURCE_RE.match(data_source_line)
)
if joined_match is None and standalone_match is None:
return None
# frame_type and frame_factor immediately follow the data line.
# Layout-style cell joining sometimes collapses them onto a
# single "Wood 0.70" line; treat both shapes uniformly so the
@ -992,9 +1056,15 @@ class ElmhurstSiteNotesExtractor:
# field (glazing_gap / bp / location / orient).
if data_idx + 1 >= len(lines):
return None
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
lines, data_idx
)
if standalone_match is not None:
# Sparse "known data" row: no frame type/factor/glazing-gap cells;
# everything between W×H×A and the data-source is location/orient.
frame_type, frame_factor = None, self._DEFAULT_FRAME_FACTOR
middle_start = data_idx + 1
else:
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
lines, data_idx
)
if frame_factor is None or not 0.0 < frame_factor <= 1.0:
return None
@ -1017,28 +1087,40 @@ class ElmhurstSiteNotesExtractor:
(t for t in middle if t in self._ORIENTATION_TOKENS), None
)
# Manufacturer line carries data_source + u_value.
manuf_match = self._MANUFACTURER_RE.match(lines[manuf_idx].strip())
if manuf_match is None:
return None
data_source = manuf_match.group(1)
u_value = float(manuf_match.group(2))
# Data-source line carries the source keyword and U-value: joined on
# one line ("Manufacturer 4.80") or, for sparse rows, the keyword alone
# with the U-value on the next line ("BFRC data" / "1.00"). `post_idx`
# is where g_value / draught / shutters begin in either layout.
if joined_match is not None:
data_source = joined_match.group(1)
u_value = float(joined_match.group(2))
post_idx = manuf_idx + 1
else:
assert standalone_match is not None
data_source = standalone_match.group(1)
if manuf_idx + 1 >= len(lines):
return None
try:
u_value = float(lines[manuf_idx + 1].strip())
except ValueError:
return None
post_idx = manuf_idx + 2
# Post-manufacturer: g_value, draught, shutters.
if manuf_idx + 3 >= len(lines):
# Post-data-source: g_value, draught, shutters.
if post_idx + 2 >= len(lines):
return None
try:
g_value = float(lines[manuf_idx + 1].strip())
g_value = float(lines[post_idx].strip())
except ValueError:
return None
draught_proofed = lines[manuf_idx + 2].strip().lower() == "yes"
permanent_shutters = lines[manuf_idx + 3].strip()
draught_proofed = lines[post_idx + 1].strip().lower() == "yes"
permanent_shutters = lines[post_idx + 2].strip()
# Prefix / suffix tokens (variable count) carry the
# glazing-type, building-part, and orientation strings split by
# the layout preprocessor.
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
after = [lines[j].strip() for j in range(manuf_idx + 4, after_end) if lines[j].strip()]
after = [lines[j].strip() for j in range(post_idx + 3, after_end) if lines[j].strip()]
# Room-in-roof windows lodge their location as "Roof of Room in
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,151 @@
"""Worksheet pins for the cat-10 electric-room-heater dwelling (ref 001431).
Fixture: `sap worksheets/Recommendations Elmhurst Files/main heating/high
heat retention storage heaters/electric room heaters/before/` Summary
(site-notes input) + P960 (the `(1)..(286)` worksheet ground truth). The
dwelling lodges main `sap_main_heating_code=691` (electric room heaters),
control `2601`, an `18 Hour` meter, and water heating `sap_code=909`
(electric instantaneous, single-point at the point of use NO cylinder,
NO solar, NO WWHRS).
Per [[feedback-worksheet-not-api-reference]] + [[feedback-zero-error-strict]]
the worksheet PDF is the 1e-4 target. Each pin below is a P960 line ref
transcribed to 4 d.p. and asserted via `abs(x - y) <= 1e-4` against the
extractor mapper cascade output.
Because the SAP 10.2 worksheet computes the rating block (UK-average
climate, Table 12 regulated prices) separately from the EPC block
(postcode climate, Table 32 prices), the rating-mode cascade
(`cert_to_inputs`) is pinned against the rating block and the demand-mode
cascade (`cert_to_demand_inputs`) against the EPC block.
"""
from __future__ import annotations
import re
import subprocess
from pathlib import Path
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import (
SAP_10_2_SPEC_PRICES,
_is_timber_or_steel_frame, # pyright: ignore[reportPrivateUsage]
cert_to_inputs,
)
_FIXTURE_DIR = (
Path(__file__).parents[3]
/ "sap worksheets/Recommendations Elmhurst Files/main heating"
/ "high heat retention storage heaters/electric room heaters/before"
)
# P960 line ref (219) "Water heating fuel used" — rating block. The water
# heater is electric (efficiency (216) = 100 %), so (219) == (64) output.
_WORKSHEET_LINE_219_WATER_FUEL_KWH = 1770.2313
# P960 line ref (38)m "Ventilation heat loss calculated monthly" — rating
# block, mean of the 12 printed monthly values
# (90.1949 .. 86.1692) / 12. The dwelling is SY System Build (masonry per
# RdSAP 10 §2), so the structural infiltration (11) = 0.35 not 0.25.
_WORKSHEET_LINE_38_VENT_HEAT_LOSS_MEAN_W_PER_K = 83.3613
_ABS_TOLERANCE = 0.0001
def _summary_pdf_to_pages(pdf: Path) -> list[str]:
"""Summary PDF → one Textract-style token string per page (the same
`pdftotext -layout` whitespace-split preprocessing the rest of the
documents_parser chain tests use)."""
page_count_text = subprocess.run(
["pdfinfo", str(pdf)], capture_output=True, text=True
).stdout
page_count_match = re.search(r"Pages:\s+(\d+)", page_count_text)
assert page_count_match is not None, f"no page count in {pdf}"
page_count = int(page_count_match.group(1))
pages: list[str] = []
for page_index in range(1, page_count + 1):
layout = subprocess.run(
[
"pdftotext", "-layout",
"-f", str(page_index), "-l", str(page_index),
str(pdf), "-",
],
capture_output=True,
text=True,
).stdout
pages.append(
"\n".join(
token
for line in layout.splitlines()
for token in re.split(r"\s{2,}", line.strip())
if token
)
)
return pages
def test_electric_room_heater_water_fuel_matches_worksheet_line_219() -> None:
# Arrange — route the before/ Summary through the full extractor →
# mapper → rating cascade. Water heating SAP code 909 is a single-
# point electric instantaneous heater at the point of use, so per
# SAP 10.2 §4 (p.23, l.1416) it has NO distribution loss: worksheet
# (46)m = 0 and (62)m = 0.85 × (45)m collapses to the (219) fuel.
summary_pdf = next(_FIXTURE_DIR.glob("Summary_*.pdf"))
pages = _summary_pdf_to_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
expected_water_fuel_kwh = _WORKSHEET_LINE_219_WATER_FUEL_KWH
# Act
rating = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES),
)
actual_water_fuel_kwh = rating.hot_water_kwh_per_yr
# Assert
assert abs(actual_water_fuel_kwh - expected_water_fuel_kwh) <= _ABS_TOLERANCE
def test_system_build_wall_is_classified_masonry_for_structural_infiltration() -> None:
# Arrange — the dwelling's walls are SY System Build (wall_construction
# code 6). Per RdSAP 10 §2 (Ventilation, "Walls" row): "System build:
# treated as masonry", so it must NOT take the 0.25 steel/timber-frame
# structural infiltration — only code 5 (timber frame) does.
summary_pdf = next(_FIXTURE_DIR.glob("Summary_*.pdf"))
pages = _summary_pdf_to_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
main_wall_construction_code = epc.sap_building_parts[0].wall_construction
# Act
is_frame = _is_timber_or_steel_frame(epc.sap_building_parts)
# Assert
assert main_wall_construction_code == 6
assert is_frame is False
def test_electric_room_heater_ventilation_heat_loss_matches_worksheet_line_38() -> None:
# Arrange — with SY System Build treated as masonry the structural
# infiltration (11) = 0.35, lifting the effective air change (25) and
# the monthly ventilation heat loss (38)m = 0.33 × (25)m × (5) to the
# worksheet's rating-block values.
summary_pdf = next(_FIXTURE_DIR.glob("Summary_*.pdf"))
pages = _summary_pdf_to_pages(summary_pdf)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
expected_vent_heat_loss_w_per_k = _WORKSHEET_LINE_38_VENT_HEAT_LOSS_MEAN_W_PER_K
# Act
rating = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES),
)
actual_vent_heat_loss_w_per_k = rating.intermediate["infiltration_w_per_k"]
# Assert
assert (
abs(actual_vent_heat_loss_w_per_k - expected_vent_heat_loss_w_per_k)
<= _ABS_TOLERANCE
)

View file

@ -45,7 +45,11 @@ from datatypes.epc.domain.mapper import (
_elmhurst_glazing_type_code, # pyright: ignore[reportPrivateUsage]
)
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import (
SAP_10_2_SPEC_PRICES,
cert_to_inputs,
heat_transmission_section_from_cert,
)
from domain.sap10_ml.rdsap_uvalues import u_party_wall
from tests.domain.sap10_calculator.worksheet import (
_elmhurst_worksheet_000474 as _w000474,
@ -78,6 +82,7 @@ _SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cyli
_SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0)
_SUMMARY_000890_PDF = _FIXTURES / "Summary_000890.pdf" # cert 7800 (two electric showers)
_SUMMARY_000565_PDF = _FIXTURES / "Summary_000565.pdf" # cert 000565 (5-bp Elmhurst-only)
_SUMMARY_001431_CASE20_PDF = _FIXTURES / "Summary_001431_case20.pdf" # sim case 20 (storage heaters + RR type-2 + wrapped "Double between 2002 and 2021" glazing)
# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the
# Summary_001479.pdf fixture. Together they drive the API ≡ Summary
@ -127,6 +132,36 @@ def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
return pages
def test_summary_001431_case20_extracts_all_five_section11_windows() -> None:
# Arrange — sim case 20's §11 lodges 5 windows, each with the glazing
# label "Double between 2002 and 2021". That phrase wraps to two PDF
# lines, so pdftotext interleaves its continuation ("and 2021") with
# the next row's cells — a layout the window parser must survive.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001431_CASE20_PDF)
# Act
survey = ElmhurstSiteNotesExtractor(pages).extract()
# Assert
assert len(survey.windows) == 5
def test_summary_001431_case20_fabric_heat_loss_matches_worksheet_line_33() -> None:
# Arrange — sim case 20's room-in-roof (type 2, Detailed) lodges two
# "Stud Wall" surfaces at §8.1 Default U-value 0.00, which the P960
# worksheet §3 excludes from fabric heat loss: (33) = 285.9847 W/K.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001431_CASE20_PDF)
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(
ElmhurstSiteNotesExtractor(pages).extract()
)
# Act
ht = heat_transmission_section_from_cert(epc)
# Assert
assert abs(ht.fabric_heat_loss_w_per_k - 285.9847) <= 1e-4
def test_summary_000474_mapper_produces_three_building_parts() -> None:
# Arrange — cert U985-0001-000474 is a mid-terrace with 3 building
# parts (Main + 2 extensions) per the hand-built worksheet fixture
@ -4469,3 +4504,64 @@ def test_elmhurst_wall_is_basement_disambiguates_system_built_from_basement() ->
# Other constructions defer to the API code-6 heuristic.
assert _elmhurst_wall_is_basement("CA Cavity") is None
assert _elmhurst_wall_is_basement("") is None
def test_gas_boiler_main_fuel_inferred_from_mains_gas_meter_when_hw_is_electric() -> None:
# Arrange — the boiler-2/before variant of cert 001431 lodges §14.0
# "Main Heating SAP Code: 102" (a Table 4b gas-boiler row, 101-119)
# with NO §14.0 "Fuel Type" string and a SEPARATE electric immersion
# for hot water (§15.0 "Water Heating Fuel Type: Electricity",
# SAP code 909). The §15.0-water-fuel disambiguation can't fire
# (electricity is not a gas/LPG carrier), so the mapper used to leave
# main_fuel_type empty and the cascade strict-raised MissingMainFuelType.
# The §14.2 Meters "Main gas: Yes" lodgement is the authoritative
# carrier signal: a 101-119 gas boiler on mains gas burns mains gas
# (SAP10 main_fuel 26 per _ELMHURST_MAIN_FUEL_TO_SAP10 "Mains gas").
from datatypes.epc.domain.mapper import (
_elmhurst_gas_boiler_main_fuel, # pyright: ignore[reportPrivateUsage]
)
gas_boiler_sap_code = 102
electric_immersion_fuel = 30 # §15.0 "Electricity" → Table 32 code 30
# Act — electric HW, but the dwelling is on mains gas.
resolved = _elmhurst_gas_boiler_main_fuel(
gas_boiler_sap_code, electric_immersion_fuel, main_gas=True
)
# Assert — mains gas (26), not a strict-raise.
assert resolved == 26
def test_gas_boiler_main_fuel_prefers_section_15_gas_carrier_over_meter() -> None:
# Arrange — when §15.0 DOES resolve a gas/LPG carrier (combi heats
# space + water from the one appliance) it stays authoritative, so a
# bottled-LPG boiler (main_fuel 5) is not overwritten by the mains-gas
# meter flag.
from datatypes.epc.domain.mapper import (
_elmhurst_gas_boiler_main_fuel, # pyright: ignore[reportPrivateUsage]
)
lpg_water_fuel = 5 # bottled LPG
# Act
resolved = _elmhurst_gas_boiler_main_fuel(104, lpg_water_fuel, main_gas=True)
# Assert — §15.0 gas/LPG carrier wins.
assert resolved == 5
def test_gas_boiler_main_fuel_without_mains_gas_meter_still_unresolved() -> None:
# Arrange — no mains gas meter AND §15.0 is electric: the carrier
# genuinely can't be determined (e.g. an LPG boiler whose §15.0 lodges
# an electric immersion), so the helper returns None and the caller
# strict-raises rather than guessing.
from datatypes.epc.domain.mapper import (
_elmhurst_gas_boiler_main_fuel, # pyright: ignore[reportPrivateUsage]
)
# Act
resolved = _elmhurst_gas_boiler_main_fuel(102, 30, main_gas=False)
# Assert
assert resolved is None

View file

@ -1455,6 +1455,7 @@ class EpcPropertyDataMapper:
bp.roof_construction,
bp.roof_insulation_thickness,
bp.construction_age_band,
bp.sloping_ceiling_insulation_thickness,
),
sap_room_in_roof=_api_build_room_in_roof(
bp.sap_room_in_roof,
@ -1730,6 +1731,7 @@ class EpcPropertyDataMapper:
bp.roof_construction,
bp.roof_insulation_thickness,
bp.construction_age_band,
bp.sloping_ceiling_insulation_thickness,
),
sap_room_in_roof=_api_build_room_in_roof(
bp.sap_room_in_roof,
@ -2625,31 +2627,60 @@ def _api_floor_construction_str(value: Optional[int]) -> Optional[str]:
# 1 = "To external air" — exposed floor (cantilever / passageway)
# 2 = "To unheated space" — over garage / unheated basement /
# crawlspace; cert 7536 Main lodges this
# 3 = "To unheated space" — variant lodged by cert 7536 Ext2 with
# the same top-level floors[] description
# as code 2; route to the same cascade
# signal until a fixture forces them apart
# 6 = "(another dwelling below)" — top-floor flat over a party floor;
# cert 9501 lodges this. The cascade's
# floor-as-party-floor dispatch already
# handles this via `property_type=Flat` +
# cert.floors[].description, so the
# floor_type string from this helper is
# not consumed for the (12) spec rule
# in that path — explicit None preserves
# the cert 9501 cascade match without
# silently letting unknown codes through.
# 3 = "(other premises below)" — the lowest floor sits over non-domestic
# "other premises" (heated, but at different
# times), so it is "above a partially heated
# space" per RdSAP 10 §3.12 (PDF p.25) → the
# §5.14 constant U=0.7 W/m²K. The independent
# floors[].description resolves this: all 13
# code-3 certs in the 2026 sample lodge
# "(other premises below)". `_api_build_sap
# _floor_dimensions` sets is_above_partially
# _heated_space on the floor=0 dimension;
# this string (!= "Ground floor", != "another
# dwelling below") is inert metadata.
# 6 = "(another dwelling below)" — the floor sits over another heated
# dwelling (e.g. an upper-floor flat, or a
# ground-floor flat above a basement flat),
# so it is a party floor with no heat loss
# (RdSAP 10 §3). The heat-transmission step
# reads this string to suppress the BP's
# floor area, mirroring the roof's "another
# dwelling above" party override — the
# dwelling-level exposure heuristic (keyed
# only on the dwelling_type label) defaults
# has_exposed_floor=True for a ground-floor
# flat, so the per-BP lodgement is needed to
# override it. It is != "Ground floor", so
# the §5 (12) suspended-timber rule stays
# inert (short-circuits exactly as None did).
# 7 = "Ground floor" — typical ground-floor heat loss
# 8 = "(another dwelling below)" — observed on EXTENSION building parts
# whose floor sits over a heated space
# within the SAME dwelling (an upper-storey
# extension over a heated room). RdSAP 10 §3
# gives an internal floor between heated
# storeys no floor heat loss — mechanically
# identical to a code-6 party floor, so it
# reuses that suppression string (consumed
# by heat_transmission's party-floor
# override; != "Ground floor" so §5 (12)
# stays inert). Empirically confirmed: both
# code-8 certs land within 0.5 of lodged
# (0370-2254 68.9 vs 69; 0997-1206 40.7 vs
# 41), while Ground-floor / unheated /
# external mappings miss 0997 by ~4 SAP.
#
# Codes 4/5/8+ are not yet observed in any fixture; the strict-raise
# path catches them at the extraction boundary so the next cert forces
# an explicit mapping decision.
# Codes 4/5 are not yet observed in any fixture; the strict-raise path
# catches them at the extraction boundary so the next cert forces an
# explicit mapping decision.
_API_FLOOR_HEAT_LOSS_TO_FLOOR_TYPE: Dict[int, Optional[str]] = {
1: "To external air",
2: "To unheated space",
3: "To unheated space",
6: None,
3: "(other premises below)",
6: "(another dwelling below)",
7: "Ground floor",
8: "(another dwelling below)",
}
@ -2698,6 +2729,19 @@ def _api_roof_construction_str(value: Optional[int]) -> Optional[str]:
_API_FLOOR_HEAT_LOSS_EXPOSED: Final[int] = 1
# API `floor_heat_loss` integer that signals a floor above a partially
# heated space. The independent `floors[].description` field resolves the
# code: floor_heat_loss=3 lodges "(other premises below)" (13/13 certs in
# the 2026 sample). Per RdSAP 10 §3.12 (PDF p.25) a flat's floor is "above
# a partially heated space if there are non-domestic premises below
# (heated, but at different times)" — the "other premises" wording. That
# routes the cascade to the §5.14 (PDF p.47) constant U=0.7 W/m²K via
# `u_floor_above_partially_heated_space`, distinct from code 2's "To
# unheated space" (semi-exposed → Table 20) and code 6's "(another dwelling
# below)" (party floor, no heat loss).
_API_FLOOR_HEAT_LOSS_ABOVE_PARTIALLY_HEATED: Final[int] = 3
# GOV.UK API `built_form` integer → SAP10.2 sheltered_sides count per
# RdSAP §S5. Detached has no neighbours shielding wind; terraced
# variants pick up 1-3 sheltered sides via adjacent dwellings. Cross-
@ -2990,6 +3034,9 @@ def _api_build_sap_floor_dimensions(
fixture convention.
"""
is_exposed = floor_heat_loss == _API_FLOOR_HEAT_LOSS_EXPOSED
is_above_partial = (
floor_heat_loss == _API_FLOOR_HEAT_LOSS_ABOVE_PARTIALLY_HEATED
)
out: List[SapFloorDimension] = []
for fd in fds or []:
raw_height = _measurement_value(fd.room_height)
@ -3003,6 +3050,7 @@ def _api_build_sap_floor_dimensions(
floor_insulation=fd.floor_insulation,
floor_construction=fd.floor_construction,
is_exposed_floor=is_exposed and fd.floor == 0,
is_above_partially_heated_space=is_above_partial and fd.floor == 0,
))
return out
@ -3017,11 +3065,20 @@ _RIR_TYPE_1_GABLE_HEIGHT_M: Final[float] = 2.45
# `SapRoomInRoofSurface.kind` the cascade's Detailed-RR branch routes by
# U-value. Established from cert 6035's Summary (gable_wall_type_1=1 ↔
# "Exposed" U=0.29; gable_wall_type_2=0 ↔ "Party" U=0.25):
# 0 = Party → `gable_wall` (Table 4 p.22 row 2, U=0.25)
# 1 = Exposed → `gable_wall_external` (Table 4 p.22 row 1, "as common wall")
# 0 = Party → `gable_wall` (Table 4 p.22 row 2, U=0.25)
# 1 = Exposed → `gable_wall_external` (Table 4 p.22 row 1, "as common wall")
# 2 = Sheltered → `gable_wall_sheltered` (Table 4 p.22, U = 1/(1/U_wall + 0.5))
# 3 = Connected → `connected_wall` (Table 4 p.22 row 4, U=0, area deducts)
# Codes 2/3 established from sim case 21 (a replica of API cert
# 2818-3053-3203-2655-9204: gable_wall_type_1=2 lodges "Sheltered",
# gable_wall_type_2=3 lodges "Connected"). The Summary path already routes
# the same string labels to these kinds; the cascade derives the Sheltered
# U from the wall (the API lodges no per-gable U-value).
_API_TYPE_1_GABLE_TYPE_TO_KIND: Dict[int, str] = {
0: "gable_wall",
1: "gable_wall_external",
2: "gable_wall_sheltered",
3: "connected_wall",
}
@ -3194,25 +3251,53 @@ def _api_resolve_wall_insulation_thickness(
return wall_insulation_thickness
def _api_thickness_is_numeric(value: Union[str, int, None]) -> bool:
"""True when an insulation-thickness lodgement carries a measured value
(an int, or a string whose leading characters are digits, e.g. "100mm").
Categorical sentinels ("AB" As Built, "NI" Not Insulated) and None are
NOT numeric. Mirrors the cascade's `_parse_thickness_mm` digit-prefix
rule so the two agree on what counts as an observed thickness."""
if isinstance(value, int):
return True
return isinstance(value, str) and value.strip()[:1].isdigit()
def _api_resolve_sloping_ceiling_thickness(
roof_construction: Optional[int],
roof_insulation_thickness: Union[str, int, None],
age_band: Optional[str],
sloping_ceiling_insulation_thickness: Union[str, int, None] = None,
) -> Union[str, int, None]:
"""Apply Slice 57's pre-1950 sloping-ceiling-roof rule to the API
path: when a "Pitched, sloping ceiling" roof carries no insulation
thickness lodgement on a pre-1950 dwelling (age bands A-D), set
the thickness to 0 mm so the cascade's `u_roof` returns the
uninsulated Table 16 row (U=2.30) rather than the age-band default
(e.g. U=0.40 for age C pitched-with-loft). Mirrors the Elmhurst
`_resolve_sloping_ceiling_thickness` for the API code-based path.
"""Resolve the roof-insulation thickness the cascade should see for a
"Pitched, sloping ceiling" (`roof_construction == 8`) API building part.
Observed on cert 001479 Ext2: age C, roof_construction=8 (PS),
roof_insulation_thickness=None worksheet U=2.30 (uninsulated PS
sloping ceiling); without this rule the cascade returns U=0.40."""
A code-8 roof's ceiling follows the slope, so its insulation is lodged
in the dedicated `sloping_ceiling_insulation_thickness` field, NOT
`roof_insulation_thickness` (which stays None the loft-joist field is
meaningless for a slope-following ceiling). When that field carries a
NUMERIC thickness it wins: feeding e.g. "100mm" lets `u_roof` reach
Table 17 column (1a) "Insulated slope sloping ceiling, mineral
wool/EPS" (RdSAP 10 §5.11.3 page 44 — 100 mm → U=0.40), instead of
treating the slope as uninsulated (U=2.30). Cert 9884-3059-9202-7506
(code 8, age B, sloping 100 mm) over-stated roof heat loss ~74% before
this preference. A categorical lodgement ("AB" As Built / "NI") is NOT
a measured thickness, so it falls through to the as-built rule below
(Table 18 column (3) age-band default via `is_pitched_sloping_ceiling`,
or the description signal) rather than masking it.
Otherwise the original Slice 57 rule applies: a code-8 roof with NO
thickness lodged anywhere on a pre-1950 dwelling (age bands A-D) gets
0 mm so `u_roof` returns the uninsulated Table 16 row (U=2.30) rather
than the age-band default. Observed on cert 001479 Ext2 (age C, code 8,
both thickness fields None) worksheet U=2.30."""
if (
roof_construction == 8 # 8 = Pitched, sloping ceiling
and _api_thickness_is_numeric(sloping_ceiling_insulation_thickness)
):
return sloping_ceiling_insulation_thickness
if roof_insulation_thickness is not None:
return roof_insulation_thickness
if roof_construction != 8: # 8 = Pitched, sloping ceiling
if roof_construction != 8:
return roof_insulation_thickness
if age_band is None or age_band.upper() not in _PRE_1950_AGE_CODES:
return roof_insulation_thickness
@ -3832,10 +3917,25 @@ def _map_elmhurst_rir_surface(
# the same Simplified RR (scalar gable fields, no roof-going
# detailed_surfaces; cert 6035) and the gables-only cert 000565.
# Detailed (§3.10) assessments DO measure these surfaces — keep them.
# An RR stud wall (internal knee wall below the slope) is a heat-loss
# surface ONLY when Elmhurst lodges a positive §8.1 Default U-value
# (cert 000565 Ext2 Detailed: 0.31 / 0.10 — real exposed knee walls).
# A Default U-value of 0.00 marks an internal stud wall the P960
# worksheet excludes from BOTH fabric heat loss (§3) and total exposed
# area (31): sim case 20's (33)=285.9847 and (31)=239.68 both omit its
# 2×4 m² studs. Drop only the U=0 (internal) ones; positive-U studs
# fall through to the Table-17 path like slopes/ceilings.
if kind == "stud_wall" and surface.default_u_value == 0.0:
return None
if is_simplified and kind in ("slope", "flat_ceiling", "stud_wall"):
return None
u_value_override: Optional[float] = None
if kind == "gable_wall" and surface.gable_type == "Sheltered":
# Summary lodges the Sheltered Default U-value directly (case 20
# 0.92 / case 21 0.71), so route to gable_wall_external and carry the
# lodged U as the override — the cascade uses it as-is. (The API path
# lodges no per-gable U, so it routes code 2 to the discrete
# `gable_wall_sheltered` kind that DERIVES 1/(1/U_wall+0.5) instead.)
kind = "gable_wall_external"
u_value_override = surface.default_u_value
elif kind == "gable_wall" and surface.gable_type == "Exposed":
@ -4116,6 +4216,14 @@ def _is_elmhurst_roof_window(
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
):
return True
# U > 3.0 backstop — Elmhurst routes high-U "Double pre 2002" units
# through the worksheet's (27a) Roof Windows line regardless of the
# lodged "External wall" location, which is a §11 lodging artifact
# (cert 000516's W6 is lodged "External wall" yet scored via (27a)).
# The location string is therefore NOT a reliable vertical signal:
# all six of 000516's §11 rows read "External wall", and only U
# separates the five vertical (2.8) panes from the one rooflight
# (3.1). Matching the worksheet means trusting U here, not location.
return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD
@ -4600,29 +4708,44 @@ _GAS_BOILER_SAP_MAIN_HEATING_CODES: Final[frozenset[int]] = (
# case still strict-raises `MissingMainFuelType` to force a mapper fix.
_GAS_LPG_MAIN_FUEL_CODES: Final[frozenset[int]] = frozenset({1, 5, 6, 7, 26, 27})
# SAP10 main-fuel code for mains gas (`_ELMHURST_MAIN_FUEL_TO_SAP10`
# "Mains gas"). Used when a Table 4b gas boiler's carrier can't be read
# from §14.0 / §15.0 but the §14.2 Meters "Main gas: Yes" lodgement
# confirms the dwelling is on mains gas.
_MAINS_GAS_MAIN_FUEL_CODE: Final[int] = 26
def _elmhurst_gas_boiler_main_fuel(
sap_main_heating_code: Optional[int],
water_heating_fuel_code: Optional[int],
main_gas: bool = False,
) -> Optional[int]:
"""Derive a gas/LPG main-fuel code for a Table 4b gas boiler whose
§14.0 "Fuel Type" string is absent (newer Elmhurst export form).
Returns the §15.0 water-heating fuel code when, and only when, the
SAP main-heating code is a Table 4b gas-boiler row (101-119) AND the
§15.0 fuel resolves to a gas/LPG carrier the same combi/boiler
heats space + water, so §15.0 names the boiler's carrier. Returns
None otherwise (non-gas-boiler code, or §15.0 lodges a non-gas fuel
such as an electric immersion), leaving the caller to strict-raise.
For a Table 4b gas-boiler row (101-119) the carrier is resolved, in
priority order:
1. §15.0 "Water Heating Fuel Type" when it resolves to a gas/LPG
carrier the same combi/boiler heats space + water, so §15.0 names
the boiler's carrier and disambiguates mains-gas-vs-LPG precisely.
2. The §14.2 Meters "Main gas: Yes" flag mains gas (code 26). This
covers a gas boiler paired with a SEPARATE electric immersion (where
§15.0 lodges "Electricity", not the boiler's fuel): the meter still
proves the boiler burns mains gas.
Returns None otherwise (non-gas-boiler code, or a gas boiler with no
mains-gas meter and a non-gas §15.0 e.g. an LPG boiler whose carrier
is genuinely undeterminable), leaving the caller to strict-raise.
Spec: SAP 10.2 Table 4b "Seasonal efficiency for gas and liquid fuel
boilers" (PDF p.168) — rows 101-119 are gas-family boilers.
"""
if (
sap_main_heating_code in _GAS_BOILER_SAP_MAIN_HEATING_CODES
and water_heating_fuel_code in _GAS_LPG_MAIN_FUEL_CODES
):
if sap_main_heating_code not in _GAS_BOILER_SAP_MAIN_HEATING_CODES:
return None
if water_heating_fuel_code in _GAS_LPG_MAIN_FUEL_CODES:
return water_heating_fuel_code
if main_gas:
return _MAINS_GAS_MAIN_FUEL_CODE
return None
@ -5341,13 +5464,15 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating:
# gas vs LPG vs biogas). The newer Elmhurst export leaves §14.0
# "Fuel Type" empty and lodges only the SAP code (e.g. 104 condensing
# combi, EES "BGW"); the §15.0 "Water Heating Fuel Type" names the
# carrier because the same combi/boiler heats space + water. Adopt it
# only when it resolves to a gas/LPG fuel, so a regular boiler paired
# with an electric immersion (where §15.0 lodges "Electricity") still
# strict-raises rather than mis-billing the gas boiler as electric.
# carrier because the same combi/boiler heats space + water. When the
# boiler instead pairs with a SEPARATE electric immersion (§15.0
# lodges "Electricity"), the §14.2 Meters "Main gas: Yes" flag is the
# authoritative carrier signal → mains gas. Without either, the gas
# boiler still strict-raises rather than being mis-billed.
if main_fuel_int is None:
main_fuel_int = _elmhurst_gas_boiler_main_fuel(
mh.main_heating_sap_code, water_heating_fuel,
main_gas=survey.meters.main_gas,
)
# Solid-fuel main heating: SAP code rows 150-160 (open / closed
# room heaters with boiler) and 600-636 (independent solid-fuel

View file

@ -765,6 +765,81 @@ class TestApiResolveWallInsulationThickness:
assert resolved == lodged_thickness
class TestApiResolveSlopingCeilingThickness:
"""A "Pitched, sloping ceiling" (`roof_construction == 8`) lodges its
insulation in the dedicated `sloping_ceiling_insulation_thickness`
field, NOT `roof_insulation_thickness` (which stays None the loft-
joist field is meaningless for a slope-following ceiling). The cascade
must read the sloping-ceiling field so it reaches Table 17 column (1a)
(RdSAP 10 §5.11.3 page 44) e.g. 100 mm U=0.40 rather than the
uninsulated 2.30. Cert 9884-3059-9202-7506 lodges code 8 / age B /
sloping_ceiling 100 mm; before this fix the pre-1950 None-fallback
forced 0 mm (U=2.30) and over-stated roof heat loss ~74%."""
def test_sloping_ceiling_thickness_used_for_code_8(self) -> None:
# Arrange
from datatypes.epc.domain.mapper import (
_api_resolve_sloping_ceiling_thickness, # pyright: ignore[reportPrivateUsage]
)
# Act — code 8, no loft-joist thickness, age B (pre-1950), but the
# sloping ceiling carries a lodged 100 mm.
resolved: object = _api_resolve_sloping_ceiling_thickness(
8, None, "B", "100mm"
)
# Assert — the lodged sloping-ceiling thickness wins over the
# pre-1950 None → 0 mm fallback.
assert resolved == "100mm"
def test_pre_1950_none_fallback_unchanged_without_sloping_field(
self,
) -> None:
# Arrange
from datatypes.epc.domain.mapper import (
_api_resolve_sloping_ceiling_thickness, # pyright: ignore[reportPrivateUsage]
)
# Act — code 8, no thickness anywhere, pre-1950 age.
resolved: object = _api_resolve_sloping_ceiling_thickness(
8, None, "C", None
)
# Assert — existing Slice 57 behaviour preserved: 0 mm (U=2.30).
assert resolved == 0
def test_as_built_sloping_field_falls_through_to_pre_1950_zero(self) -> None:
# Arrange
from datatypes.epc.domain.mapper import (
_api_resolve_sloping_ceiling_thickness, # pyright: ignore[reportPrivateUsage]
)
# Act — code 8, age B (pre-1950), sloping lodged "AB" (As Built —
# categorical, NOT a measured thickness).
resolved: object = _api_resolve_sloping_ceiling_thickness(
8, None, "B", "AB"
)
# Assert — "AB" is not a numeric thickness, so it must NOT win; the
# Slice 57 pre-1950 None → 0 mm (U=2.30) rule still applies.
assert resolved == 0
def test_sloping_field_ignored_for_non_code_8(self) -> None:
# Arrange
from datatypes.epc.domain.mapper import (
_api_resolve_sloping_ceiling_thickness, # pyright: ignore[reportPrivateUsage]
)
# Act — code 5 (vaulted) is not a sloping-ceiling code-8; the
# sloping field must not be consumed here.
resolved: object = _api_resolve_sloping_ceiling_thickness(
5, "200mm", "C", "100mm"
)
# Assert — the regular roof_insulation_thickness passes through.
assert resolved == "200mm"
# ---------------------------------------------------------------------------
# Glazing-type label cleaning — pdftotext gap-column wrap
# ---------------------------------------------------------------------------
@ -812,6 +887,92 @@ class TestElmhurstGlazingTypeWrappedGap:
assert code == 2
class TestApiFloorTypeCode:
"""`_api_floor_type_str` maps the GOV.UK API integer floor_heat_loss
code to the floor-position string the cascade reads. Code 6 ("another
dwelling below") must surface "(another dwelling below)" so the
heat-transmission step can suppress that BP's floor as a party floor
(RdSAP 10 §3) it previously mapped to None and the floor leaked
heat-loss area. Cert 2115-4121-4711-9361-3686 (ground-floor flat over
another dwelling) under-rated ~23 SAP from the over-counted floor."""
def test_code_6_maps_to_another_dwelling_below(self) -> None:
# Arrange
from datatypes.epc.domain.mapper import _api_floor_type_str # pyright: ignore[reportPrivateUsage]
# Act
result = _api_floor_type_str(6)
# Assert — a party-floor signal the cascade consumes (not None).
assert result == "(another dwelling below)"
def test_code_7_still_maps_to_ground_floor(self) -> None:
# Arrange — regression guard: the ground-floor signal the §5 (12)
# suspended-timber rule keys on is unchanged.
from datatypes.epc.domain.mapper import _api_floor_type_str # pyright: ignore[reportPrivateUsage]
# Act / Assert
assert _api_floor_type_str(7) == "Ground floor"
def test_code_8_maps_to_no_floor_heat_loss(self) -> None:
# Arrange — code 8 is observed on EXTENSION building parts whose
# floor sits over a heated space within the same dwelling (an
# upper-storey extension over a heated room). RdSAP 10 §3 treats an
# internal floor between heated storeys as no floor heat loss —
# mechanically identical to a code-6 party floor. Empirically
# confirmed: routing code 8 to the no-heat-loss treatment lands
# both code-8 certs within 0.5 of lodged (0370-2254 68.9 vs 69;
# 0997-1206 40.7 vs 41), whereas Ground-floor / unheated / external
# mappings miss 0997 by ~4 SAP. Reuses code 6's suppression string
# (consumed by heat_transmission's party-floor override); it is
# != "Ground floor", so the §5 (12) suspended-timber rule stays
# inert. Pre-this, code 8 raised UnmappedApiCode, blocking the cert.
from datatypes.epc.domain.mapper import _api_floor_type_str # pyright: ignore[reportPrivateUsage]
# Act / Assert — no-heat-loss signal (not None, not "Ground floor").
assert _api_floor_type_str(8) == "(another dwelling below)"
def test_code_3_maps_to_other_premises_below(self) -> None:
# Arrange — code 3 ↔ "(other premises below)" (confirmed 9/9 on
# single-bp certs in the 2026 API sample). RdSAP 10 §3.12 (PDF p.25)
# classes a floor over non-domestic "other premises" (heated at
# different times) as "above a partially heated space" → §5.14
# constant U=0.7. The string is != "Ground floor" / "(another
# dwelling below)", so it is inert metadata; the U-routing is driven
# by the `is_above_partially_heated_space` floor-dimension flag.
from datatypes.epc.domain.mapper import _api_floor_type_str # pyright: ignore[reportPrivateUsage]
# Act / Assert
assert _api_floor_type_str(3) == "(other premises below)"
def test_code_3_sets_above_partially_heated_space_on_lowest_floor(self) -> None:
# Arrange — the floor-dimension builder flags floor_heat_loss=3 →
# is_above_partially_heated_space on the lowest storey (floor==0)
# only, so the cascade routes that floor to U=0.7 (§5.14) and the
# heat-transmission step keeps its area even on a flat whose
# dwelling-level exposure defaults has_exposed_floor=False.
from datatypes.epc.domain.mapper import _api_build_sap_floor_dimensions # pyright: ignore[reportPrivateUsage]
from datatypes.epc.schema.rdsap_schema_21_0_1 import (
SapFloorDimension as ApiSapFloorDimension,
)
def fd(floor: int) -> ApiSapFloorDimension:
return ApiSapFloorDimension(
floor=floor,
room_height=2.5,
total_floor_area=50.0,
party_wall_length=0.0,
heat_loss_perimeter=28.0,
)
# Act
dims = _api_build_sap_floor_dimensions([fd(0), fd(1)], floor_heat_loss=3)
# Assert — lowest floor flagged, upper storey not.
assert dims[0].is_above_partially_heated_space is True
assert dims[1].is_above_partially_heated_space is False
class TestApiFloorConstructionCode:
"""`_api_floor_construction_str` maps the GOV.UK API integer
floor_construction code to the description string the cascade's

View file

@ -254,6 +254,13 @@ class SapBuildingPart:
wall_insulation_thermal_conductivity: Optional[Union[str, int]] = None
floor_insulation_thickness: Optional[str] = None
flat_roof_insulation_thickness: Optional[Union[str, int]] = None
# Lodged insulation thickness (e.g. "100mm") for a "Pitched, sloping
# ceiling" roof (roof_construction == 8), whose ceiling follows the
# slope so the insulation is NOT at the loft joists. Previously
# undeclared → dropped by `from_dict`, leaving the cascade to treat
# the slope as uninsulated (Table 16 / Table 18 fallback). Consumed by
# `_api_resolve_sloping_ceiling_thickness` → Table 17 column (1a).
sloping_ceiling_insulation_thickness: Optional[Union[str, int]] = None
@dataclass

View file

@ -292,6 +292,13 @@ class SapBuildingPart:
wall_insulation_thermal_conductivity: Optional[Union[str, int]] = None
floor_insulation_thickness: Optional[str] = None
flat_roof_insulation_thickness: Optional[Union[str, int]] = None
# Lodged insulation thickness (e.g. "100mm") for a "Pitched, sloping
# ceiling" roof (roof_construction == 8), whose ceiling follows the
# slope so the insulation is NOT at the loft joists. Previously
# undeclared → dropped by `from_dict`, leaving the cascade to treat
# the slope as uninsulated (Table 16 / Table 18 fallback). Consumed by
# `_api_resolve_sloping_ceiling_thickness` → Table 17 column (1a).
sloping_ceiling_insulation_thickness: Optional[Union[str, int]] = None
@dataclass

View file

@ -0,0 +1,150 @@
# Handover — API SAP accuracy (session 2): fabric + tariff fixes, and why we now need worksheets
**Branch:** `feature/per-cert-mapper-validation` (long-lived working branch — **NEVER PR to
main**; the user pushes/PRs when ready). **HEAD `4d1a58b8`**, local-only ahead of origin.
**READ ALSO:** `docs/HANDOVER_COST_DECOMPOSITION.md` (the decomposition method + price
calibration), and the auto-memory `project_per_cert_mapper_validation_state` (full slice log
+ deproven approaches).
## THE GOAL (unchanged, and we are FAR from it)
100% of API records with a lodged SAP must compute within **0.5 SAP** of the API's
`energy_rating_current`. `scripts/eval_api_sap_accuracy.py` headline (905 computed certs):
| metric | session-2 start | now (`4d1a58b8`) |
|--------|-----------------|------------------|
| **% \|err\| < 0.5** | 43.8% | **45.0%** |
| % \|err\| < 1.0 | | 59.4% |
| % \|err\| < 2.0 | | 77.6% |
| mean \|err\| | 2.01 | 1.757 |
| **mean signed** | 0.31 | **+0.019** |
| p99 \|err\| | — | 17.2 |
| max \|err\| | — | 61.4 |
**Be honest about where this is: 45% within 0.5 is poor.** The headline barely moved
(+1.2pp) across 6 fixes because each clean cause is small (10-30 certs). What DID change
decisively is the **signed bias: 0.31 → +0.02**. The systematic under-rating that defined
the sample at session start is gone — the remaining error is **bidirectional scatter**, ~55%
of certs are >0.5 off in BOTH directions, and there is **no single lever left that moves the
headline by more than ~0.3pp.** Further progress is per-cause, and increasingly needs
worksheet ground truth (see "Why we need worksheets" below).
## WHAT SHIPPED THIS SESSION (7 commits, all green, pyright net-zero)
1. `98f71d25` **decomposition tool** `scripts/decompose_api_cost_error.py` — calibrates the
consumer price from accurate gas certs (gas £0.0809, elec £0.2839/kWh), predicts each
component cost, clusters by (component × direction). **CAVEAT: it uses the STANDARD elec
price, so it MIS-FLAGS off-peak-heated certs as `heat:high`.** For electric certs compare
against the cascade's own cost intermediates (`SapResult.intermediate['main_heating_cost_gbp']`
etc.), not the decomposition.
2. `bb830741` **sloping-ceiling**`roof_construction=8` carries `sloping_ceiling_insulation_thickness`
("100mm"); the mapper dropped it. Now fed → Table 17 col (1a). 9884 5.5 → +0.06.
3. `6b045146` **gas-boiler fuel from §14.2 mains-gas meter** (Summary/Elmhurst path) — a
Table-4b gas boiler with a SEPARATE electric immersion (§15 "Electricity") used to raise
`MissingMainFuelType`; now falls back to the "Main gas: Yes" meter flag → mains gas.
4. `3aed8f85` **floor "another dwelling below" (code 6)** — party floor, no heat loss
(mirror of the roof's "another dwelling above" override). 2115 floor 47.85→0 W/K, 23→4.
5. `a64e857b` **roof "Unknown insulation" → Table 18** (§5.11.4) — "NI"=Not Indicated
(undetermined), not zero; routes to age-band default not 2.30. Cluster mean|err| 7.8→1.8.
6. `678aa7af` **main-roof U ignores Room-in-Roof "no insulation" leak**`_joined_descriptions`
concatenated ALL roofs[], so an RR "no insulation" contaminated the main-roof U. Now drops
"Roof room(s)" entries for the main-roof U (RR shell unaffected; golden 6035 safe).
7. `4d1a58b8` **Unknown-meter + storage/CPSU → off-peak tariff** (§12) — storage heaters
charge overnight; an Unknown (code-3) meter no longer bills their charge at standard
13.19p. `rdsap_tariff_for_cert` infers off-peak for Rule-1 CPSU/Rule-2 storage only; and
`_fuel_cost` now uses `_rdsap_tariff` (not raw `tariff_from_meter_type`). 7336 26 → 0.16.
## DEPROVEN — do NOT retry (empirically failed this session)
- **roof `'ND'` (Not Determined) → Table 18.** `'ND'` is on ~305/905 certs and the lodged
calc genuinely uses the description's high U for many; routing all 'ND' to age-default broke
9 certs (some 0 → +15) for zero net gain. The description is load-bearing even with 'ND'.
(The narrow "**unknown**" word IS a clean signal — that's slice `a64e857b`.)
- **broad "all §12 Rule-3 electric → off-peak on Unknown meters".** Net-NEGATIVE (44.9→44.8,
bias flipped +0.16). Room-heater dwellings (code 691) over-credit when forced off-peak
(their electric-immersion HW goes off-peak). Direct-boiler 191 alone is +0.1 but requires a
191-vs-691 split that is NOT spec-grounded (both are Rule 3) — a population data-fit; left
unshipped on purpose (the user's principle: RdSAP is deterministic, no overfitting).
- **RR shell U Table-17-50mm** (from session 1, still true): golden 6035 disproves it.
## THE REMAINING CLUSTER MAP (where the error lives now)
Run `scripts/decompose_api_cost_error.py` for the live table. As of `4d1a58b8`:
| cluster | n | within 0.5 | note |
|---------|---|-----------|------|
| `heat:high` | 319 | 39% | we over-state heating energy (or off-peak mis-priced) |
| `heat:low` | 229 | 47% | we under-state heating energy |
| `hw:low` | 161 | 50% | |
| `hw:high` | 120 | 43% | |
| `balanced` | 76 | 55% | |
By dwelling type / system (from `_results.csv`):
- **Flats (prop 2): 283 certs, 31% within 0.5** — still the worst segment by far (houses 50%,
bungalows 59%). Signed 0.24. The fabric/tariff fixes helped but flats remain hardest.
- **Heat pumps (cat 4): 20 certs, 45% within 0.5, mean signed +1.43, mean|err| 3.81** — a
distinct OVER-rating cluster, UNTOUCHED this session. These have PCDB indices (e.g. 9472
+15.0 idx 104351, 2789 +13.4 idx 104632, 4135 +10.0 idx 106465). Likely an Appendix-N /
PCDB efficiency or HW-from-HP issue. **Good next target — it's a coherent over-rate cluster,
and HPs may be pinnable from a worksheet.**
- **Top single offenders** (see eval TOP-40): 2100 61 (n_bps=2, electric, prop 0), 2958 +32
(single-bp electric), 0390 29 (flat, "Flat no insulation"+ND roof — the deproven path),
2080 25 (electric direct-boiler flat — mixed cause), 7921 23 (gas, PCDB idx 16814).
## WHY WE NEED WORKSHEETS NOW (the user has accepted this)
The decomposition method got us the directional bias (under-rating → balanced). It is now
**exhausted for the bidirectional scatter** because:
1. For **electric/off-peak certs** the consumer-price `*_cost_current` fields diverge from the
SAP Table-12 prices the rating actually uses — the lodged total can EXCEED ours while the
lodged SAP is HIGHER. So we cannot back-calculate a reliable kWh/cost target.
2. The remaining causes (HW immersion off-peak charge-vs-on-demand split; HP Appendix-N
efficiency + HP-DHW; per-cert fabric like 2100's 61) are **sub-component values that the
±10% calibration cannot resolve** — they need a line-ref pin.
**What to generate (in priority order):** Elmhurst worksheets (P960 + Summary) for —
- **A heat pump cat-4 cert that over-rates**, e.g. `9472-3052-6202-0766-7200` (+15.0, idx
104351) or `2789-8331-7179-3314-1150` (+13.4). Pin §9b HP efficiency (Appendix N / Table
4a), the (206)/(207) seasonal eff, and HW-from-HP. This is the cleanest coherent cluster.
- **A meter-3 electric flat with electric-immersion HW**, e.g. `2474-3059-4202-4496-3200`
(13.3, cat-2 direct-boiler 191) or `2080` (25.5). Pin EXACTLY how RdSAP bills the
electric-immersion HW (§4 + Table 12a) and direct-acting heating on an off-peak tariff —
this resolves whether Rule-3 electric on Unknown meters should be off-peak (the unshipped
191 question) and the HW-off-peak split.
- (Optional) **2100-5421-0922-1622-3463** (61, the worst) — 2 building parts, electric; a
worksheet would localise whether it's a §3 geometry or heating blowup.
The faithful-reproduction rule still holds: **use the cert's OWN data** (its API JSON is in
`/tmp/epc_2026_sample/<cert>.json`; generate the Elmhurst worksheet from the same property),
NOT a template-edited 001431. Template edits drift (session-1 lesson).
## TOOLS & CONVENTIONS
- `PYTHONPATH=/workspaces/model python scripts/eval_api_sap_accuracy.py` — headline + TOP-40
+ per-cert `/tmp/epc_2026_sample/_results.csv`.
- `PYTHONPATH=/workspaces/model python scripts/decompose_api_cost_error.py` — component
clusters + `_cost_decomposition.csv` (remember the off-peak caveat above).
- Sample: ~1009 cached API JSONs at `/tmp/epc_2026_sample` (override `EPC_SAMPLE_CACHE`).
- **Conventions (non-negotiable):** one cause = one slice = one commit; **spec citation
(page+line)** in the message; AAA test headers; `abs(x-y)<=tol` not `pytest.approx`;
SAP 10.2 only; **no tolerance-widening / xfail**; pyright strict **net-zero** (baseline-
compare via `git stash`); **stage files BY NAME** (the tree carries unrelated `scripts/`
+ "sap worksheets/" changes — never `git add -A`); RdSAP is **deterministic** — every fix
must be a spec rule, not a population data-fit (the user is firm on this);
`Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>`.
- **REGRESSION after any calculator change:** `tests/domain/sap10_calculator/`,
`backend/documents_parser/tests/`, `datatypes/epc/`, and the golden fixtures (esp. **6035**).
- **Pre-existing failures to IGNORE** (fail on the stashed baseline too, NOT yours):
`test_from_rdsap_schema.py::…::test_total_floor_area`, and the 2 stone-wall U tests in
`domain/sap10_ml/tests/test_rdsap_uvalues.py` (`…stone_granite_thin_wall_age_a_120mm…`,
`…stone_sandstone…`) — likely fallout from the §5.7 wall-U slice `27375d93`; worth a
separate fix but not yours to count against net-zero.
## ARCHITECTURE NOTES THAT COST TIME (so you don't re-discover them)
- The API cost path uses `inputs.fuel_cost` (the Table-32/12a **precompute**, `_fuel_cost`),
NOT the scalar `space_heating_fuel_cost_gbp_per_kwh`. `calculator.py:540` picks the
precompute when populated, ELSE the legacy scalar fields. `_fuel_cost` returns a ZERO
sentinel for any off-peak tariff → the calculator then falls back to the legacy scalar
fields (which DO carry the off-peak rate from `_space_heating_fuel_cost_gbp_per_kwh`). So a
tariff change only bites if it flips `_fuel_cost`'s tariff off STANDARD.
- `_table_12a_system_for_main` maps cat-10 room heaters → `OTHER_DIRECT_ACTING_ELECTRIC` but
leaves storage (401-409, correct: → None → 100% low rate) and **direct-boiler 191 / CPSU as
TODO** (→ None → pure low rate, which OVER-credits 191 on off-peak). Wiring 191/CPSU rows is
a prerequisite if you ever revisit Rule-3-on-Unknown.
- Fuel codes stored on `SapResult` are the RAW API enum (26 = mains gas), not Table-12 codes
— translate via `table_12.API_FUEL_TO_TABLE_12` (the decomposition script does this).

View file

@ -0,0 +1,168 @@
# Handover — API SAP accuracy (session 3): raises cleared, now profile-driven
**Branch:** `feature/per-cert-mapper-validation` (long-lived working branch — **NEVER PR to
main**; the user pushes/PRs when ready). **HEAD `a8e5563a`+** (the profiler commit), local-only
ahead of origin.
**READ ALSO:** the auto-memory `project_per_cert_mapper_validation_state` (full slice log +
deproven approaches + the meter/shower data-fidelity findings), and the earlier
`docs/HANDOVER_API_ACCURACY_S2.md` (session-2 method).
## THE GOAL (unchanged)
100% of API records with a lodged SAP compute within **0.5 SAP** of the API's
`energy_rating_current`. Headline gauge:
`PYTHONPATH=/workspaces/model python scripts/eval_api_sap_accuracy.py`.
| metric | now (`a8e5563a`) |
|--------|------------------|
| **% \|err\| < 0.5** | **45.1%** |
| % \|err\| < 1.0 | 59.4% |
| mean \|err\| | 1.702 |
| mean signed | 0.006 (balanced) |
| computed / raises | **909 / 0** |
| unsupported_schema | 100 (deferred — see below) |
45% is still poor. The systematic bias is gone; remaining error is per-cert scatter + the
profile-surfaced buckets below.
## WHAT SHIPPED THIS SESSION (7 slices, all green, pyright net-zero)
1. `e41a0bc0` **PCDB heat pump w/o SAP code → Table 12a ASHP_APP_N SH split** (0.80 high-rate).
2. `2bc73fb0` **HP-DHW (WHC 901/902/914 + PCDB HP) → Table 12a WH 0.70 split.** Together (1)+(2)
killed the cat-4 heat-pump over-rating bias (+1.43 → +0.06).
3. `449d8c5b` **direct-acting electric boiler (191) → zero primary circuit loss** (SAP Table 3
p.160 zero list names it verbatim).
4. `f4048588` **wall_insulation_thermal_conductivity ignored → §5.8 default λ=0.04.** (See KEY
INSIGHT below — the gov field is RdSAP *output*, not an input.)
5. `1c5675a0` **floor_heat_loss=8 → no floor heat loss** (extension floor over a heated space;
RdSAP §3, like code 6).
6. `a8e5563a` **main_heating_category=9 (warm air) → Table 11 secondary fraction 0.10.**
(4)(5)(6) cleared **all 4 raises** — eval now has zero raises.
7. `(profiler)` **`scripts/profile_api_error.py`** — the new diagnostic (below).
## SESSION-4 UPDATE (HEAD `8741fbdf`) — read before re-working the leads below
- **Lead #1 `floor_codes=3` RESOLVED — the code IS authoritative.** The diagnostic that cracked
it: join each **single-BP** cert's `floor_heat_loss` code to its independent
`floors[].description` (the multi-BP tally was contaminated because a cert's `floors[]` summary
is LOSSY — it drops some BPs' descriptions). Single-BP gives a perfect 1:1 enum: code 1↔"To
external air"(exposed), 2↔"To unheated space"(semi-exposed), **3↔"(other premises below)"
(9/9)**, 6↔"(another dwelling below)"(party), 7↔Solid/Suspended(ground). Per RdSAP §3.12
(p.25) code 3 = "above a partially heated space" (non-domestic premises below) → §5.14 constant
**U=0.7** (NOT Table-20 semi-exposed, NOT ground). SHIPPED `8741fbdf`.
- **SHIPPED `b40e0f67`:** exposed-floor-on-flats (code 1) area fix — §3.12. A flat's code-1
floor was area-zeroed by `_dwelling_exposure`; now the per-BP `is_exposed_floor` overrides the
flat suppression upward (mirrors the "another dwelling below" party override).
- **SHIPPED `8741fbdf`:** code 3 → `is_above_partially_heated_space` (U=0.7) + area override.
**RE-PINNED golden 7536-3827** — its Ext2(bp3) code-3 floor was mis-read as "ground U=1.12" by
a prior agent (the lossy floors[] dropped its description), who declared the residual an
"irreducible register-rounding artifact, DO NOT chase". It was this bug: U 1.12→0.70, PE/CO2
residuals moved toward 0. **LESSON: "irreducible residual" golden notes are suspect — a real
mapper bug can hide there.** Eval (both slices): 45.1→45.3%, mean|err| 1.702→1.659, <1.0
59.5→60.2%. User is generating a fresh `0380-2087-8190-2996-3075` worksheet to independently
confirm U=0.7 (0380 now 0.63) — validate when it lands.
- **Leads re-checked, NOT clean:** `immersion_type=2` (+1.86) is high-scatter (mean|err| 3.71,
bidirectional). `main_control=2107` (+1.63) is correctly mapped ("Programmer, TRVs and bypass"
type 2 Table 4c(2)) — over-rate is diffuse gas-boiler/flat-fabric, not a dispatch bug.
`roof_codes=1` broad bucket is mean 0.15 (the 1.78 was top-floor-electric-flat outliers
29/25). Remaining gains need per-cert worksheets (start code-3) or the unsupported-schema ticket.
## KEY INSIGHT (load-bearing, from the user)
**The gov EPC API JSON is the published OUTPUT of RdSAP software (Elmhurst), not its input.**
So any API field Elmhurst doesn't expose as an *input* is register metadata the RdSAP10 method
does **not** consume — route it to the spec default, don't try to "use" it. This is exactly why
`wall_insulation_thermal_conductivity` (slice 4) → always λ=0.04. Apply the same lens to any
new "extra" API field before wiring it.
## THE NEW DIAGNOSTIC — `scripts/profile_api_error.py` (run this first)
`PYTHONPATH=/workspaces/model python scripts/profile_api_error.py` joins each computed cert's
signed error with a rich feature set from its **raw API JSON** (not the mapped EpcPropertyData),
and ranks (feature, value) buckets by error carried + by |mean signed| bias. This is how to find
"silly API-path handling" gaps. `--min-n N` sets the bucket floor.
### PRIORITISED LEADS (from the run at `a8e5563a` — verify with the profiler, they'll shift)
Cleanest "API-path handling" candidates first (small, biased buckets = likely a mapper/dispatch
bug, not noise):
1. **`floor_codes=3` → mean signed +5.37 (n=10).** We map API `floor_heat_loss=3` → "To unheated
space" (same as code 2). The +5.37 over-rate says that's wrong — code 3 likely isn't "unheated
space" (or its U is wrong). Pull the n=10 certs, check what code 3 really is (ask the user the
Elmhurst floor dropdown — the API=output lens). **Highest bias, smallest scope = start here.**
2. **Control-code biases:** `main_control=2306` 2.96 (n=11), `2602` +2.49 (n=14), `2107` +1.65
(n=38), `2402` +1.14 (n=10), `2307` +0.74 (n=11). Several control codes carry systematic bias
→ Table 4c/4e control dispatch gaps. `2107`/`2602` are the biggest. Check
`_CONTROL_TYPE_BY_CODE` + the Table 4c efficiency-adjustment / Table 4e control coverage.
3. **`immersion_type=2` (dual immersion) → +2.00 (n=43, mean|err| 3.85).** RdSAP §12 lists "dual
electric immersion" as an off-peak trigger; the cascade does NOT consume `immersion_heating_type`
for tariff (verified — only comments reference it). Wiring the §12 dual-immersion → off-peak
rule for Unknown meters is a clean spec slice. (1=single, 2=dual per the Elmhurst Summary.)
4. **`roof_codes=1` 1.78 (n=27)** (flat roof under-rate) and **`roof_insulation_thickness=None`
1.18 (n=52)** — flat-roof / no-thickness roof handling.
5. **`main_data_source=2` / `has_pcdb_main=False` → 28% within 0.5, mean|err| 3.17 (n≈242).**
Non-PCDB heating systems (SAP-table efficiency) are a big under-rating cluster. Likely
Table 4b default-efficiency or fabric, but worth a look — it's 1/4 of the sample.
### Big scattered segments (need worksheets, NOT clean single fixes)
- **`whc=903` (electric immersion HW): 13% within 0.5, n=84** — looks like the worst bucket but
it's the electric **storage(cat-7)+room-heater(cat-10)** segment compounding (worst certs span
29…+32, bidirectional). Not one bug.
- **`mains_gas=N` (electric): 21% within 0.5, mean|err| 4.27 (n=145)** — the hardest segment;
per-cert fabric/tariff scatter.
- **Flats (`property_type=2`): 31% within 0.5 (n=283)** — still the worst dwelling type.
- **cat-7 storage (+0.75) / cat-10 room heaters (+0.75)** — both net over-rate; bidirectional.
## DEPROVEN — do NOT retry (empirically failed in earlier sessions; details in memory)
- Routing **roof `'ND'` → Table 18** (description is load-bearing even with 'ND').
- Broad **"all Unknown(meter 3) electric → off-peak"** (over-credits room heaters). NOTE: the
meter-3 under-rate is partly an **irreducible data-fidelity artifact** — the register stores
meter_type=3 ("Unknown") on certs whose lodged rating actually used an off-peak meter (cert
2474: lodged 78 needs 18-hour, but API says Unknown → spec-faithful ~68). Don't chase those to
the lodged value.
- **RR shell U Table-17-50mm** (golden 6035 disproves it).
- **Shower enum is settled (non-bug):** API `shower_outlet_type` 1=non-electric(mixer)/2=electric
(cohort 2636/0330 validate at 1e-4); types 3/4/5 are finer gov-output sub-types (type 3 is all
on unsupported schema 19.1.0; type 4 already accurate). `shower_wwhrs` 1/2/3/4 = none / inst-
WWHRS-1 / inst-WWHRS-2 / storage. Low headline value — not worth pursuing.
## THE 100 unsupported_schema CERTS (deferred — bigger ticket)
SAP-Schema-19.1.0 (and other pre-21). The user is planning a separate big piece: map old schemas
→ new + **predict missing fields from similar-looking properties** (needs an EPC-prediction
method). That needs its own grilling session — do NOT start it here.
## WORKSHEET WORKFLOW (the user generates them on request)
For per-cert scatter that needs ground truth, ask the user to generate **P960 + Summary**
worksheets from the cert's OWN API JSON (`/tmp/epc_2026_sample/<cert>.json`). **Describe the cert
field-by-field first** (the user reproduces in Elmhurst; their repros are approximate — confirm
SAP matches lodged before pinning). Worksheets land under `sap worksheets/golden fixture
debugging/simulated case NN/` or `sap worksheets/additional with api 2/<cert>/`. Pin the cascade
to the P960 §3/§4/§9a/§10a line refs at abs=1e-4. **Caveat:** the user's repros often diverge
(wrong system / approximate inputs) — validate the BEHAVIOUR (e.g. λ, no-heat-loss) empirically
against the lodged SAP, don't blindly pin to a non-faithful repro.
## TOOLS & CONVENTIONS (non-negotiable)
- `scripts/eval_api_sap_accuracy.py` — headline + TOP-40 + `_results.csv`.
- `scripts/profile_api_error.py` — raw-API characteristic profiling (NEW, run first).
- `scripts/decompose_api_cost_error.py` — per-component cost decomposition (off-peak caveat: uses
STANDARD elec price, mis-flags off-peak certs).
- ~1009 cached API JSONs at `/tmp/epc_2026_sample` (`EPC_SAMPLE_CACHE` overrides).
- **one cause = one slice = one commit**; **spec citation (page+line)** in the message; AAA test
headers (`# Arrange/# Act/# Assert`); `abs(x-y)<=tol` not `pytest.approx`; **SAP 10.2 only**;
**no tolerance-widening / xfail**; RdSAP is **deterministic** — every fix is a spec rule, not a
population data-fit (the user is firm); pyright strict **net-zero** (baseline-compare via
`git stash`); **stage files BY NAME** (tree carries unrelated `scripts/` + `sap worksheets/`
changes — never `git add -A`); `Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>`.
- **REGRESSION after any calc/mapper change:** `tests/domain/sap10_calculator/`,
`backend/documents_parser/tests/`, `datatypes/epc/`, golden fixtures (esp. **6035**).
- **Pre-existing failures to IGNORE** (fail on the stashed baseline too): `test_total_floor_area`
and the 2 stone-wall U tests in `domain/sap10_ml/tests/test_rdsap_uvalues.py`.
## ARCHITECTURE NOTES (so you don't re-discover them)
- API path: `EpcPropertyDataMapper.from_api_response(doc)` → `cert_to_inputs(epc, prices=
SAP_10_2_SPEC_PRICES)` → `calculate_sap_from_inputs(...).sap_score_continuous`.
- Cost path uses `inputs.fuel_cost` (Table-32/12a precompute); `_fuel_cost` returns a ZERO
sentinel for off-peak → calculator falls back to the legacy scalar `_space_heating_fuel_cost_
gbp_per_kwh` (which DOES carry the off-peak rate). SapResult fuel codes are RAW API enums —
translate via `table_12.API_FUEL_TO_TABLE_12`.
- Heating efficiency: `_main_heating_detail_efficiency` → PCDB Table 105 winter eff (if PCDB
index) else `seasonal_efficiency(code, cat, fuel)` (Table 4a/4b, in `domain/sap10_ml/
sap_efficiencies.py`). Warm-air Table 4a code→eff map already covers 501-520.
- `sap10_ml/` is marked for eventual migration to `sap10_calculator/` but is still the live
u-value/efficiency path.

View file

@ -4,13 +4,13 @@ from typing import Final
from domain.fuel_rates.fuel import Fuel
from domain.sap10_calculator.exceptions import UnmappedSapCode
from domain.sap10_calculator.tables.table_32 import to_table_32_code
# SAP 10.2 / Table 32 fuel code -> canonical billing Fuel (ADR-0014). Bounded to
# the ~47 Table 32 fuel codes (the keys of `table_12.UNIT_PRICE_P_PER_KWH`) — the
# carrier, NOT the PCDB product, so a thousand PCDB heat pumps all share one code.
# Input is a normalised Table 32 fuel code (the calculator sets `main_fuel_type`
# to Table 32 codes); an unmapped code raises `UnmappedSapCode` rather than
# guessing — a bounded, self-surfacing backlog [[reference-unmapped-sap-code]].
# Table 32 fuel code -> canonical billing Fuel (ADR-0014). Bounded to the ~47
# Table 32 fuel codes (the keys of `UNIT_PRICE_P_PER_KWH`) — the carrier, NOT the
# PCDB product, so a thousand PCDB heat pumps all share one code. An unmapped code
# raises `UnmappedSapCode` rather than guessing — a bounded, self-surfacing
# backlog [[reference-unmapped-sap-code]].
_CODE_TO_FUEL: Final[dict[int, Fuel]] = {
**dict.fromkeys([1, 7], Fuel.MAINS_GAS), # mains gas, grid biogas
**dict.fromkeys([2, 3, 5, 9], Fuel.LPG),
@ -29,13 +29,26 @@ _CODE_TO_FUEL: Final[dict[int, Fuel]] = {
def sap_code_to_fuel(code: int) -> Fuel:
"""Map a SAP 10.2 / Table 32 fuel code to its canonical billing Fuel.
"""Map one of the calculator's per-end-use fuel codes to its billing Fuel.
The code may be a raw gov-API `main_fuel_type` enum or an already-Table-32
code depending on the source mapper (until [[adr-0015]] normalizes the cert),
so it is first run through the calculator's own ``to_table_32_code`` —
T32-first, then API-translate the **same** normalization the calculator's
pricing/CO2 helpers use, so the bill's carrier matches what the calculator
billed. The normalized Table-32 code is then dispatched to a billing Fuel.
Raises ``UnmappedSapCode`` on a code with no single billing carrier e.g.
dual fuel (10) or the grid-export codes (36/60), which are not an end use's
input fuel.
"""
fuel = _CODE_TO_FUEL.get(code)
# Normalize to a Table-32 code; fall back to the raw code for billing fuels
# the price table does not carry (the 41-58 heat-network range — `to_table_32_
# code` returns None there, but they still resolve to HEAT_NETWORK and so to
# UnpricedFuel, which is stricter — and correct — than the calculator's
# lossy default-to-mains-gas for an unpriced code).
normalized = to_table_32_code(code)
fuel = _CODE_TO_FUEL.get(normalized if normalized is not None else code)
if fuel is None:
raise UnmappedSapCode("fuel_code", code)
return fuel

View file

@ -336,6 +336,18 @@ class CalculatorInputs:
# this field. cert_to_inputs sets this via `additional_standing_
# charges_gbp(main_fuel_code, water_heating_fuel_code, tariff)`.
standing_charges_gbp: float = 0.0
# Per-end-use fuel codes (RdSAP10 Table 32 / SAP 10.2 Table 12 fuel
# code column) for ADR-0014 BillDerivation fuel attribution. Output-
# only — these do NOT feed ECF / cost / CO2 / primary energy /
# sap_score (the rating cascade already prices each end-use via the
# per-end-use cost/CO2/PE factor fields above). They tell the bill
# adapter WHICH fuel carrier each end-use burns. None when the
# corresponding system is absent (no main / no 2nd main / no
# secondary) or the water-heating fuel is not resolvable.
main_heating_fuel_code: Optional[int] = None
main_2_heating_fuel_code: Optional[int] = None
secondary_heating_fuel_code: Optional[int] = None
hot_water_fuel_code: Optional[int] = None
@dataclass(frozen=True)
@ -385,6 +397,20 @@ class SapResult:
# gas-cooker split, if ever needed, is a separate follow-up).
appliances_kwh_per_yr: float
cooking_kwh_per_yr: float
# Per-end-use fuel codes (RdSAP10 Table 32 / SAP 10.2 Table 12 fuel
# code column) + annual PV export for ADR-0014 BillDerivation. Output-
# only metadata — these do NOT contribute to ecf / total_fuel_cost_gbp
# / co2_kg_per_yr / primary_energy_kwh_per_yr / sap_score. They tell
# the bill adapter WHICH fuel carrier each end-use burns; the fuel
# codes are None when the corresponding system is absent or the water-
# heating fuel is not resolvable. `pv_exported_kwh_per_yr` is the
# annual kWh exported to the grid (SAP 10.2 Appendix M1 §3-4 split),
# 0.0 when there is no PV.
main_heating_fuel_code: Optional[int]
main_2_heating_fuel_code: Optional[int]
secondary_heating_fuel_code: Optional[int]
hot_water_fuel_code: Optional[int]
pv_exported_kwh_per_yr: float
primary_energy_kwh_per_yr: float
primary_energy_kwh_per_m2: float
monthly: tuple[MonthlyEntry, ...]
@ -798,6 +824,11 @@ def calculate_sap_from_inputs(inputs: CalculatorInputs) -> SapResult:
lighting_kwh_per_yr=inputs.lighting_kwh_per_yr,
appliances_kwh_per_yr=inputs.appliances_kwh_per_yr,
cooking_kwh_per_yr=inputs.cooking_kwh_per_yr,
main_heating_fuel_code=inputs.main_heating_fuel_code,
main_2_heating_fuel_code=inputs.main_2_heating_fuel_code,
secondary_heating_fuel_code=inputs.secondary_heating_fuel_code,
hot_water_fuel_code=inputs.hot_water_fuel_code,
pv_exported_kwh_per_yr=inputs.pv_exported_kwh_per_yr or 0.0,
primary_energy_kwh_per_yr=primary_energy_kwh,
primary_energy_kwh_per_m2=primary_energy_per_m2,
monthly=monthly,

View file

@ -103,6 +103,7 @@ from domain.sap10_calculator.tables.table_12a import (
rdsap_tariff_for_cert,
space_heating_high_rate_fraction,
tariff_from_meter_type,
water_heating_high_rate_fraction,
)
from domain.sap10_calculator.tables.table_32 import (
additional_standing_charges_gbp,
@ -662,6 +663,11 @@ _INSTANTANEOUS_WATER_CODES: Final[frozenset[int]] = frozenset({907, 909})
# zero-loss list, so primary loss is zero whenever this code is lodged.
_WHC_ELECTRIC_IMMERSION: Final[int] = 903
# SAP 10.2 Table 4a "direct-acting electric boiler" (RdSAP 10 §12 p.62).
# Named in the SAP 10.2 Table 3 (PDF p.160) primary-loss zero list, so a
# 191 main feeding a cylinder incurs no primary circuit loss.
_DIRECT_ACTING_ELECTRIC_BOILER_CODE: Final[int] = 191
# Water-heating codes for a dedicated "boiler/circulator for water
# heating only" — SAP 10.2 Table 4a hot-water section (PDF p.166):
# 911 gas, 912 liquid fuel, 913 solid fuel boiler/circulator; 921-931
@ -786,6 +792,12 @@ _SECONDARY_HEATING_FRACTION_BY_CATEGORY: Final[dict[int, float]] = {
5: 0.10,
6: 0.10,
7: 0.15,
9: 0.10, # Warm-air systems (NOT heat pump): a gas/oil warm-air unit
# is an "All gas, liquid and solid fuel systems" row (0.10),
# and electric warm air is "Other electric systems" (also
# 0.10) — so 0.10 regardless of fuel (SAP 10.2 Table 11
# p.188). Cert 0380 (warm air mains gas, code 506, +
# electric room-heater secondary) raised here before.
10: 0.20,
}
_SECONDARY_HEATING_FRACTION_DEFAULT: Final[float] = 0.10
@ -1393,13 +1405,19 @@ def _climate_source(
def _is_timber_or_steel_frame(parts: list[SapBuildingPart]) -> bool:
"""RdSAP 10 §5: wall_construction codes 5 (timber frame) and 6 (system
build steel frame) get the lower 0.25 structural ACH; everything else
is treated as 0.35 masonry."""
"""RdSAP 10 §2 (Ventilation, "Walls" row): "Structural infiltration:
0.25 for steel or timber frame or 0.35 for masonry construction ...
System build: treated as masonry." So only wall_construction code 5
(timber frame) takes the lower 0.25 structural ACH; code 6 (system
build) is explicitly masonry (0.35), as is everything else.
(Park homes also take the timber-frame value per the same spec row,
but that is a dwelling-type flag, not a wall_construction code, and is
out of scope here.)"""
if not parts:
return False
wc = parts[0].wall_construction
return isinstance(wc, int) and wc in (5, 6)
return isinstance(wc, int) and wc == 5
def _living_area_fraction_default(habitable_rooms_count: Optional[int]) -> float:
@ -2134,12 +2152,27 @@ def _table_12a_system_for_main(
# all callers already pre-gate on electric, this is belt-and-braces.
if main.main_heating_category == 10 and _is_electric_main(main):
return Table12aSystem.OTHER_DIRECT_ACTING_ELECTRIC
# A PCDB Table 362 record IS a heat pump by definition (the Appendix-N
# efficiency cascade keys off it), whether or not a Table-4a SAP code
# (211-227 / 521-524) was ALSO lodged. API-path heat pumps resolve via
# the PCDB index alone (data_source=1, sap_main_heating_code None), so
# the code-range gate below misses them and they fell through to None
# → the "100% off-peak low-rate" fallback, OVER-crediting the cat-4
# cluster on Dual meters (cert 9472 +15.0 SAP). Route any PCDB heat
# pump to ASHP_APP_N: SAP 10.2 Table 12a Grid 1 (PDF p.191) gives the
# ASHP/GSHP Appendix-N rows the same 0.80 SH high-rate fraction at
# 7-hour and 10-hour, so ASHP_APP_N is the canonical Appendix-N row
# for the space-heating cost split.
if has_pcdb_hp:
return Table12aSystem.ASHP_APP_N
# ASHP — Table 4a rows 211-217 (earlier generations) + 221-227
# (2013+) cover the air-source space. Warm-air ASHPs are 521-524.
# Reached only when no PCDB record is present (handled above), so the
# "from database" variant never applies here → ASHP_OTHER.
if code is not None and (
211 <= code <= 217 or 221 <= code <= 227 or 521 <= code <= 524
):
return Table12aSystem.ASHP_APP_N if has_pcdb_hp else Table12aSystem.ASHP_OTHER
return Table12aSystem.ASHP_OTHER
return None
@ -2210,6 +2243,7 @@ def _hot_water_fuel_cost_gbp_per_kwh(
tariff: Tariff,
prices: PriceTable,
*,
water_heating_code: Optional[int] = None,
inherit_main_for_community_heating: bool = False,
) -> float:
"""Hot water bills at the *water-heating* fuel's rate. When the
@ -2218,8 +2252,16 @@ def _hot_water_fuel_cost_gbp_per_kwh(
water fuel is a non-electric fuel (gas / oil / LPG), tariff is
not consulted those fuels are single-rate per Table 32. For
cert 000565 HW routes to gas combi via WHC 914 tariff branch
not taken. TODO: Table 12a Grid 1 WH high-rate-fraction split for
electric WH on off-peak (currently uses 100% low rate).
not taken.
HP-DHW exception: when DHW is heated by the main system (WHC
{901, 902, 914}) and that main is a PCDB Table 362 heat pump, the
HW bills per SAP 10.2 Table 12a Grid 1 WH column (PDF p.191) the
ASHP/GSHP-from-database row carries a 0.70 high-rate fraction at
7-hour and 10-hour, NOT 100% off-peak low rate. Electric IMMERSION
(WHC 903) is a different Table 12a row (off-peak immersion 0.17 /
Table 13) and stays on the 100%-low-rate fallback until that slice
lands.
`inherit_main_for_community_heating`: per S0380.173, when WHC
{901, 902, 914} AND main is a heat network, ignore the cert-
@ -2232,6 +2274,18 @@ def _hot_water_fuel_cost_gbp_per_kwh(
return _fuel_cost_gbp_per_kwh(main, prices)
water_electric = _is_electric_water(water_heating_fuel)
if water_electric and tariff is not Tariff.STANDARD:
if (
water_heating_code in _WATER_INHERIT_FROM_MAIN_CODES
and main is not None
and main.main_heating_index_number is not None
and heat_pump_record(main.main_heating_index_number) is not None
):
high_rate, low_rate = _tariff_high_low_rates_p_per_kwh(tariff)
high_frac = water_heating_high_rate_fraction(
Table12aSystem.ASHP_APP_N, tariff
)
blended = high_frac * high_rate + (1.0 - high_frac) * low_rate
return blended * _PENCE_TO_GBP
return _off_peak_low_rate_gbp_per_kwh(tariff)
if water_heating_fuel is not None:
return prices.unit_price_p_per_kwh(water_heating_fuel) * _PENCE_TO_GBP
@ -3042,14 +3096,24 @@ def _main_heating_co2_factor_kg_per_kwh(
if monthly is None:
return _co2_factor_kg_per_kwh(main)
return monthly
codes = _TARIFF_HIGH_LOW_FUEL_CODES_TABLE_12.get(tariff)
system = _table_12a_system_for_main(main)
if system is None:
# An electric main on a dual tariff with no Table 12a Grid 1 row is
# an off-peak STORAGE system (storage heaters / electric storage
# boiler / CPSU): it charges 100% off-peak per the Table 12a design
# intent, so its monthly CO2 factor is the dual-rate LOW code
# cascade — NOT the flat annual factor. case-20 storage on E7:
# code 31 → (261) 0.1357, vs the 0.136 annual fallback.
if codes is not None:
low_only = _effective_monthly_co2_factor(main_fuel_monthly_kwh, codes[1])
if low_only is not None:
return low_only
return _co2_factor_kg_per_kwh(main)
try:
high_frac = space_heating_high_rate_fraction(system, tariff)
except NotImplementedError:
return _co2_factor_kg_per_kwh(main)
codes = _TARIFF_HIGH_LOW_FUEL_CODES_TABLE_12.get(tariff)
if codes is None:
return _co2_factor_kg_per_kwh(main)
high_code, low_code = codes
@ -3516,6 +3580,18 @@ def _secondary_heating_co2_factor_kg_per_kwh(
not the 0.136 electricity flat that the pre-S0380.70 hardcoded
`_STANDARD_ELECTRICITY_FUEL_CODE` path produced."""
code = _secondary_fuel_code(epc)
if code == _STANDARD_ELECTRICITY_FUEL_CODE:
# Secondary electric heaters are direct-acting (used on demand,
# daytime) → on-peak. On a dual-rate meter they draw HIGH-rate
# electricity, so the monthly Table 12d CO2 cascade keys on the
# tariff's HIGH code, not the standard all-day code 30 — mirroring
# the cost side billing secondary at the high rate (e.g. 15.29 p on
# E7). case-20 secondary on E7: code 32 → (263) 0.1616, vs the
# 0.15405 a code-30 weighting gives. STANDARD-tariff certs have no
# dual codes → code 30 unchanged.
dual_codes = _TARIFF_HIGH_LOW_FUEL_CODES_TABLE_12.get(_rdsap_tariff(epc))
if dual_codes is not None:
code = dual_codes[0]
monthly = _effective_monthly_co2_factor(secondary_fuel_monthly_kwh, code)
if monthly is not None:
return monthly
@ -5242,6 +5318,16 @@ def _primary_loss_applies(
# kWh/yr — zero before this branch.
if water_heating_code in _WATER_HEATING_BOILER_CIRCULATOR_CODES:
return True
# SAP 10.2 Table 3 (PDF p.160) zero-loss list names "Direct-acting
# electric boiler" verbatim. RdSAP 10 §12 (p.62) classifies SAP code
# 191 as the direct-acting electric boiler: its cylinder is immersion-
# heated with no primary pipework, so no primary loss — even though it
# lodges as main_heating_category 2 ("Boiler and radiators, electric")
# and would otherwise hit the cat-{1,2} boiler branch below. Checked
# before that branch so the electric-flat segment (cert 2474: WHC 901
# + code 191 + cylinder) no longer accrues ~1177 kWh/yr phantom loss.
if main.sap_main_heating_code == _DIRECT_ACTING_ELECTRIC_BOILER_CODE:
return False
if main.main_heating_category == 4:
if hp_record is None:
# No PCDB record → assume separate-vessel (conservative; the
@ -5653,6 +5739,7 @@ def _water_heating_worksheet_and_gains(
primary_loss_monthly_kwh_override=primary_loss_override,
has_electric_shower=has_electric_shower,
electric_shower_count=electric_shower_count,
is_instantaneous_at_point_of_use=is_instantaneous,
)
solar_hw_override = _solar_hw_monthly_override(
epc=epc,
@ -5670,6 +5757,7 @@ def _water_heating_worksheet_and_gains(
solar_water_heating_monthly_kwh_override=solar_hw_override,
has_electric_shower=has_electric_shower,
electric_shower_count=electric_shower_count,
is_instantaneous_at_point_of_use=is_instantaneous,
)
return wh_result, wh_result.heat_gains_monthly_kwh
@ -6084,8 +6172,14 @@ def _fuel_cost(
is the natural extension point for the Table 12a `_SH_HIGH_RATE_
FRACTION` lookup + `Table12aSystem` mapping (deferred per slice 3
docs `Q11` follow-ups)."""
meter_type = epc.sap_energy_source.meter_type
tariff = tariff_from_meter_type(meter_type)
# Use the §12-Rules-aware tariff (not the raw meter→tariff): it routes
# an "Unknown" (code 3) meter with an electric storage / heat-pump /
# room-heater main to its off-peak tariff (storage heaters can't run on
# a single rate), so the off-peak branch below fires and the legacy
# scalar fields bill the overnight charge at the low rate instead of
# the standard 13.19 p/kWh. A non-electric Unknown-meter dwelling still
# resolves STANDARD here, keeping the full §10a precompute.
tariff = _rdsap_tariff(epc)
if tariff is not Tariff.STANDARD:
# Off-peak path defers to the legacy scalar fuel-cost fields on
# CalculatorInputs (the pre-§10a `_space_heating_fuel_cost_gbp_
@ -6912,6 +7006,7 @@ def cert_to_inputs(
_water_heating_main(epc),
_rdsap_tariff(epc),
prices,
water_heating_code=epc.sap_heating.water_heating_code,
inherit_main_for_community_heating=_community_hw_inherit,
)
hw_co2_factor = _hot_water_co2_factor_kg_per_kwh(
@ -6987,6 +7082,25 @@ def cert_to_inputs(
# E_cook = 138 + 28×N, already summed in `cooking_monthly_kwh`.
appliances_kwh_per_yr=sum(appliances_monthly_kwh),
cooking_kwh_per_yr=sum(cooking_monthly_kwh),
# Per-end-use fuel codes (RdSAP10 Table 32 / SAP 10.2 Table 12 fuel
# code column) for ADR-0014 BillDerivation fuel attribution.
# Output-only — they tell the bill adapter WHICH carrier each end-
# use burns and do NOT feed cost / CO2 / PE / sap_score (those are
# already priced via the per-end-use factor fields below). Resolved
# via the same helpers the cost/CO2 cascade uses: `_main_fuel_code`
# (None when no main system), `_secondary_fuel_code`, and
# `_water_heating_fuel_code` (None when the WHC fuel is not
# resolvable). Main 2 is the second `main_heating_details` entry,
# if any (None when the cert has a single main system).
main_heating_fuel_code=_main_fuel_code(main),
main_2_heating_fuel_code=_main_fuel_code(
epc.sap_heating.main_heating_details[1]
if epc.sap_heating
and len(epc.sap_heating.main_heating_details) > 1
else None
),
secondary_heating_fuel_code=_secondary_fuel_code(epc),
hot_water_fuel_code=_water_heating_fuel_code(epc),
space_heating_fuel_cost_gbp_per_kwh=_space_heating_fuel_cost_gbp_per_kwh(
main, _rdsap_tariff(epc), prices
),

View file

@ -263,6 +263,21 @@ _RULE_3_TEN_HOUR_CODES: Final[frozenset[int]] = frozenset(
)
def _meter_is_unknown(meter_type: object) -> bool:
"""True when the meter is the RdSAP "Unknown" sentinel (code 3 / the
"unknown" / "" / "3" string aliases) the assessor did not record the
tariff. Distinct from Single (code 2), an explicit single-rate
lodgement. Mirrors `_is_off_peak_meter`'s code extraction so the main-
heating tariff inference stays consistent with the HW/secondary path."""
if isinstance(meter_type, bool):
return False
if isinstance(meter_type, int):
return meter_type == 3
if isinstance(meter_type, str):
return meter_type.strip().lower() in {"unknown", "3", ""}
return False
def rdsap_tariff_for_cert(
meter_type: object,
*,
@ -297,23 +312,46 @@ def rdsap_tariff_for_cert(
TEN_HOUR, matching the worksheet's "10 Hour Off Peak" lodging.
"""
base = tariff_from_meter_type(meter_type)
# Non-Dual meters resolve straight from the meter type.
if base is not Tariff.SEVEN_HOUR:
return base
main_codes = {
c for c in (main_1_sap_code, main_2_sap_code) if c is not None
}
# Rule 1
if main_codes & _RULE_1_CPSU_CODES:
return Tariff.TEN_HOUR
# Rule 2 — checked BEFORE rule 3 per §12 ordering (storage takes
# precedence over the broader Rule 3 electric set).
if main_codes & _RULE_2_STORAGE_CODES:
return Tariff.SEVEN_HOUR
# Rule 3
if main_codes & _RULE_3_TEN_HOUR_CODES:
return Tariff.TEN_HOUR
if main_1_is_heat_pump_database or main_2_is_heat_pump_database:
return Tariff.TEN_HOUR
# Rule 4 — default
return Tariff.SEVEN_HOUR
def _rules_1_to_3() -> Optional[Tariff]:
"""§12 Rules 1-3 — the explicit electric-system tariff matches.
Returns None when no electric storage / CPSU / heat-pump / room-
heater main is present (i.e. Rule 4 territory)."""
# Rule 1
if main_codes & _RULE_1_CPSU_CODES:
return Tariff.TEN_HOUR
# Rule 2 — checked BEFORE rule 3 per §12 ordering (storage takes
# precedence over the broader Rule 3 electric set).
if main_codes & _RULE_2_STORAGE_CODES:
return Tariff.SEVEN_HOUR
# Rule 3
if main_codes & _RULE_3_TEN_HOUR_CODES:
return Tariff.TEN_HOUR
if main_1_is_heat_pump_database or main_2_is_heat_pump_database:
return Tariff.TEN_HOUR
return None
# Dual meter — §12 Rules 1-4, where Rule 4 is the 7-hour default.
if base is Tariff.SEVEN_HOUR:
return _rules_1_to_3() or Tariff.SEVEN_HOUR
# "Unknown" meter (code 3): the assessor didn't record the tariff, but
# an electric CPSU (Rule 1) or STORAGE (Rule 2) main is physical
# evidence the dwelling is on an off-peak tariff — these charge
# overnight at the low rate and cannot run economically on a single
# rate, so the tariff is implied. Direct-acting electric / room heaters
# / heat pumps (Rule 3) are NOT off-peak evidence (they run on demand
# and exist on single-rate meters too), so they keep STANDARD here
# rather than being mis-billed 100% at the off-peak low rate. A
# non-electric main also keeps STANDARD (no Rule 4 default — Unknown
# must not force off-peak on a gas dwelling).
if _meter_is_unknown(meter_type):
if main_codes & _RULE_1_CPSU_CODES:
return Tariff.TEN_HOUR
if main_codes & _RULE_2_STORAGE_CODES:
return Tariff.SEVEN_HOUR
return Tariff.STANDARD
# Single (code 2) or any other explicit non-off-peak meter.
return base

View file

@ -194,7 +194,7 @@ _OFF_PEAK_STANDING_CODE: Final[dict[Tariff, int]] = {
}
def _to_table_32_code(fuel_code: Optional[int]) -> Optional[int]:
def to_table_32_code(fuel_code: Optional[int]) -> Optional[int]:
"""Normalise a fuel code (Table 32 or API enum) to its Table 32 form."""
if fuel_code is None:
return None
@ -204,7 +204,7 @@ def _to_table_32_code(fuel_code: Optional[int]) -> Optional[int]:
def _is_gas_code(fuel_code: Optional[int]) -> bool:
code = _to_table_32_code(fuel_code)
code = to_table_32_code(fuel_code)
return code is not None and code in _GAS_FUEL_CODES
@ -219,9 +219,9 @@ def is_electric_fuel_code(fuel_code: Optional[int]) -> bool:
silently mis-classifies as electric. The S0380.135 EES-code
Table 32 mapper lookups set `main_fuel_type` to Table 32 codes
(BDI 10 = dual fuel), so the literal-set checks fail loudly here
unless normalised through `_to_table_32_code` first.
unless normalised through `to_table_32_code` first.
"""
code = _to_table_32_code(fuel_code)
code = to_table_32_code(fuel_code)
return code is not None and code in _ELECTRIC_FUEL_CODES
@ -235,7 +235,7 @@ def is_liquid_fuel_code(fuel_code: Optional[int]) -> bool:
LPG is treated as GAS by Table 4f (separate "Gas boiler" row,
45 kWh/yr) `is_liquid_fuel_code` returns False for LPG codes.
"""
code = _to_table_32_code(fuel_code)
code = to_table_32_code(fuel_code)
return code is not None and code in _LIQUID_FUEL_CODES

View file

@ -127,6 +127,11 @@ _WINDOW_CURTAIN_RESISTANCE_M2K_PER_W: Final[float] = 0.04
# rounding policy — applied to gross wall / roof / floor / party / window
# / door / alt-wall / RR sub-area inputs to the §3 cascade.
_AREA_ROUND_DP: Final[int] = 2
# RdSAP 10 Table 4 (p.22) — a "Sheltered" room-in-roof gable adds this
# external surface resistance to the storey-below main wall: U_sheltered =
# 1/(1/U_wall + 0.5). Back-solved from Elmhurst Default U-values: sim case
# 21 (U_wall 1.10 → 0.71) and sim case 20 (U_wall 1.70 → 0.92).
_SHELTERED_GABLE_ADDED_RESISTANCE_M2K_W: Final[float] = 0.5
# RdSAP 10 §3.8 "Roof area" — pitched-sloping-ceiling roofs use the
# inclined surface area (floor area divided by cos(30°)) rather than
# the horizontal projection.
@ -339,6 +344,32 @@ def _joined_descriptions(elements: list[Any]) -> Optional[str]:
return " | ".join(parts)
def _joined_main_roof_descriptions(roofs: list[Any]) -> Optional[str]:
"""Join roof descriptions for the MAIN (non-RR) roof U-value, dropping
"Roof room(s)" entries.
A room-in-roof carries its own §3.9/§3.10 shell area + U-value cascade
(Table 17 / Table 18 col 4), so a "Roof room(s), no insulation
(assumed)" lodgement must NOT leak into the main pitched/flat roof's
`u_roof`. Without this filter a multi-roof cert like "Pitched,
insulated (assumed) | Roof room(s), no insulation (assumed)" applies
the RR's "no insulation" 2.30 to the WHOLE main roof, ~3x over-stating
its heat loss (the 4700-family regular-roof-U leak).
Falls back to the unfiltered join when every roof entry is a Room-in-
Roof (pure-RR dwelling) so that case keeps its prior behaviour."""
if not roofs:
return None
parts = [
d
for e in roofs
if (d := getattr(e, "description", "")) and "roof room" not in d.lower()
]
if not parts:
return _joined_descriptions(roofs)
return " | ".join(parts)
def _part_geometry(part: SapBuildingPart) -> dict[str, float]:
if not part.sap_floor_dimensions:
# A part with no floor dimensions has no derivable RR shell or
@ -554,7 +585,7 @@ def heat_transmission_from_cert(
return HeatTransmission(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
country = Country.from_code(epc.country_code)
roof_description = _joined_descriptions(epc.roofs)
roof_description = _joined_main_roof_descriptions(epc.roofs)
wall_description = _joined_descriptions(epc.walls)
floor_description = _joined_descriptions(epc.floors)
@ -932,8 +963,30 @@ def heat_transmission_from_cert(
rw_area_part if _bp_rr_roof_absorbs_rooflight(part, geom) else 0.0
)
roof_area = max(0.0, gross_roof_area - (rw_area_part - rw_area_on_rr))
# Per-BP floor exposure: a floor lodged "(another dwelling below)"
# (API floor_heat_loss=6) sits over another heated dwelling, so it
# is a party floor with no heat loss (RdSAP 10 §3) — suppress that
# BP's floor even when the dwelling-level `has_exposed_floor` flag
# is True. The flag is keyed only on the dwelling_type label, which
# defaults a "Ground-floor flat" to an exposed floor; the per-BP
# lodgement is authoritative. Mirrors the roof's "another dwelling
# above" override above. Cert 2115-4121-4711-9361-3686.
part_floor_is_party = "another dwelling below" in (part.floor_type or "").lower()
# A floor lodged as a heat-loss floor — *exposed* (API
# floor_heat_loss=1 → `is_exposed_floor`, "an exposed floor if there
# is an open space below") or *above a partially heated space* (API
# floor_heat_loss=3, "(other premises below)" → `is_above_partial`)
# per RdSAP 10 §3.12 (PDF p.25) — carries heat loss even when the
# dwelling-level flat heuristic (`_dwelling_exposure`) defaults a
# mid-/top-floor flat to has_exposed_floor=False on the assumption its
# floor sits over another *heated* dwelling. The per-BP lodgement is
# authoritative: it overrides the suppression upward, mirroring how
# the "another dwelling below" party signal overrides it downward.
part_has_exposed_floor = (
exposure.has_exposed_floor or is_exposed_floor or is_above_partial
) and not part_floor_is_party
floor_area_total = _round_half_up(
geom["ground_floor_area_m2"] if exposure.has_exposed_floor else 0.0,
geom["ground_floor_area_m2"] if part_has_exposed_floor else 0.0,
_AREA_ROUND_DP,
)
@ -1081,6 +1134,23 @@ def heat_transmission_from_cert(
rr_detailed_area += area
walls += u_gable * area
rr_walls_in_a_rr_area += area
elif kind == "gable_wall_sheltered":
# RdSAP 10 Table 4 (p.22) "Sheltered" gable: the storey-
# below main-wall U (`uw`) with an added R=0.5 m²K/W
# sheltered external resistance → U = 1/(1/uw + 0.5).
# The API path carries only the gable_wall_type=2 code
# (no lodged U) so the cascade derives it; the Summary
# path's lodged Default U-value rides through as a
# `surf.u_value` override. Validated against sim case 21
# (uw=1.10 → 0.71) and sim case 20 (uw=1.70 → 0.92).
u_sheltered = (
surf.u_value if surf.u_value is not None
else 1.0 / (1.0 / uw + _SHELTERED_GABLE_ADDED_RESISTANCE_M2K_W)
)
if area >= 0:
rr_detailed_area += area
walls += u_sheltered * area
rr_walls_in_a_rr_area += area
elif kind == "common_wall":
# RdSAP 10 §3.9.2 Simplified Type 2 + Table 4 p.22
# "Common wall": billed as external wall at the

View file

@ -843,6 +843,7 @@ def water_heating_from_cert(
electric_shower_monthly_kwh_override: Optional[tuple[float, ...]] = None,
has_electric_shower: bool = False,
electric_shower_count: int = 0,
is_instantaneous_at_point_of_use: bool = False,
) -> WaterHeatingResult:
"""SAP 10.2 §4 orchestrator — chain every line ref from (42) through
(65) for a combi-gas dwelling with optional PCDB-backed combi loss.
@ -912,7 +913,7 @@ def water_heating_from_cert(
)
distribution = distribution_loss_monthly_kwh(
monthly_energy_content_kwh=energy_content,
is_instantaneous_at_point_of_use=False,
is_instantaneous_at_point_of_use=is_instantaneous_at_point_of_use,
)
combi = (
combi_loss_monthly_kwh_override

View file

@ -177,42 +177,27 @@ WALL_INSULATION_CAVITY_PLUS_INTERNAL: Final[int] = 7
# (cavity + external/internal insulation).
_WALL_INSULATION_LAMBDA_W_PER_MK: Final[float] = 0.04
# RdSAP 10 §5.8 (page 41) — when documentary evidence lodges the insulation
# thermal conductivity, the R-value calc uses it instead of the 0.04 default.
# The spec offers three λ: 0.04 (mineral wool / EPS, the default), 0.03 (XPS),
# 0.025 (PUR / PIR / phenolic). The GOV.UK API surfaces a coded value
# (`wall_insulation_thermal_conductivity`); code 1 = the default 0.04 (the
# only code observed — cert 2130 Ext1, whose documentary-evidence path does
# not fire as no wall thickness is lodged, so the value is captured but
# unused there). Other codes raise until a worksheet-backed fixture confirms
# their λ — the same incremental-coverage discipline as the glazing-type map.
_WALL_INSULATION_CONDUCTIVITY_CODE_TO_LAMBDA: Final[dict[int, float]] = {
1: 0.04,
}
def _resolve_wall_insulation_lambda_w_per_mk(
conductivity: "str | int | None",
) -> float:
"""Resolve the insulation λ (W/m·K) for the §5.8 documentary-evidence
R-value calc. Absent / "Unknown" the 0.04 default; a mapped integer
code its λ; an unmapped integer code raises so the enum is confirmed
against a worksheet rather than silently mis-factored."""
if conductivity is None:
return _WALL_INSULATION_LAMBDA_W_PER_MK
if isinstance(conductivity, str):
text = conductivity.strip()
if not text or text.lower() == "unknown" or not text.isdigit():
return _WALL_INSULATION_LAMBDA_W_PER_MK
conductivity = int(text)
lam = _WALL_INSULATION_CONDUCTIVITY_CODE_TO_LAMBDA.get(conductivity)
if lam is None:
raise ValueError(
"unmapped wall_insulation_thermal_conductivity code "
f"{conductivity!r}; add its RdSAP 10 §5.8 λ "
"(0.04 / 0.03 / 0.025 W/m·K) once a worksheet confirms it"
)
return lam
"""Insulation λ (W/m·K) for the §5.8 documentary-evidence R-value calc.
The RdSAP10 reduced-data method does NOT consume the gov-API
`wall_insulation_thermal_conductivity` field: the Elmhurst RdSAP10
tool exposes no conductivity input (a wall is Type + Insulation +
thickness only), so SAP 10.2 §5.8 (p.41) default λ=0.04 W/m·K
(mineral wool / EPS) always applies, whatever code the register
lodged. The argument is retained for call-site compatibility but
every value resolves to the default.
SAP 10.2 §5.8 also lists 0.03 (XPS) / 0.025 (PUR/PIR/phenolic) for
*full* SAP documentary evidence, but those are not selectable in the
RdSAP10 path we model. Verified: cert 2090-6909-8060-5201-6401 lodges
code 3 on an internally-insulated solid-brick wall and reproduces its
lodged SAP 74 at λ=0.04 (continuous 73.97; 0.04/0.03/0.025 all round
to 74). Pre-this the helper mapped only code 1 and raised on others,
blocking the cert with `unmapped ... code 3`."""
return _WALL_INSULATION_LAMBDA_W_PER_MK
# RdSAP10 §5.8 final note + Table 14 page 41: "For drylining including
# laths and plaster use Rinsulation = 0.17 m²K/W." Applied additively to
@ -648,6 +633,35 @@ def u_wall(
return float(
Decimal(str(u_unrounded)).quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)
)
# RdSAP 10 §5.7 Table 13 (PDF p.40) — uninsulated ("as built") solid
# brick wall U₀ by lodged wall thickness, age bands A-E. Table 6
# footnote (b) on the "Solid brick as built" row (PDF p.40):
# "Or from 5.7 if wall thickness is other than 200mm to 280mm" — the
# thickness table supersedes the flat 1.7 Table-6 default whenever a
# documentary wall thickness is lodged. 200-280 mm gives 1.7 either
# way, so the table is applied unconditionally here:
# ≤200 → 2.5, 200-280 → 1.7, 280-420 → 1.4, >420 → 1.1.
# The §5.8 + Table 14 dry-lining R is added on top only when the wall
# is dry-lined (§5.7 closing sentence: "Apply the adjustment according
# to Table 14 ... if wall is insulated or/and dry-lined including lath
# and plaster"). The insulated External/Internal case is handled by
# the branch above; this is the as-built (and dry-lined-only) path.
# Worksheet sim case 21: solid brick 440 mm (>420) as-built, Dry-lining
# No → U=1.10 (§3 (29a)). Cross-check sim case 20: 220 mm → 1.70.
if (
wall_type == WALL_SOLID_BRICK
and band in _STONE_AGE_A_TO_E
and wall_thickness_mm is not None
):
u0 = _u_brick_thin_wall_age_a_to_e(wall_thickness_mm)
if dry_lined:
u_unrounded = 1.0 / (1.0 / u0 + _DRY_LINING_RESISTANCE_M2K_PER_W)
return float(
Decimal(str(u_unrounded)).quantize(
Decimal("0.01"), rounding=ROUND_HALF_UP
)
)
return u0
if wall_type == WALL_CAVITY and wall_insulation_type in (
WALL_INSULATION_CAVITY_PLUS_EXTERNAL,
WALL_INSULATION_CAVITY_PLUS_INTERNAL,
@ -808,6 +822,24 @@ def u_roof(
# ("Average thermal transmittance X W/m²K"); spec §5.11 opening
# clause defers to the assessor's value when present.
return measured
if (
age_band is not None
and description is not None
and "unknown" in description.lower()
and (insulation_thickness_mm is None or insulation_thickness_mm == 0)
):
# RdSAP 10 §5.11.4 (page 44): "U-values in Table 18 are used when
# thickness of insulation cannot be determined." A roof lodged
# "Unknown loft insulation" carries thickness "NI" (Not Indicated,
# parsed to 0) or "ND" (None) — the thickness is UNDETERMINED, not
# zero — so it takes the Table 18 age-band default (column (1)
# pitched / column (3) flat), NOT the uninsulated 2.30 the Table 16
# row-0 lookup would give for a parsed-0 thickness. Distinct from a
# genuine "no insulation" lodgement, which keeps 2.30 (below). The
# discriminator is the deterministic "Unknown" text RdSAP renders
# for an undetermined-thickness observation.
table_18 = _FLAT_ROOF_BY_AGE if is_flat_roof else _ROOF_BY_AGE
return table_18.get(age_band.upper(), 0.4)
if (
is_sloping_ceiling
and age_band is not None

View file

@ -155,6 +155,7 @@ def make_building_part(
roof_construction: Optional[int] = 4,
floor_dimensions: Optional[list[SapFloorDimension]] = None,
sap_room_in_roof: Optional[SapRoomInRoof] = None,
floor_type: Optional[str] = None,
) -> SapBuildingPart:
"""Build a SapBuildingPart with sensible SAP10 defaults."""
return SapBuildingPart(
@ -169,6 +170,7 @@ def make_building_part(
if floor_dimensions is not None
else [make_floor_dimension()],
sap_room_in_roof=sap_room_in_roof,
floor_type=floor_type,
)

View file

@ -821,6 +821,46 @@ def test_u_roof_ni_thickness_with_no_insulation_description_stays_at_2_30() -> N
assert result == pytest.approx(2.30, abs=0.01)
def test_u_roof_unknown_loft_insulation_uses_table18_default_per_section_5_11_4() -> None:
# Arrange — "Pitched, Unknown loft insulation" lodges
# roof_insulation_thickness 'NI' (Not Indicated, parsed to 0) — the
# thickness is UNDETERMINED, not zero. RdSAP 10 §5.11.4 (page 44):
# "U-values in Table 18 are used when thickness of insulation cannot
# be determined." So a pitched roof takes the Table 18 column (1)
# age-band default (age A = 0.40), NOT the uninsulated 2.30 the
# Table 16 row-0 lookup gives for a parsed-0 thickness. Cert
# 9836-5829-1500-0803-7206 (top-floor flat, age A).
# Act
result = u_roof(
country=Country.ENG,
age_band="A",
insulation_thickness_mm=0, # parsed from "NI"
description="Pitched, Unknown loft insulation",
)
# Assert
assert abs(result - 0.40) <= 0.01
def test_u_roof_unknown_flat_insulation_uses_table18_flat_column() -> None:
# Arrange — an "Unknown" flat-roof lodgement with no determinable
# thickness (None) takes Table 18 column (3) "Flat roof" age-band
# default (age H = 0.35), per §5.11.4 — not 2.30.
# Act
result = u_roof(
country=Country.ENG,
age_band="H",
insulation_thickness_mm=None,
description="Flat, Unknown insulation",
is_flat_roof=True,
)
# Assert
assert abs(result - 0.35) <= 0.01
def test_u_roof_age_band_j_pitched_returns_table18_value() -> None:
# Arrange — Table 18, pitched insulation between joists, age J -> 0.16 W/m^2K.
@ -1914,15 +1954,26 @@ def test_resolve_wall_insulation_lambda_code_1_is_default_mineral_wool() -> None
assert abs(lam_str - 0.04) <= 1e-9
def test_resolve_wall_insulation_lambda_unmapped_code_raises() -> None:
# Arrange — an unmapped code must raise (incremental-coverage gate)
# rather than silently mis-factor the R-value.
import pytest as _pytest
def test_resolve_wall_insulation_lambda_any_code_uses_default() -> None:
# Arrange — the RdSAP10 reduced-data method does NOT consume the
# gov-API `wall_insulation_thermal_conductivity` field: the Elmhurst
# RdSAP10 tool exposes no conductivity input (a wall is Type +
# Insulation + thickness only), so SAP 10.2 §5.8 (p.41) default
# λ=0.04 W/m·K always applies regardless of the lodged code. Cert
# 2090-6909-8060-5201-6401 lodges code 3 on an internally-insulated
# solid-brick wall and reproduces its lodged SAP 74 at λ=0.04
# (continuous 73.97; 0.04/0.03/0.025 all round to 74). Pre-this the
# helper mapped only code 1 and RAISED on 2/3, blocking the cert.
from domain.sap10_ml.rdsap_uvalues import (
_resolve_wall_insulation_lambda_w_per_mk,
)
# Act / Assert
with _pytest.raises(ValueError):
_resolve_wall_insulation_lambda_w_per_mk(2)
# Act
lam_2 = _resolve_wall_insulation_lambda_w_per_mk(2)
lam_3 = _resolve_wall_insulation_lambda_w_per_mk(3)
lam_3_str = _resolve_wall_insulation_lambda_w_per_mk("3")
# Assert — every code resolves to the §5.8 default 0.04, never raises.
assert abs(lam_2 - 0.04) <= 1e-9
assert abs(lam_3 - 0.04) <= 1e-9
assert abs(lam_3_str - 0.04) <= 1e-9

View file

@ -0,0 +1,282 @@
"""Decompose each API cert's SAP error into per-component energy/cost deltas.
WHAT THIS IS FOR
----------------
`eval_api_sap_accuracy.py` tells us *which* certs are wrong (SAP err vs lodged
`energy_rating_current`). This script tells us *which component* is wrong and by
how much without generating an Elmhurst worksheet.
THE METHOD (calibrate-then-compare)
-----------------------------------
The API response carries the lodged per-component consumer costs
(`heating_cost_current`, `hot_water_cost_current`, `lighting_cost_current`).
Those use the EPC's *consumer* price basis, not SAP Table-12. So:
1. Calibrate the effective consumer price empirically on the certs we already
get right (|SAP err| < CAL_TOL): for gas heating certs
`gas_price = median(heating_cost / our_heating_kWh)`; for lighting (always
electric) `elec_price = median(lighting_cost / our_lighting_kWh)`.
2. For every cert: `predicted_cost = our_component_kWh × calibrated_price`.
`delta = predicted - lodged`. The component with the biggest |delta| is the
broken one; the sign gives the direction (predicted > lodged => we
over-estimate that component's energy => we under-rate SAP).
`back_calc_kWh = lodged_cost / price` is a numeric energy target to fix to.
3. Accuracy is ~+-10% good for component triage + fix targets, NOT 1e-4.
OUTPUT
------
- Calibrated prices + how many certs fed each calibration.
- Cluster table: (component x direction) counts + mean |SAP err| + mean delta£.
- The worst certs per cluster (to pick the next slice).
- A full per-cert CSV at <cache>/_cost_decomposition.csv.
USAGE
-----
PYTHONPATH=/workspaces/model python scripts/decompose_api_cost_error.py
Reads the same cache as `eval_api_sap_accuracy.py` (default `/tmp/epc_2026_sample`,
overridable via `EPC_SAMPLE_CACHE`).
"""
import os
import csv
import json
import math
import statistics
from collections import Counter, defaultdict
from pathlib import Path
from typing import Any, Optional, cast
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.calculator import SapResult, calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
from domain.sap10_calculator.tables.table_12 import API_FUEL_TO_TABLE_12
CACHE = Path(os.environ.get("EPC_SAMPLE_CACHE", "/tmp/epc_2026_sample"))
# Certs feed the price calibration only when they are this accurate already.
CAL_TOL = 0.4
# A cert is flagged "broken on component X" only when |delta£| clears this floor,
# so tiny noise certs land in a "balanced" bucket rather than a spurious cluster.
DELTA_FLOOR_GBP = 40.0
# Table-12 fuel-code groups for assigning a calibrated consumer price.
GAS_CODE = 1 # mains gas
ELEC_CODES = frozenset({30, 31, 32, 33, 34, 35, 38, 40, 41}) # std/off-peak/HP
def _fuel_kind(code: Optional[int]) -> str:
"""Classify a fuel code for pricing: gas / elec / other.
The calculator stores the *raw API* fuel enum (e.g. 26 = mains gas), so
translate through `API_FUEL_TO_TABLE_12` first; Table-12 codes (30+) are
not keys in that map and pass through unchanged.
"""
if code is None:
return "other"
t12 = API_FUEL_TO_TABLE_12.get(code, code)
if t12 == GAS_CODE:
return "gas"
if t12 in ELEC_CODES:
return "elec"
return "other"
def _lodged_cost(doc: dict[str, Any], key: str) -> Optional[float]:
obj: Any = doc.get(key)
if isinstance(obj, dict):
val: Any = cast(dict[str, Any], obj).get("value")
if isinstance(val, (int, float)):
return float(val)
return None
def _heating_kwh(res: SapResult) -> float:
"""Space-heating delivered fuel across main, main-2 and secondary."""
return (
res.main_heating_fuel_kwh_per_yr
+ res.main_2_heating_fuel_kwh_per_yr
+ res.secondary_heating_fuel_kwh_per_yr
)
def main() -> None:
files = sorted(CACHE.glob("????-????-????-????-????.json"))
records: list[dict[str, Any]] = []
cat: Counter[str] = Counter()
for f in files:
cert = f.stem
try:
doc: dict[str, Any] = json.loads(f.read_text())
except Exception:
cat["bad_json"] += 1
continue
lodged_sap = doc.get("energy_rating_current")
if lodged_sap is None:
cat["no_lodged_sap"] += 1
continue
try:
epc = EpcPropertyDataMapper.from_api_response(doc)
except ValueError as e:
cat["unsupported_schema" if "Unsupported EPC schema" in str(e) else "raise"] += 1
continue
except Exception:
cat["raise"] += 1
continue
try:
res = calculate_sap_from_inputs(cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES))
except Exception:
cat["calc_raise"] += 1
continue
if not math.isfinite(res.sap_score_continuous):
cat["non_finite"] += 1
continue
cat["computed"] += 1
records.append({
"cert": cert,
"sap_err": res.sap_score_continuous - lodged_sap,
"heat_kwh": _heating_kwh(res),
"hw_kwh": res.hot_water_kwh_per_yr,
"light_kwh": res.lighting_kwh_per_yr,
"heat_fuel": _fuel_kind(res.main_heating_fuel_code),
"hw_fuel": _fuel_kind(res.hot_water_fuel_code),
"lodged_heat": _lodged_cost(doc, "heating_cost_current"),
"lodged_hw": _lodged_cost(doc, "hot_water_cost_current"),
"lodged_light": _lodged_cost(doc, "lighting_cost_current"),
"mains_gas": _mains_gas(doc),
"roof_construction": _roof_construction(doc),
})
# --- Calibrate consumer prices on the already-accurate certs ------------
gas_samples: list[float] = []
elec_samples: list[float] = []
for r in records:
if abs(r["sap_err"]) >= CAL_TOL:
continue
if r["heat_fuel"] == "gas" and r["lodged_heat"] and r["heat_kwh"] > 0:
gas_samples.append(r["lodged_heat"] / r["heat_kwh"])
if r["lodged_light"] and r["light_kwh"] > 0:
elec_samples.append(r["lodged_light"] / r["light_kwh"])
gas_price = statistics.median(gas_samples) if gas_samples else 0.0809
elec_price = statistics.median(elec_samples) if elec_samples else 0.2839
price_by_kind = {"gas": gas_price, "elec": elec_price, "other": gas_price}
print("=" * 74)
print("CALIBRATED CONSUMER PRICES (median over |SAP err| < %.2f certs)" % CAL_TOL)
print(f" gas £{gas_price:.4f}/kWh (n={len(gas_samples)})")
print(f" elec £{elec_price:.4f}/kWh (n={len(elec_samples)})")
# --- Per-cert component deltas ------------------------------------------
for r in records:
gp = price_by_kind[r["heat_fuel"]]
hwp = price_by_kind[r["hw_fuel"]]
r["pred_heat"] = r["heat_kwh"] * gp
r["pred_hw"] = r["hw_kwh"] * hwp
r["pred_light"] = r["light_kwh"] * elec_price
r["d_heat"] = _delta(r["pred_heat"], r["lodged_heat"])
r["d_hw"] = _delta(r["pred_hw"], r["lodged_hw"])
r["d_light"] = _delta(r["pred_light"], r["lodged_light"])
# back-calculated energy targets (what the lodged cost implies)
r["tgt_heat_kwh"] = (r["lodged_heat"] / gp) if r["lodged_heat"] else None
r["tgt_hw_kwh"] = (r["lodged_hw"] / hwp) if r["lodged_hw"] else None
# dominant broken component
comp, delta = _dominant(r)
r["broken"] = comp
r["broken_delta"] = delta
r["cluster"] = (
"balanced" if comp is None
else f"{comp}:{'high' if delta > 0 else 'low'}"
)
_print_clusters(records)
_write_csv(records)
print("\nCategories:", dict(cat))
print(f"Full per-cert CSV -> {CACHE / '_cost_decomposition.csv'}")
def _mains_gas(doc: dict[str, Any]) -> Any:
es: Any = doc.get("sap_energy_source") or {}
return es.get("mains_gas")
def _roof_construction(doc: dict[str, Any]) -> Optional[int]:
bps: Any = doc.get("sap_building_parts") or []
if bps and isinstance(bps[0], dict):
rc: Any = bps[0].get("roof_construction")
return rc if isinstance(rc, int) else None
return None
def _delta(pred: float, lodged: Optional[float]) -> Optional[float]:
return None if lodged is None else pred - lodged
def _dominant(r: dict[str, Any]) -> tuple[Optional[str], float]:
"""The component with the largest |delta£| above the floor, with its delta."""
candidates = [
("heat", r["d_heat"]),
("hw", r["d_hw"]),
("light", r["d_light"]),
]
scored = [(c, d) for c, d in candidates if d is not None and abs(d) >= DELTA_FLOOR_GBP]
if not scored:
return None, 0.0
comp, delta = max(scored, key=lambda cd: abs(cd[1]))
return comp, delta
def _print_clusters(records: list[dict[str, Any]]) -> None:
by_cluster: dict[str, list[dict[str, Any]]] = defaultdict(list)
for r in records:
by_cluster[r["cluster"]].append(r)
print("=" * 74)
print(f"CLUSTERS by (component x direction) [delta floor £{DELTA_FLOOR_GBP:.0f}]")
print(f" {'cluster':14s} {'n':>4s} {'mean|sapErr|':>12s} {'meanΔ£':>8s} {'within0.5':>9s}")
order = sorted(by_cluster.items(), key=lambda kv: -len(kv[1]))
for name, rs in order:
n = len(rs)
mean_abs = sum(abs(r["sap_err"]) for r in rs) / n
mean_delta = sum(r["broken_delta"] for r in rs) / n
within = 100.0 * sum(1 for r in rs if abs(r["sap_err"]) < 0.5) / n
print(f" {name:14s} {n:>4d} {mean_abs:>12.2f} {mean_delta:>+8.0f} {within:>8.1f}%")
# The fabric/heating clusters are the fix targets — show their worst certs.
for name in ("heat:high", "heat:low"):
rs = by_cluster.get(name, [])
if not rs:
continue
print("-" * 74)
print(f"WORST in {name} (broken_delta = predicted - lodged £):")
print(f" {'cert':22s} {'sapErr':>7s} {'Δ£':>6s} {'ourkWh':>7s} {'tgtkWh':>7s} roof")
worst = sorted(rs, key=lambda r: -abs(r["sap_err"]))[:15]
for r in worst:
tgt = r["tgt_heat_kwh"]
print(f" {r['cert']:22s} {r['sap_err']:+7.2f} {r['broken_delta']:+6.0f} "
f"{r['heat_kwh']:7.0f} {('%7.0f' % tgt) if tgt else ' -'} "
f"{str(r['roof_construction'])}")
def _write_csv(records: list[dict[str, Any]]) -> None:
cols = [
"cert", "cluster", "broken", "broken_delta", "sap_err",
"heat_kwh", "tgt_heat_kwh", "d_heat", "lodged_heat", "pred_heat",
"hw_kwh", "tgt_hw_kwh", "d_hw", "lodged_hw", "pred_hw",
"light_kwh", "d_light", "lodged_light", "pred_light",
"heat_fuel", "hw_fuel", "mains_gas", "roof_construction",
]
with open(CACHE / "_cost_decomposition.csv", "w", newline="") as fh:
w = csv.DictWriter(fh, fieldnames=cols, extrasaction="ignore")
w.writeheader()
for r in sorted(records, key=lambda r: -abs(r["sap_err"])):
w.writerow({k: _fmt(r.get(k)) for k in cols})
def _fmt(v: Any) -> Any:
return round(v, 2) if isinstance(v, float) else v
if __name__ == "__main__":
main()

View file

@ -0,0 +1,290 @@
"""Render a human-readable "Elmhurst SAP input sheet" for one or more certs.
WHAT THIS IS FOR
----------------
The debugging companion to `eval_api_sap_accuracy.py`: once that script names a
worst-offender cert, this dumps everything the mapper hands the calculator
the *codes the calculator actually sees* (`from_api_response`
`EpcPropertyData`) in the same readable layout as the worked
`sap worksheets/golden fixture debugging/6035_elmhurst_input_sheet.md`, plus
the lodged reference outputs the worksheet must reproduce and our own
continuous SAP next to the lodged value. You read it side-by-side with the
real Elmhurst Summary / P960 worksheet PDF to localise where we diverge.
USAGE
-----
PYTHONPATH=/workspaces/model python scripts/elmhurst_input_sheet.py <cert> [<cert> ...]
# write each sheet to a file instead of stdout:
PYTHONPATH=/workspaces/model python scripts/elmhurst_input_sheet.py --out-dir "sap worksheets/golden fixture debugging" <cert>
Certs are read from the cache built by `fetch_2026_epc_sample.py` (default
`/tmp/epc_2026_sample`, overridable via `EPC_SAMPLE_CACHE`). A bare cert
number resolves to `<cache>/<cert>.json`; an explicit path is also accepted.
"""
from __future__ import annotations
import json
import math
import os
import sys
from pathlib import Path
from typing import Any, Optional
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
CACHE = Path(os.environ.get("EPC_SAMPLE_CACHE", "/tmp/epc_2026_sample"))
def _num(v: Any) -> Any:
"""Unwrap a Measurement (`.value`) or pass an int/float/str through."""
return getattr(v, "value", v)
def _resolve(cert_arg: str) -> Path:
p = Path(cert_arg)
if p.suffix == ".json" and p.exists():
return p
cached = CACHE / f"{cert_arg}.json"
if cached.exists():
return cached
raise FileNotFoundError(
f"No cached JSON for {cert_arg!r} (looked at {cached}). "
f"Run scripts/fetch_2026_epc_sample.py or set EPC_SAMPLE_CACHE."
)
def _our_sap(epc: Any) -> str:
"""Our continuous SAP, or the exception that blocks it."""
try:
result = calculate_sap_from_inputs(cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES))
cont: float = result.sap_score_continuous
return f"{cont:.4f}" if math.isfinite(cont) else f"non-finite ({cont})"
except Exception as e: # debugging tool — surface, don't swallow
return f"RAISED {type(e).__name__}: {e}"
def render(cert: str, doc: dict[str, Any]) -> str:
epc = EpcPropertyDataMapper.from_api_response(doc)
out: list[str] = []
w = out.append
# --- header --------------------------------------------------------
w(f"# Cert {cert} — Elmhurst SAP input sheet\n")
addr = ", ".join(
str(x) for x in (epc.address_line_1, epc.post_town, epc.postcode) if x
)
w(f"Address: {addr}")
w(
f"Dwelling: {epc.dwelling_type} built_form={epc.built_form} "
f"property_type={epc.property_type}"
)
w(
f"TFA: {epc.total_floor_area_m2}"
f"habitable_rooms={epc.habitable_rooms_count} "
f"heated_rooms={epc.heated_rooms_count}"
)
w(
f"Extensions: {epc.extensions_count} region_code={epc.region_code} "
f"measurement_type={epc.measurement_type}"
)
w(
f"Pressure test: {epc.pressure_test if epc.pressure_test is not None else '(not tested)'} "
f"door_count={epc.door_count}"
)
w(
f"Conservatory: type={epc.conservatory_type} "
f"heated_sep_consv={str(epc.has_heated_separate_conservatory).lower()}"
)
# --- our vs lodged (debug aid) -------------------------------------
lodged = doc.get("energy_rating_current")
our = _our_sap(epc)
delta = ""
try:
if lodged is not None and not our.startswith(("RAISED", "non-finite")):
delta = f" Δ={float(our) - float(lodged):+.4f} (we lodged)"
except ValueError:
pass
w(f"\n## SAP: OURS={our} LODGED={lodged}{delta}")
# --- element descriptions (lodged) ---------------------------------
w("\n## Element descriptions (lodged)")
for label, elems in (
("WALL", epc.walls), ("ROOF", epc.roofs), ("FLOOR", epc.floors),
):
for el in elems or []:
w(f" {label}: {el.description}")
if epc.window:
w(f" WINDOW: {epc.window.description}")
for el in epc.main_heating or []:
w(f" MAIN HEATING: {el.description}")
if epc.hot_water:
w(f" HOT WATER: {epc.hot_water.description}")
if epc.lighting:
w(f" LIGHTING: {epc.lighting.description}")
if epc.secondary_heating:
w(f" SECONDARY: {epc.secondary_heating.description}")
# --- building parts / dimensions -----------------------------------
# NB direct attribute access (not getattr-with-default) so a future
# domain rename fails loudly here rather than silently printing None
# over real data. Field names are the `epc_property_data` domain
# types the mapper emits (NOT the `schema` dataclasses).
w("\n## Building parts / dimensions")
for bp in epc.sap_building_parts or []:
w(f"### {bp.identifier} (part {bp.building_part_number}, age {bp.construction_age_band})")
w(
f" wall_construction={bp.wall_construction} "
f"insulation_type={bp.wall_insulation_type} "
f"ins_thick={bp.wall_insulation_thickness} "
f"wall_thickness={bp.wall_thickness_mm}mm "
f"measured={bp.wall_thickness_measured} "
f"dry_lined={bp.wall_dry_lined}"
)
w(f" party_wall_construction={bp.party_wall_construction}")
w(
f" roof_construction={bp.roof_construction} ({bp.roof_construction_type}) "
f"ins_location={bp.roof_insulation_location} "
f"ins_thick={bp.roof_insulation_thickness}"
)
w(
f" floor_heat_loss={bp.floor_heat_loss} ({bp.floor_type}) "
f"floor_ins_thick={bp.floor_insulation_thickness}"
)
rir = bp.sap_room_in_roof
if rir is not None:
w(
f" ROOM-IN-ROOF: floor_area={_num(rir.floor_area)} "
f"age={rir.construction_age_band} "
f"gable1={rir.gable_1_length_m}x{rir.gable_1_height_m}m "
f"gable2={rir.gable_2_length_m}x{rir.gable_2_height_m}m "
f"common_wall={rir.common_wall_length_m}m"
)
for fd in bp.sap_floor_dimensions or []:
w(
f" floor {fd.floor}: area={fd.total_floor_area_m2} "
f"height={fd.room_height_m} "
f"HLP={fd.heat_loss_perimeter_m} "
f"party_wall_len={fd.party_wall_length_m} "
f"floor_constr={fd.floor_construction} floor_ins={fd.floor_insulation} "
f"exposed={fd.is_exposed_floor}"
)
# --- windows -------------------------------------------------------
windows = epc.sap_windows or []
w(f"\n## Windows ({len(windows)})")
for i, win in enumerate(windows):
w(
f" W{i}: {win.window_width}x{win.window_height}m "
f"orient={win.orientation} "
f"glazing_type={win.glazing_type} "
f"gap={win.glazing_gap} "
f"frame={win.frame_material} "
f"draught={win.draught_proofed} "
f"loc(bp)={win.window_location} "
f"wall_type={win.window_wall_type} "
f"frame_factor={win.frame_factor}"
)
# --- doors / heating / water / vent --------------------------------
w("\n## Doors / heating / water / vent")
w(
f" door_count={epc.door_count} "
f"insulated_door_count={epc.insulated_door_count}"
)
sh = epc.sap_heating
for mh in sh.main_heating_details or []:
w(
f" MAIN: sap_code={mh.sap_main_heating_code} "
f"fuel={mh.main_fuel_type} "
f"category={mh.main_heating_category} "
f"emitter={mh.heat_emitter_type} "
f"emit_temp={mh.emitter_temperature} "
f"control={mh.main_heating_control} "
f"fghrs={mh.has_fghrs} "
f"fan_flue={mh.fan_flue_present} "
f"flue_type={mh.boiler_flue_type} "
f"pump_age={mh.central_heating_pump_age} "
f"data_source={mh.main_heating_data_source} "
f"idx={mh.main_heating_index_number} "
f"fraction={mh.main_heating_fraction}"
)
w(
f" WATER: code={sh.water_heating_code} "
f"fuel={sh.water_heating_fuel} "
f"cylinder_size={sh.cylinder_size} "
f"has_cyl={str(epc.has_hot_water_cylinder).lower()} "
f"cyl_ins_type={sh.cylinder_insulation_type} "
f"cyl_ins_thick={sh.cylinder_insulation_thickness_mm} "
f"immersion={sh.immersion_heating_type} "
f"solar_wh={str(epc.solar_water_heating).lower()} "
f"secondary_fuel={sh.secondary_fuel_type} "
f"secondary_type={sh.secondary_heating_type}"
)
es = epc.sap_energy_source
w(
f" ENERGY SOURCE: mains_gas={es.mains_gas} "
f"meter_type={es.meter_type} "
f"wind_turbines={es.wind_turbines_count} "
f"pv_raw={json.dumps(doc.get('sap_energy_source', {}).get('photovoltaic_supply'))}"
)
w(
f" VENT: fixed_AC={str(epc.has_fixed_air_conditioning).lower()} "
f"LIGHTING: led={epc.led_fixed_lighting_bulbs_count} "
f"cfl={epc.cfl_fixed_lighting_bulbs_count} "
f"incandescent={epc.incandescent_fixed_lighting_bulbs_count}"
)
# --- lodged reference outputs (the target) -------------------------
w("\n## Lodged reference outputs (the target a worksheet must reproduce)")
def _d(k: str) -> Any:
return doc.get(k)
w(
f" energy_rating_current={_d('energy_rating_current')} "
f"env_impact_current={_d('environmental_impact_current')}"
)
w(
f" energy_consumption_current={_d('energy_consumption_current')} "
f"co2_emissions_current={_d('co2_emissions_current')} "
f"(per_floor_area={_d('co2_emissions_current_per_floor_area')})"
)
w(
f" heating_cost_current={_d('heating_cost_current')} "
f"hot_water_cost_current={_d('hot_water_cost_current')} "
f"lighting_cost_current={_d('lighting_cost_current')}"
)
return "\n".join(out) + "\n"
def main(argv: list[str]) -> int:
args = [a for a in argv if not a.startswith("--")]
out_dir: Optional[Path] = None
if "--out-dir" in argv:
i = argv.index("--out-dir")
out_dir = Path(argv[i + 1])
args = [a for a in args if a != str(out_dir)]
if not args:
print(__doc__)
return 2
for cert_arg in args:
path = _resolve(cert_arg)
cert = path.stem
doc = json.loads(path.read_text())
sheet = render(cert, doc)
if out_dir is not None:
out_dir.mkdir(parents=True, exist_ok=True)
dest = out_dir / f"{cert}_elmhurst_input_sheet.md"
dest.write_text(sheet)
print(f"wrote {dest}")
else:
print(sheet)
return 0
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))

View file

@ -0,0 +1,188 @@
"""Profile API-path SAP error against RAW API-response characteristics.
WHAT THIS IS FOR
----------------
`eval_api_sap_accuracy.py` tells us HOW big the error is; this tells us
WHICH raw-API characteristics the error correlates with so we can find
systematic "API-path handling" gaps (a field dropped/mis-mapped on the
`from_api_response` `cert_to_inputs` path) rather than per-cert noise.
It deliberately profiles against the RAW JSON (`/tmp/epc_2026_sample/
<cert>.json`), NOT the mapped `EpcPropertyData`, so a feature that the
mapper silently drops still shows up here as an error-correlated bucket.
METHOD
------
1. Read `<cache>/_results.csv` (written by eval) cert -> signed err.
2. For each computed cert, extract a rich feature set from its raw JSON.
3. For every (feature, value) bucket: n, % within 0.5, mean signed,
mean |err|. Rank buckets by "wasted accuracy" = n_outside_0.5 ×
mean|err| so the biggest systematic levers float to the top.
4. Also dump the worst |err| certs with their full raw feature profile.
USAGE
-----
PYTHONPATH=/workspaces/model python scripts/profile_api_error.py
PYTHONPATH=/workspaces/model python scripts/profile_api_error.py --min-n 12
"""
from __future__ import annotations
import csv
import json
import os
import statistics as stats
import sys
from collections import defaultdict
from pathlib import Path
from typing import Any, Optional
CACHE = Path(os.environ.get("EPC_SAMPLE_CACHE", "/tmp/epc_2026_sample"))
def _g(d: dict[str, Any], *path: str) -> Any:
"""Nested-get; returns None on any missing link."""
cur: Any = d
for k in path:
if not isinstance(cur, dict):
return None
cur = cur.get(k)
return cur
def features(doc: dict[str, Any]) -> dict[str, Any]:
"""Extract raw-API characteristics worth profiling against. Each value
is bucketed verbatim (stringified) so unmapped / unusual codes surface
as their own bucket rather than being normalised away."""
h = doc.get("sap_heating") or {}
es = doc.get("sap_energy_source") or {}
mh_list = h.get("main_heating_details") or [{}]
mh = mh_list[0] if mh_list else {}
bps = doc.get("sap_building_parts") or []
bp0 = bps[0] if bps else {}
pv = es.get("photovoltaic_supply")
has_pv = bool(pv.get("pv_arrays")) if isinstance(pv, dict) else bool(pv)
showers = h.get("shower_outlets") or []
if isinstance(showers, dict):
showers = [showers]
shower_types = sorted({
(s.get("shower_outlet", s) if isinstance(s, dict) else {}).get("shower_outlet_type")
for s in showers
} - {None})
# any building part lodging a non-ground floor_heat_loss
floor_codes = sorted({bp.get("floor_heat_loss") for bp in bps} - {None})
roof_codes = sorted({bp.get("roof_construction") for bp in bps} - {None})
return {
"dwelling_type": doc.get("dwelling_type"),
"property_type": doc.get("property_type"),
"built_form": doc.get("built_form"),
"age_band": doc.get("construction_age_band"),
"mains_gas": es.get("mains_gas"),
"meter_type": es.get("meter_type"),
"main_heat_cat": mh.get("main_heating_category"),
"main_sap_code": mh.get("sap_main_heating_code"),
"main_control": mh.get("main_heating_control"),
"main_data_source": mh.get("main_heating_data_source"),
"has_pcdb_main": mh.get("main_heating_index_number") is not None,
"main_fuel": mh.get("main_fuel_type"),
"has_secondary": (doc.get("secondary_heating") or {}).get("description") not in (None, "None"),
"whc": h.get("water_heating_code"),
"water_fuel": h.get("water_heating_fuel"),
"has_cylinder": doc.get("has_hot_water_cylinder"),
"immersion_type": h.get("immersion_heating_type"),
"n_building_parts": len(bps),
"floor_codes": ",".join(str(c) for c in floor_codes),
"roof_codes": ",".join(str(c) for c in roof_codes),
"wall_construction": bp0.get("wall_construction"),
"wall_insulation_type": bp0.get("wall_insulation_type"),
"roof_insulation_thickness": bp0.get("roof_insulation_thickness"),
"has_pv": has_pv,
"has_wwhrs": any(
(s.get("shower_outlet", s) if isinstance(s, dict) else {}).get("shower_wwhrs") not in (None, 1)
for s in showers
),
"shower_types": ",".join(str(t) for t in shower_types),
"conservatory": doc.get("conservatory_type"),
"mech_vent": doc.get("mechanical_ventilation"),
"is_flat": doc.get("property_type") == 2,
}
def main() -> None:
min_n = 10
if "--min-n" in sys.argv:
min_n = int(sys.argv[sys.argv.index("--min-n") + 1])
results_path = CACHE / "_results.csv"
if not results_path.exists():
sys.exit(f"no {results_path}; run eval_api_sap_accuracy.py first")
errs: dict[str, float] = {}
for r in csv.DictReader(results_path.open()):
errs[r["cert"]] = float(r["err"])
# cert -> features
rows: list[tuple[str, float, dict[str, Any]]] = []
for cert, err in errs.items():
f = CACHE / f"{cert}.json"
if not f.exists():
continue
try:
doc = json.loads(f.read_text())
except Exception:
continue
rows.append((cert, err, features(doc)))
n_all = len(rows)
base_within = sum(1 for _, e, _ in rows if abs(e) < 0.5) / n_all * 100
print(f"profiled {n_all} computed certs | overall within-0.5 = {base_within:.1f}% "
f"| mean signed {stats.mean(e for _, e, _ in rows):+.3f} "
f"| mean|err| {stats.mean(abs(e) for _, e, _ in rows):.3f}")
print("=" * 100)
# per-feature bucket analysis
feat_names = list(rows[0][2].keys())
bucket_lines: list[tuple[float, str]] = []
for fn in feat_names:
groups: dict[str, list[float]] = defaultdict(list)
for _, err, feats in rows:
groups[str(feats.get(fn))].append(err)
for val, es in groups.items():
n = len(es)
if n < min_n:
continue
w05 = sum(1 for e in es if abs(e) < 0.5)
within = w05 / n * 100
signed = stats.mean(es)
mabs = stats.mean(abs(e) for e in es)
n_out = n - w05
waste = n_out * mabs # ranking: how much total error this bucket carries
line = (f" {fn:22s}={val:<22.22s} n={n:4d} within0.5={within:4.0f}% "
f"signed={signed:+6.2f} mean|err|={mabs:5.2f} [waste={waste:6.0f}]")
bucket_lines.append((waste, line))
print("TOP ERROR-CARRYING BUCKETS (ranked by n_outside_0.5 × mean|err|; min-n="
f"{min_n}):")
for _, line in sorted(bucket_lines, key=lambda x: -x[0])[:45]:
print(line)
print("=" * 100)
print("MOST BIASED BUCKETS (|mean signed| — systematic over/under-rate, min-n="
f"{min_n}):")
biased: list[tuple[float, str]] = []
for fn in feat_names:
groups2: dict[str, list[float]] = defaultdict(list)
for _, err, feats in rows:
groups2[str(feats.get(fn))].append(err)
for val, es in groups2.items():
if len(es) < min_n:
continue
signed = stats.mean(es)
biased.append((abs(signed),
f" {fn:22s}={val:<22.22s} n={len(es):4d} signed={signed:+6.2f} "
f"mean|err|={stats.mean(abs(e) for e in es):5.2f}"))
for _, line in sorted(biased, key=lambda x: -x[0])[:25]:
print(line)
if __name__ == "__main__":
main()

View file

@ -49,6 +49,11 @@ def _sap_result(
lighting_kwh_per_yr=0.0,
appliances_kwh_per_yr=0.0,
cooking_kwh_per_yr=0.0,
main_heating_fuel_code=None,
main_2_heating_fuel_code=None,
secondary_heating_fuel_code=None,
hot_water_fuel_code=None,
pv_exported_kwh_per_yr=0.0,
primary_energy_kwh_per_yr=0.0,
primary_energy_kwh_per_m2=primary_energy_kwh_per_m2,
monthly=(),

View file

@ -35,6 +35,22 @@ def test_table_32_codes_map_to_their_billing_fuel(code: int, fuel: Fuel) -> None
assert sap_code_to_fuel(code) == fuel
@pytest.mark.parametrize(
("api_code", "fuel"),
[
(26, Fuel.MAINS_GAS), # gov-API mains-gas enum -> Table 32 code 1
(0, Fuel.ELECTRICITY), # API "electricity" -> Table 32 code 30
(25, Fuel.HEAT_NETWORK), # API community heat -> Table 32 code 41
(14, Fuel.COAL), # API house coal -> Table 32 code 11
],
)
def test_raw_api_fuel_codes_normalize_before_mapping(api_code: int, fuel: Fuel) -> None:
# Arrange — the calculator may carry a raw gov-API fuel code (not yet a Table
# 32 code); sap_code_to_fuel normalizes via the calculator's own helper first.
# Act / Assert
assert sap_code_to_fuel(api_code) == fuel
def test_an_unmapped_code_raises_rather_than_guessing() -> None:
# Arrange — code 10 (dual fuel) has no single billing fuel.
# Act / Assert

View file

@ -2164,6 +2164,36 @@ def test_secondary_electric_off_peak_bills_at_table_12a_direct_acting_high_rate(
assert abs(secondary_rate_gbp_per_kwh - 0.1529) <= 1e-6
def test_sap_table_3_primary_loss_zero_for_direct_acting_electric_boiler() -> None:
# Arrange — SAP 10.2 Table 3 (PDF p.160) names "Direct-acting electric
# boiler" verbatim in the primary-loss zero list (alongside electric
# immersion, combi, CPSU, integral-vessel heat pump). RdSAP 10 §12
# (p.62) classifies SAP code 191 as the "direct-acting electric
# boiler", so a 191 main feeding a cylinder (WHC 901, "from main
# system") incurs NO primary circuit loss — the DHW is immersion-
# heated, with no primary pipework. The cat-{1,2} branch in
# `_primary_loss_applies` mis-fires here (main_heating_category=2),
# returning True and adding ~1177 kWh/yr of phantom primary loss to
# the cat-2 electric-flat segment (cert 2474 worksheet (59) = 0).
electric_boiler_main = MainHeatingDetail(
has_fghrs=False,
main_fuel_type=29, # electricity
heat_emitter_type=1,
emitter_temperature="NA",
main_heating_control=2106,
main_heating_category=2, # "Boiler and radiators, electric"
sap_main_heating_code=191, # direct-acting electric boiler
)
# Act — cylinder present, WHC 901 (HW from the electric boiler).
applies = _primary_loss_applies(
electric_boiler_main, True, None, water_heating_code=901,
)
# Assert — direct-acting electric boiler → Table 3 zero list → no loss.
assert applies is False
def test_sap_table_3_primary_loss_applies_to_dedicated_water_heating_boiler_circulator() -> None:
# Arrange — SAP 10.2 Table 3 (PDF p.160) row 1: primary circuit loss
# applies when "hot water is heated by a heat generator (e.g. boiler)
@ -2515,6 +2545,24 @@ def test_elmhurst_simplified_rir_drops_placeholder_roof_surfaces() -> None:
assert kinds == ["gable_wall", "gable_wall_external"]
def test_api_type_1_gable_kind_maps_sheltered_and_connected_codes() -> None:
# Arrange — RdSAP 10 Table 4 (p.22) room-in-roof gable variants. Codes
# 2/3 established from sim case 21 (a replica of API cert 2818-3053-...:
# gable_wall_type_1=2 lodges "Sheltered", gable_wall_type_2=3 lodges
# "Connected"). Before this, codes 2/3 raised UnmappedApiCode (14 certs
# in the 2026 API sample). Sheltered routes to the discrete kind whose
# U the cascade derives (1/(1/U_wall+0.5)); Connected is U=0.
from datatypes.epc.domain.mapper import (
_api_type_1_gable_kind, # pyright: ignore[reportPrivateUsage]
)
# Act / Assert
assert _api_type_1_gable_kind(0) == "gable_wall"
assert _api_type_1_gable_kind(1) == "gable_wall_external"
assert _api_type_1_gable_kind(2) == "gable_wall_sheltered"
assert _api_type_1_gable_kind(3) == "connected_wall"
def test_elmhurst_detailed_rir_keeps_roof_surfaces() -> None:
# Arrange — a Detailed (§3.10) assessment DOES measure slope / flat
# ceiling, so they must be retained (regression guard so the
@ -3194,6 +3242,83 @@ def test_space_heating_electric_room_heater_off_peak_bills_at_direct_acting_high
assert abs(gas_rate - 0.0550) > 1e-6
def test_hot_water_from_pcdb_heat_pump_bills_at_app_n_wh_high_rate() -> None:
# Arrange — when DHW is heated by the main heat pump (WHC 901/902/914
# "from main system") and that main carries a PCDB Table 362 record,
# SAP 10.2 Table 12a Grid 1 WH column (PDF p.191) bills it on the
# ASHP/GSHP-from-database row: 0.70 high-rate fraction at 7-hour and
# 10-hour. `_hot_water_fuel_cost_gbp_per_kwh` previously billed any
# electric off-peak HW at 100% low rate (its TODO), over-crediting the
# HP-DHW cat-4 cluster. Electric IMMERSION (WHC 903) is a different
# Table 12a row (off-peak immersion 0.17 / Table 13) and must stay on
# the 100%-low-rate fallback here.
from domain.sap10_calculator.tables.table_12a import Tariff
from domain.sap10_calculator.rdsap.cert_to_inputs import (
_hot_water_fuel_cost_gbp_per_kwh, # pyright: ignore[reportPrivateUsage]
)
pcdb_heat_pump_main = MainHeatingDetail(
has_fghrs=False,
main_fuel_type=29, # electricity (heat pump), API enum
heat_emitter_type=1,
emitter_temperature=0,
main_heating_control=2210,
main_heating_category=4,
sap_main_heating_code=None,
main_heating_index_number=104351, # PCDB Table 362 heat pump
)
# Act — DHW from the main HP (WHC 901) vs a separate electric
# immersion (WHC 903), both on a 10-hour off-peak tariff.
rate_from_hp = _hot_water_fuel_cost_gbp_per_kwh(
29, pcdb_heat_pump_main, Tariff.TEN_HOUR, SAP_10_2_SPEC_PRICES,
water_heating_code=901,
)
rate_immersion = _hot_water_fuel_cost_gbp_per_kwh(
29, pcdb_heat_pump_main, Tariff.TEN_HOUR, SAP_10_2_SPEC_PRICES,
water_heating_code=903,
)
# Assert — HP-DHW: 0.70 × 14.68 p + 0.30 × 7.50 p = 12.526 p; immersion
# stays at the 10-hour low rate 7.50 p (£0.0750).
assert abs(rate_from_hp - 0.12526) <= 1e-6
assert abs(rate_immersion - 0.0750) <= 1e-6
def test_space_heating_pcdb_heat_pump_without_sap_code_bills_at_app_n_high_rate() -> None:
# Arrange — an API-path heat pump resolves via its PCDB Table 362
# index alone (data_source=1, no Table-4a SAP code lodged), so
# `sap_main_heating_code` is None. SAP 10.2 Table 12a Grid 1 (PDF
# p.191) puts an Appendix-N heat pump on the ASHP/GSHP "from database"
# row: SH high-rate fraction 0.80 at both 7-hour and 10-hour. The
# code-range gate in `_table_12a_system_for_main` (211-227 / 521-524)
# missed the PCDB-only heat pump, so it fell through to the "100%
# low-rate" fallback (10-hour low 7.50 p, £0.0750), under-charging
# space heating by ~5.74 p/kWh and OVER-rating the cat-4 heat-pump
# cluster (1,000-cert API sample: 20 certs, mean signed +1.43; cert
# 9472 +15.0). The fix routes any main with a PCDB heat-pump record
# to ASHP_APP_N regardless of SAP code. Mirror of the cat-10 room-
# heater fix above.
from domain.sap10_calculator.tables.table_12a import Tariff
pcdb_heat_pump_main = MainHeatingDetail(
has_fghrs=False,
main_fuel_type=29, # electricity (heat pump), API enum
heat_emitter_type=1,
emitter_temperature=0,
main_heating_control=2210,
main_heating_category=4, # heat pump
sap_main_heating_code=None, # API path: PCDB index only, no SAP code
main_heating_index_number=104351, # Vaillant aroTHERM, PCDB Table 362
)
# Act — 10-hour off-peak tariff (RdSAP §12 Rule 3 routes heat pumps here).
rate_ten_hour = _space_heating_fuel_cost_gbp_per_kwh(
pcdb_heat_pump_main, Tariff.TEN_HOUR, prices=SAP_10_2_SPEC_PRICES,
)
# Assert — ASHP_APP_N 10-hour: 0.80 × 14.68 p + 0.20 × 7.50 p = 13.244 p.
assert abs(rate_ten_hour - 0.13244) <= 1e-6
def test_heat_network_dlf_full_table_12c_age_band_coverage() -> None:
# Arrange — SAP 10.2 Table 12c (page 193) heat-network Distribution
# Loss Factor by dwelling age band A..M. None → K-or-newer
@ -3227,6 +3352,13 @@ def test_secondary_heating_fraction_for_category_full_table_11_coverage() -> Non
assert _secondary_heating_fraction_for_category(5) == 0.10
assert _secondary_heating_fraction_for_category(6) == 0.10
assert _secondary_heating_fraction_for_category(7) == 0.15
# Category 9 = warm-air systems (NOT heat pump). A gas/oil warm-air
# unit is an "All gas, liquid and solid fuel systems" row (0.10);
# electric warm air is "Other electric systems" (also 0.10) — so 0.10
# regardless of fuel (SAP 10.2 Table 11 p.188). Cert 0380-2197-2590-
# 2996-2715 (warm air mains gas, code 506, + electric room-heater
# secondary) previously raised UnmappedSapCode here, blocking it.
assert _secondary_heating_fraction_for_category(9) == 0.10
assert _secondary_heating_fraction_for_category(10) == 0.20
# Absent
assert _secondary_heating_fraction_for_category(None) == 0.10

View file

@ -370,9 +370,24 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = (
cert_number="7536-3827-0600-0600-0276",
actual_sap=68,
expected_sap_resid=+1,
expected_pe_resid_kwh_per_m2=-6.1952,
expected_co2_resid_tonnes_per_yr=-0.1639,
expected_pe_resid_kwh_per_m2=-5.6414,
expected_co2_resid_tonnes_per_yr=-0.1492,
notes=(
"FLOOR-CODE-3 SLICE (re-pinned): the prior 'residual is "
"irreducible register-rounding, DO NOT chase' conclusion below "
"was WRONG. Ext2 (bp3) lodges floor_heat_loss=3 = '(other "
"premises below)' — confirmed authoritative 9/9 on single-bp "
"certs (code 1↔'To external air', 2↔'To unheated space', "
"3↔'(other premises below)', 6↔'(another dwelling below)', "
"7↔Solid/Suspended). Per RdSAP 10 §3.12 (PDF p.25) that is "
"'above a partially heated space if there are non-domestic "
"premises below' → the §5.14 constant U=0.7 W/m²K, NOT the "
"ground-floor 1.12 the case-15/17 repro assumed (the cert's "
"lossy floors[] summary dropped bp3's description, so the prior "
"agent mis-read code 3 as 'ground'). Fix routes code 3 → "
"is_above_partially_heated_space: Ext2 floor U 1.12 → 0.70, "
"PE -6.1952 → -5.6414, CO2 -0.1639 → -0.1492 (both toward 0), "
"SAP integer 69 unchanged → resid +1. HISTORICAL NOTES BELOW. "
"Detached + 2 extensions, TFA 152. Multi-age bps (Main=D, "
"Ext1=L, Ext2=F). Slice 59 (per-bp window apportionment) and "
"Slice 60 (dwelling-wide thermal bridging y from primary bp's "

View file

@ -131,6 +131,57 @@ def _baseline_inputs() -> CalculatorInputs:
)
def test_fuel_codes_and_pv_export_thread_unchanged_onto_sap_result() -> None:
"""Per-end-use fuel codes + PV export reach SapResult untouched.
ADR-0014 BillDerivation attributes each end-use to a fuel carrier, so
the per-end-use fuel codes (RdSAP10 Table 32 / SAP 10.2 Table 12 fuel
code column) and the annual PV export kWh must surface on SapResult.
These are output-only metadata they must thread byte-identical from
CalculatorInputs through `calculate_sap_from_inputs` onto SapResult and
NOT be recomputed or perturbed. `pv_exported_kwh_per_yr` collapses a
None CalculatorInputs value to 0.0.
"""
# Arrange — set the four fuel codes + PV export to distinct known
# values on the baseline. Mains gas (1) main, LPG (2) main-2, standard
# electricity (30) secondary, mains gas (1) hot water.
inputs = replace(
_baseline_inputs(),
main_heating_fuel_code=1,
main_2_heating_fuel_code=2,
secondary_heating_fuel_code=30,
hot_water_fuel_code=1,
pv_exported_kwh_per_yr=850.0,
)
# Act
result = calculate_sap_from_inputs(inputs)
# Assert — threaded unchanged; PV export carried through.
assert result.main_heating_fuel_code == 1
assert result.main_2_heating_fuel_code == 2
assert result.secondary_heating_fuel_code == 30
assert result.hot_water_fuel_code == 1
assert abs(result.pv_exported_kwh_per_yr - 850.0) <= 1e-9
def test_pv_export_collapses_none_input_to_zero_on_sap_result() -> None:
"""`pv_exported_kwh_per_yr` is 0.0 (not None) on SapResult for no-PV.
CalculatorInputs.pv_exported_kwh_per_yr is Optional[float] (None on
certs without a PV split); SapResult.pv_exported_kwh_per_yr is a plain
float, so the assembly collapses None to 0.0 for the bill adapter.
"""
# Arrange — baseline has no PV split (pv_exported_kwh_per_yr defaults None).
inputs = replace(_baseline_inputs(), pv_exported_kwh_per_yr=None)
# Act
result = calculate_sap_from_inputs(inputs)
# Assert
assert result.pv_exported_kwh_per_yr == 0.0
def test_calculator_consumes_solar_gains_monthly_w_field_for_per_month_solar() -> None:
# Arrange — replace the baseline inputs' solar with an explicit known
# 12-tuple. The §6 orchestrator produces this upstream; the calculator

View file

@ -46,6 +46,53 @@ def test_dual_meter_electric_room_heater_resolves_to_ten_hour_tariff() -> None:
assert rdsap_tariff_for_cert(1, main_1_sap_code=601) is Tariff.SEVEN_HOUR
def test_unknown_meter_infers_off_peak_from_electric_storage_main() -> None:
# Arrange — RdSAP 10 §12 (PDF p.62). An "Unknown" meter (code 3) was
# not recorded by the assessor, but an electric STORAGE main (SAP
# 401-409, Rule 2) or CPSU (192, Rule 1) is physical evidence the
# dwelling is on an off-peak tariff — these charge overnight at the low
# rate and cannot run economically on a single rate. So infer the §12
# off-peak tariff rather than billing the overnight charge at the
# standard rate. Certs 7336/2080 (cat-7 storage, meter 3) under-rated
# ~25 SAP from standard-rate space heating.
# Act / Assert — storage (Rule 2) → 7-hour; CPSU (Rule 1) → 10-hour.
assert rdsap_tariff_for_cert(3, main_1_sap_code=402) is Tariff.SEVEN_HOUR
assert rdsap_tariff_for_cert(3, main_1_sap_code=192) is Tariff.TEN_HOUR
def test_unknown_meter_does_not_infer_off_peak_for_room_heater_or_heat_pump() -> None:
# Arrange — direct-acting electric room heaters (Rule 3, SAP 691) and
# heat pumps run ON DEMAND and exist on single-rate meters too, so they
# are NOT evidence of an off-peak tariff. On an Unknown meter they keep
# STANDARD — billing them 100% at the off-peak low rate would
# over-credit (room heaters draw mostly at the high rate).
# Act / Assert
assert rdsap_tariff_for_cert(3, main_1_sap_code=691) is Tariff.STANDARD
assert rdsap_tariff_for_cert(3, main_1_is_heat_pump_database=True) is Tariff.STANDARD
def test_unknown_meter_with_non_electric_main_stays_standard() -> None:
# Arrange — an "Unknown" meter on a GAS-heated dwelling (SAP 102) has
# no off-peak evidence, so it must NOT pick up the Rule-4 Dual default
# (7-hour); it stays STANDARD. (The off-peak inference fires only when
# a Rule 1/2 storage/CPSU system is present.)
# Act / Assert
assert rdsap_tariff_for_cert(3, main_1_sap_code=102) is Tariff.STANDARD
assert rdsap_tariff_for_cert(3, main_1_sap_code=None) is Tariff.STANDARD
def test_single_meter_with_storage_stays_standard() -> None:
# Arrange — code 2 (Single) is an EXPLICIT single-rate lodgement, not
# "unknown", so it is NOT overridden even with a storage main: the
# off-peak inference is only for the Unknown (code 3) sentinel.
# Act / Assert
assert rdsap_tariff_for_cert(2, main_1_sap_code=402) is Tariff.STANDARD
def test_tariff_enum_has_five_members() -> None:
"""Table 12a columns: standard (no off-peak split), 7-hour, 10-hour,
18-hour, 24-hour. Worksheet-shape fidelity: TEN_HOUR is included for

View file

@ -0,0 +1,111 @@
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
"simulated case 20" worksheet a storage-heater dwelling with a
Detailed (type-2) room-in-roof, a loose-jacket hot-water cylinder, and a
multi-building-part shell.
Like 000565 / the _rr cases, this fixture does NOT hand-build the
EpcPropertyData: it routes the Summary PDF through
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the SAP-result
pin grid exercises the WHOLE extractor + mapper + calculator pipeline.
This case was generated to validate three fronts in one worksheet:
- Detailed room-in-roof gables: a "Sheltered" gable (U=0.92) and a
"Connected" gable (U=0.00, excluded). The cascade already pins both.
- Window §11 layout where "Double between 2002 and 2021" wraps and the
Area cell splits onto its own line (fixed in the extractor see
test_summary_001431_case20_extracts_all_five_section11_windows).
- Detailed-RR "Stud Wall" surfaces lodged at Default U-value 0.00
internal knee walls the worksheet excludes from §3 and (31) (fixed in
the mapper drop only the U=0 studs, keep positive-U ones).
Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/
simulated case 20/`. The Summary is mirrored into the tracked
`backend/documents_parser/tests/fixtures/Summary_001431_case20.pdf` so the
test runs without depending on the unstaged workspace.
Cert shape: Main + Extension 1, solid brick as-built (Main 220 mm / Ext1
240 mm), 2 storeys + Detailed room-in-roof on the Main, suspended
uninsulated ground floor (Main) + above-partially-heated floor (Ext1),
electric storage heaters (SAP code 402, control 2402 automatic charge
control, Economy-7 dual meter), portable electric secondary heaters (SAP
code 693), mains-gas water heating (code 911) with a loose-jacket
cylinder + thermostat, one instantaneous electric shower, no PV.
Worksheet pin targets (P960-0001-001431 block 1 existing dwelling SAP):
- SAP rating 44 (258); continuous 43.6322; ECF 4.0397 (257)
- Total fuel cost £1810.1556 (255)
- Total CO2 3815.4060 kg/year (272)
- Space heating 19873.6555 kWh/year ((98c))
- Main 1 fuel 16892.6072 kWh/year (211)
- Secondary fuel 2981.0483 kWh/year (215)
- Hot water fuel 4326.0619 kWh/year (219)
- Lighting 246.3083 kWh/year (232)
- Pumps/fans 0.0 kWh/year (231)
Per [[feedback-zero-error-strict]] + [[feedback-e2e-validation-
philosophy]]: pins are abs=1e-4 against the worksheet PDF. The pin
values live in `test_e2e_elmhurst_sap_score._FIXTURE_PINS`.
"""
from __future__ import annotations
import re
import subprocess
from pathlib import Path
from typing import Final
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
# [4]=repo root.
_SUMMARY_PDF: Final[Path] = (
Path(__file__).resolve().parents[4]
/ "backend" / "documents_parser" / "tests" / "fixtures"
/ "Summary_001431_case20.pdf"
)
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
"""Convert a Summary PDF into the per-page text format the
ElmhurstSiteNotesExtractor expects (label\\nvalue sequences). Mirror
of the helper in `test_summary_pdf_mapper_chain.py` / the other
`_elmhurst_worksheet_*` fixtures.
"""
info = subprocess.run(
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
).stdout
m = re.search(r"Pages:\s+(\d+)", info)
if m is None:
raise RuntimeError(f"Could not parse page count from {pdf_path}")
page_count = int(m.group(1))
pages: list[str] = []
for i in range(1, page_count + 1):
layout = subprocess.run(
[
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
str(pdf_path), "-",
],
capture_output=True, text=True, check=True,
).stdout
tokens: list[str] = []
for line in layout.splitlines():
if not line.strip():
tokens.append("")
continue
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
tokens.extend(parts)
pages.append("\n".join(tokens))
return pages
def build_epc() -> EpcPropertyData:
"""Route the simulated case-20 Summary through extractor + mapper.
No hand-built EpcPropertyData the extractor and mapper are part of
the test target.
"""
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)

View file

@ -0,0 +1,129 @@
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
"simulated case 21" worksheet a replica of API cert
2818-3053-3203-2655-9204: a mid-terrace, age-band-B dwelling whose Main
wall is **solid brick, as built, 440 mm** (room-in-roof above).
Like 000565 / the _rr cases / case 20, this fixture does NOT hand-build
the EpcPropertyData: it routes the Summary PDF through
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the SAP-result
pin grid exercises the WHOLE extractor + mapper + calculator pipeline.
This case validates the RdSAP 10 §5.7 Table 13 (PDF p.40) "uninsulated
brick wall by thickness" path for an **as-built** wall. A 440 mm solid
brick wall is >420 mm U = 1.10 (not the 220 mm bucket default 1.70).
Table 6 footnote (b) on the "Solid brick as built" row makes this
explicit: "Or from 5.7 if wall thickness is other than 200mm to 280mm".
The wall is lodged "Dry-lining No", so no §5.8 / Table 14 adjustment is
applied U is the raw Table 13 value.
The fix flows through to the Sheltered room-in-roof gable, which is
1/(1/1.10 + 0.5) = 0.71 (worksheet §3 Gable Wall 1), down from the
pre-fix 0.92 that a 1.70 wall U produced (case 20's 220 mm wall).
Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/
simulated case 21/`. The Summary is mirrored into the tracked
`backend/documents_parser/tests/fixtures/Summary_001431_case21.pdf` so
the test runs without depending on the unstaged workspace.
Cert shape: Main mid-terrace, solid brick as-built 440 mm, age band B,
2 storeys + Detailed room-in-roof on the Main (Sheltered + Connected
gables), suspended uninsulated ground floor, mains-gas boiler (SAP code
119, 84% efficiency, control 2113), mains-gas multi-point instantaneous
water heater (code 908, 65% efficiency), Dual/E7 electricity meter, no
secondary heating, no PV.
This fixture is pinned on the **§3 heat-loss line refs only**
((31)/(33)/(36)/(37)) the values the wall-U-by-thickness fix directly
controls. Following the same rationale as simulated case 6 (see
`test_section_3_roof_windows_case6_match_pdf`), it is NOT added to the
full §1-§13 SAP cascade grid because its water heater code 908,
multi-point gas **instantaneous** serving several taps exposes a
separate, unrelated §4 water-heating gap (the cascade over-computes
(219) vs the worksheet's 1859.1534). That is its own cause / own slice;
folding it in here would force a tolerance widening this slice does not
own. The §3 pins below fully exercise the wall-U fix end-to-end through
the real extractor + mapper.
Worksheet §3 pin targets (P960-0001-001431 page 2, "3. Heat losses"):
- (31) Total net area of external elements = 155.1000
- (33) Fabric heat loss Σ(A×U) = 175.6208 W/K
- (36) Thermal bridges (0.150 × exposed) = 23.2650 W/K
- (37) Total fabric heat loss (33)+(36) = 198.8858 W/K
- §3 element refs: External walls Main U = 1.1000 (§5.7 Table 13, 440 mm
> 420 mm); Roof room Main Gable Wall 1 (Sheltered) = 0.71 =
1/(1/1.10 + 0.5); Common Walls = 1.10.
Per [[feedback-zero-error-strict]] + [[feedback-e2e-validation-
philosophy]]: pins are abs=1e-4 against the worksheet PDF.
"""
from __future__ import annotations
import re
import subprocess
from pathlib import Path
from typing import Final
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
# [4]=repo root.
_SUMMARY_PDF: Final[Path] = (
Path(__file__).resolve().parents[4]
/ "backend" / "documents_parser" / "tests" / "fixtures"
/ "Summary_001431_case21.pdf"
)
# §3 heat-loss line refs from the P960 worksheet (page 2, "3. Heat
# losses"). These are the dimensions the wall-U-by-thickness fix drives:
# a 440 mm (>420) solid brick as-built wall takes RdSAP 10 §5.7 Table 13
# U=1.10, lifting fabric heat loss to 175.6208 (pre-fix the 220 mm bucket
# default 1.70 over-stated it).
LINE_31_TOTAL_EXTERNAL_AREA_M2: Final[float] = 155.1000
LINE_33_FABRIC_HEAT_LOSS_W_PER_K: Final[float] = 175.6208
LINE_36_THERMAL_BRIDGING_W_PER_K: Final[float] = 23.2650
LINE_37_TOTAL_FABRIC_HEAT_LOSS_W_PER_K: Final[float] = 198.8858
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
"""Convert a Summary PDF into the per-page text format the
ElmhurstSiteNotesExtractor expects (label\\nvalue sequences). Mirror
of the helper in the other `_elmhurst_worksheet_*` fixtures.
"""
info = subprocess.run(
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
).stdout
m = re.search(r"Pages:\s+(\d+)", info)
if m is None:
raise RuntimeError(f"Could not parse page count from {pdf_path}")
page_count = int(m.group(1))
pages: list[str] = []
for i in range(1, page_count + 1):
layout = subprocess.run(
[
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
str(pdf_path), "-",
],
capture_output=True, text=True, check=True,
).stdout
tokens: list[str] = []
for line in layout.splitlines():
if not line.strip():
tokens.append("")
continue
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
tokens.extend(parts)
pages.append("\n".join(tokens))
return pages
def build_epc() -> EpcPropertyData:
"""Route the simulated case-21 Summary through extractor + mapper.
No hand-built EpcPropertyData the extractor and mapper are part of
the test target.
"""
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)

View file

@ -44,6 +44,7 @@ from tests.domain.sap10_calculator.worksheet import (
_elmhurst_worksheet_001431_case5 as _w001431_case5,
_elmhurst_worksheet_001431_case6 as _w001431_case6,
_elmhurst_worksheet_001431_case7 as _w001431_case7,
_elmhurst_worksheet_001431_case20 as _w001431_case20,
)
from tests.domain.sap10_calculator.worksheet._elmhurst_fixtures import (
ALL_FIXTURES as _ELMHURST_FIXTURES,
@ -278,6 +279,20 @@ _FIXTURE_PINS: Final[dict[str, FixtureCascadePins]] = {
lighting_kwh_per_yr=357.6571,
pumps_fans_kwh_per_yr=356.0,
),
# Mapper-driven — Summary_001431_case20.pdf → extractor → mapper →
# calculator. Storage heaters (SAP 402 / control 2402, Economy-7) +
# Detailed room-in-roof (Sheltered + Connected gables, U=0 stud walls)
# + loose-jacket cylinder. Pins are worksheet Block 1 line refs.
"001431_case20": FixtureCascadePins(
sap_score=44, sap_score_continuous=43.6322, ecf=4.0397,
total_fuel_cost_gbp=1810.1556, co2_kg_per_yr=3815.4060,
space_heating_kwh_per_yr=19873.6555,
main_heating_fuel_kwh_per_yr=16892.6072,
secondary_heating_fuel_kwh_per_yr=2981.0483,
hot_water_kwh_per_yr=4326.0619,
lighting_kwh_per_yr=246.3083,
pumps_fans_kwh_per_yr=0.0,
),
}
@ -296,6 +311,7 @@ _FIXTURE_MODULES: Final[dict[str, ModuleType]] = {
"001431_case5": _w001431_case5,
"001431_case6": _w001431_case6,
"001431_case7": _w001431_case7,
"001431_case20": _w001431_case20,
}
@ -394,3 +410,29 @@ def test_appliances_and_cooking_kwh_threaded_onto_sap_result() -> None:
assert result.appliances_kwh_per_yr == inputs.appliances_kwh_per_yr
assert result.cooking_kwh_per_yr == inputs.cooking_kwh_per_yr
assert abs(result.cooking_kwh_per_yr - expected_cooking_kwh) <= 1e-9
def test_main_heating_fuel_code_threaded_onto_sap_result_for_mains_gas_cert() -> None:
"""Per-end-use fuel codes reach SapResult for a real mains-gas cert.
ADR-0014 BillDerivation attributes each end-use to a fuel carrier.
Cert 000516 is a mains-gas combi (RdSAP10 Table 32 / SAP 10.2 Table 12
mains-gas fuel code 26 as lodged), so the cascade must surface fuel
code 26 on `SapResult.main_heating_fuel_code` and thread it unchanged
from CalculatorInputs. Output-only metadata it does NOT feed
cost / CO2 / PE / sap_score (those are pinned elsewhere in this file).
"""
# Arrange — a mains-gas combi cert.
epc = _FIXTURE_MODULES['000516'].build_epc()
# Act
inputs = cert_to_inputs(epc)
result = Sap10Calculator().calculate(epc)
# Assert — mains-gas main fuel code threaded unchanged; single main
# system (no Main 2); secondary defaults to standard electricity (30).
assert inputs.main_heating_fuel_code == 26
assert result.main_heating_fuel_code == 26
assert result.main_2_heating_fuel_code is None
assert result.secondary_heating_fuel_code == 30
assert result.hot_water_fuel_code == 26

View file

@ -36,12 +36,51 @@ from domain.sap10_calculator.worksheet.heat_transmission import (
heat_transmission_from_cert,
)
from domain.sap10_calculator.worksheet.heat_transmission import (
_joined_main_roof_descriptions, # pyright: ignore[reportPrivateUsage]
_part_geometry, # pyright: ignore[reportPrivateUsage]
_round_half_up, # pyright: ignore[reportPrivateUsage]
_window_bp_index, # pyright: ignore[reportPrivateUsage]
)
class _Desc:
"""Minimal stand-in for a roof element carrying a `description`."""
def __init__(self, description: str) -> None:
self.description = description
def test_joined_main_roof_descriptions_drops_room_in_roof_entries() -> None:
# Arrange — a multi-roof cert: main pitched roof (insulated) plus a
# Room-in-Roof lodged uninsulated. The RR has its own shell U cascade,
# so the main-roof U-value description must NOT inherit the RR's
# "no insulation" marker (which would force the whole main roof to
# U=2.30). Cert 8536-0624-4600-0934-1292.
roofs = [
_Desc("Pitched, insulated (assumed)"),
_Desc("Roof room(s), no insulation (assumed)"),
]
# Act
result = _joined_main_roof_descriptions(roofs)
# Assert — only the non-RR primary roof remains.
assert result == "Pitched, insulated (assumed)"
def test_joined_main_roof_descriptions_keeps_pure_rr_fallback() -> None:
# Arrange — a pure room-in-roof dwelling (every roof entry is an RR):
# filtering would leave nothing, so preserve prior behaviour by
# falling back to the unfiltered join.
roofs = [_Desc("Roof room(s), no insulation (assumed)")]
# Act
result = _joined_main_roof_descriptions(roofs)
# Assert
assert result == "Roof room(s), no insulation (assumed)"
def test_part_geometry_floorless_part_honours_full_key_contract() -> None:
# Arrange — a building part lodged with NO sap_floor_dimensions (e.g.
# a party-wall-only or RR-only extension; observed on 5 certs in a
@ -921,6 +960,114 @@ def test_ground_floor_flat_exposure_keeps_floor_drops_roof() -> None:
assert ground.roof_w_per_k == 0.0
def test_floor_over_another_dwelling_below_zeroes_floor_despite_exposed_flag() -> None:
# Arrange — a "Ground-floor flat" lodged with floor_heat_loss=6
# ("another dwelling below") sits over a heated dwelling (e.g. a
# basement flat), so its floor is a party floor (U=0, no heat loss)
# even though the dwelling-level exposure heuristic — keyed only on
# the "Ground-floor flat" label — defaults has_exposed_floor=True.
# The per-BP `floor_type` lodgement is authoritative and must
# suppress that BP's floor, mirroring the roof's "another dwelling
# above" party override. RdSAP 10 §3 — party floors between dwellings
# are not heat-loss elements. Cert 2115-4121-4711-9361-3686.
main = make_building_part(
construction_age_band="G",
wall_construction=4, wall_insulation_type=4,
party_wall_construction=1, roof_construction=4,
floor_type="(another dwelling below)",
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=60.0, room_height_m=2.5,
party_wall_length_m=0.0, heat_loss_perimeter_m=30.0, floor=0,
),
],
)
epc = make_minimal_sap10_epc(
total_floor_area_m2=60.0, country_code="ENG", sap_building_parts=[main],
)
# Act — dwelling-level exposure still flags the floor as exposed.
result = heat_transmission_from_cert(
epc, exposure=DwellingExposure(has_exposed_floor=True, has_exposed_roof=False),
)
# Assert — the per-BP "another dwelling below" override wins → no floor loss.
assert result.floor_w_per_k == 0.0
assert result.walls_w_per_k > 0
def test_exposed_floor_on_flat_carries_heat_loss_despite_unexposed_flag() -> None:
# Arrange — a top-/mid-floor flat whose lowest floor is lodged as an
# exposed floor (API floor_heat_loss=1, "an exposed floor if there is
# an open space below" per RdSAP 10 §3.12, PDF p.25 — e.g. a flat
# cantilevered over a passageway) IS a heat-loss floor on Table 20.
# The dwelling-level exposure heuristic, keyed only on the flat label,
# defaults has_exposed_floor=False on the assumption the floor sits over
# another heated dwelling; the per-BP `is_exposed_floor` lodgement is
# authoritative and must override that suppression upward, mirroring the
# "another dwelling below" party override (which suppresses downward).
main = make_building_part(
construction_age_band="B",
wall_construction=4, wall_insulation_type=4,
party_wall_construction=1, roof_construction=4,
floor_type="To external air",
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=18.0, room_height_m=2.88,
party_wall_length_m=0.0, heat_loss_perimeter_m=8.68, floor=0,
),
],
)
main.sap_floor_dimensions[0].is_exposed_floor = True
epc = make_minimal_sap10_epc(
total_floor_area_m2=18.0, country_code="ENG", sap_building_parts=[main],
)
# Act — dwelling-level exposure flags the floor as NOT exposed (flat).
result = heat_transmission_from_cert(
epc, exposure=DwellingExposure(has_exposed_floor=False, has_exposed_roof=True),
)
# Assert — the per-BP exposed-floor lodgement wins → Table 20 floor loss
# (1.20 W/m²K × 18 m² = 21.6 W/K), not the suppressed 0.0.
assert result.floor_w_per_k == pytest.approx(21.6, abs=0.1)
def test_above_partially_heated_floor_on_flat_carries_07_loss_despite_unexposed_flag() -> None:
# Arrange — a mid-/top-floor flat whose lowest floor is lodged "above a
# partially heated space" (API floor_heat_loss=3, "(other premises
# below)") sits over non-domestic premises heated at different times.
# RdSAP 10 §3.12 + §5.14 (PDF p.25/47) give such a floor the constant
# U=0.7 W/m²K. As with the exposed-floor case, the dwelling-level flat
# heuristic defaults has_exposed_floor=False (assuming a heated dwelling
# below); the per-BP `is_above_partially_heated_space` lodgement is
# authoritative and overrides the suppression upward.
main = make_building_part(
construction_age_band="B",
wall_construction=4, wall_insulation_type=4,
party_wall_construction=1, roof_construction=4,
floor_type="(other premises below)",
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=50.0, room_height_m=2.5,
party_wall_length_m=0.0, heat_loss_perimeter_m=28.0, floor=0,
),
],
)
main.sap_floor_dimensions[0].is_above_partially_heated_space = True
epc = make_minimal_sap10_epc(
total_floor_area_m2=50.0, country_code="ENG", sap_building_parts=[main],
)
# Act — dwelling-level exposure flags the floor as NOT exposed (flat).
result = heat_transmission_from_cert(
epc, exposure=DwellingExposure(has_exposed_floor=False, has_exposed_roof=True),
)
# Assert — §5.14 constant U=0.7 × 50 m² = 35.0 W/K, not the suppressed 0.0.
assert abs(result.floor_w_per_k - 35.0) <= 0.1
def test_ground_floor_flat_extension_with_flat_roof_exposes_extension_roof_only() -> None:
"""Per-BP roof exposure: an extension on a ground-floor flat can have
its own external (e.g. single-storey) roof even though the dwelling-

View file

@ -43,6 +43,7 @@ from tests.domain.sap10_calculator.worksheet import (
_elmhurst_worksheet_000490 as _w000490,
_elmhurst_worksheet_000516 as _w000516,
_elmhurst_worksheet_001431_case6 as _w001431_case6,
_elmhurst_worksheet_001431_case21 as _w001431_case21,
)
@ -283,6 +284,50 @@ def test_section_3_roof_windows_case6_match_pdf() -> None:
)
def test_section_3_wall_u_by_thickness_case21_match_pdf() -> None:
"""§3 heat-loss pins for simulated case 21 — a replica of API cert
2818 whose Main wall is solid brick, **as built, 440 mm**.
RdSAP 10 §5.7 Table 13 (PDF p.40) defaults an uninsulated brick wall
by thickness: >420 mm U = 1.10 (not the 220 mm bucket default 1.70).
Table 6 footnote (b) on the "Solid brick as built" row makes this
explicit: "Or from 5.7 if wall thickness is other than 200mm to
280mm". The lower wall U flows through (33) and the Sheltered
room-in-roof gable (1/(1/1.10 + 0.5) = 0.71).
Pinned on §3 line refs only (not added to `_FIXTURES`) the same
rationale as case 6: its instantaneous multi-point gas water heater
(code 908) exposes a separate §4 (219) gap, so the full §10/§12 SAP
cascade is non-comparable. See the fixture module docstring."""
# Arrange
epc = _w001431_case21.build_epc()
# Act
ht = heat_transmission_section_from_cert(epc)
# Assert
_pin(
ht.total_external_element_area_m2,
_w001431_case21.LINE_31_TOTAL_EXTERNAL_AREA_M2,
"§3 (31) case21",
)
_pin(
ht.fabric_heat_loss_w_per_k,
_w001431_case21.LINE_33_FABRIC_HEAT_LOSS_W_PER_K,
"§3 (33) case21",
)
_pin(
ht.thermal_bridging_w_per_k,
_w001431_case21.LINE_36_THERMAL_BRIDGING_W_PER_K,
"§3 (36) case21",
)
_pin(
ht.total_w_per_k,
_w001431_case21.LINE_37_TOTAL_FABRIC_HEAT_LOSS_W_PER_K,
"§3 (37) case21",
)
def test_case6_main_2_emitter_and_control_extracted() -> None:
"""Simulated case 6's §14.1 Main Heating2 lodges its OWN emitter
("Underfloor Heating") and control ("SAP code 2110, ...") the two