Slice 95: API mapper TFA from per-bp dims + window area 2dp rounding — cert 001479 to 1e-4

The end-to-end production cascade `from_api_response → cert_to_inputs →
calculate_sap_from_inputs` now hits cert 001479's worksheet continuous
SAP 69.0094 at abs < 1e-4 (was +0.000584). Two fixes:

1. API mapper: `from_rdsap_schema_21_0_{0,1}` computes `total_floor_
   area_m2` as Σ per-bp `sap_floor_dimensions[*].total_floor_area.value`
   (cert 001479: 30.45+30.77+5.37+1.92 = 68.51), not the lodged scalar
   (rounded integer 69). `water_heating_from_cert` reads `epc.total_
   floor_area_m2` directly for occupancy N (Appendix J), which propagates
   to HW kWh (+6.31 → ~0), Appendix L lighting (+0.98 → 0), and internal
   gains (+25.72 W·months → 0).

2. Cascade window area rounding per RdSAP 10 §15 "Rounding of data"
   (p.66): "All element areas (gross) including window areas: 2 d.p."
   `solar_gains.py` and `internal_gains.py` now round `w * h` to 2 d.p.
   to match the existing `heat_transmission.py` pattern (line 344).
   Closes the residual solar gains delta (+1.50 W·months → 0) that
   became dominant once TFA was fixed.

Re-pinned 5 golden cert residuals where TFA + area rounding shifted
output: 0240 (SAP -14→-15, PE +14.6650→+17.8450, CO2 +0.8060→+1.0097),
6035 (PE +48.2971→+49.5139, CO2 +1.1016→+1.1423), 8135 (PE -2.4194→
-2.4072, CO2 -0.0198→-0.0195), 2130 (PE -38.1521→-38.1666), 0390
(PE +1.6837→+1.6962, CO2 +0.0637→+0.0639).

New test: `test_api_001479_full_chain_sap_matches_worksheet_pdf_
exactly` formalises Layer 4 of the validation stack as a 1e-4 gate.

Pyright net-zero (mapper.py 33).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-26 09:30:41 +00:00
parent 985a59e1f9
commit f502db8c74
5 changed files with 101 additions and 16 deletions

View file

@ -29,6 +29,7 @@ Textract directly.
from __future__ import annotations from __future__ import annotations
import dataclasses import dataclasses
import json
import re import re
import subprocess import subprocess
from pathlib import Path from pathlib import Path
@ -56,6 +57,16 @@ _SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf"
_SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf" _SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf"
_SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf" _SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf"
# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the
# Summary_001479.pdf fixture. Together they drive the API ≡ Summary
# parity workstream; Layer 4 of the validation stack is "API cascade SAP
# matches worksheet continuous SAP at 1e-4".
_API_001479_JSON = (
Path(__file__).parents[3]
/ "packages/domain/src/domain/sap/rdsap/tests/fixtures/golden"
/ "0535-9020-6509-0821-6222.json"
)
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
"""Convert a Summary PDF into the per-page text format the existing """Convert a Summary PDF into the per-page text format the existing
@ -390,6 +401,30 @@ def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert 001479 has both an Elmhurst Summary PDF and a GOV.UK
# EPB API JSON (ref 0535-9020-6509-0821-6222). The Summary cascade
# already pins at worksheet's 69.0094 ± 1e-4 above; this test is the
# Layer 4 production-path gate: API JSON → from_api_response →
# cert_to_inputs → calculate_sap_from_inputs must also hit 69.0094
# at 1e-4. Identical inputs must produce identical outputs; the
# calculator is deterministic, so any drift is a mapper coverage gap.
doc = json.loads(_API_001479_JSON.read_text())
epc = EpcPropertyDataMapper.from_api_response(doc)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert — 1e-4 pin against the worksheet's continuous SAP. ±0.5 is
# the API-only fallback (project memory `feedback_api_tolerance_1e_
# minus_4`); when the worksheet is available, identical-inputs-must-
# produce-identical-outputs is the bar.
worksheet_unrounded_sap = 69.0094
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
# ============================================================================ # ============================================================================
# Mapper-vs-hand-built EpcPropertyData diff tests # Mapper-vs-hand-built EpcPropertyData diff tests
# ============================================================================ # ============================================================================

View file

@ -1146,6 +1146,7 @@ class EpcPropertyDataMapper:
def from_rdsap_schema_21_0_0(schema: RdSapSchema21_0_0) -> EpcPropertyData: def from_rdsap_schema_21_0_0(schema: RdSapSchema21_0_0) -> EpcPropertyData:
es = schema.sap_energy_source es = schema.sap_energy_source
pv_supply, pv_arrays = _map_schema_21_pv(es.photovoltaic_supply) pv_supply, pv_arrays = _map_schema_21_pv(es.photovoltaic_supply)
_per_bp_tfa = _total_floor_area_from_building_parts(schema.sap_building_parts)
return EpcPropertyData( return EpcPropertyData(
uprn=schema.uprn, uprn=schema.uprn,
assessment_type=schema.assessment_type, assessment_type=schema.assessment_type,
@ -1163,7 +1164,9 @@ class EpcPropertyDataMapper:
inspection_date=date.fromisoformat(schema.inspection_date), inspection_date=date.fromisoformat(schema.inspection_date),
completion_date=date.fromisoformat(schema.completion_date), completion_date=date.fromisoformat(schema.completion_date),
registration_date=date.fromisoformat(schema.registration_date), registration_date=date.fromisoformat(schema.registration_date),
total_floor_area_m2=float(schema.total_floor_area), total_floor_area_m2=(
_per_bp_tfa if _per_bp_tfa is not None else float(schema.total_floor_area)
),
solar_water_heating=schema.solar_water_heating == "Y", solar_water_heating=schema.solar_water_heating == "Y",
has_hot_water_cylinder=schema.has_hot_water_cylinder == "true", has_hot_water_cylinder=schema.has_hot_water_cylinder == "true",
has_fixed_air_conditioning=schema.has_fixed_air_conditioning == "true", has_fixed_air_conditioning=schema.has_fixed_air_conditioning == "true",
@ -1418,6 +1421,7 @@ class EpcPropertyDataMapper:
def from_rdsap_schema_21_0_1(schema: RdSapSchema21_0_1) -> EpcPropertyData: def from_rdsap_schema_21_0_1(schema: RdSapSchema21_0_1) -> EpcPropertyData:
es = schema.sap_energy_source es = schema.sap_energy_source
pv_supply, pv_arrays = _map_schema_21_pv(es.photovoltaic_supply) pv_supply, pv_arrays = _map_schema_21_pv(es.photovoltaic_supply)
_per_bp_tfa = _total_floor_area_from_building_parts(schema.sap_building_parts)
return EpcPropertyData( return EpcPropertyData(
# General # General
uprn=schema.uprn, uprn=schema.uprn,
@ -1436,7 +1440,9 @@ class EpcPropertyDataMapper:
inspection_date=date.fromisoformat(schema.inspection_date), inspection_date=date.fromisoformat(schema.inspection_date),
completion_date=date.fromisoformat(schema.completion_date), completion_date=date.fromisoformat(schema.completion_date),
registration_date=date.fromisoformat(schema.registration_date), registration_date=date.fromisoformat(schema.registration_date),
total_floor_area_m2=float(schema.total_floor_area), total_floor_area_m2=(
_per_bp_tfa if _per_bp_tfa is not None else float(schema.total_floor_area)
),
# Property flags # Property flags
solar_water_heating=schema.solar_water_heating == "Y", solar_water_heating=schema.solar_water_heating == "Y",
has_hot_water_cylinder=schema.has_hot_water_cylinder == "true", has_hot_water_cylinder=schema.has_hot_water_cylinder == "true",
@ -1887,6 +1893,28 @@ def _measurement_value(field: Any) -> float:
return float(field) return float(field)
def _total_floor_area_from_building_parts(building_parts: Any) -> Optional[float]:
"""Sum per-bp `sap_floor_dimensions[*].total_floor_area` to recover the
precise TFA. The GOV.UK EPB API JSON's top-level `total_floor_area`
is rounded to the integer (cert 001479: 30.45+30.77+5.37+1.92 = 68.51
lodged 69), but the worksheet computes continuous SAP from the
unrounded geometry. `epc.total_floor_area_m2` is read directly by
`water_heating_from_cert` to derive occupancy N (Appendix J), which
drives HW, lighting (Appendix L), and internal-gains kWh so the
rounded scalar shifts SAP by ~+0.0006 on cert 001479. Returns None
when no per-bp dims are lodged so callers fall back to the scalar."""
if not building_parts:
return None
total = 0.0
found = False
for bp in building_parts:
floor_dims: Any = bp.sap_floor_dimensions or []
for fd in floor_dims:
total += _measurement_value(fd.total_floor_area)
found = True
return total if found else None
def _first_pv_battery( def _first_pv_battery(
schema_pv_batteries: Any, schema_pv_batteries: Any,
) -> Optional[PvBatteries]: ) -> Optional[PvBatteries]:

View file

@ -74,9 +74,9 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = (
_GoldenExpectation( _GoldenExpectation(
cert_number="0240-0200-5706-2365-8010", cert_number="0240-0200-5706-2365-8010",
actual_sap=73, actual_sap=73,
expected_sap_resid=-14, expected_sap_resid=-15,
expected_pe_resid_kwh_per_m2=+14.6650, expected_pe_resid_kwh_per_m2=+17.8450,
expected_co2_resid_tonnes_per_yr=+0.8060, expected_co2_resid_tonnes_per_yr=+1.0097,
notes=( notes=(
"Detached house, TFA 202, age J, oil boiler, Table 4b code 130. " "Detached house, TFA 202, age J, oil boiler, Table 4b code 130. "
"API response lodges sap_room_in_roof.room_in_roof_type_1 with " "API response lodges sap_room_in_roof.room_in_roof_type_1 with "
@ -119,8 +119,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = (
cert_number="6035-7729-2309-0879-2296", cert_number="6035-7729-2309-0879-2296",
actual_sap=70, actual_sap=70,
expected_sap_resid=-6, expected_sap_resid=-6,
expected_pe_resid_kwh_per_m2=+48.2971, expected_pe_resid_kwh_per_m2=+49.5139,
expected_co2_resid_tonnes_per_yr=+1.1016, expected_co2_resid_tonnes_per_yr=+1.1423,
notes=( notes=(
"Mid-terrace, TFA 128, age A, gas combi Table 4b code 104. " "Mid-terrace, TFA 128, age A, gas combi Table 4b code 104. "
"Slice 59 per-bp window apportionment tightens all 3 " "Slice 59 per-bp window apportionment tightens all 3 "
@ -147,8 +147,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = (
cert_number="8135-1728-8500-0511-3296", cert_number="8135-1728-8500-0511-3296",
actual_sap=72, actual_sap=72,
expected_sap_resid=+0, expected_sap_resid=+0,
expected_pe_resid_kwh_per_m2=-2.4194, expected_pe_resid_kwh_per_m2=-2.4072,
expected_co2_resid_tonnes_per_yr=-0.0198, expected_co2_resid_tonnes_per_yr=-0.0195,
notes=( notes=(
"Semi-detached, TFA 102, age C, gas PCDB-listed. Cert lodges " "Semi-detached, TFA 102, age C, gas PCDB-listed. Cert lodges "
"blocked_chimneys_count=1. Slice 59 per-bp window apportionment " "blocked_chimneys_count=1. Slice 59 per-bp window apportionment "
@ -160,7 +160,7 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = (
cert_number="2130-1033-4050-5007-8395", cert_number="2130-1033-4050-5007-8395",
actual_sap=82, actual_sap=82,
expected_sap_resid=+1, expected_sap_resid=+1,
expected_pe_resid_kwh_per_m2=-38.1521, expected_pe_resid_kwh_per_m2=-38.1666,
expected_co2_resid_tonnes_per_yr=+0.3047, expected_co2_resid_tonnes_per_yr=+0.3047,
notes=( notes=(
"End-terrace + 1 extension, TFA 64, gas combi PCDB index 17505, " "End-terrace + 1 extension, TFA 64, gas combi PCDB index 17505, "
@ -180,8 +180,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = (
cert_number="0390-2254-6420-2126-5561", cert_number="0390-2254-6420-2126-5561",
actual_sap=65, actual_sap=65,
expected_sap_resid=+0, expected_sap_resid=+0,
expected_pe_resid_kwh_per_m2=+1.6837, expected_pe_resid_kwh_per_m2=+1.6962,
expected_co2_resid_tonnes_per_yr=+0.0637, expected_co2_resid_tonnes_per_yr=+0.0639,
notes=( notes=(
"End-terrace + 1 extension, TFA 80, gas combi PCDB index 18119, " "End-terrace + 1 extension, TFA 80, gas combi PCDB index 18119, "
"no PV, no secondary, postcode LN12 (PCDB Table 172 match). " "no PV, no secondary, postcode LN12 (PCDB Table 172 match). "

View file

@ -25,11 +25,19 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from math import cos, exp, pi from math import cos, exp, floor, pi
from typing import Final, Optional from typing import Final, Optional
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow
def _round_area_2dp(value: float) -> float:
"""Half-away-from-zero rounding to 2 d.p. matching heat_transmission.
RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross)
including window areas: 2 d.p." Inlined rather than imported so this
module doesn't reach into heat_transmission's private helpers."""
return floor(value * 100.0 + 0.5) / 100.0
_DAYS_PER_YEAR: Final[float] = 365.0 _DAYS_PER_YEAR: Final[float] = 365.0
_APPLIANCES_E_A_COEFF: Final[float] = 207.8 _APPLIANCES_E_A_COEFF: Final[float] = 207.8
_APPLIANCES_E_A_EXPONENT: Final[float] = 0.4714 _APPLIANCES_E_A_EXPONENT: Final[float] = 0.4714
@ -571,8 +579,11 @@ def _daylight_factor_from_cert(
if tfa <= 0.0 or (not epc.sap_windows and rooflight_total_area_m2 <= 0.0): if tfa <= 0.0 or (not epc.sap_windows and rooflight_total_area_m2 <= 0.0):
return 1.433 return 1.433
z_l = _Z_L_BY_OVERSHADING[overshading] z_l = _Z_L_BY_OVERSHADING[overshading]
# RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross)
# including window areas: 2 d.p." — mirrors solar_gains and heat_
# transmission so G_L sees the same area as the fabric cascade.
wall_g_l_numerator = sum( wall_g_l_numerator = sum(
float(w.window_width) * float(w.window_height) _round_area_2dp(float(w.window_width) * float(w.window_height))
* _g_light(w) * _frame_factor(w) * z_l * _g_light(w) * _frame_factor(w) * z_l
for w in epc.sap_windows for w in epc.sap_windows
) )

View file

@ -31,7 +31,7 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from math import cos, radians, sin from math import cos, floor, radians, sin
from typing import Final from typing import Final
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow
@ -43,6 +43,14 @@ from domain.sap.climate.appendix_u import (
from domain.sap.worksheet.internal_gains import OvershadingCategory from domain.sap.worksheet.internal_gains import OvershadingCategory
def _round_area_2dp(value: float) -> float:
"""Half-away-from-zero rounding to 2 d.p. matching heat_transmission.
RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross)
including window areas: 2 d.p." Inlined rather than imported so this
module doesn't reach into heat_transmission's private helpers."""
return floor(value * 100.0 + 0.5) / 100.0
# Table 6d first column — winter solar access factor Z for heating gains. # Table 6d first column — winter solar access factor Z for heating gains.
# Distinct from the lighting Z_L (third column, §5) and cooling Z (second # Distinct from the lighting Z_L (third column, §5) and cooling Z (second
# column, out of scope). SAP 10.2 spec p178. # column, out of scope). SAP 10.2 spec p178.
@ -304,7 +312,10 @@ def _vertical_window_gain_monthly_w(
) -> tuple[float, ...]: ) -> tuple[float, ...]:
"""Compute the 12-tuple of monthly solar gain (W) for one vertical wall """Compute the 12-tuple of monthly solar gain (W) for one vertical wall
window. Pitch = 90° always; Table 6b/6c lookups derive g and FF.""" window. Pitch = 90° always; Table 6b/6c lookups derive g and FF."""
area = float(w.window_width) * float(w.window_height) # RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross)
# including window areas: 2 d.p." — matches heat_transmission's per-
# window area rounding so solar gains and conduction agree on area.
area = _round_area_2dp(float(w.window_width) * float(w.window_height))
g_perp = _g_perpendicular(w) g_perp = _g_perpendicular(w)
ff = _frame_factor(w) ff = _frame_factor(w)
return tuple( return tuple(