From f502db8c746218a83f0ca18bc4a1128de526ad67 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 26 May 2026 09:30:41 +0000 Subject: [PATCH] =?UTF-8?q?Slice=2095:=20API=20mapper=20TFA=20from=20per-b?= =?UTF-8?q?p=20dims=20+=20window=20area=202dp=20rounding=20=E2=80=94=20cer?= =?UTF-8?q?t=20001479=20to=201e-4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The end-to-end production cascade `from_api_response → cert_to_inputs → calculate_sap_from_inputs` now hits cert 001479's worksheet continuous SAP 69.0094 at abs < 1e-4 (was +0.000584). Two fixes: 1. API mapper: `from_rdsap_schema_21_0_{0,1}` computes `total_floor_ area_m2` as Σ per-bp `sap_floor_dimensions[*].total_floor_area.value` (cert 001479: 30.45+30.77+5.37+1.92 = 68.51), not the lodged scalar (rounded integer 69). `water_heating_from_cert` reads `epc.total_ floor_area_m2` directly for occupancy N (Appendix J), which propagates to HW kWh (+6.31 → ~0), Appendix L lighting (+0.98 → 0), and internal gains (+25.72 W·months → 0). 2. Cascade window area rounding per RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross) including window areas: 2 d.p." `solar_gains.py` and `internal_gains.py` now round `w * h` to 2 d.p. to match the existing `heat_transmission.py` pattern (line 344). Closes the residual solar gains delta (+1.50 W·months → 0) that became dominant once TFA was fixed. Re-pinned 5 golden cert residuals where TFA + area rounding shifted output: 0240 (SAP -14→-15, PE +14.6650→+17.8450, CO2 +0.8060→+1.0097), 6035 (PE +48.2971→+49.5139, CO2 +1.1016→+1.1423), 8135 (PE -2.4194→ -2.4072, CO2 -0.0198→-0.0195), 2130 (PE -38.1521→-38.1666), 0390 (PE +1.6837→+1.6962, CO2 +0.0637→+0.0639). New test: `test_api_001479_full_chain_sap_matches_worksheet_pdf_ exactly` formalises Layer 4 of the validation stack as a 1e-4 gate. Pyright net-zero (mapper.py 33). Co-Authored-By: Claude Opus 4.7 --- .../tests/test_summary_pdf_mapper_chain.py | 35 +++++++++++++++++++ datatypes/epc/domain/mapper.py | 32 +++++++++++++++-- .../sap/rdsap/tests/test_golden_fixtures.py | 20 +++++------ .../domain/sap/worksheet/internal_gains.py | 15 ++++++-- .../src/domain/sap/worksheet/solar_gains.py | 15 ++++++-- 5 files changed, 101 insertions(+), 16 deletions(-) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index fe3c02e3..c46964b2 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -29,6 +29,7 @@ Textract directly. from __future__ import annotations import dataclasses +import json import re import subprocess from pathlib import Path @@ -56,6 +57,16 @@ _SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf" _SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf" _SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf" +# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the +# Summary_001479.pdf fixture. Together they drive the API ≡ Summary +# parity workstream; Layer 4 of the validation stack is "API cascade SAP +# matches worksheet continuous SAP at 1e-4". +_API_001479_JSON = ( + Path(__file__).parents[3] + / "packages/domain/src/domain/sap/rdsap/tests/fixtures/golden" + / "0535-9020-6509-0821-6222.json" +) + def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: """Convert a Summary PDF into the per-page text format the existing @@ -390,6 +401,30 @@ def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 +def test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: + # Arrange — cert 001479 has both an Elmhurst Summary PDF and a GOV.UK + # EPB API JSON (ref 0535-9020-6509-0821-6222). The Summary cascade + # already pins at worksheet's 69.0094 ± 1e-4 above; this test is the + # Layer 4 production-path gate: API JSON → from_api_response → + # cert_to_inputs → calculate_sap_from_inputs must also hit 69.0094 + # at 1e-4. Identical inputs must produce identical outputs; the + # calculator is deterministic, so any drift is a mapper coverage gap. + doc = json.loads(_API_001479_JSON.read_text()) + epc = EpcPropertyDataMapper.from_api_response(doc) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + + # Assert — 1e-4 pin against the worksheet's continuous SAP. ±0.5 is + # the API-only fallback (project memory `feedback_api_tolerance_1e_ + # minus_4`); when the worksheet is available, identical-inputs-must- + # produce-identical-outputs is the bar. + worksheet_unrounded_sap = 69.0094 + assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + # ============================================================================ # Mapper-vs-hand-built EpcPropertyData diff tests # ============================================================================ diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 42030e85..c3427666 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1146,6 +1146,7 @@ class EpcPropertyDataMapper: def from_rdsap_schema_21_0_0(schema: RdSapSchema21_0_0) -> EpcPropertyData: es = schema.sap_energy_source pv_supply, pv_arrays = _map_schema_21_pv(es.photovoltaic_supply) + _per_bp_tfa = _total_floor_area_from_building_parts(schema.sap_building_parts) return EpcPropertyData( uprn=schema.uprn, assessment_type=schema.assessment_type, @@ -1163,7 +1164,9 @@ class EpcPropertyDataMapper: inspection_date=date.fromisoformat(schema.inspection_date), completion_date=date.fromisoformat(schema.completion_date), registration_date=date.fromisoformat(schema.registration_date), - total_floor_area_m2=float(schema.total_floor_area), + total_floor_area_m2=( + _per_bp_tfa if _per_bp_tfa is not None else float(schema.total_floor_area) + ), solar_water_heating=schema.solar_water_heating == "Y", has_hot_water_cylinder=schema.has_hot_water_cylinder == "true", has_fixed_air_conditioning=schema.has_fixed_air_conditioning == "true", @@ -1418,6 +1421,7 @@ class EpcPropertyDataMapper: def from_rdsap_schema_21_0_1(schema: RdSapSchema21_0_1) -> EpcPropertyData: es = schema.sap_energy_source pv_supply, pv_arrays = _map_schema_21_pv(es.photovoltaic_supply) + _per_bp_tfa = _total_floor_area_from_building_parts(schema.sap_building_parts) return EpcPropertyData( # General uprn=schema.uprn, @@ -1436,7 +1440,9 @@ class EpcPropertyDataMapper: inspection_date=date.fromisoformat(schema.inspection_date), completion_date=date.fromisoformat(schema.completion_date), registration_date=date.fromisoformat(schema.registration_date), - total_floor_area_m2=float(schema.total_floor_area), + total_floor_area_m2=( + _per_bp_tfa if _per_bp_tfa is not None else float(schema.total_floor_area) + ), # Property flags solar_water_heating=schema.solar_water_heating == "Y", has_hot_water_cylinder=schema.has_hot_water_cylinder == "true", @@ -1887,6 +1893,28 @@ def _measurement_value(field: Any) -> float: return float(field) +def _total_floor_area_from_building_parts(building_parts: Any) -> Optional[float]: + """Sum per-bp `sap_floor_dimensions[*].total_floor_area` to recover the + precise TFA. The GOV.UK EPB API JSON's top-level `total_floor_area` + is rounded to the integer (cert 001479: 30.45+30.77+5.37+1.92 = 68.51 + → lodged 69), but the worksheet computes continuous SAP from the + unrounded geometry. `epc.total_floor_area_m2` is read directly by + `water_heating_from_cert` to derive occupancy N (Appendix J), which + drives HW, lighting (Appendix L), and internal-gains kWh — so the + rounded scalar shifts SAP by ~+0.0006 on cert 001479. Returns None + when no per-bp dims are lodged so callers fall back to the scalar.""" + if not building_parts: + return None + total = 0.0 + found = False + for bp in building_parts: + floor_dims: Any = bp.sap_floor_dimensions or [] + for fd in floor_dims: + total += _measurement_value(fd.total_floor_area) + found = True + return total if found else None + + def _first_pv_battery( schema_pv_batteries: Any, ) -> Optional[PvBatteries]: diff --git a/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py b/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py index aeb100ca..97a654ca 100644 --- a/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py +++ b/packages/domain/src/domain/sap/rdsap/tests/test_golden_fixtures.py @@ -74,9 +74,9 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( _GoldenExpectation( cert_number="0240-0200-5706-2365-8010", actual_sap=73, - expected_sap_resid=-14, - expected_pe_resid_kwh_per_m2=+14.6650, - expected_co2_resid_tonnes_per_yr=+0.8060, + expected_sap_resid=-15, + expected_pe_resid_kwh_per_m2=+17.8450, + expected_co2_resid_tonnes_per_yr=+1.0097, notes=( "Detached house, TFA 202, age J, oil boiler, Table 4b code 130. " "API response lodges sap_room_in_roof.room_in_roof_type_1 with " @@ -119,8 +119,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( cert_number="6035-7729-2309-0879-2296", actual_sap=70, expected_sap_resid=-6, - expected_pe_resid_kwh_per_m2=+48.2971, - expected_co2_resid_tonnes_per_yr=+1.1016, + expected_pe_resid_kwh_per_m2=+49.5139, + expected_co2_resid_tonnes_per_yr=+1.1423, notes=( "Mid-terrace, TFA 128, age A, gas combi Table 4b code 104. " "Slice 59 per-bp window apportionment tightens all 3 " @@ -147,8 +147,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( cert_number="8135-1728-8500-0511-3296", actual_sap=72, expected_sap_resid=+0, - expected_pe_resid_kwh_per_m2=-2.4194, - expected_co2_resid_tonnes_per_yr=-0.0198, + expected_pe_resid_kwh_per_m2=-2.4072, + expected_co2_resid_tonnes_per_yr=-0.0195, notes=( "Semi-detached, TFA 102, age C, gas PCDB-listed. Cert lodges " "blocked_chimneys_count=1. Slice 59 per-bp window apportionment " @@ -160,7 +160,7 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( cert_number="2130-1033-4050-5007-8395", actual_sap=82, expected_sap_resid=+1, - expected_pe_resid_kwh_per_m2=-38.1521, + expected_pe_resid_kwh_per_m2=-38.1666, expected_co2_resid_tonnes_per_yr=+0.3047, notes=( "End-terrace + 1 extension, TFA 64, gas combi PCDB index 17505, " @@ -180,8 +180,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( cert_number="0390-2254-6420-2126-5561", actual_sap=65, expected_sap_resid=+0, - expected_pe_resid_kwh_per_m2=+1.6837, - expected_co2_resid_tonnes_per_yr=+0.0637, + expected_pe_resid_kwh_per_m2=+1.6962, + expected_co2_resid_tonnes_per_yr=+0.0639, notes=( "End-terrace + 1 extension, TFA 80, gas combi PCDB index 18119, " "no PV, no secondary, postcode LN12 (PCDB Table 172 match). " diff --git a/packages/domain/src/domain/sap/worksheet/internal_gains.py b/packages/domain/src/domain/sap/worksheet/internal_gains.py index f90a002c..2f6771a0 100644 --- a/packages/domain/src/domain/sap/worksheet/internal_gains.py +++ b/packages/domain/src/domain/sap/worksheet/internal_gains.py @@ -25,11 +25,19 @@ from __future__ import annotations from dataclasses import dataclass from enum import Enum -from math import cos, exp, pi +from math import cos, exp, floor, pi from typing import Final, Optional from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow + +def _round_area_2dp(value: float) -> float: + """Half-away-from-zero rounding to 2 d.p. matching heat_transmission. + RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross) + including window areas: 2 d.p." Inlined rather than imported so this + module doesn't reach into heat_transmission's private helpers.""" + return floor(value * 100.0 + 0.5) / 100.0 + _DAYS_PER_YEAR: Final[float] = 365.0 _APPLIANCES_E_A_COEFF: Final[float] = 207.8 _APPLIANCES_E_A_EXPONENT: Final[float] = 0.4714 @@ -571,8 +579,11 @@ def _daylight_factor_from_cert( if tfa <= 0.0 or (not epc.sap_windows and rooflight_total_area_m2 <= 0.0): return 1.433 z_l = _Z_L_BY_OVERSHADING[overshading] + # RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross) + # including window areas: 2 d.p." — mirrors solar_gains and heat_ + # transmission so G_L sees the same area as the fabric cascade. wall_g_l_numerator = sum( - float(w.window_width) * float(w.window_height) + _round_area_2dp(float(w.window_width) * float(w.window_height)) * _g_light(w) * _frame_factor(w) * z_l for w in epc.sap_windows ) diff --git a/packages/domain/src/domain/sap/worksheet/solar_gains.py b/packages/domain/src/domain/sap/worksheet/solar_gains.py index a623b899..66af6e98 100644 --- a/packages/domain/src/domain/sap/worksheet/solar_gains.py +++ b/packages/domain/src/domain/sap/worksheet/solar_gains.py @@ -31,7 +31,7 @@ from __future__ import annotations from dataclasses import dataclass from enum import Enum -from math import cos, radians, sin +from math import cos, floor, radians, sin from typing import Final from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapWindow @@ -43,6 +43,14 @@ from domain.sap.climate.appendix_u import ( from domain.sap.worksheet.internal_gains import OvershadingCategory +def _round_area_2dp(value: float) -> float: + """Half-away-from-zero rounding to 2 d.p. matching heat_transmission. + RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross) + including window areas: 2 d.p." Inlined rather than imported so this + module doesn't reach into heat_transmission's private helpers.""" + return floor(value * 100.0 + 0.5) / 100.0 + + # Table 6d first column — winter solar access factor Z for heating gains. # Distinct from the lighting Z_L (third column, §5) and cooling Z (second # column, out of scope). SAP 10.2 spec p178. @@ -304,7 +312,10 @@ def _vertical_window_gain_monthly_w( ) -> tuple[float, ...]: """Compute the 12-tuple of monthly solar gain (W) for one vertical wall window. Pitch = 90° always; Table 6b/6c lookups derive g⊥ and FF.""" - area = float(w.window_width) * float(w.window_height) + # RdSAP 10 §15 "Rounding of data" (p.66): "All element areas (gross) + # including window areas: 2 d.p." — matches heat_transmission's per- + # window area rounding so solar gains and conduction agree on area. + area = _round_area_2dp(float(w.window_width) * float(w.window_height)) g_perp = _g_perpendicular(w) ff = _frame_factor(w) return tuple(