diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index ceae7169..26df1543 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -352,6 +352,39 @@ def test_summary_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None: assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 +_API_0330_JSON = ( + Path(__file__).parents[3] + / "domain/sap10_calculator/rdsap/tests/fixtures/golden" + / "0330-2249-8150-2326-4121.json" +) + + +def test_api_0330_full_chain_sap_matches_worksheet_pdf_exactly() -> None: + # Arrange — cert 0330-2249-8150-2326-4121 (second boiler validation + # cert: mains-gas Vaillant PCDB idx 10241, mid-terrace 2-bp dwelling, + # TFA 90.56 m²) has both an Elmhurst Summary PDF and a GOV.UK EPB API + # JSON. The Summary path lands at 1e-4 vs worksheet SAP 61.5993 + # above; this Layer 4 production gate asserts the API path matches + # the worksheet to the same 1e-4 tolerance — same forcing function + # as cert 001479's Layer 4 test, applied to the second boiler cert. + # + # Slices 96-99 (flat-roof Table 18 col (3) U-values + glazing_type=2 + # surfacing + shower-outlets list normalisation + window-area + # rounding alignment) jointly closed the API path from + # Δ +2.1453 → Δ -0.000011 vs worksheet 61.5993. + doc = json.loads(_API_0330_JSON.read_text()) + epc = EpcPropertyDataMapper.from_api_response(doc) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + + # Assert — 1e-4 pin against the worksheet's continuous SAP. + worksheet_unrounded_sap = 61.5993 + assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + def test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 001479 has both an Elmhurst Summary PDF and a GOV.UK # EPB API JSON (ref 0535-9020-6509-0821-6222). The Summary cascade diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index e8c9af93..57aa0465 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1,7 +1,7 @@ import re from datetime import date from decimal import ROUND_HALF_UP, Decimal -from typing import Any, Dict, Final, List, Optional, Sequence, Union +from typing import Any, Dict, Final, List, Optional, Sequence, Union, cast from datatypes.epc.schema.helpers import from_dict from datatypes.epc.domain.epc_property_data import ( @@ -1230,21 +1230,18 @@ class EpcPropertyDataMapper: water_heating_code=schema.sap_heating.water_heating_code, water_heating_fuel=schema.sap_heating.water_heating_fuel, immersion_heating_type=schema.sap_heating.immersion_heating_type, - shower_outlets=( - ShowerOutlets( - ShowerOutlet( - shower_wwhrs=schema.sap_heating.shower_outlets.shower_outlet.shower_wwhrs, - shower_outlet_type=schema.sap_heating.shower_outlets.shower_outlet.shower_outlet_type, - ) - ) - if schema.sap_heating.shower_outlets - else None - ), + shower_outlets=_first_shower_outlet(schema.sap_heating.shower_outlets), cylinder_insulation_type=schema.sap_heating.cylinder_insulation_type, cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, + electric_shower_count=_count_shower_outlets_by_type( + schema.sap_heating.shower_outlets, _API_SHOWER_OUTLET_CODE_ELECTRIC, + ), + mixer_shower_count=_count_shower_outlets_by_type( + schema.sap_heating.shower_outlets, _API_SHOWER_OUTLET_CODE_MIXER, + ), ), sap_windows=[ SapWindow( @@ -1524,6 +1521,12 @@ class EpcPropertyDataMapper: cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, number_baths=schema.sap_heating.number_baths, number_baths_wwhrs=schema.sap_heating.number_baths_wwhrs, + electric_shower_count=_count_shower_outlets_by_type( + schema.sap_heating.shower_outlets, _API_SHOWER_OUTLET_CODE_ELECTRIC, + ), + mixer_shower_count=_count_shower_outlets_by_type( + schema.sap_heating.shower_outlets, _API_SHOWER_OUTLET_CODE_MIXER, + ), ), # SAP windows sap_windows=[ @@ -1861,6 +1864,7 @@ class EpcPropertyDataMapper: Raises ValueError for unsupported schemas — add cases here as needed. """ + data = _normalize_shower_outlets(data) schema = data.get("schema_type", "") if schema == "RdSAP-Schema-21.0.1": from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1 @@ -1958,6 +1962,82 @@ def _first_shower_outlet( ) +# RdSAP shower-outlet integer codes observed across the golden cohort +# (no spec reference found — derived empirically: cert 0330 lodges code +# 2 + Summary surfaces "Electric shower"; cert 0240 lodges multiple +# code-1 outlets on a conventional oil-boiler + cylinder dwelling +# matching "Mixer shower" expectation). +_API_SHOWER_OUTLET_CODE_MIXER: Final[int] = 1 +_API_SHOWER_OUTLET_CODE_ELECTRIC: Final[int] = 2 + + +def _normalize_shower_outlets(data: Dict[str, Any]) -> Dict[str, Any]: + """Rewrite the raw API doc's `sap_heating.shower_outlets` list so + every element is the wrapped `{"shower_outlet": {...}}` shape the + schema's `ShowerOutlets` dataclass expects. + + Real-API certs lodge each outlet as a bare dict + `{"shower_outlet_type": ..., "shower_wwhrs": ...}` directly in the + list — older fixtures wrap each element as + `{"shower_outlet": {"shower_outlet_type": ..., "shower_wwhrs": ...}}`. + Without normalisation, `from_dict` parses the bare shape as + `ShowerOutlets(shower_outlet=None)`, silently dropping the + `shower_outlet_type` / `shower_wwhrs` payload — which made the + `_count_shower_outlets_by_type` helper return 0 for every cert. + + Mutates a shallow copy of `data` so the caller's dict is untouched. + """ + sap_heating: Optional[Dict[str, Any]] = data.get("sap_heating") + if not isinstance(sap_heating, dict): + return data + outlets: Optional[List[Any]] = sap_heating.get("shower_outlets") + if not isinstance(outlets, list) or not outlets: + return data + needs_rewrite = any( + isinstance(item, dict) and "shower_outlet" not in item + for item in outlets + ) + if not needs_rewrite: + return data + new_outlets: List[Dict[str, Any]] = [ + item if isinstance(item, dict) and "shower_outlet" in item + else {"shower_outlet": item} + for item in outlets + ] + new_sap_heating: Dict[str, Any] = {**sap_heating, "shower_outlets": new_outlets} + return {**data, "sap_heating": new_sap_heating} + + +def _count_shower_outlets_by_type( + schema_shower_outlets: Any, target_type: int, +) -> Optional[int]: + """Count how many outlets in the schema list lodge the given + `shower_outlet_type` integer. Returns None when the schema field + is None or empty (the cascade reads None as "use the spec default" + rather than 0 — RdSAP modal lodging assumption). + + Assumes the input has been passed through + `_normalize_shower_outlets` first — every list element is the + wrapped `ShowerOutlets(shower_outlet=ShowerOutlet)` shape. + """ + if schema_shower_outlets is None: + return None + if not isinstance(schema_shower_outlets, list): + outlet = schema_shower_outlets.shower_outlet + if outlet is None: + return 0 + return 1 if outlet.shower_outlet_type == target_type else 0 + outlets_list = cast("list[Any]", schema_shower_outlets) + if not outlets_list: + return None + count = 0 + for o in outlets_list: + outlet = o.shower_outlet + if outlet is not None and outlet.shower_outlet_type == target_type: + count += 1 + return count + + def _strip_code(value: str) -> str: """Strip leading uppercase code from Elmhurst coded strings, e.g. 'CA Cavity' → 'Cavity'.""" parts = value.split(" ", 1) diff --git a/datatypes/epc/schema/rdsap_schema_21_0_0.py b/datatypes/epc/schema/rdsap_schema_21_0_0.py index 279c35b9..16360256 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_0.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py @@ -65,7 +65,12 @@ class SapHeating: immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str instantaneous_wwhrs: Optional[InstantaneousWwhrs] = None - shower_outlets: Optional[ShowerOutlets] = None + # Real-API certs carry shower_outlets as a list, not the synthetic + # single-object form; list elements are normalised to the wrapped + # `{"shower_outlet": {...}}` shape in `from_api_response` before + # `from_dict` parses them (the bare-element shape is equivalent + # but requires the doc rewrite to land losslessly). + shower_outlets: Optional[Union[ShowerOutlets, List[ShowerOutlets]]] = None cylinder_insulation_type: Optional[int] = None cylinder_thermostat: Optional[str] = None secondary_fuel_type: Optional[int] = None diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index 8fdadb72..06ed6bdc 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -67,7 +67,11 @@ class SapHeating: has_fixed_air_conditioning: str instantaneous_wwhrs: Optional[InstantaneousWwhrs] = None # Real-API certs carry shower_outlets as a list, not the synthetic single-object form; - # accept both shapes so older fixtures keep parsing. + # accept both shapes so older fixtures keep parsing. List elements + # are normalised to the wrapped `{"shower_outlet": {...}}` shape in + # `EpcPropertyDataMapper.from_api_response` before `from_dict` + # parses them — the real-API bare-element shape (no wrapper) is + # equivalent but requires the doc rewrite to land losslessly. shower_outlets: Optional[Union[ShowerOutlets, List[ShowerOutlets]]] = None # SAP10 hot-water demand inputs. number_baths: Optional[int] = None diff --git a/domain/sap10_calculator/rdsap/tests/test_golden_fixtures.py b/domain/sap10_calculator/rdsap/tests/test_golden_fixtures.py index e347b3de..e2637995 100644 --- a/domain/sap10_calculator/rdsap/tests/test_golden_fixtures.py +++ b/domain/sap10_calculator/rdsap/tests/test_golden_fixtures.py @@ -97,8 +97,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( cert_number="0300-2747-7640-2526-2135", actual_sap=78, expected_sap_resid=+0, - expected_pe_resid_kwh_per_m2=+7.5229, - expected_co2_resid_tonnes_per_yr=-0.2726, + expected_pe_resid_kwh_per_m2=+8.4391, + expected_co2_resid_tonnes_per_yr=-0.2341, notes=( "Large semi-detached, TFA 526, age D, gas boiler PCDB-listed " "(no Table 4b code). Cert lodges open_flues_count=1 + " @@ -110,7 +110,11 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( "Slice 96 (RdSAP 10 §5.11 Table 18 column (3) flat-roof " "defaults) lifted Ext1's flat-roof U from the pitched-column-1 " "0.40 fall-through to the spec-correct 2.30 (age D), " - "tightening SAP residual +1 → 0." + "tightening SAP residual +1 → 0. Slice 98 (schema 21.0.x " + "shower_outlets list normalisation + explicit electric/" + "mixer counts) surfaces this cert's 1 electric + 1 mixer " + "outlets vs the previous default 0+1: PE +7.52 → +8.44, " + "CO2 -0.27 → -0.23." ), ), _GoldenExpectation( @@ -179,8 +183,8 @@ _EXPECTATIONS: tuple[_GoldenExpectation, ...] = ( cert_number="2130-1033-4050-5007-8395", actual_sap=82, expected_sap_resid=+1, - expected_pe_resid_kwh_per_m2=-38.1666, - expected_co2_resid_tonnes_per_yr=+0.3047, + expected_pe_resid_kwh_per_m2=-38.1790, + expected_co2_resid_tonnes_per_yr=+0.3046, notes=( "End-terrace + 1 extension, TFA 64, gas combi PCDB index 17505, " "postcode DE22 (PCDB Table 172 match), PV: 2× 2.04 kWp arrays " diff --git a/domain/sap10_calculator/worksheet/heat_transmission.py b/domain/sap10_calculator/worksheet/heat_transmission.py index 9dd20874..07a330d5 100644 --- a/domain/sap10_calculator/worksheet/heat_transmission.py +++ b/domain/sap10_calculator/worksheet/heat_transmission.py @@ -428,16 +428,20 @@ def heat_transmission_from_cert( # single-bp test contract. window_area_by_bp = [0.0] * len(parts) if epc.sap_windows: - window_area_by_bp_unrounded = [0.0] * len(parts) + # RdSAP 10 §15: per-window area enters the SAP calc at 2 d.p. + # The worksheet's line (27) Σ-area column sums the per-window- + # rounded values (12.23 = 2.48 + 0.79 + ... rounded per row), + # NOT the unrounded total rounded once (which yields 12.22 for + # cert 0330 — Δ +0.015 W/K on net wall area = +0.0012 SAP). + # The cascade's windows_w_per_k_total branch already rounds + # per-window; align the wall-net-deduction branch with it for + # cascade-internal consistency. for w in epc.sap_windows: idx = _window_bp_index(w.window_location, len(parts)) - window_area_by_bp_unrounded[idx] += ( - float(w.window_width) * float(w.window_height) + window_area_by_bp[idx] += _round_half_up( + float(w.window_width) * float(w.window_height), + _AREA_ROUND_DP, ) - window_area_by_bp = [ - _round_half_up(a, _AREA_ROUND_DP) - for a in window_area_by_bp_unrounded - ] elif window_total_area_m2 > 0.0: window_area_by_bp[0] = _round_half_up( window_total_area_m2, _AREA_ROUND_DP,