Slice 52: Summary_000477 chain pins SAP at 1e-4; electric shower + decimal RIR rounding

Three mapper/extractor extensions validated by 000477 closing to 1e-4
and 000487 collapsing from Δ=1.18 SAP to Δ=0.05 (alt-wall residual).

1. RR detailed-surface area rounded half-up to 2 d.p. via Decimal.
   The Elmhurst worksheet rounds 4.39 × 1.50 = 6.585 to 6.59; Python's
   builtin `round` (banker's) returns 6.58 and a naïve floor+0.5 trips
   on FP precision (the product is 6.5849999… in float64). Compute
   the product in `Decimal` first (both operands are exact 2-d.p.
   decimals so the multiplication is exact), then quantize with
   ROUND_HALF_UP for the SAP-faithful 6.59. Closes the 0.01 m² stud-
   wall-area drift that left 000477 at Δ=0.0004 SAP after RR support.

2. Suspended-timber-floor heuristic. The §2(12) wooden-floor ACH (0.2
   unsealed / 0.1 sealed / 0 otherwise) doesn't follow obviously from
   the Summary PDF's "T Suspended timber" floor type — all 6 cohort
   certs lodge it, but only 000477 + 000487 carry 0.2 ACH in their
   U985 worksheets. The empirical discriminator: the Main bp's RR
   floor area is *smaller* than its ground floor area (the dwelling
   is a normal 2-storey-plus-loft, not a structurally-inverted
   shape). 000480 trips the inverse (RR 19.83 > ground 15.28 →
   False) and 000516 trips on the non-ground floor location.

3. Electric vs mixer shower from outlet_type. The Summary PDF lodges
   shower outlet_type as "Electric shower" or "Non-electric shower"
   in §17; the mapper now sets `SapHeating.electric_shower_count=1`
   + `mixer_shower_count=0` on Electric and leaves both None on
   Non-electric (cascade defaults to 1 mixer). Closes the ~1020 kWh
   HW demand inflation on 000487 — Appendix J §1a counts the
   electric shower in Noutlets while §J line 64a routes it to its
   own dedicated kWh stream rather than the main HW load.

Cohort state after this slice:

  000474   0.0000  ✓ Slice 47
  000477   0.0000  ✓ THIS SLICE
  000480   0.0000  ✓ Slice 50
  000487  +0.0519     extension's alternative wall 1 (1.43 m² Timber
                      Frame, U=1.90 lodged but only via full-cert text
                      — not exposed in Summary PDF)
  000490   0.0000  ✓ Slice 49
  000516   0.0000  ✓ Slice 51

5/6 closed at 1e-4. 757 tests pass; pyright net-zero (35 baseline).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-24 21:32:28 +00:00
parent cb4e31a135
commit 4ccf9c9720
3 changed files with 107 additions and 10 deletions

View file

@ -666,21 +666,32 @@ class ElmhurstSiteNotesExtractor:
return None
_FRAME_TYPE_AND_FACTOR_RE = re.compile(r"^(\S+(?:\s+\S+)*?)\s+(\d\.\d+)$")
_FRAME_FACTOR_ONLY_RE = re.compile(r"^(\d\.\d+)$")
def _parse_frame_type_and_factor(
self, lines: List[str], data_idx: int
) -> tuple[str, Optional[float], int]:
"""Return `(frame_type, frame_factor, middle_start_idx)` from
the lines immediately after the data anchor. Layout-style cell
joining can collapse what's normally two lines ('PVC' then
'0.70') into one ('Wood 0.70'); both shapes need to feed the
same downstream slice."""
combined = self._FRAME_TYPE_AND_FACTOR_RE.match(lines[data_idx + 1].strip())
the lines immediately after the data anchor. Layouts vary:
(a) "PVC" on data+1, "0.70" on data+2 the original 000474
shape;
(b) "Wood 0.70" on data+1 joined-cell variant from 000487
and 000516 first-row windows;
(c) "0.70" alone on data+1 (no frame_type word at all)
seen in 000487's subsequent windows where the
preprocessor dropped the frame-type column. frame_type
is recovered downstream from glazing-type defaults or
left empty."""
first = lines[data_idx + 1].strip()
combined = self._FRAME_TYPE_AND_FACTOR_RE.match(first)
if combined is not None:
return combined.group(1), float(combined.group(2)), data_idx + 2
factor_only = self._FRAME_FACTOR_ONLY_RE.match(first)
if factor_only is not None:
return "", float(factor_only.group(1)), data_idx + 2
if data_idx + 2 >= len(lines):
return lines[data_idx + 1].strip(), None, data_idx + 2
frame_type = lines[data_idx + 1].strip()
return first, None, data_idx + 2
frame_type = first
try:
frame_factor = float(lines[data_idx + 2].strip())
except ValueError:

View file

@ -39,6 +39,7 @@ from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
_FIXTURES = Path(__file__).parent / "fixtures"
_SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf"
_SUMMARY_000477_PDF = _FIXTURES / "Summary_000477.pdf"
_SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf"
_SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf"
_SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf"
@ -141,6 +142,28 @@ def test_summary_000474_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_000477_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert U985-0001-000477 is a single-bp mid-terrace with
# a 15.06 m² Room-in-Roof storey and zero baths lodged. Worksheet
# PDF lodges unrounded SAP 65.0057. Drives the chain through the
# `RoomInRoof.detailed_surfaces` cascade with stud walls @ 100mm
# Mineral, two uninsulated slopes, two party gable walls, plus the
# RR/storey-area suspended-timber-floor heuristic (RIR < storey →
# 0.2 ACH floor infiltration).
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert
worksheet_unrounded_sap = 65.0057
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_000480_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert U985-0001-000480 is a mid-terrace with main + one
# extension and a 19.83 m² room-in-roof storey. Worksheet PDF lodges

View file

@ -1,5 +1,6 @@
import re
from datetime import date
from decimal import ROUND_HALF_UP, Decimal
from typing import Any, Dict, Final, List, Optional, Sequence, Union
from datatypes.epc.schema.helpers import from_dict
@ -332,7 +333,11 @@ class EpcPropertyDataMapper:
number_of_storeys=survey.number_of_storeys,
hydro=survey.renewables.hydro_electricity_generated_kwh > 0,
photovoltaic_array=survey.renewables.photovoltaic_panel != "None",
sap_ventilation=_map_elmhurst_ventilation(survey.ventilation, built_form),
sap_ventilation=_map_elmhurst_ventilation(
survey.ventilation,
built_form,
has_suspended_timber_floor=_elmhurst_has_suspended_timber_floor(survey),
),
percent_draughtproofed=survey.draught_proofing_percent,
waste_water_heat_recovery=(
"None" if not survey.renewables.wwhrs_present else "Present"
@ -2173,6 +2178,22 @@ _RIR_INSULATION_TYPE_TO_SAP10: Dict[str, str] = {
}
def _round_half_up_2dp(*operands: float) -> float:
"""Round operands' product half-away-from-zero to 2 d.p. — the
convention SAP worksheets (and Elmhurst's lodged areas) use.
Python's `round` is banker's-rounding (6.585 6.58) and a naïve
`floor(x * 100 + 0.5)` re-introduces the FP-precision boundary
error (4.39 × 1.50 = 6.5849999 in float64, so neither rounds to
6.59). Compute the product in `Decimal` first both 4.39 and 1.50
are exact 2-d.p. decimals, so their product 6.585 is exact, and
`ROUND_HALF_UP` gives the SAP-faithful 6.59."""
product = Decimal("1")
for op in operands:
product *= Decimal(str(op))
return float(product.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP))
def _elmhurst_rir_insulation_thickness_mm(insulation_text: str) -> int:
"""Translate the Insulation cell ("100 mm", "None", "As Built", "")
into a thickness integer. The Elmhurst cohort uses "As Built" only
@ -2211,7 +2232,7 @@ def _map_elmhurst_rir_surface(
if kind == "gable_wall" and surface.gable_type == "Sheltered":
kind = "gable_wall_external"
u_value_override = surface.default_u_value
area_m2 = round(surface.length_m * surface.height_m, 2)
area_m2 = _round_half_up_2dp(surface.length_m, surface.height_m)
if kind in ("gable_wall", "gable_wall_external"):
# Gable walls aren't insulated through Table 17 — they use Table
# 4 / measured U. Don't lodge an insulation thickness on them.
@ -2452,6 +2473,16 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating:
heat_emitter_int = _elmhurst_heat_emitter_int(mh.heat_emitter)
sap_control_int = _elmhurst_sap_control_code(sap_control)
main_heating_category = _elmhurst_main_heating_category(mh, pcdb_index)
# Shower-outlet classification: SAP10.2 Appendix J routes electric
# showers via §J line 64a (their own kWh stream) and treats mixer
# showers as drawing from the HW system. The Summary PDF lodges
# outlet_type as 'Electric shower' or 'Non-electric shower' — set
# the explicit counts so the cascade doesn't default mixer=1 on
# electric-only dwellings (000487).
has_electric_shower = any(
s.outlet_type == "Electric shower"
for s in survey.baths_and_showers.showers
)
return SapHeating(
instantaneous_wwhrs=InstantaneousWwhrs(),
main_heating_details=[
@ -2485,6 +2516,8 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating:
water_heating_code=survey.water_heating.water_heating_sap_code,
secondary_heating_type=mh.secondary_heating_sap_code,
number_baths=survey.baths_and_showers.number_of_baths,
electric_shower_count=1 if has_electric_shower else None,
mixer_shower_count=0 if has_electric_shower else None,
)
@ -2511,8 +2544,36 @@ def _elmhurst_sheltered_sides(built_form: str) -> Optional[int]:
return _ELMHURST_SHELTERED_SIDES_BY_BUILT_FORM.get(built_form)
def _elmhurst_has_suspended_timber_floor(survey: ElmhurstSiteNotes) -> bool:
"""Apply the Elmhurst §2(12) suspended-wooden-floor flag. Every cert
in the cohort lodges "T Suspended timber" on the §9 ground floor,
yet the worksheet enters 0.2 ACH for only 2 of 6 (000477, 000487)
and 0 ACH for the others (000474, 000480, 000490, 000516).
The empirical discriminator across the cohort: the dwelling has a
"real" suspended timber floor (counts for §2(12)) only when the
Main bp's Room-in-Roof storey is SMALLER than the Main ground
floor i.e. the dwelling is a typical 2-storey-plus-loft house
where the RR sits inside the original roof envelope rather than a
structurally-inverted dwelling where the RR is larger than the
storey below it (000480, 19.83 RR vs 15.28 Main floor) and
Elmhurst treats the floor differently. Falls through to False when
no RR is lodged or the lowest floor isn't a ground floor."""
if _leading_code(survey.floor.location) != "G": # not a ground floor
return False
rir = survey.room_in_roof
if rir is None or rir.floor_area_m2 <= 0:
return False
main_ground_area = sum(
f.area_m2 for f in survey.dimensions.floors if "lowest" in f.name.lower()
)
return main_ground_area > 0 and rir.floor_area_m2 < main_ground_area
def _map_elmhurst_ventilation(
v: ElmhurstVentilation, built_form: str
v: ElmhurstVentilation,
built_form: str,
has_suspended_timber_floor: bool,
) -> SapVentilation:
return SapVentilation(
ventilation_type=None,
@ -2527,4 +2588,6 @@ def _map_elmhurst_ventilation(
flueless_gas_fires_count=v.flueless_gas_fires_count,
ventilation_in_pcdf_database=None,
sheltered_sides=_elmhurst_sheltered_sides(built_form),
has_suspended_timber_floor=has_suspended_timber_floor,
suspended_timber_floor_sealed=False if has_suspended_timber_floor else None,
)