slice S-B24: parse measured U from full-SAP wall description

Full SAP assessments (~15% of corpus, 4 403 of 30 000 scanned bulk-zip
certs) lodge a measured/calculated wall U-value per BS EN ISO 6946 in
walls[i].description, e.g. "Average thermal transmittance 0.18 W/m²K".
These certs typically have wall_construction, wall_insulation_type and
construction_age_band all None, which the cascade defaults previously
resolved to U = 1.5 (uninsulated cavity at band E). RdSAP 10 §5.3:
"U values are obtained from … the construction type, date of
construction and, where applicable, thickness of additional insulation"
— but a measured value supersedes the cascade.

Corpus U-value distribution among parsed:
  median 0.21, mean 0.225, range 0.06-1.84
  80% at U ≈ 0.2 (Part L-compliant new-builds)
  10% at U ≈ 0.1 (passivhaus / very low)
  7%  at U ≈ 0.3 (older retrofitted full-SAP)
  3%  in the tail (conversions, edge cases)

Per affected cert (100 m² new-build at U 1.5 → 0.21):
  walls_w_per_k drops 129 → 21 W/K
  PEUI drops ≈ 120 kWh/m²

Implementation:
- _measured_u_from_description() regex-parses the phrase from the wall
  description; returns None on no-match or non-numeric so the cascade
  fall-through is preserved.
- u_wall checks the measured value FIRST, before any cascade logic.
- No range cap — calculator mirrors what the assessor lodged, per the
  "deterministic except for input errors" principle. Parse failure
  falls through cleanly.

Parity probe at 300 certs, seed=7: headlines unchanged. Direct check
on the sample: 0/300 certs carry an "Average thermal transmittance"
description. The v18a parquet filters full-SAP certs out somewhere
upstream, so this slice is invisible in the parquet-based probe. The
slice's correctness is proved by:
- 4 unit tests in test_rdsap_uvalues.py (tracer + regression on
  ordinary descriptions + parse-failure fallback + filled-cavity
  description still routes correctly)
- 1 end-to-end test in test_heat_transmission.py exercising a
  synthetic full-SAP cert through heat_transmission_from_cert
- All 274 domain tests passing, no regressions

Follow-up tooling: a bulk-zip-based parity probe that doesn't filter
to the parquet's subset is needed to measure this slice's corpus
impact. Separate dig.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-18 20:50:39 +00:00
parent 136f149d46
commit 15613309df
3 changed files with 140 additions and 0 deletions

View file

@ -13,11 +13,38 @@ evidence" rule in spec section 6.2.3.
from __future__ import annotations
import re
from enum import Enum
from math import log, pi
from typing import Final, Optional
# Full-SAP (not RdSAP) assessments lodge a measured/calculated wall
# U-value per BS EN ISO 6946 in `walls[i].description`, e.g.
# "Average thermal transmittance 0.18 W/m²K". When present, the measured
# value supersedes any default-table cascade.
_THERMAL_TRANSMITTANCE_RE: Final[re.Pattern[str]] = re.compile(
r"thermal\s+transmittance\s+([\d.]+)\s*W", re.IGNORECASE
)
def _measured_u_from_description(description: Optional[str]) -> Optional[float]:
"""Return the measured W/m²K value lodged in a wall description, or
None if no "Average thermal transmittance X W/m²K" phrase is present
(or if parsing fails). On full-SAP certs the assessor enters the
BS EN ISO 6946 result directly here in lieu of using the cascade.
"""
if description is None:
return None
match = _THERMAL_TRANSMITTANCE_RE.search(description)
if match is None:
return None
try:
return float(match.group(1))
except ValueError:
return None
# ---------------------------------------------------------------------------
# Country
# ---------------------------------------------------------------------------
@ -280,6 +307,9 @@ def u_wall(
thickness-bucketed cascade the two encode different things (filled-
cavity is a construction state, not an added-insulation thickness).
"""
measured = _measured_u_from_description(description)
if measured is not None:
return measured
if country is None and age_band is None and construction is None and insulation_thickness_mm is None and not insulation_present:
return 1.5
ctry = country if country is not None else Country.ENG

View file

@ -39,6 +39,69 @@ from domain.ml.rdsap_uvalues import (
# ----- Walls -----
def test_u_wall_description_with_measured_transmittance_returns_parsed_value() -> None:
# Arrange — full SAP (not RdSAP) assessments lodge a measured/calculated
# U-value per BS EN ISO 6946 in the wall description string, e.g.
# "Average thermal transmittance 0.18 W/m²K". These certs typically
# have wall_construction, wall_insulation_type, and age_band all None
# because the cascade defaults don't apply — the assessor's measured
# value takes precedence (RdSAP 10 §5.3). Affects ~15% of corpus.
# Act
result = u_wall(
country=None,
age_band=None,
construction=None,
insulation_thickness_mm=None,
description="Average thermal transmittance 0.18 W/m²K",
)
# Assert
assert result == pytest.approx(0.18, abs=0.001)
def test_u_wall_description_with_malformed_transmittance_falls_through_to_cascade() -> None:
# Arrange — a description containing the phrase but a malformed value
# (e.g. just a stray dot) should NOT short-circuit to a parse failure;
# it should fall through to the construction cascade and return a
# spec-defined value. This is the calculator's "trust the cert when
# parseable, never raise" contract.
# Act
result = u_wall(
country=Country.ENG,
age_band="G",
construction=WALL_CAVITY,
insulation_thickness_mm=0,
description="Average thermal transmittance . W/m²K",
)
# Assert — Table 6 cavity-as-built row at band G = 0.60 W/m²K.
assert result == pytest.approx(0.60, abs=0.001)
def test_u_wall_description_without_transmittance_phrase_routes_through_cascade() -> None:
# Arrange — the measured-U dispatcher must only fire when the
# description contains the "thermal transmittance" phrase. The
# ordinary surveyor-text descriptions (e.g. "Cavity wall, filled
# cavity") must still route through the construction cascade.
# Act
result = u_wall(
country=Country.ENG,
age_band="E",
construction=WALL_CAVITY,
insulation_thickness_mm=0,
insulation_present=True,
wall_insulation_type=WALL_INSULATION_FILLED_CAVITY,
description="Cavity wall, filled cavity",
)
# Assert — should return the Filled-cavity row value, not anything
# parsed out of the description.
assert result == pytest.approx(0.7, abs=0.001)
def test_u_wall_filled_cavity_england_age_band_e_returns_table6_value() -> None:
# Arrange — RdSAP 10 Table 6 (England) row "Filled cavity", age band E
# (1967-1975) -> 0.7 W/m^2K. The cert records this as the triple

View file

@ -17,6 +17,8 @@ envelope.py test pack so cases match production cert shape.
import pytest
from datatypes.epc.domain.epc_property_data import EnergyElement
from domain.ml.tests._fixtures import (
make_building_part,
make_floor_dimension,
@ -29,6 +31,51 @@ from domain.sap.worksheet.heat_transmission import (
)
def test_walls_description_measured_transmittance_overrides_construction_cascade() -> None:
# Arrange — a full-SAP (not RdSAP) cert lodges the wall U-value
# directly in walls[i].description ("Average thermal transmittance
# 0.18 W/m²K") rather than via the construction/insulation triple.
# Such certs typically have wall_construction, wall_insulation_type,
# and age_band all None, which the cascade would otherwise fall back
# to U=1.5. With the measured value lodged, the calculator must use
# it directly.
# Geometry: 100 m² ground floor, 40 m perimeter, 2.5 m height,
# single storey → gross_wall = 100 m². walls_w_per_k expected =
# 0.18 × 100 = 18 W/K.
main = make_building_part(
identifier="Main Dwelling",
construction_age_band="E",
wall_construction=4,
wall_insulation_type=4,
party_wall_construction=1,
roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=100.0, room_height_m=2.5,
party_wall_length_m=0.0, heat_loss_perimeter_m=40.0, floor=0,
),
],
)
epc = make_minimal_sap10_epc(
total_floor_area_m2=100.0,
country_code="ENG",
sap_building_parts=[main],
)
epc.walls = [
EnergyElement(
description="Average thermal transmittance 0.18 W/m²K",
energy_efficiency_rating=5,
environmental_efficiency_rating=5,
),
]
# Act
result = heat_transmission_from_cert(epc)
# Assert
assert result.walls_w_per_k == pytest.approx(18.0, abs=0.5)
def test_band_e_filled_cavity_uses_table6_filled_row_in_walls_w_per_k() -> None:
# Arrange — RdSAP 10 Table 6 (England) "Filled cavity" row at band E
# (1967-1975) = 0.7 W/m^2K. Cert encodes this as