From 15613309df6b794d0845af32b2e906f9742758b4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 18 May 2026 20:50:39 +0000 Subject: [PATCH] slice S-B24: parse measured U from full-SAP wall description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full SAP assessments (~15% of corpus, 4 403 of 30 000 scanned bulk-zip certs) lodge a measured/calculated wall U-value per BS EN ISO 6946 in walls[i].description, e.g. "Average thermal transmittance 0.18 W/m²K". These certs typically have wall_construction, wall_insulation_type and construction_age_band all None, which the cascade defaults previously resolved to U = 1.5 (uninsulated cavity at band E). RdSAP 10 §5.3: "U values are obtained from … the construction type, date of construction and, where applicable, thickness of additional insulation" — but a measured value supersedes the cascade. Corpus U-value distribution among parsed: median 0.21, mean 0.225, range 0.06-1.84 80% at U ≈ 0.2 (Part L-compliant new-builds) 10% at U ≈ 0.1 (passivhaus / very low) 7% at U ≈ 0.3 (older retrofitted full-SAP) 3% in the tail (conversions, edge cases) Per affected cert (100 m² new-build at U 1.5 → 0.21): walls_w_per_k drops 129 → 21 W/K PEUI drops ≈ 120 kWh/m² Implementation: - _measured_u_from_description() regex-parses the phrase from the wall description; returns None on no-match or non-numeric so the cascade fall-through is preserved. - u_wall checks the measured value FIRST, before any cascade logic. - No range cap — calculator mirrors what the assessor lodged, per the "deterministic except for input errors" principle. Parse failure falls through cleanly. Parity probe at 300 certs, seed=7: headlines unchanged. Direct check on the sample: 0/300 certs carry an "Average thermal transmittance" description. The v18a parquet filters full-SAP certs out somewhere upstream, so this slice is invisible in the parquet-based probe. The slice's correctness is proved by: - 4 unit tests in test_rdsap_uvalues.py (tracer + regression on ordinary descriptions + parse-failure fallback + filled-cavity description still routes correctly) - 1 end-to-end test in test_heat_transmission.py exercising a synthetic full-SAP cert through heat_transmission_from_cert - All 274 domain tests passing, no regressions Follow-up tooling: a bulk-zip-based parity probe that doesn't filter to the parquet's subset is needed to measure this slice's corpus impact. Separate dig. Co-Authored-By: Claude Opus 4.7 --- .../domain/src/domain/ml/rdsap_uvalues.py | 30 +++++++++ .../src/domain/ml/tests/test_rdsap_uvalues.py | 63 +++++++++++++++++++ .../worksheet/tests/test_heat_transmission.py | 47 ++++++++++++++ 3 files changed, 140 insertions(+) diff --git a/packages/domain/src/domain/ml/rdsap_uvalues.py b/packages/domain/src/domain/ml/rdsap_uvalues.py index 9ebdec9f..1929ba3c 100644 --- a/packages/domain/src/domain/ml/rdsap_uvalues.py +++ b/packages/domain/src/domain/ml/rdsap_uvalues.py @@ -13,11 +13,38 @@ evidence" rule in spec section 6.2.3. from __future__ import annotations +import re from enum import Enum from math import log, pi from typing import Final, Optional +# Full-SAP (not RdSAP) assessments lodge a measured/calculated wall +# U-value per BS EN ISO 6946 in `walls[i].description`, e.g. +# "Average thermal transmittance 0.18 W/m²K". When present, the measured +# value supersedes any default-table cascade. +_THERMAL_TRANSMITTANCE_RE: Final[re.Pattern[str]] = re.compile( + r"thermal\s+transmittance\s+([\d.]+)\s*W", re.IGNORECASE +) + + +def _measured_u_from_description(description: Optional[str]) -> Optional[float]: + """Return the measured W/m²K value lodged in a wall description, or + None if no "Average thermal transmittance X W/m²K" phrase is present + (or if parsing fails). On full-SAP certs the assessor enters the + BS EN ISO 6946 result directly here in lieu of using the cascade. + """ + if description is None: + return None + match = _THERMAL_TRANSMITTANCE_RE.search(description) + if match is None: + return None + try: + return float(match.group(1)) + except ValueError: + return None + + # --------------------------------------------------------------------------- # Country # --------------------------------------------------------------------------- @@ -280,6 +307,9 @@ def u_wall( thickness-bucketed cascade — the two encode different things (filled- cavity is a construction state, not an added-insulation thickness). """ + measured = _measured_u_from_description(description) + if measured is not None: + return measured if country is None and age_band is None and construction is None and insulation_thickness_mm is None and not insulation_present: return 1.5 ctry = country if country is not None else Country.ENG diff --git a/packages/domain/src/domain/ml/tests/test_rdsap_uvalues.py b/packages/domain/src/domain/ml/tests/test_rdsap_uvalues.py index 96fcf460..78a1601f 100644 --- a/packages/domain/src/domain/ml/tests/test_rdsap_uvalues.py +++ b/packages/domain/src/domain/ml/tests/test_rdsap_uvalues.py @@ -39,6 +39,69 @@ from domain.ml.rdsap_uvalues import ( # ----- Walls ----- +def test_u_wall_description_with_measured_transmittance_returns_parsed_value() -> None: + # Arrange — full SAP (not RdSAP) assessments lodge a measured/calculated + # U-value per BS EN ISO 6946 in the wall description string, e.g. + # "Average thermal transmittance 0.18 W/m²K". These certs typically + # have wall_construction, wall_insulation_type, and age_band all None + # because the cascade defaults don't apply — the assessor's measured + # value takes precedence (RdSAP 10 §5.3). Affects ~15% of corpus. + + # Act + result = u_wall( + country=None, + age_band=None, + construction=None, + insulation_thickness_mm=None, + description="Average thermal transmittance 0.18 W/m²K", + ) + + # Assert + assert result == pytest.approx(0.18, abs=0.001) + + +def test_u_wall_description_with_malformed_transmittance_falls_through_to_cascade() -> None: + # Arrange — a description containing the phrase but a malformed value + # (e.g. just a stray dot) should NOT short-circuit to a parse failure; + # it should fall through to the construction cascade and return a + # spec-defined value. This is the calculator's "trust the cert when + # parseable, never raise" contract. + + # Act + result = u_wall( + country=Country.ENG, + age_band="G", + construction=WALL_CAVITY, + insulation_thickness_mm=0, + description="Average thermal transmittance . W/m²K", + ) + + # Assert — Table 6 cavity-as-built row at band G = 0.60 W/m²K. + assert result == pytest.approx(0.60, abs=0.001) + + +def test_u_wall_description_without_transmittance_phrase_routes_through_cascade() -> None: + # Arrange — the measured-U dispatcher must only fire when the + # description contains the "thermal transmittance" phrase. The + # ordinary surveyor-text descriptions (e.g. "Cavity wall, filled + # cavity") must still route through the construction cascade. + + # Act + result = u_wall( + country=Country.ENG, + age_band="E", + construction=WALL_CAVITY, + insulation_thickness_mm=0, + insulation_present=True, + wall_insulation_type=WALL_INSULATION_FILLED_CAVITY, + description="Cavity wall, filled cavity", + ) + + # Assert — should return the Filled-cavity row value, not anything + # parsed out of the description. + assert result == pytest.approx(0.7, abs=0.001) + + def test_u_wall_filled_cavity_england_age_band_e_returns_table6_value() -> None: # Arrange — RdSAP 10 Table 6 (England) row "Filled cavity", age band E # (1967-1975) -> 0.7 W/m^2K. The cert records this as the triple diff --git a/packages/domain/src/domain/sap/worksheet/tests/test_heat_transmission.py b/packages/domain/src/domain/sap/worksheet/tests/test_heat_transmission.py index 2222b1ba..da0437c0 100644 --- a/packages/domain/src/domain/sap/worksheet/tests/test_heat_transmission.py +++ b/packages/domain/src/domain/sap/worksheet/tests/test_heat_transmission.py @@ -17,6 +17,8 @@ envelope.py test pack so cases match production cert shape. import pytest +from datatypes.epc.domain.epc_property_data import EnergyElement + from domain.ml.tests._fixtures import ( make_building_part, make_floor_dimension, @@ -29,6 +31,51 @@ from domain.sap.worksheet.heat_transmission import ( ) +def test_walls_description_measured_transmittance_overrides_construction_cascade() -> None: + # Arrange — a full-SAP (not RdSAP) cert lodges the wall U-value + # directly in walls[i].description ("Average thermal transmittance + # 0.18 W/m²K") rather than via the construction/insulation triple. + # Such certs typically have wall_construction, wall_insulation_type, + # and age_band all None, which the cascade would otherwise fall back + # to U=1.5. With the measured value lodged, the calculator must use + # it directly. + # Geometry: 100 m² ground floor, 40 m perimeter, 2.5 m height, + # single storey → gross_wall = 100 m². walls_w_per_k expected = + # 0.18 × 100 = 18 W/K. + main = make_building_part( + identifier="Main Dwelling", + construction_age_band="E", + wall_construction=4, + wall_insulation_type=4, + party_wall_construction=1, + roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=100.0, room_height_m=2.5, + party_wall_length_m=0.0, heat_loss_perimeter_m=40.0, floor=0, + ), + ], + ) + epc = make_minimal_sap10_epc( + total_floor_area_m2=100.0, + country_code="ENG", + sap_building_parts=[main], + ) + epc.walls = [ + EnergyElement( + description="Average thermal transmittance 0.18 W/m²K", + energy_efficiency_rating=5, + environmental_efficiency_rating=5, + ), + ] + + # Act + result = heat_transmission_from_cert(epc) + + # Assert + assert result.walls_w_per_k == pytest.approx(18.0, abs=0.5) + + def test_band_e_filled_cavity_uses_table6_filled_row_in_walls_w_per_k() -> None: # Arrange — RdSAP 10 Table 6 (England) "Filled cavity" row at band E # (1967-1975) = 0.7 W/m^2K. Cert encodes this as