Merge pull request #1264 from Hestia-Homes/feature/hyde_make_it_more_accurate_with_tests

Feature/hyde make it more accurate with tests
This commit is contained in:
Jun-te Kim 2026-06-23 14:31:45 +01:00 committed by GitHub
commit 3c45a30890
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 60 additions and 25 deletions

View file

@ -67,10 +67,19 @@ prediction, and the skipped certs surface for follow-up:
cert numbers, so the gaps can be closed deliberately. +3 tests; 0 new pyright.
Live-verified on BN11 4EP: cohort now builds 35 + records the 1 skip.
**✅ DONE — 2 safe generic mapper fixes (+regression tests, 0 new pyright):**
**✅ DONE — generic mapper fixes (+regression tests, 0 new pyright):**
- `_normalize_sap_schema_16_x`: `setdefault("insulated_door_count", 0)` (the
original prod crash) AND `setdefault("multiple_glazed_proportion", 100)` (16.3
cert 0418-3986-7250-2884-7970; ML-only field the SAP calc ignores; modal 100).
original prod crash).
- ⚠️ REVERTED `setdefault("multiple_glazed_proportion", 100)` — it made an
otherwise-unmappable 16.3 cert (0418-3986-7250-2884-7970) join the EPC-prediction
**frozen-fixture donor pool** and tipped near-tie similarity matches, regressing
the component-accuracy gate (`has_hot_water_cylinder` 30→29/36; `door_count`
residual 23→25/36 — both ≫ the gate's 1e-3 float tolerance). The field is ML-only
(SAP calc never reads it) and the cohort skip-and-report path handles the cert,
so we leave it fail-loud instead of synthesising a value. Re-baselining the gate
was rejected (loosens a tighten-only gate for a real regression). For Khalim: if
16.x certs missing `multiple_glazed_proportion` should map, derive it (single→0 /
double→100) rather than a flat default, and re-measure the gate.
- `_with_recorded_performance` co2/consumption/rating: `float(co2)` → crashed on
certs lodging `co2_emissions_current` as a Measurement dict `{'value':3.5,
'quantity':'tonnes per year'}` (16.x cert 2308-4997-7262-0137-9930, surfaced as

View file

@ -3313,13 +3313,14 @@ def _normalize_sap_schema_16_x(data: Dict[str, Any]) -> Dict[str, Any]:
# still fail loud — that is correct, the fabric data is insufficient.
d.setdefault("insulated_door_count", 0)
# Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) omit
# `multiple_glazed_proportion` while still lodging `multiple_glazing_type` —
# RdSapSchema17_1 requires it. It is an ML-feature field the SAP-10 calculator
# never reads (only `sap10_ml/transform`), so the value is non-load-bearing for
# the score; default the modal 100 ("fully" the lodged glazing type) to keep the
# cert mappable. Mirrors the `insulated_door_count` default above.
d.setdefault("multiple_glazed_proportion", 100)
# NB: we deliberately do NOT default `multiple_glazed_proportion` here. A 16.x
# cert that omits it (e.g. 16.3 cert 0418-3986-7250-2884-7970) is left to fail
# the RdSapSchema17_1 parse and be handled by the cohort skip-and-report path —
# synthesising a value (tried `100`) drew the otherwise-unmappable cert into the
# EPC-prediction donor pool and tipped near-tie similarity matches, regressing
# the frozen-fixture component-accuracy gate (has_hot_water_cylinder 30→29/36,
# door_count residual 23→25/36). The field is ML-only (the SAP calc never reads
# it), so there is no calc cost to leaving such certs unmapped. See worklist.
# 16.2 lodges glazing in BOTH `multiple_glazing_type` (frequently the "ND"
# not-defined sentinel) AND the windows[].description. When the numeric field

View file

@ -324,7 +324,7 @@ class TestFromSapSchema17_1CodeBasedHeating:
class TestFromSapSchema17_1Ventilation:
"""Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP4),
"""Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP50),
ventilation_type MechanicalVentilationKind, sheltered sides, wet rooms and
the MEV PCDB index."""
@ -333,9 +333,13 @@ class TestFromSapSchema17_1Ventilation:
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
return EpcPropertyDataMapper.from_sap_schema_17_1(schema)
def test_measured_air_permeability_fed_as_ap4(self, sample: EpcPropertyData) -> None:
def test_measured_air_permeability_fed_as_ap50(self, sample: EpcPropertyData) -> None:
# The lodged `air_permeability` is a q50 Blower-Door result, so it feeds the
# engine's AP50 path `(18) = AP50/20 + (8)` — NOT the AP4/Pulse formula
# `0.263 × AP4^0.924` (the air-permeability AP50 fix, uprn_10093116528).
assert sample.sap_ventilation is not None
assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 == 2.6
assert sample.sap_ventilation.air_permeability_ap50_m3_h_m2 == 2.6
assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 is None
def test_ventilation_type_6_is_extract(self, sample: EpcPropertyData) -> None:
# ventilation_type 6 = MEV decentralised → EXTRACT_OR_PIV_OUTSIDE.
@ -538,21 +542,20 @@ class TestFromSapSchema16_2:
assert isinstance(epc, EpcPropertyData)
assert epc.insulated_door_count == 0
def test_16_x_missing_multiple_glazed_proportion_still_maps(self) -> None:
# Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) lodge
# `multiple_glazing_type` but omit `multiple_glazed_proportion`, which
# RdSapSchema17_1 requires — previously raised "missing required field
# 'multiple_glazed_proportion'", aborting the prediction cohort. The
# normaliser defaults the modal 100 so the cert maps. (The field is an
# ML-only feature the SAP calculator never reads, and from_rdsap_schema_17_1
# does not carry it onto EpcPropertyData — the point here is mappability.)
def test_16_x_missing_multiple_glazed_proportion_fails_loud(self) -> None:
# A 16.x cert omitting `multiple_glazed_proportion` (e.g. 16.3 cert
# 0418-3986-7250-2884-7970) is deliberately NOT defaulted — it fails the
# RdSapSchema17_1 parse and is handled by the cohort skip-and-report path.
# Synthesising a value drew the otherwise-unmappable cert into the
# EPC-prediction donor pool and regressed the component-accuracy gate;
# the field is ML-only so there is no calc cost to leaving it unmapped.
data = load("sap_16_3.json")
del data["multiple_glazed_proportion"]
assert "multiple_glazed_proportion" not in data
epc = EpcPropertyDataMapper.from_api_response(data)
assert isinstance(epc, EpcPropertyData)
with pytest.raises(
ValueError, match="missing required field 'multiple_glazed_proportion'"
):
EpcPropertyDataMapper.from_api_response(data)
def test_recorded_co2_as_measurement_dict_is_coerced_not_crashed(self) -> None:
# Some certs (e.g. 16.x cert 2308-4997-7262-0137-9930) lodge

View file

@ -581,6 +581,28 @@ _EXPECTATIONS: Final[tuple[RealCertExpectation, ...]] = (
cert_num="9978-7098-7226-2633-3994",
sap_score=75,
),
# UPRN 10002468137 → cert 0215-2818-7357-9703-2145. RdSAP-Schema-17.1,
# all-electric high-heat-retention storage heaters on Economy 7, solid-
# brick uninsulated end-terrace. Ground truth is Elmhurst RdSAP10 = 60,
# reproduced on identical inputs (summary + full SAP 10.2 worksheet saved
# alongside: elmhurst_summary.pdf / elmhurst_worksheet.pdf). The engine
# produces 62 — a +2 over-rating localised to OFF-PEAK WATER HEATING:
# the worksheet (lines 243-246) prices the 7-hour off-peak immersion at a
# Table 13 split (19.36% @ 15.29p high + 80.64% @ 5.5p low), but the engine
# prices 100% at the 5.5p low rate, under-costing the bill (£595.68 vs
# £629.67) → lower ECF (2.69 vs 2.84) → SAP 62 not 60. (Space heating 100%
# off-peak IS correct for storage heaters — the worksheet agrees.) Strict
# xfail until the off-peak water-heating rate split is implemented.
RealCertExpectation(
schema="RdSAP-Schema-17.1",
sample="uprn_10002468137",
cert_num="0215-2818-7357-9703-2145",
sap_score=60,
known_bug_xfail=(
"off-peak (7-hour) water-heating high/low rate split not applied — "
"engine prices 100% at the low rate; see elmhurst_worksheet.pdf (243-246)"
),
),
)