diff --git a/.claude/skills/expand-sap-accuracy-corpus/worklist.md b/.claude/skills/expand-sap-accuracy-corpus/worklist.md index e5c6b8fa..310582bc 100644 --- a/.claude/skills/expand-sap-accuracy-corpus/worklist.md +++ b/.claude/skills/expand-sap-accuracy-corpus/worklist.md @@ -67,10 +67,19 @@ prediction, and the skipped certs surface for follow-up: cert numbers, so the gaps can be closed deliberately. +3 tests; 0 new pyright. Live-verified on BN11 4EP: cohort now builds 35 + records the 1 skip. -**✅ DONE — 2 safe generic mapper fixes (+regression tests, 0 new pyright):** +**✅ DONE — generic mapper fixes (+regression tests, 0 new pyright):** - `_normalize_sap_schema_16_x`: `setdefault("insulated_door_count", 0)` (the - original prod crash) AND `setdefault("multiple_glazed_proportion", 100)` (16.3 - cert 0418-3986-7250-2884-7970; ML-only field the SAP calc ignores; modal 100). + original prod crash). +- ⚠️ REVERTED `setdefault("multiple_glazed_proportion", 100)` — it made an + otherwise-unmappable 16.3 cert (0418-3986-7250-2884-7970) join the EPC-prediction + **frozen-fixture donor pool** and tipped near-tie similarity matches, regressing + the component-accuracy gate (`has_hot_water_cylinder` 30→29/36; `door_count` + residual 23→25/36 — both ≫ the gate's 1e-3 float tolerance). The field is ML-only + (SAP calc never reads it) and the cohort skip-and-report path handles the cert, + so we leave it fail-loud instead of synthesising a value. Re-baselining the gate + was rejected (loosens a tighten-only gate for a real regression). For Khalim: if + 16.x certs missing `multiple_glazed_proportion` should map, derive it (single→0 / + double→100) rather than a flat default, and re-measure the gate. - `_with_recorded_performance` co2/consumption/rating: `float(co2)` → crashed on certs lodging `co2_emissions_current` as a Measurement dict `{'value':3.5, 'quantity':'tonnes per year'}` (16.x cert 2308-4997-7262-0137-9930, surfaced as diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ce0dbcc5..2b2045b6 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3313,13 +3313,14 @@ def _normalize_sap_schema_16_x(data: Dict[str, Any]) -> Dict[str, Any]: # still fail loud — that is correct, the fabric data is insufficient. d.setdefault("insulated_door_count", 0) - # Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) omit - # `multiple_glazed_proportion` while still lodging `multiple_glazing_type` — - # RdSapSchema17_1 requires it. It is an ML-feature field the SAP-10 calculator - # never reads (only `sap10_ml/transform`), so the value is non-load-bearing for - # the score; default the modal 100 ("fully" the lodged glazing type) to keep the - # cert mappable. Mirrors the `insulated_door_count` default above. - d.setdefault("multiple_glazed_proportion", 100) + # NB: we deliberately do NOT default `multiple_glazed_proportion` here. A 16.x + # cert that omits it (e.g. 16.3 cert 0418-3986-7250-2884-7970) is left to fail + # the RdSapSchema17_1 parse and be handled by the cohort skip-and-report path — + # synthesising a value (tried `100`) drew the otherwise-unmappable cert into the + # EPC-prediction donor pool and tipped near-tie similarity matches, regressing + # the frozen-fixture component-accuracy gate (has_hot_water_cylinder 30→29/36, + # door_count residual 23→25/36). The field is ML-only (the SAP calc never reads + # it), so there is no calc cost to leaving such certs unmapped. See worklist. # 16.2 lodges glazing in BOTH `multiple_glazing_type` (frequently the "ND" # not-defined sentinel) AND the windows[].description. When the numeric field diff --git a/datatypes/epc/domain/tests/test_from_sap_schema.py b/datatypes/epc/domain/tests/test_from_sap_schema.py index 85f8566e..d1bf36ed 100644 --- a/datatypes/epc/domain/tests/test_from_sap_schema.py +++ b/datatypes/epc/domain/tests/test_from_sap_schema.py @@ -324,7 +324,7 @@ class TestFromSapSchema17_1CodeBasedHeating: class TestFromSapSchema17_1Ventilation: - """Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP4), + """Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP50), ventilation_type → MechanicalVentilationKind, sheltered sides, wet rooms and the MEV PCDB index.""" @@ -333,9 +333,13 @@ class TestFromSapSchema17_1Ventilation: schema = from_dict(SapSchema17_1, load("sap_17_1.json")) return EpcPropertyDataMapper.from_sap_schema_17_1(schema) - def test_measured_air_permeability_fed_as_ap4(self, sample: EpcPropertyData) -> None: + def test_measured_air_permeability_fed_as_ap50(self, sample: EpcPropertyData) -> None: + # The lodged `air_permeability` is a q50 Blower-Door result, so it feeds the + # engine's AP50 path `(18) = AP50/20 + (8)` — NOT the AP4/Pulse formula + # `0.263 × AP4^0.924` (the air-permeability AP50 fix, uprn_10093116528). assert sample.sap_ventilation is not None - assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 == 2.6 + assert sample.sap_ventilation.air_permeability_ap50_m3_h_m2 == 2.6 + assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 is None def test_ventilation_type_6_is_extract(self, sample: EpcPropertyData) -> None: # ventilation_type 6 = MEV decentralised → EXTRACT_OR_PIV_OUTSIDE. @@ -538,21 +542,20 @@ class TestFromSapSchema16_2: assert isinstance(epc, EpcPropertyData) assert epc.insulated_door_count == 0 - def test_16_x_missing_multiple_glazed_proportion_still_maps(self) -> None: - # Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) lodge - # `multiple_glazing_type` but omit `multiple_glazed_proportion`, which - # RdSapSchema17_1 requires — previously raised "missing required field - # 'multiple_glazed_proportion'", aborting the prediction cohort. The - # normaliser defaults the modal 100 so the cert maps. (The field is an - # ML-only feature the SAP calculator never reads, and from_rdsap_schema_17_1 - # does not carry it onto EpcPropertyData — the point here is mappability.) + def test_16_x_missing_multiple_glazed_proportion_fails_loud(self) -> None: + # A 16.x cert omitting `multiple_glazed_proportion` (e.g. 16.3 cert + # 0418-3986-7250-2884-7970) is deliberately NOT defaulted — it fails the + # RdSapSchema17_1 parse and is handled by the cohort skip-and-report path. + # Synthesising a value drew the otherwise-unmappable cert into the + # EPC-prediction donor pool and regressed the component-accuracy gate; + # the field is ML-only so there is no calc cost to leaving it unmapped. data = load("sap_16_3.json") del data["multiple_glazed_proportion"] - assert "multiple_glazed_proportion" not in data - epc = EpcPropertyDataMapper.from_api_response(data) - - assert isinstance(epc, EpcPropertyData) + with pytest.raises( + ValueError, match="missing required field 'multiple_glazed_proportion'" + ): + EpcPropertyDataMapper.from_api_response(data) def test_recorded_co2_as_measurement_dict_is_coerced_not_crashed(self) -> None: # Some certs (e.g. 16.x cert 2308-4997-7262-0137-9930) lodge diff --git a/tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py b/tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py index aa8344b2..56989c71 100644 --- a/tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py +++ b/tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py @@ -581,6 +581,28 @@ _EXPECTATIONS: Final[tuple[RealCertExpectation, ...]] = ( cert_num="9978-7098-7226-2633-3994", sap_score=75, ), + # UPRN 10002468137 → cert 0215-2818-7357-9703-2145. RdSAP-Schema-17.1, + # all-electric high-heat-retention storage heaters on Economy 7, solid- + # brick uninsulated end-terrace. Ground truth is Elmhurst RdSAP10 = 60, + # reproduced on identical inputs (summary + full SAP 10.2 worksheet saved + # alongside: elmhurst_summary.pdf / elmhurst_worksheet.pdf). The engine + # produces 62 — a +2 over-rating localised to OFF-PEAK WATER HEATING: + # the worksheet (lines 243-246) prices the 7-hour off-peak immersion at a + # Table 13 split (19.36% @ 15.29p high + 80.64% @ 5.5p low), but the engine + # prices 100% at the 5.5p low rate, under-costing the bill (£595.68 vs + # £629.67) → lower ECF (2.69 vs 2.84) → SAP 62 not 60. (Space heating 100% + # off-peak IS correct for storage heaters — the worksheet agrees.) Strict + # xfail until the off-peak water-heating rate split is implemented. + RealCertExpectation( + schema="RdSAP-Schema-17.1", + sample="uprn_10002468137", + cert_num="0215-2818-7357-9703-2145", + sap_score=60, + known_bug_xfail=( + "off-peak (7-hour) water-heating high/low rate split not applied — " + "engine prices 100% at the low rate; see elmhurst_worksheet.pdf (243-246)" + ), + ), )