Merge pull request #1264 from Hestia-Homes/feature/hyde_make_it_more_accurate_with_tests

Feature/hyde make it more accurate with tests
2026-06-30 13:10:47 +00:00 · 2026-06-23 14:31:45 +01:00 · 2026-06-23 14:31:45 +01:00 · 3c45a30890
commit 3c45a30890
parent 22b92f968e 2b0cb180bf
4 changed files with 60 additions and 25 deletions
--- a/.claude/skills/expand-sap-accuracy-corpus/worklist.md
+++ b/.claude/skills/expand-sap-accuracy-corpus/worklist.md
@ -67,10 +67,19 @@ prediction, and the skipped certs surface for follow-up:
  cert numbers, so the gaps can be closed deliberately. +3 tests; 0 new pyright.
  Live-verified on BN11 4EP: cohort now builds 35 + records the 1 skip.

-**✅ DONE — 2 safe generic mapper fixes (+regression tests, 0 new pyright):**
+**✅ DONE — generic mapper fixes (+regression tests, 0 new pyright):**
 - `_normalize_sap_schema_16_x`: `setdefault("insulated_door_count", 0)` (the
-  original prod crash) AND `setdefault("multiple_glazed_proportion", 100)` (16.3
-  cert 0418-3986-7250-2884-7970; ML-only field the SAP calc ignores; modal 100).
+  original prod crash).
+- ⚠️ REVERTED `setdefault("multiple_glazed_proportion", 100)` — it made an
+  otherwise-unmappable 16.3 cert (0418-3986-7250-2884-7970) join the EPC-prediction
+  **frozen-fixture donor pool** and tipped near-tie similarity matches, regressing
+  the component-accuracy gate (`has_hot_water_cylinder` 30→29/36; `door_count`
+  residual 23→25/36 — both ≫ the gate's 1e-3 float tolerance). The field is ML-only
+  (SAP calc never reads it) and the cohort skip-and-report path handles the cert,
+  so we leave it fail-loud instead of synthesising a value. Re-baselining the gate
+  was rejected (loosens a tighten-only gate for a real regression). For Khalim: if
+  16.x certs missing `multiple_glazed_proportion` should map, derive it (single→0 /
+  double→100) rather than a flat default, and re-measure the gate.
 - `_with_recorded_performance` co2/consumption/rating: `float(co2)` → crashed on
  certs lodging `co2_emissions_current` as a Measurement dict `{'value':3.5,
  'quantity':'tonnes per year'}` (16.x cert 2308-4997-7262-0137-9930, surfaced as
--- a/datatypes/epc/domain/mapper.py
+++ b/datatypes/epc/domain/mapper.py
@ -3313,13 +3313,14 @@ def _normalize_sap_schema_16_x(data: Dict[str, Any]) -> Dict[str, Any]:
    # still fail loud — that is correct, the fabric data is insufficient.
    d.setdefault("insulated_door_count", 0)

-    # Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) omit
-    # `multiple_glazed_proportion` while still lodging `multiple_glazing_type` —
-    # RdSapSchema17_1 requires it. It is an ML-feature field the SAP-10 calculator
-    # never reads (only `sap10_ml/transform`), so the value is non-load-bearing for
-    # the score; default the modal 100 ("fully" the lodged glazing type) to keep the
-    # cert mappable. Mirrors the `insulated_door_count` default above.
-    d.setdefault("multiple_glazed_proportion", 100)
+    # NB: we deliberately do NOT default `multiple_glazed_proportion` here. A 16.x
+    # cert that omits it (e.g. 16.3 cert 0418-3986-7250-2884-7970) is left to fail
+    # the RdSapSchema17_1 parse and be handled by the cohort skip-and-report path —
+    # synthesising a value (tried `100`) drew the otherwise-unmappable cert into the
+    # EPC-prediction donor pool and tipped near-tie similarity matches, regressing
+    # the frozen-fixture component-accuracy gate (has_hot_water_cylinder 30→29/36,
+    # door_count residual 23→25/36). The field is ML-only (the SAP calc never reads
+    # it), so there is no calc cost to leaving such certs unmapped. See worklist.

    # 16.2 lodges glazing in BOTH `multiple_glazing_type` (frequently the "ND"
    # not-defined sentinel) AND the windows[].description. When the numeric field
--- a/datatypes/epc/domain/tests/test_from_sap_schema.py
+++ b/datatypes/epc/domain/tests/test_from_sap_schema.py
@ -324,7 +324,7 @@ class TestFromSapSchema17_1CodeBasedHeating:


 class TestFromSapSchema17_1Ventilation:
-    """Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP4),
+    """Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP50),
    ventilation_type → MechanicalVentilationKind, sheltered sides, wet rooms and
    the MEV PCDB index."""

@ -333,9 +333,13 @@ class TestFromSapSchema17_1Ventilation:
        schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
        return EpcPropertyDataMapper.from_sap_schema_17_1(schema)

-    def test_measured_air_permeability_fed_as_ap4(self, sample: EpcPropertyData) -> None:
+    def test_measured_air_permeability_fed_as_ap50(self, sample: EpcPropertyData) -> None:
+        # The lodged `air_permeability` is a q50 Blower-Door result, so it feeds the
+        # engine's AP50 path `(18) = AP50/20 + (8)` — NOT the AP4/Pulse formula
+        # `0.263 × AP4^0.924` (the air-permeability AP50 fix, uprn_10093116528).
        assert sample.sap_ventilation is not None
-        assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 == 2.6
+        assert sample.sap_ventilation.air_permeability_ap50_m3_h_m2 == 2.6
+        assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 is None

    def test_ventilation_type_6_is_extract(self, sample: EpcPropertyData) -> None:
        # ventilation_type 6 = MEV decentralised → EXTRACT_OR_PIV_OUTSIDE.
@ -538,21 +542,20 @@ class TestFromSapSchema16_2:
        assert isinstance(epc, EpcPropertyData)
        assert epc.insulated_door_count == 0

-    def test_16_x_missing_multiple_glazed_proportion_still_maps(self) -> None:
-        # Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) lodge
-        # `multiple_glazing_type` but omit `multiple_glazed_proportion`, which
-        # RdSapSchema17_1 requires — previously raised "missing required field
-        # 'multiple_glazed_proportion'", aborting the prediction cohort. The
-        # normaliser defaults the modal 100 so the cert maps. (The field is an
-        # ML-only feature the SAP calculator never reads, and from_rdsap_schema_17_1
-        # does not carry it onto EpcPropertyData — the point here is mappability.)
+    def test_16_x_missing_multiple_glazed_proportion_fails_loud(self) -> None:
+        # A 16.x cert omitting `multiple_glazed_proportion` (e.g. 16.3 cert
+        # 0418-3986-7250-2884-7970) is deliberately NOT defaulted — it fails the
+        # RdSapSchema17_1 parse and is handled by the cohort skip-and-report path.
+        # Synthesising a value drew the otherwise-unmappable cert into the
+        # EPC-prediction donor pool and regressed the component-accuracy gate;
+        # the field is ML-only so there is no calc cost to leaving it unmapped.
        data = load("sap_16_3.json")
        del data["multiple_glazed_proportion"]
-        assert "multiple_glazed_proportion" not in data

-        epc = EpcPropertyDataMapper.from_api_response(data)
-
-        assert isinstance(epc, EpcPropertyData)
+        with pytest.raises(
+            ValueError, match="missing required field 'multiple_glazed_proportion'"
+        ):
+            EpcPropertyDataMapper.from_api_response(data)

    def test_recorded_co2_as_measurement_dict_is_coerced_not_crashed(self) -> None:
        # Some certs (e.g. 16.x cert 2308-4997-7262-0137-9930) lodge
--- a/tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py
+++ b/tests/domain/sap10_calculator/test_real_cert_sap_accuracy.py
@ -581,6 +581,28 @@ _EXPECTATIONS: Final[tuple[RealCertExpectation, ...]] = (
        cert_num="9978-7098-7226-2633-3994",
        sap_score=75,
    ),
+    # UPRN 10002468137 → cert 0215-2818-7357-9703-2145. RdSAP-Schema-17.1,
+    # all-electric high-heat-retention storage heaters on Economy 7, solid-
+    # brick uninsulated end-terrace. Ground truth is Elmhurst RdSAP10 = 60,
+    # reproduced on identical inputs (summary + full SAP 10.2 worksheet saved
+    # alongside: elmhurst_summary.pdf / elmhurst_worksheet.pdf). The engine
+    # produces 62 — a +2 over-rating localised to OFF-PEAK WATER HEATING:
+    # the worksheet (lines 243-246) prices the 7-hour off-peak immersion at a
+    # Table 13 split (19.36% @ 15.29p high + 80.64% @ 5.5p low), but the engine
+    # prices 100% at the 5.5p low rate, under-costing the bill (£595.68 vs
+    # £629.67) → lower ECF (2.69 vs 2.84) → SAP 62 not 60. (Space heating 100%
+    # off-peak IS correct for storage heaters — the worksheet agrees.) Strict
+    # xfail until the off-peak water-heating rate split is implemented.
+    RealCertExpectation(
+        schema="RdSAP-Schema-17.1",
+        sample="uprn_10002468137",
+        cert_num="0215-2818-7357-9703-2145",
+        sap_score=60,
+        known_bug_xfail=(
+            "off-peak (7-hour) water-heating high/low rate split not applied — "
+            "engine prices 100% at the low rate; see elmhurst_worksheet.pdf (243-246)"
+        ),
+    ),
 )