mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Merge pull request #1264 from Hestia-Homes/feature/hyde_make_it_more_accurate_with_tests
Feature/hyde make it more accurate with tests
This commit is contained in:
commit
3c45a30890
4 changed files with 60 additions and 25 deletions
|
|
@ -67,10 +67,19 @@ prediction, and the skipped certs surface for follow-up:
|
|||
cert numbers, so the gaps can be closed deliberately. +3 tests; 0 new pyright.
|
||||
Live-verified on BN11 4EP: cohort now builds 35 + records the 1 skip.
|
||||
|
||||
**✅ DONE — 2 safe generic mapper fixes (+regression tests, 0 new pyright):**
|
||||
**✅ DONE — generic mapper fixes (+regression tests, 0 new pyright):**
|
||||
- `_normalize_sap_schema_16_x`: `setdefault("insulated_door_count", 0)` (the
|
||||
original prod crash) AND `setdefault("multiple_glazed_proportion", 100)` (16.3
|
||||
cert 0418-3986-7250-2884-7970; ML-only field the SAP calc ignores; modal 100).
|
||||
original prod crash).
|
||||
- ⚠️ REVERTED `setdefault("multiple_glazed_proportion", 100)` — it made an
|
||||
otherwise-unmappable 16.3 cert (0418-3986-7250-2884-7970) join the EPC-prediction
|
||||
**frozen-fixture donor pool** and tipped near-tie similarity matches, regressing
|
||||
the component-accuracy gate (`has_hot_water_cylinder` 30→29/36; `door_count`
|
||||
residual 23→25/36 — both ≫ the gate's 1e-3 float tolerance). The field is ML-only
|
||||
(SAP calc never reads it) and the cohort skip-and-report path handles the cert,
|
||||
so we leave it fail-loud instead of synthesising a value. Re-baselining the gate
|
||||
was rejected (loosens a tighten-only gate for a real regression). For Khalim: if
|
||||
16.x certs missing `multiple_glazed_proportion` should map, derive it (single→0 /
|
||||
double→100) rather than a flat default, and re-measure the gate.
|
||||
- `_with_recorded_performance` co2/consumption/rating: `float(co2)` → crashed on
|
||||
certs lodging `co2_emissions_current` as a Measurement dict `{'value':3.5,
|
||||
'quantity':'tonnes per year'}` (16.x cert 2308-4997-7262-0137-9930, surfaced as
|
||||
|
|
|
|||
|
|
@ -3313,13 +3313,14 @@ def _normalize_sap_schema_16_x(data: Dict[str, Any]) -> Dict[str, Any]:
|
|||
# still fail loud — that is correct, the fabric data is insufficient.
|
||||
d.setdefault("insulated_door_count", 0)
|
||||
|
||||
# Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) omit
|
||||
# `multiple_glazed_proportion` while still lodging `multiple_glazing_type` —
|
||||
# RdSapSchema17_1 requires it. It is an ML-feature field the SAP-10 calculator
|
||||
# never reads (only `sap10_ml/transform`), so the value is non-load-bearing for
|
||||
# the score; default the modal 100 ("fully" the lodged glazing type) to keep the
|
||||
# cert mappable. Mirrors the `insulated_door_count` default above.
|
||||
d.setdefault("multiple_glazed_proportion", 100)
|
||||
# NB: we deliberately do NOT default `multiple_glazed_proportion` here. A 16.x
|
||||
# cert that omits it (e.g. 16.3 cert 0418-3986-7250-2884-7970) is left to fail
|
||||
# the RdSapSchema17_1 parse and be handled by the cohort skip-and-report path —
|
||||
# synthesising a value (tried `100`) drew the otherwise-unmappable cert into the
|
||||
# EPC-prediction donor pool and tipped near-tie similarity matches, regressing
|
||||
# the frozen-fixture component-accuracy gate (has_hot_water_cylinder 30→29/36,
|
||||
# door_count residual 23→25/36). The field is ML-only (the SAP calc never reads
|
||||
# it), so there is no calc cost to leaving such certs unmapped. See worklist.
|
||||
|
||||
# 16.2 lodges glazing in BOTH `multiple_glazing_type` (frequently the "ND"
|
||||
# not-defined sentinel) AND the windows[].description. When the numeric field
|
||||
|
|
|
|||
|
|
@ -324,7 +324,7 @@ class TestFromSapSchema17_1CodeBasedHeating:
|
|||
|
||||
|
||||
class TestFromSapSchema17_1Ventilation:
|
||||
"""Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP4),
|
||||
"""Slice D5-vent: full-SAP sap_ventilation → measured air permeability (AP50),
|
||||
ventilation_type → MechanicalVentilationKind, sheltered sides, wet rooms and
|
||||
the MEV PCDB index."""
|
||||
|
||||
|
|
@ -333,9 +333,13 @@ class TestFromSapSchema17_1Ventilation:
|
|||
schema = from_dict(SapSchema17_1, load("sap_17_1.json"))
|
||||
return EpcPropertyDataMapper.from_sap_schema_17_1(schema)
|
||||
|
||||
def test_measured_air_permeability_fed_as_ap4(self, sample: EpcPropertyData) -> None:
|
||||
def test_measured_air_permeability_fed_as_ap50(self, sample: EpcPropertyData) -> None:
|
||||
# The lodged `air_permeability` is a q50 Blower-Door result, so it feeds the
|
||||
# engine's AP50 path `(18) = AP50/20 + (8)` — NOT the AP4/Pulse formula
|
||||
# `0.263 × AP4^0.924` (the air-permeability AP50 fix, uprn_10093116528).
|
||||
assert sample.sap_ventilation is not None
|
||||
assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 == 2.6
|
||||
assert sample.sap_ventilation.air_permeability_ap50_m3_h_m2 == 2.6
|
||||
assert sample.sap_ventilation.air_permeability_ap4_m3_h_m2 is None
|
||||
|
||||
def test_ventilation_type_6_is_extract(self, sample: EpcPropertyData) -> None:
|
||||
# ventilation_type 6 = MEV decentralised → EXTRACT_OR_PIV_OUTSIDE.
|
||||
|
|
@ -538,21 +542,20 @@ class TestFromSapSchema16_2:
|
|||
assert isinstance(epc, EpcPropertyData)
|
||||
assert epc.insulated_door_count == 0
|
||||
|
||||
def test_16_x_missing_multiple_glazed_proportion_still_maps(self) -> None:
|
||||
# Some 16.x certs (e.g. 16.3 cert 0418-3986-7250-2884-7970) lodge
|
||||
# `multiple_glazing_type` but omit `multiple_glazed_proportion`, which
|
||||
# RdSapSchema17_1 requires — previously raised "missing required field
|
||||
# 'multiple_glazed_proportion'", aborting the prediction cohort. The
|
||||
# normaliser defaults the modal 100 so the cert maps. (The field is an
|
||||
# ML-only feature the SAP calculator never reads, and from_rdsap_schema_17_1
|
||||
# does not carry it onto EpcPropertyData — the point here is mappability.)
|
||||
def test_16_x_missing_multiple_glazed_proportion_fails_loud(self) -> None:
|
||||
# A 16.x cert omitting `multiple_glazed_proportion` (e.g. 16.3 cert
|
||||
# 0418-3986-7250-2884-7970) is deliberately NOT defaulted — it fails the
|
||||
# RdSapSchema17_1 parse and is handled by the cohort skip-and-report path.
|
||||
# Synthesising a value drew the otherwise-unmappable cert into the
|
||||
# EPC-prediction donor pool and regressed the component-accuracy gate;
|
||||
# the field is ML-only so there is no calc cost to leaving it unmapped.
|
||||
data = load("sap_16_3.json")
|
||||
del data["multiple_glazed_proportion"]
|
||||
assert "multiple_glazed_proportion" not in data
|
||||
|
||||
epc = EpcPropertyDataMapper.from_api_response(data)
|
||||
|
||||
assert isinstance(epc, EpcPropertyData)
|
||||
with pytest.raises(
|
||||
ValueError, match="missing required field 'multiple_glazed_proportion'"
|
||||
):
|
||||
EpcPropertyDataMapper.from_api_response(data)
|
||||
|
||||
def test_recorded_co2_as_measurement_dict_is_coerced_not_crashed(self) -> None:
|
||||
# Some certs (e.g. 16.x cert 2308-4997-7262-0137-9930) lodge
|
||||
|
|
|
|||
|
|
@ -581,6 +581,28 @@ _EXPECTATIONS: Final[tuple[RealCertExpectation, ...]] = (
|
|||
cert_num="9978-7098-7226-2633-3994",
|
||||
sap_score=75,
|
||||
),
|
||||
# UPRN 10002468137 → cert 0215-2818-7357-9703-2145. RdSAP-Schema-17.1,
|
||||
# all-electric high-heat-retention storage heaters on Economy 7, solid-
|
||||
# brick uninsulated end-terrace. Ground truth is Elmhurst RdSAP10 = 60,
|
||||
# reproduced on identical inputs (summary + full SAP 10.2 worksheet saved
|
||||
# alongside: elmhurst_summary.pdf / elmhurst_worksheet.pdf). The engine
|
||||
# produces 62 — a +2 over-rating localised to OFF-PEAK WATER HEATING:
|
||||
# the worksheet (lines 243-246) prices the 7-hour off-peak immersion at a
|
||||
# Table 13 split (19.36% @ 15.29p high + 80.64% @ 5.5p low), but the engine
|
||||
# prices 100% at the 5.5p low rate, under-costing the bill (£595.68 vs
|
||||
# £629.67) → lower ECF (2.69 vs 2.84) → SAP 62 not 60. (Space heating 100%
|
||||
# off-peak IS correct for storage heaters — the worksheet agrees.) Strict
|
||||
# xfail until the off-peak water-heating rate split is implemented.
|
||||
RealCertExpectation(
|
||||
schema="RdSAP-Schema-17.1",
|
||||
sample="uprn_10002468137",
|
||||
cert_num="0215-2818-7357-9703-2145",
|
||||
sap_score=60,
|
||||
known_bug_xfail=(
|
||||
"off-peak (7-hour) water-heating high/low rate split not applied — "
|
||||
"engine prices 100% at the low rate; see elmhurst_worksheet.pdf (243-246)"
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue