mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(epc-prediction): Tier-1 ratcheting Component Accuracy gate (ADR-0030)
The committed CI gate: run the calculator-free leave-one-out scorer over the frozen anonymised fixture (36 SAP-10.2 targets) and assert each per-component classification rate / geometry residual is no worse than a committed baseline. Prediction is deterministic + the fixture frozen, so the numbers reproduce exactly — a failure is a real regression, never sample noise. - 19 rate floors + 5 residual ceilings, seeded at the currently-measured values; they only ever tighten (no-widening ethos on an aggregate). - Calculator-FREE — component floors are the real gate; the end-to-end SAP/carbon/PE guards stay out (their floor is the separate API-path calculator workstream). - Skips with a message when the fixture is absent. 25 parametrized assertions, all green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
008c1922c4
commit
e3a2720e5c
1 changed files with 106 additions and 0 deletions
106
tests/domain/epc_prediction/test_component_accuracy_gate.py
Normal file
106
tests/domain/epc_prediction/test_component_accuracy_gate.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
"""Tier-1 ratcheting Component Accuracy gate (ADR-0030).
|
||||
|
||||
Runs the calculator-free leave-one-out scorer over the committed, anonymised
|
||||
fixture and asserts each per-component classification rate / geometry residual is
|
||||
no worse than a committed baseline. Because the prediction is deterministic and
|
||||
the fixture is frozen, every run reproduces the same numbers exactly — so a
|
||||
failure means a real *regression* in prediction quality, never sample noise.
|
||||
|
||||
The floors / ceilings are the currently-measured values and only ever **tighten**
|
||||
(the repo's no-tolerance-widening ethos applied to an aggregate): when prediction
|
||||
improves, ratchet the relevant floor up in the same change. The end-to-end
|
||||
SAP / carbon / PE guards are deliberately *not* here — they need the calculator,
|
||||
whose API-path residual is a separate workstream; the component floors are the
|
||||
real gate (ADR-0030).
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from domain.epc_prediction.validation import (
|
||||
ComponentAccuracy,
|
||||
evaluate_component_accuracy,
|
||||
)
|
||||
from harness.epc_prediction_corpus import load_corpus
|
||||
|
||||
_FIXTURE = Path(__file__).parents[3] / "tests" / "fixtures" / "epc_prediction"
|
||||
|
||||
# Minimum classification hit-rate per component (ratchet floors). Tighten — never
|
||||
# loosen — as prediction improves. Values are the measured rates over the frozen
|
||||
# 36-target fixture; a 1e-3 tolerance absorbs float rounding only.
|
||||
_RATE_FLOORS: dict[str, float] = {
|
||||
"wall_construction": 0.8889,
|
||||
"wall_insulation_type": 0.7778,
|
||||
"construction_age_band": 0.6389,
|
||||
"roof_construction": 0.7222,
|
||||
"floor_construction": 0.7500,
|
||||
"heating_main_fuel": 0.9722,
|
||||
"heating_main_category": 0.8889,
|
||||
"heating_main_control": 0.7500,
|
||||
"water_heating_fuel": 0.9167,
|
||||
"water_heating_code": 0.8889,
|
||||
"has_hot_water_cylinder": 0.8889,
|
||||
"cylinder_insulation_type": 0.1667,
|
||||
"secondary_heating_type": 0.0000,
|
||||
"roof_insulation_thickness": 0.1471,
|
||||
"floor_insulation": 0.9062,
|
||||
"has_room_in_roof": 0.8333,
|
||||
"modal_glazing_type": 0.5000,
|
||||
"has_pv": 1.0000,
|
||||
"solar_water_heating": 1.0000,
|
||||
}
|
||||
|
||||
# Maximum mean absolute residual per numeric component (ratchet ceilings).
|
||||
_RESIDUAL_CEILINGS: dict[str, float] = {
|
||||
"floor_area": 12.2175,
|
||||
"window_count": 3.8889,
|
||||
"total_window_area": 4.4067,
|
||||
"building_parts": 0.3333,
|
||||
"door_count": 0.6389,
|
||||
}
|
||||
|
||||
_TOLERANCE = 1e-3
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def accuracy() -> ComponentAccuracy:
|
||||
if not (_FIXTURE / "_index.json").exists():
|
||||
pytest.skip(f"no EPC Prediction fixture at {_FIXTURE}")
|
||||
return evaluate_component_accuracy(load_corpus(_FIXTURE))
|
||||
|
||||
|
||||
def test_fixture_yields_the_expected_target_count(
|
||||
accuracy: ComponentAccuracy,
|
||||
) -> None:
|
||||
# The frozen fixture must still produce its full set of SAP-10.2 targets — a
|
||||
# drop means the fixture or the target filter changed.
|
||||
assert accuracy.targets >= 36
|
||||
|
||||
|
||||
@pytest.mark.parametrize("component,floor", sorted(_RATE_FLOORS.items()))
|
||||
def test_classification_rate_does_not_regress(
|
||||
accuracy: ComponentAccuracy, component: str, floor: float
|
||||
) -> None:
|
||||
# Arrange / Act
|
||||
rate = accuracy.rate(component)
|
||||
|
||||
# Assert — the component is still applicable and at or above its floor.
|
||||
assert rate is not None, f"{component} had no applicable targets"
|
||||
assert rate >= floor - _TOLERANCE, (
|
||||
f"{component} classification regressed: {rate:.4f} < floor {floor:.4f}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("component,ceiling", sorted(_RESIDUAL_CEILINGS.items()))
|
||||
def test_residual_does_not_regress(
|
||||
accuracy: ComponentAccuracy, component: str, ceiling: float
|
||||
) -> None:
|
||||
# Arrange / Act
|
||||
mean_abs = accuracy.mean_abs_residual(component)
|
||||
|
||||
# Assert — the mean absolute residual is at or below its ceiling.
|
||||
assert mean_abs is not None, f"{component} had no residuals"
|
||||
assert mean_abs <= ceiling + _TOLERANCE, (
|
||||
f"{component} residual regressed: {mean_abs:.4f} > ceiling {ceiling:.4f}"
|
||||
)
|
||||
Loading…
Add table
Reference in a new issue