mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(epc-prediction): leave-one-out validation harness (ADR-0029)
Pure compare_prediction (TDD): wall-construction classification hit + signed residuals on floor area, window count, total window area, building-parts count. Plus validate_epc_prediction.py (IO plumbing): drops each cert from its postcode cohort, predicts from the rest on guaranteed inputs only, aggregates the metrics, and reports SAP three ways (pred-calc vs lodged / vs calc-on-actual / vs the neighbour-mean baseline). Smoke run: wall 90.9%, floor-area mean|·| 42.6 m2 (a real signal — template-copied floor area is noisy), SAP pred-calc edges baseline. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
5e6d2cff16
commit
f3ad6343a3
3 changed files with 326 additions and 0 deletions
60
domain/epc_prediction/prediction_comparison.py
Normal file
60
domain/epc_prediction/prediction_comparison.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
"""Per-Property prediction comparison for the EPC Prediction validation harness
|
||||
(ADR-0029).
|
||||
|
||||
`compare_prediction` scores a predicted `EpcPropertyData` against the actual one
|
||||
on the accuracy signals the leave-one-out harness aggregates: classification
|
||||
matches on the key categoricals (wall / roof / floor construction + insulation,
|
||||
construction age band) and residuals on the geometry (window area + count,
|
||||
building-parts count, floor area). Pure — the SAP residual is computed in the
|
||||
runner, which has the calculator and the lodged SAP.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PredictionComparison:
|
||||
"""One Property's prediction accuracy: classification hits + geometry
|
||||
residuals (predicted − actual)."""
|
||||
|
||||
wall_construction_correct: bool
|
||||
floor_area_residual: float
|
||||
building_parts_residual: int
|
||||
window_count_residual: int
|
||||
total_window_area_residual: float
|
||||
|
||||
|
||||
def _main(epc: EpcPropertyData) -> SapBuildingPart:
|
||||
return epc.sap_building_parts[0]
|
||||
|
||||
|
||||
def _total_window_area(epc: EpcPropertyData) -> float:
|
||||
return sum(w.window_width * w.window_height for w in epc.sap_windows)
|
||||
|
||||
|
||||
def compare_prediction(
|
||||
predicted: EpcPropertyData, actual: EpcPropertyData
|
||||
) -> PredictionComparison:
|
||||
"""Compare a predicted picture against the actual one, field by field. All
|
||||
residuals are signed, predicted − actual."""
|
||||
return PredictionComparison(
|
||||
wall_construction_correct=(
|
||||
_main(predicted).wall_construction == _main(actual).wall_construction
|
||||
),
|
||||
floor_area_residual=(
|
||||
predicted.total_floor_area_m2 - actual.total_floor_area_m2
|
||||
),
|
||||
building_parts_residual=(
|
||||
len(predicted.sap_building_parts) - len(actual.sap_building_parts)
|
||||
),
|
||||
window_count_residual=(
|
||||
len(predicted.sap_windows) - len(actual.sap_windows)
|
||||
),
|
||||
total_window_area_residual=(
|
||||
_total_window_area(predicted) - _total_window_area(actual)
|
||||
),
|
||||
)
|
||||
165
scripts/validate_epc_prediction.py
Normal file
165
scripts/validate_epc_prediction.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
"""Leave-one-out accuracy harness for EPC Prediction (ADR-0029).
|
||||
|
||||
Runs entirely against the frozen postcode-clustered corpus
|
||||
(`fetch_epc_prediction_corpus.py`). For every cert that has neighbours, it
|
||||
drops that cert from its postcode cohort, predicts it from the rest using only
|
||||
its *guaranteed* inputs (property type + built form), and compares the predicted
|
||||
`EpcPropertyData` to the actual one.
|
||||
|
||||
Reports the ADR-0029 metrics:
|
||||
- classification rate: main wall construction (extend as coverage grows);
|
||||
- geometry residuals: floor area, window count + total window area, building
|
||||
parts (mean signed + mean absolute);
|
||||
- SAP reported three ways — predicted-then-calculated vs (a) the actual lodged
|
||||
SAP, (b) the calculator on the actual components, (c) the neighbour-mean SAP
|
||||
baseline (the number predict-then-calculate must beat).
|
||||
|
||||
USAGE
|
||||
-----
|
||||
PYTHONPATH=. python scripts/validate_epc_prediction.py
|
||||
|
||||
Corpus dir: $EPC_PREDICTION_CORPUS (default /tmp/epc_prediction_corpus).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
PredictionTarget,
|
||||
select_comparables,
|
||||
)
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction
|
||||
from domain.epc_prediction.prediction_comparison import compare_prediction
|
||||
from domain.sap10_calculator.calculator import Sap10Calculator
|
||||
|
||||
CORPUS = Path(os.environ.get("EPC_PREDICTION_CORPUS", "/tmp/epc_prediction_corpus"))
|
||||
|
||||
|
||||
def _load_cohort(postcode: str, certs: list[str]) -> list[Comparable]:
|
||||
"""Map a postcode's cached cert payloads to Comparables, skipping any the
|
||||
mapper rejects (unsupported schema, malformed)."""
|
||||
cohort: list[Comparable] = []
|
||||
for cert in certs:
|
||||
path = CORPUS / postcode / f"{cert}.json"
|
||||
if not path.exists():
|
||||
continue
|
||||
try:
|
||||
epc = EpcPropertyDataMapper.from_api_response(json.loads(path.read_text()))
|
||||
except Exception: # noqa: BLE001 — a bad cert must not abort the sweep
|
||||
continue
|
||||
cohort.append(Comparable(epc=epc, certificate_number=cert))
|
||||
return cohort
|
||||
|
||||
|
||||
def _sap(calculator: Sap10Calculator, epc: EpcPropertyData) -> Optional[float]:
|
||||
try:
|
||||
return calculator.calculate(epc).sap_score_continuous
|
||||
except Exception: # noqa: BLE001 — some pictures don't score; count as misses
|
||||
return None
|
||||
|
||||
|
||||
def main() -> None:
|
||||
index_path = CORPUS / "_index.json"
|
||||
if not index_path.exists():
|
||||
raise SystemExit(f"no corpus at {CORPUS} — run fetch_epc_prediction_corpus.py")
|
||||
index: dict[str, list[str]] = json.loads(index_path.read_text())
|
||||
|
||||
calculator = Sap10Calculator()
|
||||
predictor = EpcPrediction()
|
||||
|
||||
wall_hits = wall_total = 0
|
||||
floor_res: list[float] = []
|
||||
window_count_res: list[int] = []
|
||||
window_area_res: list[float] = []
|
||||
parts_res: list[int] = []
|
||||
sap_vs_lodged: list[float] = []
|
||||
sap_vs_calc_actual: list[float] = []
|
||||
sap_vs_neighbour_mean: list[float] = []
|
||||
predicted_n = skipped_no_cohort = 0
|
||||
|
||||
for postcode, certs in index.items():
|
||||
cohort = _load_cohort(postcode, certs)
|
||||
if len(cohort) < 2:
|
||||
skipped_no_cohort += len(cohort)
|
||||
continue
|
||||
for i, held_out in enumerate(cohort):
|
||||
others = [c for j, c in enumerate(cohort) if j != i]
|
||||
actual = held_out.epc
|
||||
target = PredictionTarget(
|
||||
postcode=postcode,
|
||||
property_type=actual.property_type or "",
|
||||
built_form=actual.built_form,
|
||||
)
|
||||
comparables = select_comparables(target, others)
|
||||
if not comparables.members:
|
||||
continue
|
||||
predicted = predictor.predict(target, comparables)
|
||||
predicted_n += 1
|
||||
|
||||
cmp = compare_prediction(predicted, actual)
|
||||
wall_total += 1
|
||||
wall_hits += int(cmp.wall_construction_correct)
|
||||
floor_res.append(cmp.floor_area_residual)
|
||||
window_count_res.append(cmp.window_count_residual)
|
||||
window_area_res.append(cmp.total_window_area_residual)
|
||||
parts_res.append(cmp.building_parts_residual)
|
||||
|
||||
sap_pred = _sap(calculator, predicted)
|
||||
lodged = actual.energy_rating_current
|
||||
if sap_pred is not None and lodged is not None:
|
||||
sap_vs_lodged.append(abs(sap_pred - lodged))
|
||||
sap_actual = _sap(calculator, actual)
|
||||
if sap_pred is not None and sap_actual is not None:
|
||||
sap_vs_calc_actual.append(abs(sap_pred - sap_actual))
|
||||
neighbour_lodged = [
|
||||
c.epc.energy_rating_current
|
||||
for c in comparables.members
|
||||
if c.epc.energy_rating_current is not None
|
||||
]
|
||||
if neighbour_lodged and lodged is not None:
|
||||
baseline = statistics.mean(neighbour_lodged)
|
||||
sap_vs_neighbour_mean.append(abs(baseline - lodged))
|
||||
|
||||
print(f"corpus: {CORPUS}")
|
||||
print(f"predicted {predicted_n} held-out certs ({skipped_no_cohort} had no cohort)\n")
|
||||
if wall_total:
|
||||
print(f"CLASSIFICATION wall_construction: {wall_hits}/{wall_total} = "
|
||||
f"{wall_hits / wall_total:.1%}")
|
||||
_residual("floor_area (m2)", floor_res)
|
||||
_residual("window_count", [float(x) for x in window_count_res])
|
||||
_residual("total_window_area (m2)", window_area_res)
|
||||
_residual("building_parts", [float(x) for x in parts_res])
|
||||
print()
|
||||
_sap_line("SAP |pred-calc − lodged|", sap_vs_lodged)
|
||||
_sap_line("SAP |pred-calc − calc(actual)|", sap_vs_calc_actual)
|
||||
_sap_line("SAP |neighbour-mean − lodged| (baseline)", sap_vs_neighbour_mean)
|
||||
|
||||
|
||||
def _residual(label: str, values: list[float]) -> None:
|
||||
if not values:
|
||||
print(f"RESIDUAL {label}: (none)")
|
||||
return
|
||||
mean_signed = statistics.mean(values)
|
||||
mean_abs = statistics.mean(abs(v) for v in values)
|
||||
print(f"RESIDUAL {label}: mean {mean_signed:+.2f} | mean|·| {mean_abs:.2f} "
|
||||
f"(n={len(values)})")
|
||||
|
||||
|
||||
def _sap_line(label: str, values: list[float]) -> None:
|
||||
if not values:
|
||||
print(f"{label}: (none)")
|
||||
return
|
||||
print(f"{label}: MAE {statistics.mean(values):.2f} | "
|
||||
f"median {statistics.median(values):.2f} (n={len(values)})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
101
tests/domain/epc_prediction/test_prediction_comparison.py
Normal file
101
tests/domain/epc_prediction/test_prediction_comparison.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
"""Behaviour of the per-Property prediction comparison (ADR-0029): given a
|
||||
predicted EpcPropertyData and the actual one, report the accuracy signals the
|
||||
validation harness aggregates — classification matches on the key categoricals
|
||||
and residuals on the geometry. Pure; SAP residual is computed in the runner
|
||||
(it needs the calculator + lodged SAP).
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import (
|
||||
EpcPropertyData,
|
||||
SapBuildingPart,
|
||||
SapWindow,
|
||||
)
|
||||
from domain.epc_prediction.prediction_comparison import compare_prediction
|
||||
|
||||
|
||||
def _epc(
|
||||
*,
|
||||
wall_construction: int = 1,
|
||||
floor_area: float = 80.0,
|
||||
building_parts: int = 1,
|
||||
windows: Optional[list[tuple[float, float]]] = None,
|
||||
) -> EpcPropertyData:
|
||||
epc: EpcPropertyData = object.__new__(EpcPropertyData)
|
||||
epc.total_floor_area_m2 = floor_area
|
||||
parts: list[SapBuildingPart] = []
|
||||
for _ in range(building_parts):
|
||||
part: SapBuildingPart = object.__new__(SapBuildingPart)
|
||||
part.wall_construction = wall_construction
|
||||
parts.append(part)
|
||||
epc.sap_building_parts = parts
|
||||
sap_windows: list[SapWindow] = []
|
||||
for width, height in windows or []:
|
||||
w: SapWindow = object.__new__(SapWindow)
|
||||
w.window_width = width
|
||||
w.window_height = height
|
||||
sap_windows.append(w)
|
||||
epc.sap_windows = sap_windows
|
||||
return epc
|
||||
|
||||
|
||||
def test_flags_a_correct_main_wall_construction_classification() -> None:
|
||||
# Arrange — predicted and actual agree on cavity (1).
|
||||
predicted = _epc(wall_construction=1)
|
||||
actual = _epc(wall_construction=1)
|
||||
|
||||
# Act
|
||||
comparison = compare_prediction(predicted, actual)
|
||||
|
||||
# Assert
|
||||
assert comparison.wall_construction_correct is True
|
||||
|
||||
|
||||
def test_flags_an_incorrect_main_wall_construction_classification() -> None:
|
||||
# Arrange — predicted cavity (1), actual solid brick (2).
|
||||
predicted = _epc(wall_construction=1)
|
||||
actual = _epc(wall_construction=2)
|
||||
|
||||
# Act
|
||||
comparison = compare_prediction(predicted, actual)
|
||||
|
||||
# Assert
|
||||
assert comparison.wall_construction_correct is False
|
||||
|
||||
|
||||
def test_reports_the_floor_area_residual_as_predicted_minus_actual() -> None:
|
||||
# Arrange — predicted 90 m², actual 100 m² (a 10 m² under-prediction).
|
||||
predicted = _epc(floor_area=90.0)
|
||||
actual = _epc(floor_area=100.0)
|
||||
|
||||
# Act
|
||||
comparison = compare_prediction(predicted, actual)
|
||||
|
||||
# Assert — signed residual, predicted − actual.
|
||||
assert abs(comparison.floor_area_residual - (-10.0)) <= 1e-9
|
||||
|
||||
|
||||
def test_reports_the_building_parts_count_residual() -> None:
|
||||
# Arrange — predicted a single part; the actual has a main + an extension.
|
||||
predicted = _epc(building_parts=1)
|
||||
actual = _epc(building_parts=2)
|
||||
|
||||
# Act
|
||||
comparison = compare_prediction(predicted, actual)
|
||||
|
||||
# Assert — predicted − actual.
|
||||
assert comparison.building_parts_residual == -1
|
||||
|
||||
|
||||
def test_reports_window_count_and_total_area_residuals() -> None:
|
||||
# Arrange — predicted 2 windows (3 m² total); actual 1 window (1 m²).
|
||||
predicted = _epc(windows=[(1.0, 1.0), (2.0, 1.0)])
|
||||
actual = _epc(windows=[(1.0, 1.0)])
|
||||
|
||||
# Act
|
||||
comparison = compare_prediction(predicted, actual)
|
||||
|
||||
# Assert
|
||||
assert comparison.window_count_residual == 1
|
||||
assert abs(comparison.total_window_area_residual - 2.0) <= 1e-9
|
||||
Loading…
Add table
Reference in a new issue