From f3ad6343a3ba0b8f815c342d74883d96c289c66a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 13 Jun 2026 23:55:05 +0000 Subject: [PATCH] feat(epc-prediction): leave-one-out validation harness (ADR-0029) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure compare_prediction (TDD): wall-construction classification hit + signed residuals on floor area, window count, total window area, building-parts count. Plus validate_epc_prediction.py (IO plumbing): drops each cert from its postcode cohort, predicts from the rest on guaranteed inputs only, aggregates the metrics, and reports SAP three ways (pred-calc vs lodged / vs calc-on-actual / vs the neighbour-mean baseline). Smoke run: wall 90.9%, floor-area mean|·| 42.6 m2 (a real signal — template-copied floor area is noisy), SAP pred-calc edges baseline. Co-Authored-By: Claude Opus 4.8 --- .../epc_prediction/prediction_comparison.py | 60 +++++++ scripts/validate_epc_prediction.py | 165 ++++++++++++++++++ .../test_prediction_comparison.py | 101 +++++++++++ 3 files changed, 326 insertions(+) create mode 100644 domain/epc_prediction/prediction_comparison.py create mode 100644 scripts/validate_epc_prediction.py create mode 100644 tests/domain/epc_prediction/test_prediction_comparison.py diff --git a/domain/epc_prediction/prediction_comparison.py b/domain/epc_prediction/prediction_comparison.py new file mode 100644 index 00000000..995ae5ca --- /dev/null +++ b/domain/epc_prediction/prediction_comparison.py @@ -0,0 +1,60 @@ +"""Per-Property prediction comparison for the EPC Prediction validation harness +(ADR-0029). + +`compare_prediction` scores a predicted `EpcPropertyData` against the actual one +on the accuracy signals the leave-one-out harness aggregates: classification +matches on the key categoricals (wall / roof / floor construction + insulation, +construction age band) and residuals on the geometry (window area + count, +building-parts count, floor area). Pure — the SAP residual is computed in the +runner, which has the calculator and the lodged SAP. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart + + +@dataclass(frozen=True) +class PredictionComparison: + """One Property's prediction accuracy: classification hits + geometry + residuals (predicted − actual).""" + + wall_construction_correct: bool + floor_area_residual: float + building_parts_residual: int + window_count_residual: int + total_window_area_residual: float + + +def _main(epc: EpcPropertyData) -> SapBuildingPart: + return epc.sap_building_parts[0] + + +def _total_window_area(epc: EpcPropertyData) -> float: + return sum(w.window_width * w.window_height for w in epc.sap_windows) + + +def compare_prediction( + predicted: EpcPropertyData, actual: EpcPropertyData +) -> PredictionComparison: + """Compare a predicted picture against the actual one, field by field. All + residuals are signed, predicted − actual.""" + return PredictionComparison( + wall_construction_correct=( + _main(predicted).wall_construction == _main(actual).wall_construction + ), + floor_area_residual=( + predicted.total_floor_area_m2 - actual.total_floor_area_m2 + ), + building_parts_residual=( + len(predicted.sap_building_parts) - len(actual.sap_building_parts) + ), + window_count_residual=( + len(predicted.sap_windows) - len(actual.sap_windows) + ), + total_window_area_residual=( + _total_window_area(predicted) - _total_window_area(actual) + ), + ) diff --git a/scripts/validate_epc_prediction.py b/scripts/validate_epc_prediction.py new file mode 100644 index 00000000..fee28d9e --- /dev/null +++ b/scripts/validate_epc_prediction.py @@ -0,0 +1,165 @@ +"""Leave-one-out accuracy harness for EPC Prediction (ADR-0029). + +Runs entirely against the frozen postcode-clustered corpus +(`fetch_epc_prediction_corpus.py`). For every cert that has neighbours, it +drops that cert from its postcode cohort, predicts it from the rest using only +its *guaranteed* inputs (property type + built form), and compares the predicted +`EpcPropertyData` to the actual one. + +Reports the ADR-0029 metrics: + - classification rate: main wall construction (extend as coverage grows); + - geometry residuals: floor area, window count + total window area, building + parts (mean signed + mean absolute); + - SAP reported three ways — predicted-then-calculated vs (a) the actual lodged + SAP, (b) the calculator on the actual components, (c) the neighbour-mean SAP + baseline (the number predict-then-calculate must beat). + +USAGE +----- + PYTHONPATH=. python scripts/validate_epc_prediction.py + +Corpus dir: $EPC_PREDICTION_CORPUS (default /tmp/epc_prediction_corpus). +""" + +from __future__ import annotations + +import json +import os +import statistics +from pathlib import Path +from typing import Optional + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.epc_prediction.comparable_properties import ( + Comparable, + PredictionTarget, + select_comparables, +) +from domain.epc_prediction.epc_prediction import EpcPrediction +from domain.epc_prediction.prediction_comparison import compare_prediction +from domain.sap10_calculator.calculator import Sap10Calculator + +CORPUS = Path(os.environ.get("EPC_PREDICTION_CORPUS", "/tmp/epc_prediction_corpus")) + + +def _load_cohort(postcode: str, certs: list[str]) -> list[Comparable]: + """Map a postcode's cached cert payloads to Comparables, skipping any the + mapper rejects (unsupported schema, malformed).""" + cohort: list[Comparable] = [] + for cert in certs: + path = CORPUS / postcode / f"{cert}.json" + if not path.exists(): + continue + try: + epc = EpcPropertyDataMapper.from_api_response(json.loads(path.read_text())) + except Exception: # noqa: BLE001 — a bad cert must not abort the sweep + continue + cohort.append(Comparable(epc=epc, certificate_number=cert)) + return cohort + + +def _sap(calculator: Sap10Calculator, epc: EpcPropertyData) -> Optional[float]: + try: + return calculator.calculate(epc).sap_score_continuous + except Exception: # noqa: BLE001 — some pictures don't score; count as misses + return None + + +def main() -> None: + index_path = CORPUS / "_index.json" + if not index_path.exists(): + raise SystemExit(f"no corpus at {CORPUS} — run fetch_epc_prediction_corpus.py") + index: dict[str, list[str]] = json.loads(index_path.read_text()) + + calculator = Sap10Calculator() + predictor = EpcPrediction() + + wall_hits = wall_total = 0 + floor_res: list[float] = [] + window_count_res: list[int] = [] + window_area_res: list[float] = [] + parts_res: list[int] = [] + sap_vs_lodged: list[float] = [] + sap_vs_calc_actual: list[float] = [] + sap_vs_neighbour_mean: list[float] = [] + predicted_n = skipped_no_cohort = 0 + + for postcode, certs in index.items(): + cohort = _load_cohort(postcode, certs) + if len(cohort) < 2: + skipped_no_cohort += len(cohort) + continue + for i, held_out in enumerate(cohort): + others = [c for j, c in enumerate(cohort) if j != i] + actual = held_out.epc + target = PredictionTarget( + postcode=postcode, + property_type=actual.property_type or "", + built_form=actual.built_form, + ) + comparables = select_comparables(target, others) + if not comparables.members: + continue + predicted = predictor.predict(target, comparables) + predicted_n += 1 + + cmp = compare_prediction(predicted, actual) + wall_total += 1 + wall_hits += int(cmp.wall_construction_correct) + floor_res.append(cmp.floor_area_residual) + window_count_res.append(cmp.window_count_residual) + window_area_res.append(cmp.total_window_area_residual) + parts_res.append(cmp.building_parts_residual) + + sap_pred = _sap(calculator, predicted) + lodged = actual.energy_rating_current + if sap_pred is not None and lodged is not None: + sap_vs_lodged.append(abs(sap_pred - lodged)) + sap_actual = _sap(calculator, actual) + if sap_pred is not None and sap_actual is not None: + sap_vs_calc_actual.append(abs(sap_pred - sap_actual)) + neighbour_lodged = [ + c.epc.energy_rating_current + for c in comparables.members + if c.epc.energy_rating_current is not None + ] + if neighbour_lodged and lodged is not None: + baseline = statistics.mean(neighbour_lodged) + sap_vs_neighbour_mean.append(abs(baseline - lodged)) + + print(f"corpus: {CORPUS}") + print(f"predicted {predicted_n} held-out certs ({skipped_no_cohort} had no cohort)\n") + if wall_total: + print(f"CLASSIFICATION wall_construction: {wall_hits}/{wall_total} = " + f"{wall_hits / wall_total:.1%}") + _residual("floor_area (m2)", floor_res) + _residual("window_count", [float(x) for x in window_count_res]) + _residual("total_window_area (m2)", window_area_res) + _residual("building_parts", [float(x) for x in parts_res]) + print() + _sap_line("SAP |pred-calc − lodged|", sap_vs_lodged) + _sap_line("SAP |pred-calc − calc(actual)|", sap_vs_calc_actual) + _sap_line("SAP |neighbour-mean − lodged| (baseline)", sap_vs_neighbour_mean) + + +def _residual(label: str, values: list[float]) -> None: + if not values: + print(f"RESIDUAL {label}: (none)") + return + mean_signed = statistics.mean(values) + mean_abs = statistics.mean(abs(v) for v in values) + print(f"RESIDUAL {label}: mean {mean_signed:+.2f} | mean|·| {mean_abs:.2f} " + f"(n={len(values)})") + + +def _sap_line(label: str, values: list[float]) -> None: + if not values: + print(f"{label}: (none)") + return + print(f"{label}: MAE {statistics.mean(values):.2f} | " + f"median {statistics.median(values):.2f} (n={len(values)})") + + +if __name__ == "__main__": + main() diff --git a/tests/domain/epc_prediction/test_prediction_comparison.py b/tests/domain/epc_prediction/test_prediction_comparison.py new file mode 100644 index 00000000..0ce19384 --- /dev/null +++ b/tests/domain/epc_prediction/test_prediction_comparison.py @@ -0,0 +1,101 @@ +"""Behaviour of the per-Property prediction comparison (ADR-0029): given a +predicted EpcPropertyData and the actual one, report the accuracy signals the +validation harness aggregates — classification matches on the key categoricals +and residuals on the geometry. Pure; SAP residual is computed in the runner +(it needs the calculator + lodged SAP). +""" + +from typing import Optional + +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + SapBuildingPart, + SapWindow, +) +from domain.epc_prediction.prediction_comparison import compare_prediction + + +def _epc( + *, + wall_construction: int = 1, + floor_area: float = 80.0, + building_parts: int = 1, + windows: Optional[list[tuple[float, float]]] = None, +) -> EpcPropertyData: + epc: EpcPropertyData = object.__new__(EpcPropertyData) + epc.total_floor_area_m2 = floor_area + parts: list[SapBuildingPart] = [] + for _ in range(building_parts): + part: SapBuildingPart = object.__new__(SapBuildingPart) + part.wall_construction = wall_construction + parts.append(part) + epc.sap_building_parts = parts + sap_windows: list[SapWindow] = [] + for width, height in windows or []: + w: SapWindow = object.__new__(SapWindow) + w.window_width = width + w.window_height = height + sap_windows.append(w) + epc.sap_windows = sap_windows + return epc + + +def test_flags_a_correct_main_wall_construction_classification() -> None: + # Arrange — predicted and actual agree on cavity (1). + predicted = _epc(wall_construction=1) + actual = _epc(wall_construction=1) + + # Act + comparison = compare_prediction(predicted, actual) + + # Assert + assert comparison.wall_construction_correct is True + + +def test_flags_an_incorrect_main_wall_construction_classification() -> None: + # Arrange — predicted cavity (1), actual solid brick (2). + predicted = _epc(wall_construction=1) + actual = _epc(wall_construction=2) + + # Act + comparison = compare_prediction(predicted, actual) + + # Assert + assert comparison.wall_construction_correct is False + + +def test_reports_the_floor_area_residual_as_predicted_minus_actual() -> None: + # Arrange — predicted 90 m², actual 100 m² (a 10 m² under-prediction). + predicted = _epc(floor_area=90.0) + actual = _epc(floor_area=100.0) + + # Act + comparison = compare_prediction(predicted, actual) + + # Assert — signed residual, predicted − actual. + assert abs(comparison.floor_area_residual - (-10.0)) <= 1e-9 + + +def test_reports_the_building_parts_count_residual() -> None: + # Arrange — predicted a single part; the actual has a main + an extension. + predicted = _epc(building_parts=1) + actual = _epc(building_parts=2) + + # Act + comparison = compare_prediction(predicted, actual) + + # Assert — predicted − actual. + assert comparison.building_parts_residual == -1 + + +def test_reports_window_count_and_total_area_residuals() -> None: + # Arrange — predicted 2 windows (3 m² total); actual 1 window (1 m²). + predicted = _epc(windows=[(1.0, 1.0), (2.0, 1.0)]) + actual = _epc(windows=[(1.0, 1.0)]) + + # Act + comparison = compare_prediction(predicted, actual) + + # Assert + assert comparison.window_count_residual == 1 + assert abs(comparison.total_window_area_residual - 2.0) <= 1e-9