feat(epc-prediction): SAP-10.2 target filter + carbon/PE end-to-end (ADR-0030)

Make the leave-one-out runner ADR-0030-compliant:
- Hold out only SAP 10.2 targets (sap_version == 10.2) — the source cohort
  keeps every vintage (components are methodology-agnostic).
- Label Component Accuracy as the PRIMARY, calculator-independent section.
- End-to-end vs API-lodged (SECONDARY, calculator-FLOORED): add CO2 (tonnes)
  and PEI (kWh/m2) alongside SAP, using the canonical performance.py mapping
  (co2_kg/1000; primary_energy_kwh_per_m2).
- Add the attribution readout calc(actual) vs lodged SAP — the calculator
  floor the end-to-end can reach.
- Drop the neighbour-mean-of-lodged-SAP baseline (mixes SAP versions —
  rejected by ADR-0030).

On the 181 SAP-10.2 targets: component rates are higher than the all-vintage
view (age band 60.9 -> 78.5%, floor_area mean|.| 12.7 -> 8.4). End-to-end SAP
MAE 6.34 vs the calc(actual) floor of 3.25 — ~half the gap is the known
API-path calculator residual, not prediction error.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 09:04:24 +00:00
parent 275a30a825
commit 65cb094abe

View file

@ -39,7 +39,13 @@ from domain.epc_prediction.comparable_properties import (
)
from domain.epc_prediction.epc_prediction import EpcPrediction
from domain.epc_prediction.prediction_comparison import compare_prediction
from domain.sap10_calculator.calculator import Sap10Calculator
from domain.sap10_calculator.calculator import Sap10Calculator, SapResult
# Target-cert spec gate: only SAP 10.2 certs (schema 21.0.x) carry full-fidelity
# lodged components + a same-spec lodged figure to check against (ADR-0030). The
# source cohort keeps all vintages — components are methodology-agnostic.
_SAP_10_2: float = 10.2
_KG_PER_TONNE: float = 1000.0
CORPUS = Path(os.environ.get("EPC_PREDICTION_CORPUS", "/tmp/epc_prediction_corpus"))
@ -101,13 +107,21 @@ def _recency(comparable: Comparable) -> tuple[date, str]:
)
def _sap(calculator: Sap10Calculator, epc: EpcPropertyData) -> Optional[float]:
def _result(
calculator: Sap10Calculator, epc: EpcPropertyData
) -> Optional[SapResult]:
try:
return calculator.calculate(epc).sap_score_continuous
return calculator.calculate(epc)
except Exception: # noqa: BLE001 — some pictures don't score; count as misses
return None
def _co2_tonnes(result: SapResult) -> float:
"""Calculated annual CO2 in tonnes, matching the lodged `co2_emissions_current`
scale (see domain/property_baseline/performance.py)."""
return result.co2_kg_per_yr / _KG_PER_TONNE
def main() -> None:
index_path = CORPUS / "_index.json"
if not index_path.exists():
@ -126,10 +140,14 @@ def main() -> None:
window_area_res: list[float] = []
parts_res: list[int] = []
door_res: list[int] = []
# End-to-end (calculator-FLOORED) vs API-lodged — secondary guard, ADR-0030.
sap_vs_lodged: list[float] = []
sap_vs_calc_actual: list[float] = []
sap_vs_neighbour_mean: list[float] = []
predicted_n = skipped_no_cohort = 0
co2_vs_lodged: list[float] = []
pei_vs_lodged: list[float] = []
# Attribution readout: how far the calculator alone is from lodged on the
# ACTUAL components — the floor the end-to-end numbers can reach.
sap_calc_actual_vs_lodged: list[float] = []
predicted_n = skipped_non_102 = skipped_no_cohort = 0
for postcode, certs in index.items():
cohort = _load_cohort(postcode, certs)
@ -138,6 +156,11 @@ def main() -> None:
skipped_no_cohort += len(targets)
continue
for held_out in targets:
# Only SAP 10.2 certs are valid validation targets (ADR-0030); the
# source cohort (`others`) keeps every vintage.
if held_out.epc.sap_version != _SAP_10_2:
skipped_non_102 += 1
continue
# Exclude every cert of the held-out address (not just the held cert)
# so a re-lodgement of the same property cannot leak into the cohort.
others = [
@ -166,24 +189,36 @@ def main() -> None:
parts_res.append(cmp.building_parts_residual)
door_res.append(cmp.door_count_residual)
sap_pred = _sap(calculator, predicted)
lodged = actual.energy_rating_current
if sap_pred is not None and lodged is not None:
sap_vs_lodged.append(abs(sap_pred - lodged))
sap_actual = _sap(calculator, actual)
if sap_pred is not None and sap_actual is not None:
sap_vs_calc_actual.append(abs(sap_pred - sap_actual))
neighbour_lodged = [
c.epc.energy_rating_current
for c in comparables.members
if c.epc.energy_rating_current is not None
]
if neighbour_lodged and lodged is not None:
baseline = statistics.mean(neighbour_lodged)
sap_vs_neighbour_mean.append(abs(baseline - lodged))
pred_result = _result(calculator, predicted)
actual_result = _result(calculator, actual)
lodged_sap = actual.energy_rating_current
lodged_co2 = actual.co2_emissions_current
lodged_pei = actual.energy_consumption_current
if pred_result is not None:
if lodged_sap is not None:
sap_vs_lodged.append(
abs(pred_result.sap_score_continuous - lodged_sap)
)
if lodged_co2 is not None:
co2_vs_lodged.append(
abs(_co2_tonnes(pred_result) - lodged_co2)
)
if lodged_pei is not None:
pei_vs_lodged.append(
abs(pred_result.primary_energy_kwh_per_m2 - lodged_pei)
)
if actual_result is not None and lodged_sap is not None:
sap_calc_actual_vs_lodged.append(
abs(actual_result.sap_score_continuous - lodged_sap)
)
print(f"corpus: {CORPUS}")
print(f"predicted {predicted_n} held-out certs ({skipped_no_cohort} had no cohort)\n")
print(
f"predicted {predicted_n} SAP-10.2 held-out targets "
f"({skipped_non_102} non-10.2 targets skipped, "
f"{skipped_no_cohort} had no cohort)\n"
)
print("--- Component Accuracy (PRIMARY, calculator-independent) ---")
for name, (hits, total) in categoricals.items():
if total:
print(f"CLASSIFICATION {name}: {hits}/{total} = {hits / total:.1%}")
@ -194,9 +229,11 @@ def main() -> None:
_residual("building_parts", [float(x) for x in parts_res])
_residual("door_count", [float(x) for x in door_res])
print()
_sap_line("SAP |pred-calc lodged|", sap_vs_lodged)
_sap_line("SAP |pred-calc calc(actual)|", sap_vs_calc_actual)
_sap_line("SAP |neighbour-mean lodged| (baseline)", sap_vs_neighbour_mean)
print("--- End-to-end vs API-lodged (SECONDARY, calculator-FLOORED) ---")
_sap_line("SAP |pred lodged|", sap_vs_lodged)
_sap_line("CO2 (t) |pred lodged|", co2_vs_lodged)
_sap_line("PEI (kWh/m2) |pred lodged|", pei_vs_lodged)
_sap_line(" floor: SAP |calc(actual) lodged|", sap_calc_actual_vs_lodged)
def _tally(counter: list[int], hit: Optional[bool]) -> None: