feat(epc-prediction): SAP-10.2 target filter + carbon/PE end-to-end (ADR-0030)

Make the leave-one-out runner ADR-0030-compliant: - Hold out only SAP 10.2 targets (sap_version == 10.2) — the source cohort keeps every vintage (components are methodology-agnostic). - Label Component Accuracy as the PRIMARY, calculator-independent section. - End-to-end vs API-lodged (SECONDARY, calculator-FLOORED): add CO2 (tonnes) and PEI (kWh/m2) alongside SAP, using the canonical performance.py mapping (co2_kg/1000; primary_energy_kwh_per_m2). - Add the attribution readout calc(actual) vs lodged SAP — the calculator floor the end-to-end can reach. - Drop the neighbour-mean-of-lodged-SAP baseline (mixes SAP versions — rejected by ADR-0030). On the 181 SAP-10.2 targets: component rates are higher than the all-vintage view (age band 60.9 -> 78.5%, floor_area mean|.| 12.7 -> 8.4). End-to-end SAP MAE 6.34 vs the calc(actual) floor of 3.25 — ~half the gap is the known API-path calculator residual, not prediction error. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-30 13:10:47 +00:00 · 2026-06-14 09:04:24 +00:00 · 2026-06-14 09:04:24 +00:00 · 65cb094abe
commit 65cb094abe
parent 275a30a825
1 changed files with 62 additions and 25 deletions
--- a/scripts/validate_epc_prediction.py
+++ b/scripts/validate_epc_prediction.py
@ -39,7 +39,13 @@ from domain.epc_prediction.comparable_properties import (
 )
 from domain.epc_prediction.epc_prediction import EpcPrediction
 from domain.epc_prediction.prediction_comparison import compare_prediction
-from domain.sap10_calculator.calculator import Sap10Calculator
+from domain.sap10_calculator.calculator import Sap10Calculator, SapResult
+
+# Target-cert spec gate: only SAP 10.2 certs (schema 21.0.x) carry full-fidelity
+# lodged components + a same-spec lodged figure to check against (ADR-0030). The
+# source cohort keeps all vintages — components are methodology-agnostic.
+_SAP_10_2: float = 10.2
+_KG_PER_TONNE: float = 1000.0

 CORPUS = Path(os.environ.get("EPC_PREDICTION_CORPUS", "/tmp/epc_prediction_corpus"))

@ -101,13 +107,21 @@ def _recency(comparable: Comparable) -> tuple[date, str]:
    )


-def _sap(calculator: Sap10Calculator, epc: EpcPropertyData) -> Optional[float]:
+def _result(
+    calculator: Sap10Calculator, epc: EpcPropertyData
+) -> Optional[SapResult]:
    try:
-        return calculator.calculate(epc).sap_score_continuous
+        return calculator.calculate(epc)
    except Exception:  # noqa: BLE001 — some pictures don't score; count as misses
        return None


+def _co2_tonnes(result: SapResult) -> float:
+    """Calculated annual CO2 in tonnes, matching the lodged `co2_emissions_current`
+    scale (see domain/property_baseline/performance.py)."""
+    return result.co2_kg_per_yr / _KG_PER_TONNE
+
+
 def main() -> None:
    index_path = CORPUS / "_index.json"
    if not index_path.exists():
@ -126,10 +140,14 @@ def main() -> None:
    window_area_res: list[float] = []
    parts_res: list[int] = []
    door_res: list[int] = []
+    # End-to-end (calculator-FLOORED) vs API-lodged — secondary guard, ADR-0030.
    sap_vs_lodged: list[float] = []
-    sap_vs_calc_actual: list[float] = []
-    sap_vs_neighbour_mean: list[float] = []
-    predicted_n = skipped_no_cohort = 0
+    co2_vs_lodged: list[float] = []
+    pei_vs_lodged: list[float] = []
+    # Attribution readout: how far the calculator alone is from lodged on the
+    # ACTUAL components — the floor the end-to-end numbers can reach.
+    sap_calc_actual_vs_lodged: list[float] = []
+    predicted_n = skipped_non_102 = skipped_no_cohort = 0

    for postcode, certs in index.items():
        cohort = _load_cohort(postcode, certs)
@ -138,6 +156,11 @@ def main() -> None:
            skipped_no_cohort += len(targets)
            continue
        for held_out in targets:
+            # Only SAP 10.2 certs are valid validation targets (ADR-0030); the
+            # source cohort (`others`) keeps every vintage.
+            if held_out.epc.sap_version != _SAP_10_2:
+                skipped_non_102 += 1
+                continue
            # Exclude every cert of the held-out address (not just the held cert)
            # so a re-lodgement of the same property cannot leak into the cohort.
            others = [
@ -166,24 +189,36 @@ def main() -> None:
            parts_res.append(cmp.building_parts_residual)
            door_res.append(cmp.door_count_residual)

-            sap_pred = _sap(calculator, predicted)
-            lodged = actual.energy_rating_current
-            if sap_pred is not None and lodged is not None:
-                sap_vs_lodged.append(abs(sap_pred - lodged))
-            sap_actual = _sap(calculator, actual)
-            if sap_pred is not None and sap_actual is not None:
-                sap_vs_calc_actual.append(abs(sap_pred - sap_actual))
-            neighbour_lodged = [
-                c.epc.energy_rating_current
-                for c in comparables.members
-                if c.epc.energy_rating_current is not None
-            ]
-            if neighbour_lodged and lodged is not None:
-                baseline = statistics.mean(neighbour_lodged)
-                sap_vs_neighbour_mean.append(abs(baseline - lodged))
+            pred_result = _result(calculator, predicted)
+            actual_result = _result(calculator, actual)
+            lodged_sap = actual.energy_rating_current
+            lodged_co2 = actual.co2_emissions_current
+            lodged_pei = actual.energy_consumption_current
+            if pred_result is not None:
+                if lodged_sap is not None:
+                    sap_vs_lodged.append(
+                        abs(pred_result.sap_score_continuous - lodged_sap)
+                    )
+                if lodged_co2 is not None:
+                    co2_vs_lodged.append(
+                        abs(_co2_tonnes(pred_result) - lodged_co2)
+                    )
+                if lodged_pei is not None:
+                    pei_vs_lodged.append(
+                        abs(pred_result.primary_energy_kwh_per_m2 - lodged_pei)
+                    )
+            if actual_result is not None and lodged_sap is not None:
+                sap_calc_actual_vs_lodged.append(
+                    abs(actual_result.sap_score_continuous - lodged_sap)
+                )

    print(f"corpus: {CORPUS}")
-    print(f"predicted {predicted_n} held-out certs ({skipped_no_cohort} had no cohort)\n")
+    print(
+        f"predicted {predicted_n} SAP-10.2 held-out targets "
+        f"({skipped_non_102} non-10.2 targets skipped, "
+        f"{skipped_no_cohort} had no cohort)\n"
+    )
+    print("--- Component Accuracy (PRIMARY, calculator-independent) ---")
    for name, (hits, total) in categoricals.items():
        if total:
            print(f"CLASSIFICATION  {name}: {hits}/{total} = {hits / total:.1%}")
@ -194,9 +229,11 @@ def main() -> None:
    _residual("building_parts", [float(x) for x in parts_res])
    _residual("door_count", [float(x) for x in door_res])
    print()
-    _sap_line("SAP |pred-calc − lodged|", sap_vs_lodged)
-    _sap_line("SAP |pred-calc − calc(actual)|", sap_vs_calc_actual)
-    _sap_line("SAP |neighbour-mean − lodged| (baseline)", sap_vs_neighbour_mean)
+    print("--- End-to-end vs API-lodged (SECONDARY, calculator-FLOORED) ---")
+    _sap_line("SAP |pred − lodged|", sap_vs_lodged)
+    _sap_line("CO2 (t) |pred − lodged|", co2_vs_lodged)
+    _sap_line("PEI (kWh/m2) |pred − lodged|", pei_vs_lodged)
+    _sap_line("  floor: SAP |calc(actual) − lodged|", sap_calc_actual_vs_lodged)


 def _tally(counter: list[int], hit: Optional[bool]) -> None: