From 65cb094abeb24871d42a86016b25777ed62366d3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Jun 2026 09:04:24 +0000 Subject: [PATCH] feat(epc-prediction): SAP-10.2 target filter + carbon/PE end-to-end (ADR-0030) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the leave-one-out runner ADR-0030-compliant: - Hold out only SAP 10.2 targets (sap_version == 10.2) — the source cohort keeps every vintage (components are methodology-agnostic). - Label Component Accuracy as the PRIMARY, calculator-independent section. - End-to-end vs API-lodged (SECONDARY, calculator-FLOORED): add CO2 (tonnes) and PEI (kWh/m2) alongside SAP, using the canonical performance.py mapping (co2_kg/1000; primary_energy_kwh_per_m2). - Add the attribution readout calc(actual) vs lodged SAP — the calculator floor the end-to-end can reach. - Drop the neighbour-mean-of-lodged-SAP baseline (mixes SAP versions — rejected by ADR-0030). On the 181 SAP-10.2 targets: component rates are higher than the all-vintage view (age band 60.9 -> 78.5%, floor_area mean|.| 12.7 -> 8.4). End-to-end SAP MAE 6.34 vs the calc(actual) floor of 3.25 — ~half the gap is the known API-path calculator residual, not prediction error. Co-Authored-By: Claude Opus 4.8 --- scripts/validate_epc_prediction.py | 87 +++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/scripts/validate_epc_prediction.py b/scripts/validate_epc_prediction.py index 1bc97c4e..18ee4bbb 100644 --- a/scripts/validate_epc_prediction.py +++ b/scripts/validate_epc_prediction.py @@ -39,7 +39,13 @@ from domain.epc_prediction.comparable_properties import ( ) from domain.epc_prediction.epc_prediction import EpcPrediction from domain.epc_prediction.prediction_comparison import compare_prediction -from domain.sap10_calculator.calculator import Sap10Calculator +from domain.sap10_calculator.calculator import Sap10Calculator, SapResult + +# Target-cert spec gate: only SAP 10.2 certs (schema 21.0.x) carry full-fidelity +# lodged components + a same-spec lodged figure to check against (ADR-0030). The +# source cohort keeps all vintages — components are methodology-agnostic. +_SAP_10_2: float = 10.2 +_KG_PER_TONNE: float = 1000.0 CORPUS = Path(os.environ.get("EPC_PREDICTION_CORPUS", "/tmp/epc_prediction_corpus")) @@ -101,13 +107,21 @@ def _recency(comparable: Comparable) -> tuple[date, str]: ) -def _sap(calculator: Sap10Calculator, epc: EpcPropertyData) -> Optional[float]: +def _result( + calculator: Sap10Calculator, epc: EpcPropertyData +) -> Optional[SapResult]: try: - return calculator.calculate(epc).sap_score_continuous + return calculator.calculate(epc) except Exception: # noqa: BLE001 — some pictures don't score; count as misses return None +def _co2_tonnes(result: SapResult) -> float: + """Calculated annual CO2 in tonnes, matching the lodged `co2_emissions_current` + scale (see domain/property_baseline/performance.py).""" + return result.co2_kg_per_yr / _KG_PER_TONNE + + def main() -> None: index_path = CORPUS / "_index.json" if not index_path.exists(): @@ -126,10 +140,14 @@ def main() -> None: window_area_res: list[float] = [] parts_res: list[int] = [] door_res: list[int] = [] + # End-to-end (calculator-FLOORED) vs API-lodged — secondary guard, ADR-0030. sap_vs_lodged: list[float] = [] - sap_vs_calc_actual: list[float] = [] - sap_vs_neighbour_mean: list[float] = [] - predicted_n = skipped_no_cohort = 0 + co2_vs_lodged: list[float] = [] + pei_vs_lodged: list[float] = [] + # Attribution readout: how far the calculator alone is from lodged on the + # ACTUAL components — the floor the end-to-end numbers can reach. + sap_calc_actual_vs_lodged: list[float] = [] + predicted_n = skipped_non_102 = skipped_no_cohort = 0 for postcode, certs in index.items(): cohort = _load_cohort(postcode, certs) @@ -138,6 +156,11 @@ def main() -> None: skipped_no_cohort += len(targets) continue for held_out in targets: + # Only SAP 10.2 certs are valid validation targets (ADR-0030); the + # source cohort (`others`) keeps every vintage. + if held_out.epc.sap_version != _SAP_10_2: + skipped_non_102 += 1 + continue # Exclude every cert of the held-out address (not just the held cert) # so a re-lodgement of the same property cannot leak into the cohort. others = [ @@ -166,24 +189,36 @@ def main() -> None: parts_res.append(cmp.building_parts_residual) door_res.append(cmp.door_count_residual) - sap_pred = _sap(calculator, predicted) - lodged = actual.energy_rating_current - if sap_pred is not None and lodged is not None: - sap_vs_lodged.append(abs(sap_pred - lodged)) - sap_actual = _sap(calculator, actual) - if sap_pred is not None and sap_actual is not None: - sap_vs_calc_actual.append(abs(sap_pred - sap_actual)) - neighbour_lodged = [ - c.epc.energy_rating_current - for c in comparables.members - if c.epc.energy_rating_current is not None - ] - if neighbour_lodged and lodged is not None: - baseline = statistics.mean(neighbour_lodged) - sap_vs_neighbour_mean.append(abs(baseline - lodged)) + pred_result = _result(calculator, predicted) + actual_result = _result(calculator, actual) + lodged_sap = actual.energy_rating_current + lodged_co2 = actual.co2_emissions_current + lodged_pei = actual.energy_consumption_current + if pred_result is not None: + if lodged_sap is not None: + sap_vs_lodged.append( + abs(pred_result.sap_score_continuous - lodged_sap) + ) + if lodged_co2 is not None: + co2_vs_lodged.append( + abs(_co2_tonnes(pred_result) - lodged_co2) + ) + if lodged_pei is not None: + pei_vs_lodged.append( + abs(pred_result.primary_energy_kwh_per_m2 - lodged_pei) + ) + if actual_result is not None and lodged_sap is not None: + sap_calc_actual_vs_lodged.append( + abs(actual_result.sap_score_continuous - lodged_sap) + ) print(f"corpus: {CORPUS}") - print(f"predicted {predicted_n} held-out certs ({skipped_no_cohort} had no cohort)\n") + print( + f"predicted {predicted_n} SAP-10.2 held-out targets " + f"({skipped_non_102} non-10.2 targets skipped, " + f"{skipped_no_cohort} had no cohort)\n" + ) + print("--- Component Accuracy (PRIMARY, calculator-independent) ---") for name, (hits, total) in categoricals.items(): if total: print(f"CLASSIFICATION {name}: {hits}/{total} = {hits / total:.1%}") @@ -194,9 +229,11 @@ def main() -> None: _residual("building_parts", [float(x) for x in parts_res]) _residual("door_count", [float(x) for x in door_res]) print() - _sap_line("SAP |pred-calc − lodged|", sap_vs_lodged) - _sap_line("SAP |pred-calc − calc(actual)|", sap_vs_calc_actual) - _sap_line("SAP |neighbour-mean − lodged| (baseline)", sap_vs_neighbour_mean) + print("--- End-to-end vs API-lodged (SECONDARY, calculator-FLOORED) ---") + _sap_line("SAP |pred − lodged|", sap_vs_lodged) + _sap_line("CO2 (t) |pred − lodged|", co2_vs_lodged) + _sap_line("PEI (kWh/m2) |pred − lodged|", pei_vs_lodged) + _sap_line(" floor: SAP |calc(actual) − lodged|", sap_calc_actual_vs_lodged) def _tally(counter: list[int], hit: Optional[bool]) -> None: