From 7f48495ed5828f19d0a220b9b6a425059b559133 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Jun 2026 13:55:20 +0000
Subject: [PATCH] feat(epc-prediction): surface CO2 + PEI calculator floors in
 the report (#1228)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The validation report showed only the SAP calculator floor (calc(actual) vs
lodged), so the headline PEI MAE (~40 kWh/m2) read as prediction error when
much of it is the calculator's own API-path residual. Adds the CO2 + PEI
floors alongside SAP.

Diagnostic (150pc/514): PEI floor MAE 15.73 (calc(actual) vs lodged) vs SAP
floor 1.57; calc(actual)/lodged PEI ratio ~1.06 (mean +10.7, ~+6% over-
estimate). That RULES OUT the suspected gross unit/definition mismatch (a
unit bug would be ~2x/3.6x, not 1.06) and reframes #1228: the PEI gap is a
modest calculator bias (~16 floor, calc-branch) plus a larger prediction-
sensitivity term (~24) — PEI is far more prediction-sensitive than SAP.
CO2 floor 0.20 t. Script-only; no gate impact.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 scripts/validate_epc_prediction.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/scripts/validate_epc_prediction.py b/scripts/validate_epc_prediction.py
index 797389d2..04433607 100644
--- a/scripts/validate_epc_prediction.py
+++ b/scripts/validate_epc_prediction.py
@@ -83,7 +83,15 @@ def main() -> None:
     sap_vs_lodged: list[float] = []
     co2_vs_lodged: list[float] = []
     pei_vs_lodged: list[float] = []
-    sap_calc_actual_vs_lodged: list[float] = []  # the floor the end-to-end reaches
+    # Calculator floors — calc(actual) vs lodged — per metric. Each is the error
+    # the end-to-end cannot beat (the API-path mapper/calculator residual, a
+    # separate workstream), so it attributes how much of a metric's pred-vs-lodged
+    # gap is the calculator vs the prediction. PEI carries a far larger floor than
+    # SAP (~16 vs ~1.6 kWh/m2 / pts), so the headline PEI MAE must not be read as
+    # pure prediction error (issue #1228).
+    sap_floor: list[float] = []
+    co2_floor: list[float] = []
+    pei_floor: list[float] = []
     for predicted, actual in iter_predictions(cohorts):
         pred_result = _result(calculator, predicted)
         actual_result = _result(calculator, actual)
@@ -101,17 +109,26 @@ def main() -> None:
                 pei_vs_lodged.append(
                     abs(pred_result.primary_energy_kwh_per_m2 - lodged_pei)
                 )
-        if actual_result is not None and lodged_sap is not None:
-            sap_calc_actual_vs_lodged.append(
-                abs(actual_result.sap_score_continuous - lodged_sap)
-            )
+        if actual_result is not None:
+            if lodged_sap is not None:
+                sap_floor.append(
+                    abs(actual_result.sap_score_continuous - lodged_sap)
+                )
+            if lodged_co2 is not None:
+                co2_floor.append(abs(_co2_tonnes(actual_result) - lodged_co2))
+            if lodged_pei is not None:
+                pei_floor.append(
+                    abs(actual_result.primary_energy_kwh_per_m2 - lodged_pei)
+                )
 
     print()
     print("--- End-to-end vs API-lodged (SECONDARY, calculator-FLOORED) ---")
     _sap_line("SAP |pred − lodged|", sap_vs_lodged)
     _sap_line("CO2 (t) |pred − lodged|", co2_vs_lodged)
     _sap_line("PEI (kWh/m2) |pred − lodged|", pei_vs_lodged)
-    _sap_line("  floor: SAP |calc(actual) − lodged|", sap_calc_actual_vs_lodged)
+    _sap_line("  floor: SAP |calc(actual) − lodged|", sap_floor)
+    _sap_line("  floor: CO2 |calc(actual) − lodged|", co2_floor)
+    _sap_line("  floor: PEI |calc(actual) − lodged|", pei_floor)
 
 
 def _residual(label: str, values: list[float]) -> None: