diff --git a/scripts/validate_epc_prediction.py b/scripts/validate_epc_prediction.py index 797389d2..04433607 100644 --- a/scripts/validate_epc_prediction.py +++ b/scripts/validate_epc_prediction.py @@ -83,7 +83,15 @@ def main() -> None: sap_vs_lodged: list[float] = [] co2_vs_lodged: list[float] = [] pei_vs_lodged: list[float] = [] - sap_calc_actual_vs_lodged: list[float] = [] # the floor the end-to-end reaches + # Calculator floors — calc(actual) vs lodged — per metric. Each is the error + # the end-to-end cannot beat (the API-path mapper/calculator residual, a + # separate workstream), so it attributes how much of a metric's pred-vs-lodged + # gap is the calculator vs the prediction. PEI carries a far larger floor than + # SAP (~16 vs ~1.6 kWh/m2 / pts), so the headline PEI MAE must not be read as + # pure prediction error (issue #1228). + sap_floor: list[float] = [] + co2_floor: list[float] = [] + pei_floor: list[float] = [] for predicted, actual in iter_predictions(cohorts): pred_result = _result(calculator, predicted) actual_result = _result(calculator, actual) @@ -101,17 +109,26 @@ def main() -> None: pei_vs_lodged.append( abs(pred_result.primary_energy_kwh_per_m2 - lodged_pei) ) - if actual_result is not None and lodged_sap is not None: - sap_calc_actual_vs_lodged.append( - abs(actual_result.sap_score_continuous - lodged_sap) - ) + if actual_result is not None: + if lodged_sap is not None: + sap_floor.append( + abs(actual_result.sap_score_continuous - lodged_sap) + ) + if lodged_co2 is not None: + co2_floor.append(abs(_co2_tonnes(actual_result) - lodged_co2)) + if lodged_pei is not None: + pei_floor.append( + abs(actual_result.primary_energy_kwh_per_m2 - lodged_pei) + ) print() print("--- End-to-end vs API-lodged (SECONDARY, calculator-FLOORED) ---") _sap_line("SAP |pred − lodged|", sap_vs_lodged) _sap_line("CO2 (t) |pred − lodged|", co2_vs_lodged) _sap_line("PEI (kWh/m2) |pred − lodged|", pei_vs_lodged) - _sap_line(" floor: SAP |calc(actual) − lodged|", sap_calc_actual_vs_lodged) + _sap_line(" floor: SAP |calc(actual) − lodged|", sap_floor) + _sap_line(" floor: CO2 |calc(actual) − lodged|", co2_floor) + _sap_line(" floor: PEI |calc(actual) − lodged|", pei_floor) def _residual(label: str, values: list[float]) -> None: