diff --git a/scripts/validate_epc_prediction.py b/scripts/validate_epc_prediction.py index 04433607..3c817078 100644 --- a/scripts/validate_epc_prediction.py +++ b/scripts/validate_epc_prediction.py @@ -29,6 +29,7 @@ from pathlib import Path from typing import Optional from datatypes.epc.domain.epc_property_data import EpcPropertyData +from domain.epc_prediction.comparable_properties import Comparable from domain.epc_prediction.validation import ( evaluate_component_accuracy, iter_predictions, @@ -70,6 +71,7 @@ def main() -> None: if total: print(f"CLASSIFICATION {name}: {hits}/{total} = {hits / total:.1%}") print() + _floor_area_error(cohorts) _residual("floor_area (m2)", accuracy.residuals.get("floor_area", [])) _residual("window_count", accuracy.residuals.get("window_count", [])) _residual( @@ -131,6 +133,28 @@ def main() -> None: _sap_line(" floor: PEI |calc(actual) − lodged|", pei_floor) +def _floor_area_error(cohorts: list[list[Comparable]]) -> None: + """Floor-area accuracy as MAE (m²) and MAPE (% of the actual), plus the + typical (median actual) size — so the absolute error can be read relative to + how big dwellings are. The predicted area is the cohort median, set + independently of the geo/similarity weighting that drives the categoricals.""" + pairs = [ + (predicted.total_floor_area_m2, actual.total_floor_area_m2) + for predicted, actual in iter_predictions(cohorts) + ] + valid = [(p, a) for p, a in pairs if a] + if not valid: + print("RESIDUAL floor_area: (none)") + return + mae = statistics.mean(abs(p - a) for p, a in valid) + mape = statistics.mean(abs(p - a) / a for p, a in valid) + typical = statistics.median(a for _, a in valid) + print( + f"RESIDUAL floor_area: MAE {mae:.2f} m2 | MAPE {mape:.1%} | " + f"typical (median actual) {typical:.0f} m2 (n={len(valid)})" + ) + + def _residual(label: str, values: list[float]) -> None: if not values: print(f"RESIDUAL {label}: (none)")