feat(epc-prediction): report floor-area MAE + MAPE vs typical size

Adds a floor_area line giving MAE (m2), MAPE (% of actual), and the typical
(median actual) size, so the absolute error reads relative to dwelling size.
Corpus: MAE 10.48 m2 / MAPE 13.2% / typical 61 m2.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-15 15:07:22 +00:00
parent aea2d7150f
commit d8f015fb0e

View file

@ -29,6 +29,7 @@ from pathlib import Path
from typing import Optional
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from domain.epc_prediction.comparable_properties import Comparable
from domain.epc_prediction.validation import (
evaluate_component_accuracy,
iter_predictions,
@ -70,6 +71,7 @@ def main() -> None:
if total:
print(f"CLASSIFICATION {name}: {hits}/{total} = {hits / total:.1%}")
print()
_floor_area_error(cohorts)
_residual("floor_area (m2)", accuracy.residuals.get("floor_area", []))
_residual("window_count", accuracy.residuals.get("window_count", []))
_residual(
@ -131,6 +133,28 @@ def main() -> None:
_sap_line(" floor: PEI |calc(actual) lodged|", pei_floor)
def _floor_area_error(cohorts: list[list[Comparable]]) -> None:
"""Floor-area accuracy as MAE (m²) and MAPE (% of the actual), plus the
typical (median actual) size so the absolute error can be read relative to
how big dwellings are. The predicted area is the cohort median, set
independently of the geo/similarity weighting that drives the categoricals."""
pairs = [
(predicted.total_floor_area_m2, actual.total_floor_area_m2)
for predicted, actual in iter_predictions(cohorts)
]
valid = [(p, a) for p, a in pairs if a]
if not valid:
print("RESIDUAL floor_area: (none)")
return
mae = statistics.mean(abs(p - a) for p, a in valid)
mape = statistics.mean(abs(p - a) / a for p, a in valid)
typical = statistics.median(a for _, a in valid)
print(
f"RESIDUAL floor_area: MAE {mae:.2f} m2 | MAPE {mape:.1%} | "
f"typical (median actual) {typical:.0f} m2 (n={len(valid)})"
)
def _residual(label: str, values: list[float]) -> None:
if not values:
print(f"RESIDUAL {label}: (none)")