mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Three reusable scripts (each with a purpose/usage docstring) for wide-scale
testing of the calculator's API front-end against the GOV.UK EPB register —
the toolkit behind the 1000-cert study (docs/HANDOVER_API_SAMPLE_ACCURACY.md):
fetch_2026_epc_sample.py — sample cert numbers across a date window
(random pages) + download full schema-21 JSON
to a cache; resumable, 429/5xx backoff.
eval_api_sap_accuracy.py — % within 0.5 SAP, error histogram, worst-40,
and the mapper/calculator raise breakdown.
analyse_api_sap_clusters.py — error grouped by property + heating type to
locate clusters (electric heating, flats, PV).
Cache dir defaults to /tmp/epc_2026_sample, overridable via EPC_SAMPLE_CACHE.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
169 lines
6.6 KiB
Python
169 lines
6.6 KiB
Python
"""Score the SAP10 calculator's API path against a cached EPC sample.
|
|
|
|
WHAT THIS IS FOR
|
|
----------------
|
|
Measures how well the API front-end (`from_api_response` → `cert_to_inputs`
|
|
→ continuous SAP) reproduces each cert's lodged rounded SAP
|
|
(`energy_rating_current`) across the sample built by
|
|
`fetch_2026_epc_sample.py`. This is the headline accuracy gauge for raw-API
|
|
behaviour on an unbiased population.
|
|
|
|
Each cert lands in one bucket:
|
|
- computed — ran end-to-end; SAP error recorded.
|
|
- unsupported_schema — pre-21 schema the mapper doesn't support (skip).
|
|
- raise:<Exc> — mapper raised (UnmappedApiCode etc.) — a gap to fix.
|
|
- calc_raise:<Exc> — calculator raised (UnmappedSapCode etc.) — a gap.
|
|
|
|
OUTPUT
|
|
------
|
|
- Category counts + the raise breakdown with example certs (what to fix).
|
|
- For computed certs: % within 0.5 / 1 / 2 / 5 SAP, median/mean/p90/p99/max
|
|
|err|, the signed mean (over- vs under-rating), abs-err histogram.
|
|
- The 40 worst offenders with diagnostic columns (to prioritise).
|
|
- A full per-cert CSV at <cache>/_results.csv for ad-hoc slicing.
|
|
|
|
USAGE
|
|
-----
|
|
PYTHONPATH=/workspaces/model python scripts/eval_api_sap_accuracy.py
|
|
|
|
Reads the cache written by `fetch_2026_epc_sample.py` (default
|
|
`/tmp/epc_2026_sample`, overridable via `EPC_SAMPLE_CACHE`).
|
|
"""
|
|
import os
|
|
import json
|
|
import csv
|
|
import math
|
|
from collections import Counter, defaultdict
|
|
from pathlib import Path
|
|
|
|
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
|
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
|
from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
|
|
|
|
CACHE = Path(os.environ.get("EPC_SAMPLE_CACHE", "/tmp/epc_2026_sample"))
|
|
|
|
|
|
def diag(doc):
|
|
"""A few raw-JSON fields that help explain a cert's error at a glance."""
|
|
es = doc.get("sap_energy_source", {}) or {}
|
|
h = doc.get("sap_heating", {}) or {}
|
|
mh = (h.get("main_heating_details") or [{}])
|
|
mh0 = mh[0] if mh else {}
|
|
pv = es.get("photovoltaic_supply")
|
|
return {
|
|
"schema": doc.get("schema_type"),
|
|
"prop_type": doc.get("property_type"),
|
|
"built_form": doc.get("built_form"),
|
|
"age_band": doc.get("construction_age_band"),
|
|
"mains_gas": es.get("mains_gas"),
|
|
"main_heat_cat": mh0.get("main_heating_category"),
|
|
"main_heat_idx": mh0.get("main_heating_index_number"),
|
|
"n_bps": len(doc.get("sap_building_parts") or []),
|
|
"lodged_band": doc.get("current_energy_efficiency_band"),
|
|
}
|
|
|
|
|
|
def main():
|
|
files = sorted(CACHE.glob("????-????-????-????-????.json"))
|
|
rows = []
|
|
cat = Counter()
|
|
exc_examples = defaultdict(list)
|
|
for f in files:
|
|
cert = f.stem
|
|
try:
|
|
doc = json.loads(f.read_text())
|
|
except Exception:
|
|
cat["bad_json"] += 1
|
|
continue
|
|
lodged = doc.get("energy_rating_current")
|
|
try:
|
|
epc = EpcPropertyDataMapper.from_api_response(doc)
|
|
except ValueError as e:
|
|
if "Unsupported EPC schema" in str(e):
|
|
cat["unsupported_schema"] += 1
|
|
else:
|
|
cat["raise:ValueError"] += 1
|
|
exc_examples["ValueError:" + str(e)[:60]].append(cert)
|
|
continue
|
|
except Exception as e:
|
|
ename = type(e).__name__
|
|
cat[f"raise:{ename}"] += 1
|
|
exc_examples[f"{ename}:{str(e)[:60]}"].append(cert)
|
|
continue
|
|
if lodged is None:
|
|
cat["no_lodged_sap"] += 1
|
|
continue
|
|
try:
|
|
cont = calculate_sap_from_inputs(
|
|
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
|
|
).sap_score_continuous
|
|
except Exception as e:
|
|
ename = type(e).__name__
|
|
cat[f"calc_raise:{ename}"] += 1
|
|
exc_examples[f"calc:{ename}:{str(e)[:50]}"].append(cert)
|
|
continue
|
|
if not math.isfinite(cont):
|
|
cat["non_finite"] += 1
|
|
continue
|
|
err = cont - lodged
|
|
cat["computed"] += 1
|
|
rows.append({
|
|
"cert": cert, "our_cont": round(cont, 4), "lodged": lodged,
|
|
"err": round(err, 4), "abs_err": round(abs(err), 4), **diag(doc),
|
|
})
|
|
|
|
if rows:
|
|
keys = list(rows[0].keys())
|
|
with open(CACHE / "_results.csv", "w", newline="") as fh:
|
|
w = csv.DictWriter(fh, fieldnames=keys)
|
|
w.writeheader()
|
|
w.writerows(rows)
|
|
|
|
n = len(rows)
|
|
print("=" * 70)
|
|
print(f"SAMPLE: {len(files)} cached certs | categories:")
|
|
for k, v in cat.most_common():
|
|
print(f" {k:28s} {v}")
|
|
if n == 0:
|
|
return
|
|
abs_errs = sorted(r["abs_err"] for r in rows)
|
|
|
|
def pct(thr):
|
|
return 100.0 * sum(1 for r in rows if r["abs_err"] < thr) / n
|
|
|
|
print("=" * 70)
|
|
print(f"COMPUTED: {n} certs (continuous SAP vs lodged rounded)")
|
|
print(f" % |err| < 0.5 : {pct(0.5):.1f}% <-- headline")
|
|
print(f" % |err| < 1.0 : {pct(1.0):.1f}%")
|
|
print(f" % |err| < 2.0 : {pct(2.0):.1f}%")
|
|
print(f" % |err| < 5.0 : {pct(5.0):.1f}%")
|
|
print(f" median |err| : {abs_errs[n // 2]:.3f}")
|
|
print(f" mean |err| : {sum(abs_errs) / n:.3f}")
|
|
print(f" p90 |err| : {abs_errs[int(n * 0.90)]:.3f}")
|
|
print(f" p99 |err| : {abs_errs[int(n * 0.99)]:.3f}")
|
|
print(f" max |err| : {abs_errs[-1]:.3f}")
|
|
signed = [r["err"] for r in rows]
|
|
print(f" mean signed err: {sum(signed) / n:+.3f} (we - lodged; +ve = we over-rate)")
|
|
print(" abs-err buckets:")
|
|
for lo, hi in [(0, 0.5), (0.5, 1), (1, 2), (2, 5), (5, 10), (10, 1e9)]:
|
|
c = sum(1 for r in rows if lo <= r["abs_err"] < hi)
|
|
print(f" [{lo:>4}, {hi:>4}) : {c:4d} ({100 * c / n:4.1f}%)")
|
|
print("=" * 70)
|
|
print("TOP 40 LARGEST |err| (prioritise these):")
|
|
worst = sorted(rows, key=lambda r: -r["abs_err"])[:40]
|
|
print(f" {'cert':22s} {'err':>7s} {'our':>6s} {'lodg':>4s} prop bf age gas cat/idx bps")
|
|
for r in worst:
|
|
print(f" {r['cert']:22s} {r['err']:+7.2f} {r['our_cont']:6.1f} {r['lodged']:4d} "
|
|
f"{str(r['prop_type']):>4s} {str(r['built_form']):>2s} {str(r['age_band'])[:3]:>3s} "
|
|
f"{str(r['mains_gas']):>3s} {str(r['main_heat_cat']):>3s}/{str(r['main_heat_idx']):>6s} "
|
|
f"{r['n_bps']}")
|
|
if exc_examples:
|
|
print("=" * 70)
|
|
print("RAISE/ERROR EXAMPLES (mapper/calculator gaps — also prioritise):")
|
|
for k, v in sorted(exc_examples.items(), key=lambda kv: -len(kv[1]))[:20]:
|
|
print(f" [{len(v):3d}] {k} e.g. {v[0]}")
|
|
print(f"\nFull per-cert CSV -> {CACHE / '_results.csv'}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|