mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
CertResult now carries its Plan (with flat baseline/post-SAP/measures properties), and `format_cohort_csv` renders one browsable row per cert (SAP transition, band, measures, cost, bill saving, valuation %, error). `scripts/run_modelling_cohort.py` is turnkey: no args runs the committed golden cohort, prints a sense-check table for the first measure-bearing certs (a capped preview so a large dump doesn't flood the terminal), the summary, and writes modelling_cohort.csv (gitignored). Point it at the EPC dump when it lands. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
142 lines
4.9 KiB
Python
142 lines
4.9 KiB
Python
"""Run a cohort of API-shaped EPC JSONs through Modelling, offline.
|
|
|
|
Parses each file with `EpcPropertyDataMapper.from_api_response` (the EPC-API
|
|
shape) and runs it through `run_modelling` — no database, no network, no
|
|
Baseline gate. A cert that raises (e.g. an unpriced fuel, an unmapped code) is
|
|
captured as an error rather than aborting the sweep, so one bad cert never
|
|
stops the inspection. Point it at your EPC dump and read the summary / CSV.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Iterable, Optional
|
|
|
|
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
|
from domain.modelling.plan import Plan
|
|
from harness.console import DEFAULT_CATALOGUE, run_modelling
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CertResult:
|
|
"""The outcome of modelling one cert: its `Plan` (for full inspection), or
|
|
the error it raised. The flat properties summarise the Plan for tables/CSV."""
|
|
|
|
name: str
|
|
plan: Optional[Plan] = None
|
|
error: Optional[str] = None
|
|
|
|
@property
|
|
def measures(self) -> int:
|
|
return 0 if self.plan is None else len(self.plan.measures)
|
|
|
|
@property
|
|
def baseline_sap(self) -> Optional[float]:
|
|
return None if self.plan is None else self.plan.baseline.sap_continuous
|
|
|
|
@property
|
|
def post_sap(self) -> Optional[float]:
|
|
return None if self.plan is None else self.plan.post_sap_continuous
|
|
|
|
|
|
def run_cohort(
|
|
json_paths: Iterable[Path],
|
|
*,
|
|
goal_band: str = "C",
|
|
catalogue_path: Path = DEFAULT_CATALOGUE,
|
|
) -> list[CertResult]:
|
|
"""Model every API-JSON path in `json_paths` offline, returning one
|
|
`CertResult` each (errors captured, never raised)."""
|
|
results: list[CertResult] = []
|
|
for path in json_paths:
|
|
try:
|
|
epc = EpcPropertyDataMapper.from_api_response(json.loads(path.read_text()))
|
|
plan = run_modelling(
|
|
epc,
|
|
goal_band=goal_band,
|
|
catalogue_path=catalogue_path,
|
|
print_table=False,
|
|
)
|
|
results.append(CertResult(name=path.stem, plan=plan))
|
|
except Exception as error: # noqa: BLE001 — one bad cert must not stop the sweep
|
|
results.append(
|
|
CertResult(name=path.stem, error=f"{type(error).__name__}: {error}")
|
|
)
|
|
return results
|
|
|
|
|
|
def format_cohort_summary(results: list[CertResult]) -> str:
|
|
"""A compact summary: cohort size, how many ran / produced measures /
|
|
errored, the measure-count distribution, and each distinct error."""
|
|
ran = [result for result in results if result.error is None]
|
|
errored = [result for result in results if result.error is not None]
|
|
with_measures = sum(1 for result in ran if result.measures > 0)
|
|
|
|
distribution: dict[int, int] = {}
|
|
for result in ran:
|
|
distribution[result.measures] = distribution.get(result.measures, 0) + 1
|
|
|
|
error_kinds: dict[str, int] = {}
|
|
for result in errored:
|
|
assert result.error is not None
|
|
error_kinds[result.error] = error_kinds.get(result.error, 0) + 1
|
|
|
|
lines = [
|
|
f"cohort size : {len(results)}",
|
|
f"ran offline : {len(ran)}",
|
|
f"w/ measures : {with_measures}",
|
|
f"errors : {len(errored)}",
|
|
f"measure-count distribution: {dict(sorted(distribution.items()))}",
|
|
]
|
|
if error_kinds:
|
|
lines.append("error kinds:")
|
|
lines.extend(
|
|
f" {count:3d} {kind}"
|
|
for kind, count in sorted(error_kinds.items(), key=lambda item: -item[1])
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
_CSV_HEADER = (
|
|
"cert,baseline_sap,post_sap,post_band,measures,measure_types,"
|
|
"cost_of_works,bill_savings,valuation_avg_pct,error"
|
|
)
|
|
|
|
|
|
def _csv_cell(value: object) -> str:
|
|
"""Render a CSV cell, rounding floats and keeping the row comma-safe
|
|
(measure types are ';'-joined; an error message's commas are stripped)."""
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, float):
|
|
return f"{value:.2f}"
|
|
return str(value).replace(",", ";")
|
|
|
|
|
|
def format_cohort_csv(results: list[CertResult]) -> str:
|
|
"""One header row plus one row per cert — browsable/sortable in a
|
|
spreadsheet for a large dump."""
|
|
rows = [_CSV_HEADER]
|
|
for result in results:
|
|
plan = result.plan
|
|
measure_types = (
|
|
";".join(measure.measure_type for measure in plan.measures)
|
|
if plan is not None
|
|
else ""
|
|
)
|
|
cells = [
|
|
result.name,
|
|
result.baseline_sap,
|
|
result.post_sap,
|
|
plan.post_epc_rating.value if plan is not None else None,
|
|
result.measures,
|
|
measure_types,
|
|
plan.cost_of_works if plan is not None else None,
|
|
plan.energy_bill_savings if plan is not None else None,
|
|
plan.valuation.average_pct if plan is not None else None,
|
|
result.error,
|
|
]
|
|
rows.append(",".join(_csv_cell(cell) for cell in cells))
|
|
return "\n".join(rows)
|