mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
feat(modelling): turnkey offline cohort script (tables + CSV)
CertResult now carries its Plan (with flat baseline/post-SAP/measures properties), and `format_cohort_csv` renders one browsable row per cert (SAP transition, band, measures, cost, bill saving, valuation %, error). `scripts/run_modelling_cohort.py` is turnkey: no args runs the committed golden cohort, prints a sense-check table for the first measure-bearing certs (a capped preview so a large dump doesn't flood the terminal), the summary, and writes modelling_cohort.csv (gitignored). Point it at the EPC dump when it lands. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
d8ef40c745
commit
8b5ab1c59e
4 changed files with 126 additions and 47 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -298,4 +298,4 @@ pyrightconfig.json
|
|||
backlog/*
|
||||
|
||||
# Local Claude config files
|
||||
.claude/*
|
||||
.claude/*modelling_cohort.csv
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ Parses each file with `EpcPropertyDataMapper.from_api_response` (the EPC-API
|
|||
shape) and runs it through `run_modelling` — no database, no network, no
|
||||
Baseline gate. A cert that raises (e.g. an unpriced fuel, an unmapped code) is
|
||||
captured as an error rather than aborting the sweep, so one bad cert never
|
||||
stops the inspection. Point it at your EPC dump and read the summary.
|
||||
stops the inspection. Point it at your EPC dump and read the summary / CSV.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -15,19 +15,30 @@ from pathlib import Path
|
|||
from typing import Iterable, Optional
|
||||
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.modelling.plan import Plan
|
||||
from harness.console import DEFAULT_CATALOGUE, run_modelling
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CertResult:
|
||||
"""The outcome of modelling one cert: its measure count and SAP transition,
|
||||
or the error it raised (then `measures` is 0 and the SAPs are None)."""
|
||||
"""The outcome of modelling one cert: its `Plan` (for full inspection), or
|
||||
the error it raised. The flat properties summarise the Plan for tables/CSV."""
|
||||
|
||||
name: str
|
||||
measures: int
|
||||
baseline_sap: Optional[float]
|
||||
post_sap: Optional[float]
|
||||
error: Optional[str]
|
||||
plan: Optional[Plan] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
@property
|
||||
def measures(self) -> int:
|
||||
return 0 if self.plan is None else len(self.plan.measures)
|
||||
|
||||
@property
|
||||
def baseline_sap(self) -> Optional[float]:
|
||||
return None if self.plan is None else self.plan.baseline.sap_continuous
|
||||
|
||||
@property
|
||||
def post_sap(self) -> Optional[float]:
|
||||
return None if self.plan is None else self.plan.post_sap_continuous
|
||||
|
||||
|
||||
def run_cohort(
|
||||
|
|
@ -48,24 +59,10 @@ def run_cohort(
|
|||
catalogue_path=catalogue_path,
|
||||
print_table=False,
|
||||
)
|
||||
results.append(
|
||||
CertResult(
|
||||
name=path.stem,
|
||||
measures=len(plan.measures),
|
||||
baseline_sap=plan.baseline.sap_continuous,
|
||||
post_sap=plan.post_sap_continuous,
|
||||
error=None,
|
||||
)
|
||||
)
|
||||
results.append(CertResult(name=path.stem, plan=plan))
|
||||
except Exception as error: # noqa: BLE001 — one bad cert must not stop the sweep
|
||||
results.append(
|
||||
CertResult(
|
||||
name=path.stem,
|
||||
measures=0,
|
||||
baseline_sap=None,
|
||||
post_sap=None,
|
||||
error=f"{type(error).__name__}: {error}",
|
||||
)
|
||||
CertResult(name=path.stem, error=f"{type(error).__name__}: {error}")
|
||||
)
|
||||
return results
|
||||
|
||||
|
|
@ -100,3 +97,46 @@ def format_cohort_summary(results: list[CertResult]) -> str:
|
|||
for kind, count in sorted(error_kinds.items(), key=lambda item: -item[1])
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
_CSV_HEADER = (
|
||||
"cert,baseline_sap,post_sap,post_band,measures,measure_types,"
|
||||
"cost_of_works,bill_savings,valuation_avg_pct,error"
|
||||
)
|
||||
|
||||
|
||||
def _csv_cell(value: object) -> str:
|
||||
"""Render a CSV cell, rounding floats and keeping the row comma-safe
|
||||
(measure types are ';'-joined; an error message's commas are stripped)."""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, float):
|
||||
return f"{value:.2f}"
|
||||
return str(value).replace(",", ";")
|
||||
|
||||
|
||||
def format_cohort_csv(results: list[CertResult]) -> str:
|
||||
"""One header row plus one row per cert — browsable/sortable in a
|
||||
spreadsheet for a large dump."""
|
||||
rows = [_CSV_HEADER]
|
||||
for result in results:
|
||||
plan = result.plan
|
||||
measure_types = (
|
||||
";".join(measure.measure_type for measure in plan.measures)
|
||||
if plan is not None
|
||||
else ""
|
||||
)
|
||||
cells = [
|
||||
result.name,
|
||||
result.baseline_sap,
|
||||
result.post_sap,
|
||||
plan.post_epc_rating.value if plan is not None else None,
|
||||
result.measures,
|
||||
measure_types,
|
||||
plan.cost_of_works if plan is not None else None,
|
||||
plan.energy_bill_savings if plan is not None else None,
|
||||
plan.valuation.average_pct if plan is not None else None,
|
||||
result.error,
|
||||
]
|
||||
rows.append(",".join(_csv_cell(cell) for cell in cells))
|
||||
return "\n".join(rows)
|
||||
|
|
|
|||
|
|
@ -1,14 +1,18 @@
|
|||
"""Run an EPC-JSON dump through Modelling offline and print a summary.
|
||||
"""Run an EPC-JSON dump through Modelling offline — print tables + write a CSV.
|
||||
|
||||
The files must be API-shaped EPC JSON (identical to the EPC API response — what
|
||||
`from_api_response` parses). No database, no network. Run from the worktree root
|
||||
so imports resolve to this checkout, not /workspaces/model:
|
||||
so imports resolve to this checkout, not /workspaces/model.
|
||||
|
||||
python -m scripts.run_modelling_cohort <dir-of-api-json> [goal_band]
|
||||
# no args -> the committed golden cohort (57 real API certs)
|
||||
python -m scripts.run_modelling_cohort
|
||||
|
||||
e.g. against the committed golden cohort:
|
||||
# your dump, optional goal band (default C)
|
||||
python -m scripts.run_modelling_cohort path/to/dump C
|
||||
|
||||
python -m scripts.run_modelling_cohort tests/domain/sap10_calculator/rdsap/fixtures/golden
|
||||
Prints a sense-check table for the first measure-bearing certs (a preview, so a
|
||||
huge dump doesn't flood the terminal), the cohort summary, and writes the full
|
||||
per-cert results to modelling_cohort.csv for browsing in a spreadsheet.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -19,33 +23,48 @@ from pathlib import Path
|
|||
_REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(_REPO_ROOT)) # worktree root first — avoid the import trap
|
||||
|
||||
from harness.cohort import format_cohort_summary, run_cohort # noqa: E402
|
||||
from harness.cohort import ( # noqa: E402
|
||||
format_cohort_csv,
|
||||
format_cohort_summary,
|
||||
run_cohort,
|
||||
)
|
||||
from harness.plan_table import format_plan_table # noqa: E402
|
||||
|
||||
_DEFAULT_DIR = _REPO_ROOT / "tests/domain/sap10_calculator/rdsap/fixtures/golden"
|
||||
_PREVIEW_TABLES = 10
|
||||
_CSV_PATH = Path("modelling_cohort.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) < 2:
|
||||
print(
|
||||
"usage: python -m scripts.run_modelling_cohort "
|
||||
"<dir-of-api-json> [goal_band]"
|
||||
)
|
||||
raise SystemExit(2)
|
||||
|
||||
directory = Path(sys.argv[1])
|
||||
goal_band = sys.argv[2] if len(sys.argv) > 2 else "C"
|
||||
args = sys.argv[1:]
|
||||
directory = Path(args[0]) if args else _DEFAULT_DIR
|
||||
goal_band = args[1] if len(args) > 1 else "C"
|
||||
paths = sorted(directory.glob("*.json"))
|
||||
if not paths:
|
||||
print(f"no *.json files under {directory}")
|
||||
raise SystemExit(1)
|
||||
|
||||
print(
|
||||
f"modelling {len(paths)} EPC JSON(s) from {directory} "
|
||||
f"(goal band {goal_band}), offline — no database...\n"
|
||||
)
|
||||
results = run_cohort(paths, goal_band=goal_band)
|
||||
print(format_cohort_summary(results))
|
||||
print("\ncerts with measures:")
|
||||
|
||||
shown = 0
|
||||
for result in results:
|
||||
if result.measures and result.baseline_sap is not None and result.post_sap is not None:
|
||||
print(
|
||||
f" {result.name} SAP {result.baseline_sap:.1f} -> "
|
||||
f"{result.post_sap:.1f} ({result.measures} measures)"
|
||||
)
|
||||
if result.plan is not None and result.measures and shown < _PREVIEW_TABLES:
|
||||
print(f"=== {result.name} ===")
|
||||
print(format_plan_table(result.plan))
|
||||
print()
|
||||
shown += 1
|
||||
measure_bearing = sum(1 for result in results if result.measures)
|
||||
if measure_bearing > shown:
|
||||
print(f"... and {measure_bearing - shown} more measure-bearing certs (see CSV)\n")
|
||||
|
||||
print(format_cohort_summary(results))
|
||||
|
||||
_CSV_PATH.write_text(format_cohort_csv(results) + "\n", encoding="utf-8")
|
||||
print(f"\nwrote per-cert CSV -> {_CSV_PATH.resolve()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -4,7 +4,12 @@ from __future__ import annotations
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from harness.cohort import CertResult, format_cohort_summary, run_cohort
|
||||
from harness.cohort import (
|
||||
CertResult,
|
||||
format_cohort_csv,
|
||||
format_cohort_summary,
|
||||
run_cohort,
|
||||
)
|
||||
|
||||
_GOLDEN = (
|
||||
Path(__file__).resolve().parents[1]
|
||||
|
|
@ -28,3 +33,18 @@ def test_run_cohort_models_each_api_json_offline() -> None:
|
|||
# The summary renders without raising and counts the cohort.
|
||||
summary: str = format_cohort_summary(results)
|
||||
assert "2" in summary
|
||||
|
||||
|
||||
def test_cohort_carries_each_plan_and_renders_a_csv() -> None:
|
||||
# Arrange / Act
|
||||
paths: list[Path] = sorted(_GOLDEN.glob("*.json"))[:3]
|
||||
results: list[CertResult] = run_cohort(paths)
|
||||
|
||||
# Assert — each cert either modelled (carries its Plan) or errored.
|
||||
for result in results:
|
||||
assert (result.plan is not None) != (result.error is not None)
|
||||
# CSV: a header row plus one row per cert, browsable in a spreadsheet.
|
||||
csv: str = format_cohort_csv(results)
|
||||
lines: list[str] = csv.splitlines()
|
||||
assert lines[0].startswith("cert,")
|
||||
assert len(lines) == len(results) + 1
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue