mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
feat(modelling): offline cohort runner over an EPC-JSON dump
`harness.cohort.run_cohort(paths)` parses each API-shaped EPC JSON with from_api_response and models it via run_modelling — no database, no network — capturing per-cert errors instead of aborting the sweep, plus `format_cohort_summary`. A thin `scripts/run_modelling_cohort.py` CLI points it at a directory. Proven over the 57 golden API certs: 56 ran offline, 15 produced measures, 1 errored (COAL has no Fuel Rates entry — a BillDerivation coverage gap, not a harness one). Ready for the EPC dump. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
98f5ee4fca
commit
d8ef40c745
3 changed files with 184 additions and 0 deletions
102
harness/cohort.py
Normal file
102
harness/cohort.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
"""Run a cohort of API-shaped EPC JSONs through Modelling, offline.
|
||||
|
||||
Parses each file with `EpcPropertyDataMapper.from_api_response` (the EPC-API
|
||||
shape) and runs it through `run_modelling` — no database, no network, no
|
||||
Baseline gate. A cert that raises (e.g. an unpriced fuel, an unmapped code) is
|
||||
captured as an error rather than aborting the sweep, so one bad cert never
|
||||
stops the inspection. Point it at your EPC dump and read the summary.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from harness.console import DEFAULT_CATALOGUE, run_modelling
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CertResult:
|
||||
"""The outcome of modelling one cert: its measure count and SAP transition,
|
||||
or the error it raised (then `measures` is 0 and the SAPs are None)."""
|
||||
|
||||
name: str
|
||||
measures: int
|
||||
baseline_sap: Optional[float]
|
||||
post_sap: Optional[float]
|
||||
error: Optional[str]
|
||||
|
||||
|
||||
def run_cohort(
|
||||
json_paths: Iterable[Path],
|
||||
*,
|
||||
goal_band: str = "C",
|
||||
catalogue_path: Path = DEFAULT_CATALOGUE,
|
||||
) -> list[CertResult]:
|
||||
"""Model every API-JSON path in `json_paths` offline, returning one
|
||||
`CertResult` each (errors captured, never raised)."""
|
||||
results: list[CertResult] = []
|
||||
for path in json_paths:
|
||||
try:
|
||||
epc = EpcPropertyDataMapper.from_api_response(json.loads(path.read_text()))
|
||||
plan = run_modelling(
|
||||
epc,
|
||||
goal_band=goal_band,
|
||||
catalogue_path=catalogue_path,
|
||||
print_table=False,
|
||||
)
|
||||
results.append(
|
||||
CertResult(
|
||||
name=path.stem,
|
||||
measures=len(plan.measures),
|
||||
baseline_sap=plan.baseline.sap_continuous,
|
||||
post_sap=plan.post_sap_continuous,
|
||||
error=None,
|
||||
)
|
||||
)
|
||||
except Exception as error: # noqa: BLE001 — one bad cert must not stop the sweep
|
||||
results.append(
|
||||
CertResult(
|
||||
name=path.stem,
|
||||
measures=0,
|
||||
baseline_sap=None,
|
||||
post_sap=None,
|
||||
error=f"{type(error).__name__}: {error}",
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def format_cohort_summary(results: list[CertResult]) -> str:
|
||||
"""A compact summary: cohort size, how many ran / produced measures /
|
||||
errored, the measure-count distribution, and each distinct error."""
|
||||
ran = [result for result in results if result.error is None]
|
||||
errored = [result for result in results if result.error is not None]
|
||||
with_measures = sum(1 for result in ran if result.measures > 0)
|
||||
|
||||
distribution: dict[int, int] = {}
|
||||
for result in ran:
|
||||
distribution[result.measures] = distribution.get(result.measures, 0) + 1
|
||||
|
||||
error_kinds: dict[str, int] = {}
|
||||
for result in errored:
|
||||
assert result.error is not None
|
||||
error_kinds[result.error] = error_kinds.get(result.error, 0) + 1
|
||||
|
||||
lines = [
|
||||
f"cohort size : {len(results)}",
|
||||
f"ran offline : {len(ran)}",
|
||||
f"w/ measures : {with_measures}",
|
||||
f"errors : {len(errored)}",
|
||||
f"measure-count distribution: {dict(sorted(distribution.items()))}",
|
||||
]
|
||||
if error_kinds:
|
||||
lines.append("error kinds:")
|
||||
lines.extend(
|
||||
f" {count:3d} {kind}"
|
||||
for kind, count in sorted(error_kinds.items(), key=lambda item: -item[1])
|
||||
)
|
||||
return "\n".join(lines)
|
||||
52
scripts/run_modelling_cohort.py
Normal file
52
scripts/run_modelling_cohort.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""Run an EPC-JSON dump through Modelling offline and print a summary.
|
||||
|
||||
The files must be API-shaped EPC JSON (identical to the EPC API response — what
|
||||
`from_api_response` parses). No database, no network. Run from the worktree root
|
||||
so imports resolve to this checkout, not /workspaces/model:
|
||||
|
||||
python -m scripts.run_modelling_cohort <dir-of-api-json> [goal_band]
|
||||
|
||||
e.g. against the committed golden cohort:
|
||||
|
||||
python -m scripts.run_modelling_cohort tests/domain/sap10_calculator/rdsap/fixtures/golden
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(_REPO_ROOT)) # worktree root first — avoid the import trap
|
||||
|
||||
from harness.cohort import format_cohort_summary, run_cohort # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) < 2:
|
||||
print(
|
||||
"usage: python -m scripts.run_modelling_cohort "
|
||||
"<dir-of-api-json> [goal_band]"
|
||||
)
|
||||
raise SystemExit(2)
|
||||
|
||||
directory = Path(sys.argv[1])
|
||||
goal_band = sys.argv[2] if len(sys.argv) > 2 else "C"
|
||||
paths = sorted(directory.glob("*.json"))
|
||||
if not paths:
|
||||
print(f"no *.json files under {directory}")
|
||||
raise SystemExit(1)
|
||||
|
||||
results = run_cohort(paths, goal_band=goal_band)
|
||||
print(format_cohort_summary(results))
|
||||
print("\ncerts with measures:")
|
||||
for result in results:
|
||||
if result.measures and result.baseline_sap is not None and result.post_sap is not None:
|
||||
print(
|
||||
f" {result.name} SAP {result.baseline_sap:.1f} -> "
|
||||
f"{result.post_sap:.1f} ({result.measures} measures)"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
30
tests/harness/test_cohort.py
Normal file
30
tests/harness/test_cohort.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""Run a directory of API-shaped EPC JSONs through Modelling, offline."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from harness.cohort import CertResult, format_cohort_summary, run_cohort
|
||||
|
||||
_GOLDEN = (
|
||||
Path(__file__).resolve().parents[1]
|
||||
/ "domain/sap10_calculator/rdsap/fixtures/golden"
|
||||
)
|
||||
|
||||
|
||||
def test_run_cohort_models_each_api_json_offline() -> None:
|
||||
# Arrange — two real API-shaped EPC certs (identical to the EPC response).
|
||||
paths: list[Path] = sorted(_GOLDEN.glob("*.json"))[:2]
|
||||
assert len(paths) == 2
|
||||
|
||||
# Act — no database, no network.
|
||||
results: list[CertResult] = run_cohort(paths, goal_band="C")
|
||||
|
||||
# Assert — one result per cert, each either modelled or carrying its error.
|
||||
assert len(results) == 2
|
||||
for result in results:
|
||||
assert result.name
|
||||
assert result.error is not None or result.measures >= 0
|
||||
# The summary renders without raising and counts the cohort.
|
||||
summary: str = format_cohort_summary(results)
|
||||
assert "2" in summary
|
||||
Loading…
Add table
Reference in a new issue