Model/scripts/run_modelling_cohort.py

"""Run an EPC-JSON dump through Modelling offline — print tables + write a CSV.

The files must be API-shaped EPC JSON (identical to the EPC API response — what
`from_api_response` parses). No database, no network. Run from the worktree root
so imports resolve to this checkout, not /workspaces/model.

    # no args -> the committed golden cohort (57 real API certs)
    python -m scripts.run_modelling_cohort

    # your dump, optional goal band (default C)
    python -m scripts.run_modelling_cohort path/to/dump C

Prints a sense-check table for the first measure-bearing certs (a preview, so a
huge dump doesn't flood the terminal), the cohort summary, and writes the full
per-cert results to modelling_cohort.csv for browsing in a spreadsheet.
"""

from __future__ import annotations

import sys
from pathlib import Path

_REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(_REPO_ROOT))  # worktree root first — avoid the import trap

from harness.cohort import (  # noqa: E402
    format_cohort_csv,
    format_cohort_summary,
    run_cohort,
)
from harness.plan_table import format_plan_table  # noqa: E402

_DEFAULT_DIR = _REPO_ROOT / "tests/domain/sap10_calculator/rdsap/fixtures/golden"
_PREVIEW_TABLES = 10
_CSV_PATH = Path("modelling_cohort.csv")


def main() -> None:
    args = sys.argv[1:]
    directory = Path(args[0]) if args else _DEFAULT_DIR
    goal_band = args[1] if len(args) > 1 else "C"
    paths = sorted(directory.glob("*.json"))
    if not paths:
        print(f"no *.json files under {directory}")
        raise SystemExit(1)

    print(
        f"modelling {len(paths)} EPC JSON(s) from {directory} "
        f"(goal band {goal_band}), offline — no database...\n"
    )
    results = run_cohort(paths, goal_band=goal_band)

    shown = 0
    for result in results:
        if result.plan is not None and result.measures and shown < _PREVIEW_TABLES:
            print(f"=== {result.name} ===")
            print(format_plan_table(result.plan))
            print()
            shown += 1
    measure_bearing = sum(1 for result in results if result.measures)
    if measure_bearing > shown:
        print(f"... and {measure_bearing - shown} more measure-bearing certs (see CSV)\n")

    print(format_cohort_summary(results))

    _CSV_PATH.write_text(format_cohort_csv(results) + "\n", encoding="utf-8")
    print(f"\nwrote per-cert CSV -> {_CSV_PATH.resolve()}")


if __name__ == "__main__":
    main()