Model/scripts/run_modelling_cohort.py

"""Run an EPC-JSON dump through Modelling offline and print a summary.

The files must be API-shaped EPC JSON (identical to the EPC API response — what
`from_api_response` parses). No database, no network. Run from the worktree root
so imports resolve to this checkout, not /workspaces/model:

    python -m scripts.run_modelling_cohort <dir-of-api-json> [goal_band]

e.g. against the committed golden cohort:

    python -m scripts.run_modelling_cohort tests/domain/sap10_calculator/rdsap/fixtures/golden
"""

from __future__ import annotations

import sys
from pathlib import Path

_REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(_REPO_ROOT))  # worktree root first — avoid the import trap

from harness.cohort import format_cohort_summary, run_cohort  # noqa: E402


def main() -> None:
    if len(sys.argv) < 2:
        print(
            "usage: python -m scripts.run_modelling_cohort "
            "<dir-of-api-json> [goal_band]"
        )
        raise SystemExit(2)

    directory = Path(sys.argv[1])
    goal_band = sys.argv[2] if len(sys.argv) > 2 else "C"
    paths = sorted(directory.glob("*.json"))
    if not paths:
        print(f"no *.json files under {directory}")
        raise SystemExit(1)

    results = run_cohort(paths, goal_band=goal_band)
    print(format_cohort_summary(results))
    print("\ncerts with measures:")
    for result in results:
        if result.measures and result.baseline_sap is not None and result.post_sap is not None:
            print(
                f"  {result.name}  SAP {result.baseline_sap:.1f} -> "
                f"{result.post_sap:.1f}  ({result.measures} measures)"
            )


if __name__ == "__main__":
    main()