diff --git a/.gitignore b/.gitignore index 6cd39e9d..a1bd9c0b 100644 --- a/.gitignore +++ b/.gitignore @@ -298,4 +298,4 @@ pyrightconfig.json backlog/* # Local Claude config files -.claude/* \ No newline at end of file +.claude/*modelling_cohort.csv diff --git a/harness/cohort.py b/harness/cohort.py index a3ff19cc..a56aacb0 100644 --- a/harness/cohort.py +++ b/harness/cohort.py @@ -4,7 +4,7 @@ Parses each file with `EpcPropertyDataMapper.from_api_response` (the EPC-API shape) and runs it through `run_modelling` — no database, no network, no Baseline gate. A cert that raises (e.g. an unpriced fuel, an unmapped code) is captured as an error rather than aborting the sweep, so one bad cert never -stops the inspection. Point it at your EPC dump and read the summary. +stops the inspection. Point it at your EPC dump and read the summary / CSV. """ from __future__ import annotations @@ -15,19 +15,30 @@ from pathlib import Path from typing import Iterable, Optional from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.modelling.plan import Plan from harness.console import DEFAULT_CATALOGUE, run_modelling @dataclass(frozen=True) class CertResult: - """The outcome of modelling one cert: its measure count and SAP transition, - or the error it raised (then `measures` is 0 and the SAPs are None).""" + """The outcome of modelling one cert: its `Plan` (for full inspection), or + the error it raised. The flat properties summarise the Plan for tables/CSV.""" name: str - measures: int - baseline_sap: Optional[float] - post_sap: Optional[float] - error: Optional[str] + plan: Optional[Plan] = None + error: Optional[str] = None + + @property + def measures(self) -> int: + return 0 if self.plan is None else len(self.plan.measures) + + @property + def baseline_sap(self) -> Optional[float]: + return None if self.plan is None else self.plan.baseline.sap_continuous + + @property + def post_sap(self) -> Optional[float]: + return None if self.plan is None else self.plan.post_sap_continuous def run_cohort( @@ -48,24 +59,10 @@ def run_cohort( catalogue_path=catalogue_path, print_table=False, ) - results.append( - CertResult( - name=path.stem, - measures=len(plan.measures), - baseline_sap=plan.baseline.sap_continuous, - post_sap=plan.post_sap_continuous, - error=None, - ) - ) + results.append(CertResult(name=path.stem, plan=plan)) except Exception as error: # noqa: BLE001 — one bad cert must not stop the sweep results.append( - CertResult( - name=path.stem, - measures=0, - baseline_sap=None, - post_sap=None, - error=f"{type(error).__name__}: {error}", - ) + CertResult(name=path.stem, error=f"{type(error).__name__}: {error}") ) return results @@ -100,3 +97,46 @@ def format_cohort_summary(results: list[CertResult]) -> str: for kind, count in sorted(error_kinds.items(), key=lambda item: -item[1]) ) return "\n".join(lines) + + +_CSV_HEADER = ( + "cert,baseline_sap,post_sap,post_band,measures,measure_types," + "cost_of_works,bill_savings,valuation_avg_pct,error" +) + + +def _csv_cell(value: object) -> str: + """Render a CSV cell, rounding floats and keeping the row comma-safe + (measure types are ';'-joined; an error message's commas are stripped).""" + if value is None: + return "" + if isinstance(value, float): + return f"{value:.2f}" + return str(value).replace(",", ";") + + +def format_cohort_csv(results: list[CertResult]) -> str: + """One header row plus one row per cert — browsable/sortable in a + spreadsheet for a large dump.""" + rows = [_CSV_HEADER] + for result in results: + plan = result.plan + measure_types = ( + ";".join(measure.measure_type for measure in plan.measures) + if plan is not None + else "" + ) + cells = [ + result.name, + result.baseline_sap, + result.post_sap, + plan.post_epc_rating.value if plan is not None else None, + result.measures, + measure_types, + plan.cost_of_works if plan is not None else None, + plan.energy_bill_savings if plan is not None else None, + plan.valuation.average_pct if plan is not None else None, + result.error, + ] + rows.append(",".join(_csv_cell(cell) for cell in cells)) + return "\n".join(rows) diff --git a/scripts/run_modelling_cohort.py b/scripts/run_modelling_cohort.py index ec6a04eb..d43cc66a 100644 --- a/scripts/run_modelling_cohort.py +++ b/scripts/run_modelling_cohort.py @@ -1,14 +1,18 @@ -"""Run an EPC-JSON dump through Modelling offline and print a summary. +"""Run an EPC-JSON dump through Modelling offline — print tables + write a CSV. The files must be API-shaped EPC JSON (identical to the EPC API response — what `from_api_response` parses). No database, no network. Run from the worktree root -so imports resolve to this checkout, not /workspaces/model: +so imports resolve to this checkout, not /workspaces/model. - python -m scripts.run_modelling_cohort [goal_band] + # no args -> the committed golden cohort (57 real API certs) + python -m scripts.run_modelling_cohort -e.g. against the committed golden cohort: + # your dump, optional goal band (default C) + python -m scripts.run_modelling_cohort path/to/dump C - python -m scripts.run_modelling_cohort tests/domain/sap10_calculator/rdsap/fixtures/golden +Prints a sense-check table for the first measure-bearing certs (a preview, so a +huge dump doesn't flood the terminal), the cohort summary, and writes the full +per-cert results to modelling_cohort.csv for browsing in a spreadsheet. """ from __future__ import annotations @@ -19,33 +23,48 @@ from pathlib import Path _REPO_ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(_REPO_ROOT)) # worktree root first — avoid the import trap -from harness.cohort import format_cohort_summary, run_cohort # noqa: E402 +from harness.cohort import ( # noqa: E402 + format_cohort_csv, + format_cohort_summary, + run_cohort, +) +from harness.plan_table import format_plan_table # noqa: E402 + +_DEFAULT_DIR = _REPO_ROOT / "tests/domain/sap10_calculator/rdsap/fixtures/golden" +_PREVIEW_TABLES = 10 +_CSV_PATH = Path("modelling_cohort.csv") def main() -> None: - if len(sys.argv) < 2: - print( - "usage: python -m scripts.run_modelling_cohort " - " [goal_band]" - ) - raise SystemExit(2) - - directory = Path(sys.argv[1]) - goal_band = sys.argv[2] if len(sys.argv) > 2 else "C" + args = sys.argv[1:] + directory = Path(args[0]) if args else _DEFAULT_DIR + goal_band = args[1] if len(args) > 1 else "C" paths = sorted(directory.glob("*.json")) if not paths: print(f"no *.json files under {directory}") raise SystemExit(1) + print( + f"modelling {len(paths)} EPC JSON(s) from {directory} " + f"(goal band {goal_band}), offline — no database...\n" + ) results = run_cohort(paths, goal_band=goal_band) - print(format_cohort_summary(results)) - print("\ncerts with measures:") + + shown = 0 for result in results: - if result.measures and result.baseline_sap is not None and result.post_sap is not None: - print( - f" {result.name} SAP {result.baseline_sap:.1f} -> " - f"{result.post_sap:.1f} ({result.measures} measures)" - ) + if result.plan is not None and result.measures and shown < _PREVIEW_TABLES: + print(f"=== {result.name} ===") + print(format_plan_table(result.plan)) + print() + shown += 1 + measure_bearing = sum(1 for result in results if result.measures) + if measure_bearing > shown: + print(f"... and {measure_bearing - shown} more measure-bearing certs (see CSV)\n") + + print(format_cohort_summary(results)) + + _CSV_PATH.write_text(format_cohort_csv(results) + "\n", encoding="utf-8") + print(f"\nwrote per-cert CSV -> {_CSV_PATH.resolve()}") if __name__ == "__main__": diff --git a/tests/harness/test_cohort.py b/tests/harness/test_cohort.py index c040e160..beac2505 100644 --- a/tests/harness/test_cohort.py +++ b/tests/harness/test_cohort.py @@ -4,7 +4,12 @@ from __future__ import annotations from pathlib import Path -from harness.cohort import CertResult, format_cohort_summary, run_cohort +from harness.cohort import ( + CertResult, + format_cohort_csv, + format_cohort_summary, + run_cohort, +) _GOLDEN = ( Path(__file__).resolve().parents[1] @@ -28,3 +33,18 @@ def test_run_cohort_models_each_api_json_offline() -> None: # The summary renders without raising and counts the cohort. summary: str = format_cohort_summary(results) assert "2" in summary + + +def test_cohort_carries_each_plan_and_renders_a_csv() -> None: + # Arrange / Act + paths: list[Path] = sorted(_GOLDEN.glob("*.json"))[:3] + results: list[CertResult] = run_cohort(paths) + + # Assert — each cert either modelled (carries its Plan) or errored. + for result in results: + assert (result.plan is not None) != (result.error is not None) + # CSV: a header row plus one row per cert, browsable in a spreadsheet. + csv: str = format_cohort_csv(results) + lines: list[str] = csv.splitlines() + assert lines[0].startswith("cert,") + assert len(lines) == len(results) + 1