feat(modelling): cohort builder + cohort-level ParityReport

build_property_reports models a dump in order (errors captured per-cert); parity_report_for aggregates the lodged-vs-calculated SAP across the cohort into the existing ParityReport (MAE/RMSE/bias/worst-N), excluding certs that couldn't be mapped or scored. Residual convention is the calculator's own (predicted - actual), the negative of PropertyReport.sap_error. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-27 23:35:01 +00:00 · 2026-06-04 11:13:04 +00:00 · 2026-06-04 11:13:04 +00:00 · 5e4906dd70
commit 5e4906dd70
parent 2b04dddb06
2 changed files with 88 additions and 1 deletions
--- a/harness/report.py
+++ b/harness/report.py
@ -22,7 +22,7 @@ from __future__ import annotations
 import json
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Final, Optional
+from typing import Any, Final, Iterable, Optional

 from datatypes.epc.domain.epc_property_data import (
    BuildingPartIdentifier,
@ -32,6 +32,11 @@ from datatypes.epc.domain.epc_property_data import (
 from datatypes.epc.domain.mapper import EpcPropertyDataMapper
 from domain.modelling.plan import Plan
 from domain.sap10_calculator.calculator import Sap10Calculator
+from domain.sap10_calculator.validation.parity_report import (
+    ParityCase,
+    ParityReport,
+    build_parity_report,
+)
 from harness.console import DEFAULT_CATALOGUE, run_modelling

 # A lodged-vs-calculated SAP gap beyond this many points is flagged for
@ -172,3 +177,37 @@ def build_property_report(
        plan_error=plan_error,
        measure_triggers=measure_triggers,
    )
+
+
+def build_property_reports(
+    paths: Iterable[Path],
+    *,
+    goal_band: str = "C",
+    catalogue_path: Path = DEFAULT_CATALOGUE,
+) -> list[PropertyReport]:
+    """Build one `PropertyReport` per path, in order. Errors are captured on
+    each report, never raised, so one bad cert never aborts the cohort."""
+    return [
+        build_property_report(path, goal_band=goal_band, catalogue_path=catalogue_path)
+        for path in paths
+    ]
+
+
+def parity_report_for(reports: Iterable[PropertyReport]) -> ParityReport:
+    """Aggregate the cohort's lodged-vs-calculated SAP into a `ParityReport`
+    (MAE / RMSE / bias / worst-N) for the cohort-level calculator-error view.
+    Certs that failed to map or score (no lodged or calculated SAP) are
+    excluded — they have no parity case to compare. The residual convention is
+    the calculator's own (predicted − actual = calculated − lodged), the
+    negative of each report's `sap_error`."""
+    cases: list[ParityCase] = [
+        ParityCase(
+            certificate_number=report.name,
+            actual_sap=report.lodged_sap,
+            predicted_sap=report.calculated_sap,
+            is_typical=True,
+        )
+        for report in reports
+        if report.lodged_sap is not None and report.calculated_sap is not None
+    ]
+    return build_parity_report(cases)
--- a/tests/harness/test_report.py
+++ b/tests/harness/test_report.py
@ -5,10 +5,13 @@ from __future__ import annotations
 import json
 from pathlib import Path

+from domain.sap10_calculator.validation.parity_report import ParityReport
 from harness.report import (
    MeasureTrigger,
    PropertyReport,
    build_property_report,
+    build_property_reports,
+    parity_report_for,
 )

 _GOLDEN = (
@ -110,6 +113,51 @@ def test_single_measure_cert_surfaces_only_that_measures_trigger() -> None:
    }


+def test_cohort_builder_models_each_path_capturing_errors(tmp_path: Path) -> None:
+    # Arrange — two real certs plus one the mapper rejects.
+    bad: Path = tmp_path / "broken.json"
+    bad.write_text(json.dumps({"not": "an epc"}))
+    paths: list[Path] = [
+        _GOLDEN / f"{_WITHIN_TOLERANCE}.json",
+        _GOLDEN / f"{_DIVERGENT}.json",
+        bad,
+    ]
+
+    # Act
+    reports: list[PropertyReport] = build_property_reports(paths)
+
+    # Assert — one report per path, the bad one carrying its error.
+    assert [report.name for report in reports] == [
+        _WITHIN_TOLERANCE,
+        _DIVERGENT,
+        "broken",
+    ]
+    assert reports[2].calculator_error is not None
+
+
+def test_cohort_parity_report_excludes_unscorable_certs() -> None:
+    # Arrange — a within-tolerance cert, a divergent cert, and an unscorable one.
+    reports: list[PropertyReport] = [
+        PropertyReport(name="a", lodged_sap=63, calculated_sap=62.747),
+        PropertyReport(name="b", lodged_sap=73, calculated_sap=71.727),
+        PropertyReport(
+            name="c", lodged_sap=None, calculated_sap=None, calculator_error="boom"
+        ),
+    ]
+
+    # Act
+    parity: ParityReport = parity_report_for(reports)
+
+    # Assert — only the two scorable certs form parity cases; b is the worst.
+    assert parity.case_count == 2
+    assert parity.worst_cases[0].certificate_number == "b"
+    # ParityReport's residual is predicted − actual (calculated − lodged); we
+    # under-predict both certs, so the global bias is negative.
+    assert parity.global_bias < 0
+    expected_mae: float = (abs(63 - 62.747) + abs(73 - 71.727)) / 2
+    assert abs(parity.global_mae - expected_mae) <= 1e-9
+
+
 def test_unparseable_cert_is_captured_not_raised(tmp_path: Path) -> None:
    # Arrange — a payload the mapper rejects must not abort the report.
    bad: Path = tmp_path / "broken.json"