feat(modelling): cohort builder + cohort-level ParityReport

build_property_reports models a dump in order (errors captured per-cert);
parity_report_for aggregates the lodged-vs-calculated SAP across the cohort
into the existing ParityReport (MAE/RMSE/bias/worst-N), excluding certs that
couldn't be mapped or scored. Residual convention is the calculator's own
(predicted - actual), the negative of PropertyReport.sap_error.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-04 11:13:04 +00:00
parent 2b04dddb06
commit 5e4906dd70
2 changed files with 88 additions and 1 deletions

View file

@ -22,7 +22,7 @@ from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Final, Optional
from typing import Any, Final, Iterable, Optional
from datatypes.epc.domain.epc_property_data import (
BuildingPartIdentifier,
@ -32,6 +32,11 @@ from datatypes.epc.domain.epc_property_data import (
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.modelling.plan import Plan
from domain.sap10_calculator.calculator import Sap10Calculator
from domain.sap10_calculator.validation.parity_report import (
ParityCase,
ParityReport,
build_parity_report,
)
from harness.console import DEFAULT_CATALOGUE, run_modelling
# A lodged-vs-calculated SAP gap beyond this many points is flagged for
@ -172,3 +177,37 @@ def build_property_report(
plan_error=plan_error,
measure_triggers=measure_triggers,
)
def build_property_reports(
paths: Iterable[Path],
*,
goal_band: str = "C",
catalogue_path: Path = DEFAULT_CATALOGUE,
) -> list[PropertyReport]:
"""Build one `PropertyReport` per path, in order. Errors are captured on
each report, never raised, so one bad cert never aborts the cohort."""
return [
build_property_report(path, goal_band=goal_band, catalogue_path=catalogue_path)
for path in paths
]
def parity_report_for(reports: Iterable[PropertyReport]) -> ParityReport:
"""Aggregate the cohort's lodged-vs-calculated SAP into a `ParityReport`
(MAE / RMSE / bias / worst-N) for the cohort-level calculator-error view.
Certs that failed to map or score (no lodged or calculated SAP) are
excluded they have no parity case to compare. The residual convention is
the calculator's own (predicted actual = calculated lodged), the
negative of each report's `sap_error`."""
cases: list[ParityCase] = [
ParityCase(
certificate_number=report.name,
actual_sap=report.lodged_sap,
predicted_sap=report.calculated_sap,
is_typical=True,
)
for report in reports
if report.lodged_sap is not None and report.calculated_sap is not None
]
return build_parity_report(cases)

View file

@ -5,10 +5,13 @@ from __future__ import annotations
import json
from pathlib import Path
from domain.sap10_calculator.validation.parity_report import ParityReport
from harness.report import (
MeasureTrigger,
PropertyReport,
build_property_report,
build_property_reports,
parity_report_for,
)
_GOLDEN = (
@ -110,6 +113,51 @@ def test_single_measure_cert_surfaces_only_that_measures_trigger() -> None:
}
def test_cohort_builder_models_each_path_capturing_errors(tmp_path: Path) -> None:
# Arrange — two real certs plus one the mapper rejects.
bad: Path = tmp_path / "broken.json"
bad.write_text(json.dumps({"not": "an epc"}))
paths: list[Path] = [
_GOLDEN / f"{_WITHIN_TOLERANCE}.json",
_GOLDEN / f"{_DIVERGENT}.json",
bad,
]
# Act
reports: list[PropertyReport] = build_property_reports(paths)
# Assert — one report per path, the bad one carrying its error.
assert [report.name for report in reports] == [
_WITHIN_TOLERANCE,
_DIVERGENT,
"broken",
]
assert reports[2].calculator_error is not None
def test_cohort_parity_report_excludes_unscorable_certs() -> None:
# Arrange — a within-tolerance cert, a divergent cert, and an unscorable one.
reports: list[PropertyReport] = [
PropertyReport(name="a", lodged_sap=63, calculated_sap=62.747),
PropertyReport(name="b", lodged_sap=73, calculated_sap=71.727),
PropertyReport(
name="c", lodged_sap=None, calculated_sap=None, calculator_error="boom"
),
]
# Act
parity: ParityReport = parity_report_for(reports)
# Assert — only the two scorable certs form parity cases; b is the worst.
assert parity.case_count == 2
assert parity.worst_cases[0].certificate_number == "b"
# ParityReport's residual is predicted actual (calculated lodged); we
# under-predict both certs, so the global bias is negative.
assert parity.global_bias < 0
expected_mae: float = (abs(63 - 62.747) + abs(73 - 71.727)) / 2
assert abs(parity.global_mae - expected_mae) <= 1e-9
def test_unparseable_cert_is_captured_not_raised(tmp_path: Path) -> None:
# Arrange — a payload the mapper rejects must not abort the report.
bad: Path = tmp_path / "broken.json"