diff --git a/harness/report.py b/harness/report.py index 4a21a73b..28af881d 100644 --- a/harness/report.py +++ b/harness/report.py @@ -22,7 +22,7 @@ from __future__ import annotations import json from dataclasses import dataclass from pathlib import Path -from typing import Any, Final, Optional +from typing import Any, Final, Iterable, Optional from datatypes.epc.domain.epc_property_data import ( BuildingPartIdentifier, @@ -32,6 +32,11 @@ from datatypes.epc.domain.epc_property_data import ( from datatypes.epc.domain.mapper import EpcPropertyDataMapper from domain.modelling.plan import Plan from domain.sap10_calculator.calculator import Sap10Calculator +from domain.sap10_calculator.validation.parity_report import ( + ParityCase, + ParityReport, + build_parity_report, +) from harness.console import DEFAULT_CATALOGUE, run_modelling # A lodged-vs-calculated SAP gap beyond this many points is flagged for @@ -172,3 +177,37 @@ def build_property_report( plan_error=plan_error, measure_triggers=measure_triggers, ) + + +def build_property_reports( + paths: Iterable[Path], + *, + goal_band: str = "C", + catalogue_path: Path = DEFAULT_CATALOGUE, +) -> list[PropertyReport]: + """Build one `PropertyReport` per path, in order. Errors are captured on + each report, never raised, so one bad cert never aborts the cohort.""" + return [ + build_property_report(path, goal_band=goal_band, catalogue_path=catalogue_path) + for path in paths + ] + + +def parity_report_for(reports: Iterable[PropertyReport]) -> ParityReport: + """Aggregate the cohort's lodged-vs-calculated SAP into a `ParityReport` + (MAE / RMSE / bias / worst-N) for the cohort-level calculator-error view. + Certs that failed to map or score (no lodged or calculated SAP) are + excluded — they have no parity case to compare. The residual convention is + the calculator's own (predicted − actual = calculated − lodged), the + negative of each report's `sap_error`.""" + cases: list[ParityCase] = [ + ParityCase( + certificate_number=report.name, + actual_sap=report.lodged_sap, + predicted_sap=report.calculated_sap, + is_typical=True, + ) + for report in reports + if report.lodged_sap is not None and report.calculated_sap is not None + ] + return build_parity_report(cases) diff --git a/tests/harness/test_report.py b/tests/harness/test_report.py index 4f80eee4..e60d95ff 100644 --- a/tests/harness/test_report.py +++ b/tests/harness/test_report.py @@ -5,10 +5,13 @@ from __future__ import annotations import json from pathlib import Path +from domain.sap10_calculator.validation.parity_report import ParityReport from harness.report import ( MeasureTrigger, PropertyReport, build_property_report, + build_property_reports, + parity_report_for, ) _GOLDEN = ( @@ -110,6 +113,51 @@ def test_single_measure_cert_surfaces_only_that_measures_trigger() -> None: } +def test_cohort_builder_models_each_path_capturing_errors(tmp_path: Path) -> None: + # Arrange — two real certs plus one the mapper rejects. + bad: Path = tmp_path / "broken.json" + bad.write_text(json.dumps({"not": "an epc"})) + paths: list[Path] = [ + _GOLDEN / f"{_WITHIN_TOLERANCE}.json", + _GOLDEN / f"{_DIVERGENT}.json", + bad, + ] + + # Act + reports: list[PropertyReport] = build_property_reports(paths) + + # Assert — one report per path, the bad one carrying its error. + assert [report.name for report in reports] == [ + _WITHIN_TOLERANCE, + _DIVERGENT, + "broken", + ] + assert reports[2].calculator_error is not None + + +def test_cohort_parity_report_excludes_unscorable_certs() -> None: + # Arrange — a within-tolerance cert, a divergent cert, and an unscorable one. + reports: list[PropertyReport] = [ + PropertyReport(name="a", lodged_sap=63, calculated_sap=62.747), + PropertyReport(name="b", lodged_sap=73, calculated_sap=71.727), + PropertyReport( + name="c", lodged_sap=None, calculated_sap=None, calculator_error="boom" + ), + ] + + # Act + parity: ParityReport = parity_report_for(reports) + + # Assert — only the two scorable certs form parity cases; b is the worst. + assert parity.case_count == 2 + assert parity.worst_cases[0].certificate_number == "b" + # ParityReport's residual is predicted − actual (calculated − lodged); we + # under-predict both certs, so the global bias is negative. + assert parity.global_bias < 0 + expected_mae: float = (abs(63 - 62.747) + abs(73 - 71.727)) / 2 + assert abs(parity.global_mae - expected_mae) <= 1e-9 + + def test_unparseable_cert_is_captured_not_raised(tmp_path: Path) -> None: # Arrange — a payload the mapper rejects must not abort the report. bad: Path = tmp_path / "broken.json"