feat(epc-prediction): per-prediction confidence signal (#1226)

Adds PredictionConfidence (cohort size + per-component agreement = the
modal value's share among neighbours that lodge one) and
EpcPrediction.confidence(), a compute-only signal so downstream can flag
low-confidence components (ADR-0029 open item: 'confidence signal').

Sanity check on the 40-postcode corpus (1068 component predictions):
agreement is strongly predictive of correctness — pooled hit-rate 21.9%
(<0.5) / 46.7% (0.5-0.7) / 73.6% (0.7-0.9) / 95.5% (>=0.9); point-biserial
corr(agreement, correct) = 0.582. Cohort size tracks too (<6 -> 68.4%,
>=20 -> 96.0%). Surfacing / persistence is a separate HITL follow-up.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 10:35:59 +00:00
parent ffaedd8d14
commit 07051b9401
2 changed files with 118 additions and 1 deletions

View file

@ -13,6 +13,7 @@ import copy
import math
import statistics
from collections import Counter, defaultdict
from dataclasses import dataclass
from datetime import date
from typing import Iterable, Optional, Union
@ -27,6 +28,27 @@ from domain.epc_prediction.comparable_properties import (
)
@dataclass(frozen=True)
class PredictionConfidence:
"""A compute-only confidence signal for a prediction (ADR-0029 open item).
`cohort_size` is the number of Comparable Properties the prediction drew on;
`component_agreement` maps a homogeneous component to the cohort's *agreement*
the modal value's share (0..1) of the neighbours that lodge one. A small or
split cohort flags a component downstream may want to treat cautiously (e.g.
the per-dwelling fields with a low accuracy ceiling). Surfacing / persisting
this is a separate HITL follow-up; here it is computed only.
"""
cohort_size: int
component_agreement: dict[str, float]
def agreement(self, component: str) -> Optional[float]:
"""The cohort's modal-value share for a component, or None when no
neighbour lodges one (it was not applicable)."""
return self.component_agreement.get(component)
class EpcPrediction:
"""Synthesises a predicted `EpcPropertyData` from Comparable Properties."""
@ -42,6 +64,31 @@ class EpcPrediction:
self._apply_overrides(predicted, target)
return predicted
def confidence(
self, comparables: ComparableProperties
) -> PredictionConfidence:
"""Compute the per-prediction confidence from the cohort: its size plus,
for each homogeneous categorical, the modal value's share among the
neighbours that lodge one (ADR-0029). Compute-only it never alters the
prediction, only annotates how much the cohort agreed."""
members: tuple[Comparable, ...] = comparables.members
agreement: dict[str, float] = {}
for attr in _MAIN_PART_CATEGORICALS:
share: Optional[float] = _modal_share(
_main_part_attr(c, attr) for c in members
)
if share is not None:
agreement[attr] = share
for attr in _FLOOR_DIM_CATEGORICALS:
floor_share: Optional[float] = _modal_share(
_main_floor_attr(c, attr) for c in members
)
if floor_share is not None:
agreement[attr] = floor_share
return PredictionConfidence(
cohort_size=len(members), component_agreement=agreement
)
@staticmethod
def _template(comparables: ComparableProperties) -> Comparable:
"""The representative comparable whose structure seeds the prediction:
@ -157,6 +204,18 @@ def _mode(
return Counter(present).most_common(1)[0][0]
def _modal_share(
values: Iterable[Optional[Union[int, str]]],
) -> Optional[float]:
"""The most common value's share of the present (non-None) values — a 0..1
measure of how much the cohort agrees or None when none are present."""
present = [v for v in values if v is not None]
if not present:
return None
modal_count: int = Counter(present).most_common(1)[0][1]
return modal_count / len(present)
def _recency_weighted_mode(
members: tuple[Comparable, ...], attr: str
) -> Optional[Union[int, str]]:

View file

@ -19,7 +19,10 @@ from domain.epc_prediction.comparable_properties import (
ComparableProperties,
PredictionTarget,
)
from domain.epc_prediction.epc_prediction import EpcPrediction
from domain.epc_prediction.epc_prediction import (
EpcPrediction,
PredictionConfidence,
)
def _epc(
@ -223,6 +226,61 @@ def test_recency_weights_roof_insulation_mode() -> None:
assert predicted.sap_building_parts[0].roof_insulation_thickness == 300
def test_confidence_reports_cohort_size_and_unanimous_agreement() -> None:
# Arrange — a unanimous cohort: three neighbours, all cavity-walled (1).
cohort = _cohort(
_epc(wall_construction=1),
_epc(wall_construction=1),
_epc(wall_construction=1),
)
# Act
confidence: PredictionConfidence = EpcPrediction().confidence(cohort)
# Assert — three neighbours, total agreement on the wall construction.
assert confidence.cohort_size == 3
assert confidence.agreement("wall_construction") == 1.0
def test_confidence_agreement_is_the_modal_share_of_the_cohort() -> None:
# Arrange — three of four neighbours are cavity (1), one is solid brick (2),
# so the cohort is split on the wall construction.
cohort = _cohort(
_epc(wall_construction=1),
_epc(wall_construction=1),
_epc(wall_construction=1),
_epc(wall_construction=2),
)
# Act
confidence: PredictionConfidence = EpcPrediction().confidence(cohort)
# Assert — agreement is the modal value's share of the cohort: 3 of 4.
share: Optional[float] = confidence.agreement("wall_construction")
assert share is not None
assert abs(share - 0.75) <= 1e-9
def test_confidence_excludes_absent_component_values_from_the_denominator() -> None:
# Arrange — two neighbours lodge a roof construction (both code 2); one lodges
# none. The missing value must not dilute the agreement to 2/3.
cohort = _cohort(
_epc(roof_construction=2),
_epc(roof_construction=2),
_epc(roof_construction=None),
)
# Act
confidence: PredictionConfidence = EpcPrediction().confidence(cohort)
# Assert — agreement counts only the two present, unanimous values (1.0),
# while the cohort size still reflects all three neighbours.
share: Optional[float] = confidence.agreement("roof_construction")
assert share is not None
assert abs(share - 1.0) <= 1e-9
assert confidence.cohort_size == 3
def test_applies_a_known_wall_override_over_the_mode() -> None:
# Arrange — the cohort mode is cavity (1), but we KNOW the target is solid
# brick (2), a Landlord Override. The known value must win over the estimate.