diff --git a/domain/epc_prediction/epc_prediction.py b/domain/epc_prediction/epc_prediction.py index 8d6a9b3c..251d8cc0 100644 --- a/domain/epc_prediction/epc_prediction.py +++ b/domain/epc_prediction/epc_prediction.py @@ -13,6 +13,7 @@ import copy import math import statistics from collections import Counter, defaultdict +from dataclasses import dataclass from datetime import date from typing import Iterable, Optional, Union @@ -27,6 +28,27 @@ from domain.epc_prediction.comparable_properties import ( ) +@dataclass(frozen=True) +class PredictionConfidence: + """A compute-only confidence signal for a prediction (ADR-0029 open item). + + `cohort_size` is the number of Comparable Properties the prediction drew on; + `component_agreement` maps a homogeneous component to the cohort's *agreement* + — the modal value's share (0..1) of the neighbours that lodge one. A small or + split cohort flags a component downstream may want to treat cautiously (e.g. + the per-dwelling fields with a low accuracy ceiling). Surfacing / persisting + this is a separate HITL follow-up; here it is computed only. + """ + + cohort_size: int + component_agreement: dict[str, float] + + def agreement(self, component: str) -> Optional[float]: + """The cohort's modal-value share for a component, or None when no + neighbour lodges one (it was not applicable).""" + return self.component_agreement.get(component) + + class EpcPrediction: """Synthesises a predicted `EpcPropertyData` from Comparable Properties.""" @@ -42,6 +64,31 @@ class EpcPrediction: self._apply_overrides(predicted, target) return predicted + def confidence( + self, comparables: ComparableProperties + ) -> PredictionConfidence: + """Compute the per-prediction confidence from the cohort: its size plus, + for each homogeneous categorical, the modal value's share among the + neighbours that lodge one (ADR-0029). Compute-only — it never alters the + prediction, only annotates how much the cohort agreed.""" + members: tuple[Comparable, ...] = comparables.members + agreement: dict[str, float] = {} + for attr in _MAIN_PART_CATEGORICALS: + share: Optional[float] = _modal_share( + _main_part_attr(c, attr) for c in members + ) + if share is not None: + agreement[attr] = share + for attr in _FLOOR_DIM_CATEGORICALS: + floor_share: Optional[float] = _modal_share( + _main_floor_attr(c, attr) for c in members + ) + if floor_share is not None: + agreement[attr] = floor_share + return PredictionConfidence( + cohort_size=len(members), component_agreement=agreement + ) + @staticmethod def _template(comparables: ComparableProperties) -> Comparable: """The representative comparable whose structure seeds the prediction: @@ -157,6 +204,18 @@ def _mode( return Counter(present).most_common(1)[0][0] +def _modal_share( + values: Iterable[Optional[Union[int, str]]], +) -> Optional[float]: + """The most common value's share of the present (non-None) values — a 0..1 + measure of how much the cohort agrees — or None when none are present.""" + present = [v for v in values if v is not None] + if not present: + return None + modal_count: int = Counter(present).most_common(1)[0][1] + return modal_count / len(present) + + def _recency_weighted_mode( members: tuple[Comparable, ...], attr: str ) -> Optional[Union[int, str]]: diff --git a/tests/domain/epc_prediction/test_epc_prediction.py b/tests/domain/epc_prediction/test_epc_prediction.py index c18e113e..fb59e317 100644 --- a/tests/domain/epc_prediction/test_epc_prediction.py +++ b/tests/domain/epc_prediction/test_epc_prediction.py @@ -19,7 +19,10 @@ from domain.epc_prediction.comparable_properties import ( ComparableProperties, PredictionTarget, ) -from domain.epc_prediction.epc_prediction import EpcPrediction +from domain.epc_prediction.epc_prediction import ( + EpcPrediction, + PredictionConfidence, +) def _epc( @@ -223,6 +226,61 @@ def test_recency_weights_roof_insulation_mode() -> None: assert predicted.sap_building_parts[0].roof_insulation_thickness == 300 +def test_confidence_reports_cohort_size_and_unanimous_agreement() -> None: + # Arrange — a unanimous cohort: three neighbours, all cavity-walled (1). + cohort = _cohort( + _epc(wall_construction=1), + _epc(wall_construction=1), + _epc(wall_construction=1), + ) + + # Act + confidence: PredictionConfidence = EpcPrediction().confidence(cohort) + + # Assert — three neighbours, total agreement on the wall construction. + assert confidence.cohort_size == 3 + assert confidence.agreement("wall_construction") == 1.0 + + +def test_confidence_agreement_is_the_modal_share_of_the_cohort() -> None: + # Arrange — three of four neighbours are cavity (1), one is solid brick (2), + # so the cohort is split on the wall construction. + cohort = _cohort( + _epc(wall_construction=1), + _epc(wall_construction=1), + _epc(wall_construction=1), + _epc(wall_construction=2), + ) + + # Act + confidence: PredictionConfidence = EpcPrediction().confidence(cohort) + + # Assert — agreement is the modal value's share of the cohort: 3 of 4. + share: Optional[float] = confidence.agreement("wall_construction") + assert share is not None + assert abs(share - 0.75) <= 1e-9 + + +def test_confidence_excludes_absent_component_values_from_the_denominator() -> None: + # Arrange — two neighbours lodge a roof construction (both code 2); one lodges + # none. The missing value must not dilute the agreement to 2/3. + cohort = _cohort( + _epc(roof_construction=2), + _epc(roof_construction=2), + _epc(roof_construction=None), + ) + + # Act + confidence: PredictionConfidence = EpcPrediction().confidence(cohort) + + # Assert — agreement counts only the two present, unanimous values (1.0), + # while the cohort size still reflects all three neighbours. + share: Optional[float] = confidence.agreement("roof_construction") + assert share is not None + assert abs(share - 1.0) <= 1e-9 + assert confidence.cohort_size == 3 + + def test_applies_a_known_wall_override_over_the_mode() -> None: # Arrange — the cohort mode is cavity (1), but we KNOW the target is solid # brick (2), a Landlord Override. The known value must win over the estimate.