diff --git a/domain/epc_prediction/epc_prediction.py b/domain/epc_prediction/epc_prediction.py index 9806b87d..68624632 100644 --- a/domain/epc_prediction/epc_prediction.py +++ b/domain/epc_prediction/epc_prediction.py @@ -10,6 +10,7 @@ logic — deterministic neighbour synthesis, not ML. from __future__ import annotations import copy +import statistics from collections import Counter from typing import Iterable, Optional, Union @@ -41,8 +42,19 @@ class EpcPrediction: @staticmethod def _template(comparables: ComparableProperties) -> Comparable: - """The representative comparable whose structure seeds the prediction.""" - return comparables.members[0] + """The representative comparable whose structure seeds the prediction: + the member whose floor area is closest to the cohort median. A single + neighbour's geometry is copied wholesale, so a size-representative + template keeps the prediction off the cohort's size outliers (ADR-0029 + decision 4: closest on size).""" + members: tuple[Comparable, ...] = comparables.members + median_area: float = statistics.median( + c.epc.total_floor_area_m2 for c in members + ) + return min( + members, + key=lambda c: abs(c.epc.total_floor_area_m2 - median_area), + ) @staticmethod def _apply_categorical_modes( diff --git a/tests/domain/epc_prediction/test_epc_prediction.py b/tests/domain/epc_prediction/test_epc_prediction.py index 8e2a139c..43da0737 100644 --- a/tests/domain/epc_prediction/test_epc_prediction.py +++ b/tests/domain/epc_prediction/test_epc_prediction.py @@ -59,6 +59,27 @@ def test_predicts_a_picture_by_copying_a_representative_template() -> None: assert predicted is not template +def test_template_is_the_member_closest_to_the_cohort_median_size() -> None: + # Arrange — the cohort spans a wide range of sizes; members[0] is an atypical + # tiny 20 m² outlier. A single neighbour's geometry is copied wholesale, so + # the template must be the size-representative member (closest to the median), + # not whoever happens to come first (ADR-0029 decision 4: closest on size). + cohort = _cohort( + _epc(floor_area=20.0), + _epc(floor_area=80.0), + _epc(floor_area=200.0), + ) + + # Act + predicted: EpcPropertyData = EpcPrediction().predict( + PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort + ) + + # Assert — the 80 m² member (the median) seeds the structure, not the 20 m² + # outlier sitting at members[0]. + assert predicted.total_floor_area_m2 == 80.0 + + def test_sets_main_wall_construction_to_the_cohort_mode() -> None: # Arrange — the template (members[0]) is solid brick (2), but the cohort # majority is cavity (1). The homogeneous categorical should follow the mode,