diff --git a/domain/epc_prediction/__init__.py b/domain/epc_prediction/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/domain/epc_prediction/comparable_properties.py b/domain/epc_prediction/comparable_properties.py new file mode 100644 index 00000000..6b87881b --- /dev/null +++ b/domain/epc_prediction/comparable_properties.py @@ -0,0 +1,100 @@ +"""Comparable Properties selection for EPC Prediction (ADR-0029). + +Given a `PredictionTarget` (the known inputs for an EPC-less Property) and the +raw postcode cohort of candidate `Comparable`s, `select_comparables` chooses the +reference cohort EPC Prediction synthesises from. Pure domain logic — the cohort +IO (postcode search → per-cert fetch) lives behind a repository port. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable, Optional, Union + +from datatypes.epc.domain.epc_property_data import EpcPropertyData + +# Default floor on the cohort: a conditioning filter (built form, a known +# override) is applied only while at least this many comparables survive it, +# else it is relaxed (ADR-0029 filter-then-relax ladder). +_DEFAULT_MINIMUM_COHORT = 5 + + +@dataclass(frozen=True) +class Comparable: + """One candidate neighbour: its structured `EpcPropertyData` picture plus the + register metadata not carried on the cert (identity for leave-one-out + exclusion; recency + address for weighting).""" + + epc: EpcPropertyData + certificate_number: str + + +@dataclass(frozen=True) +class PredictionTarget: + """The known inputs for the Property whose EPC we are predicting — the fields + guaranteed at ingestion (plus any Landlord Overrides, added as they're used). + `built_form` is often but not always known. + """ + + postcode: str + property_type: str + built_form: Optional[str] = None + # A known Landlord Override (e.g. solid brick) conditions cohort selection — + # matching comparables are emphasised while enough remain (ADR-0029). + wall_construction: Optional[Union[int, str]] = None + + +@dataclass(frozen=True) +class ComparableProperties: + """The selected reference cohort for a `PredictionTarget`.""" + + members: tuple[Comparable, ...] + + +def _maybe_filter( + cohort: list[Comparable], + predicate: Callable[[Comparable], bool], + *, + active: bool, + minimum_cohort: int, +) -> list[Comparable]: + """Apply a conditioning filter only while it leaves at least + `minimum_cohort` comparables; otherwise relax it (keep the pre-filter + cohort) — the filter-then-relax ladder (ADR-0029).""" + if not active: + return cohort + filtered = [c for c in cohort if predicate(c)] + return filtered if len(filtered) >= minimum_cohort else cohort + + +def select_comparables( + target: PredictionTarget, + candidates: list[Comparable], + *, + minimum_cohort: int = _DEFAULT_MINIMUM_COHORT, +) -> ComparableProperties: + """Select the Comparable Properties for `target` from the raw postcode + cohort. Property type is an always-hard filter (a flat is never a comparable + for a house); built form is a conditioning filter on the relax ladder.""" + cohort = [ + c for c in candidates if c.epc.property_type == target.property_type + ] + cohort = _maybe_filter( + cohort, + lambda c: c.epc.built_form == target.built_form, + active=target.built_form is not None, + minimum_cohort=minimum_cohort, + ) + cohort = _maybe_filter( + cohort, + lambda c: _main_wall_construction(c) == target.wall_construction, + active=target.wall_construction is not None, + minimum_cohort=minimum_cohort, + ) + return ComparableProperties(members=tuple(cohort)) + + +def _main_wall_construction(comparable: Comparable) -> object: + """The main building part's wall construction, or None when no part lodged.""" + parts = comparable.epc.sap_building_parts + return parts[0].wall_construction if parts else None diff --git a/tests/domain/epc_prediction/__init__.py b/tests/domain/epc_prediction/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/domain/epc_prediction/test_comparable_properties.py b/tests/domain/epc_prediction/test_comparable_properties.py new file mode 100644 index 00000000..4894c017 --- /dev/null +++ b/tests/domain/epc_prediction/test_comparable_properties.py @@ -0,0 +1,126 @@ +"""Behaviour of Comparable Properties selection (ADR-0029): given a prediction +target's known inputs and the raw postcode cohort, choose + weight the +comparables EPC Prediction will synthesise from. Filter-then-relax ladder: +hard filters on identity (property type, built form) + known overrides while +enough remain, weighted by recency × similarity. Pure domain logic. +""" + +from typing import Optional, Union + +from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart +from domain.epc_prediction.comparable_properties import ( + Comparable, + ComparableProperties, + PredictionTarget, + select_comparables, +) + + +def _comparable( + *, + property_type: str, + certificate_number: str, + built_form: str = "1", + wall_construction: Optional[Union[int, str]] = None, +) -> Comparable: + """A Comparable carrying only the fields under test (opaque EpcPropertyData + with property_type / built_form / main wall set — the partial-instance idiom).""" + epc: EpcPropertyData = object.__new__(EpcPropertyData) + epc.property_type = property_type + epc.built_form = built_form + main: SapBuildingPart = object.__new__(SapBuildingPart) + if wall_construction is not None: + main.wall_construction = wall_construction + epc.sap_building_parts = [main] + return Comparable(epc=epc, certificate_number=certificate_number) + + +def test_selects_only_candidates_of_the_same_property_type() -> None: + # Arrange — a target house (property_type "2"); cohort of 2 houses + 1 flat. + target = PredictionTarget(postcode="LS6 1AA", property_type="2") + candidates = [ + _comparable(property_type="2", certificate_number="A"), + _comparable(property_type="2", certificate_number="B"), + _comparable(property_type="1", certificate_number="C"), + ] + + # Act + result: ComparableProperties = select_comparables(target, candidates) + + # Assert — the flat is excluded; the two houses remain. + assert {c.certificate_number for c in result.members} == {"A", "B"} + + +def test_filters_to_the_known_built_form_when_enough_remain() -> None: + # Arrange — a mid-terrace target (built_form "4"); cohort of 5 mid-terraces + # + 2 detached, all houses. The built form is known and leaves ≥ k, so it is + # applied as a hard filter. + target = PredictionTarget( + postcode="LS6 1AA", property_type="2", built_form="4" + ) + candidates = [ + _comparable(property_type="2", built_form="4", certificate_number=f"T{i}") + for i in range(5) + ] + [ + _comparable(property_type="2", built_form="1", certificate_number=f"D{i}") + for i in range(2) + ] + + # Act + result: ComparableProperties = select_comparables( + target, candidates, minimum_cohort=5 + ) + + # Assert — only the five mid-terraces survive. + assert {c.certificate_number for c in result.members} == { + "T0", "T1", "T2", "T3", "T4" + } + + +def test_known_wall_override_emphasises_matching_comparables() -> None: + # Arrange — a mixed street: 5 solid-brick (code 2) + 3 cavity (code 1) houses. + # We KNOW the target is solid brick (a Landlord Override), and the filter + # leaves ≥ k, so cavity neighbours are dropped (the border-property case). + target = PredictionTarget( + postcode="LS6 1AA", property_type="2", wall_construction=2 + ) + candidates = [ + _comparable(property_type="2", wall_construction=2, certificate_number=f"S{i}") + for i in range(5) + ] + [ + _comparable(property_type="2", wall_construction=1, certificate_number=f"C{i}") + for i in range(3) + ] + + # Act + result: ComparableProperties = select_comparables( + target, candidates, minimum_cohort=5 + ) + + # Assert — only the solid-brick comparables remain. + assert {c.certificate_number for c in result.members} == { + "S0", "S1", "S2", "S3", "S4" + } + + +def test_known_wall_override_relaxes_when_too_few_match() -> None: + # Arrange — only 2 solid-brick but 6 cavity houses; the override would leave + # 2 (< k=5), so it relaxes to keep the full type cohort (graceful degradation). + target = PredictionTarget( + postcode="LS6 1AA", property_type="2", wall_construction=2 + ) + candidates = [ + _comparable(property_type="2", wall_construction=2, certificate_number=f"S{i}") + for i in range(2) + ] + [ + _comparable(property_type="2", wall_construction=1, certificate_number=f"C{i}") + for i in range(6) + ] + + # Act + result: ComparableProperties = select_comparables( + target, candidates, minimum_cohort=5 + ) + + # Assert — relaxed: all eight houses retained. + assert len(result.members) == 8