feat(epc-prediction): Comparable Properties selection ladder (ADR-0029)

Pure-domain select_comparables: property type is an always-hard filter; built
form and known Landlord Overrides (e.g. solid brick) are conditioning filters on
the filter-then-relax ladder — applied while >= minimum_cohort survive, relaxed
otherwise (the mixed-street border case degrades gracefully). PredictionTarget
(known inputs) + Comparable (epc + register metadata) + ComparableProperties
(selected cohort). Weighting (recency x similarity) follows in the synthesis slice.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-13 23:44:57 +00:00
parent 80b525f0f4
commit bf6b6fac17
4 changed files with 226 additions and 0 deletions

View file

View file

@ -0,0 +1,100 @@
"""Comparable Properties selection for EPC Prediction (ADR-0029).
Given a `PredictionTarget` (the known inputs for an EPC-less Property) and the
raw postcode cohort of candidate `Comparable`s, `select_comparables` chooses the
reference cohort EPC Prediction synthesises from. Pure domain logic the cohort
IO (postcode search per-cert fetch) lives behind a repository port.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Callable, Optional, Union
from datatypes.epc.domain.epc_property_data import EpcPropertyData
# Default floor on the cohort: a conditioning filter (built form, a known
# override) is applied only while at least this many comparables survive it,
# else it is relaxed (ADR-0029 filter-then-relax ladder).
_DEFAULT_MINIMUM_COHORT = 5
@dataclass(frozen=True)
class Comparable:
"""One candidate neighbour: its structured `EpcPropertyData` picture plus the
register metadata not carried on the cert (identity for leave-one-out
exclusion; recency + address for weighting)."""
epc: EpcPropertyData
certificate_number: str
@dataclass(frozen=True)
class PredictionTarget:
"""The known inputs for the Property whose EPC we are predicting — the fields
guaranteed at ingestion (plus any Landlord Overrides, added as they're used).
`built_form` is often but not always known.
"""
postcode: str
property_type: str
built_form: Optional[str] = None
# A known Landlord Override (e.g. solid brick) conditions cohort selection —
# matching comparables are emphasised while enough remain (ADR-0029).
wall_construction: Optional[Union[int, str]] = None
@dataclass(frozen=True)
class ComparableProperties:
"""The selected reference cohort for a `PredictionTarget`."""
members: tuple[Comparable, ...]
def _maybe_filter(
cohort: list[Comparable],
predicate: Callable[[Comparable], bool],
*,
active: bool,
minimum_cohort: int,
) -> list[Comparable]:
"""Apply a conditioning filter only while it leaves at least
`minimum_cohort` comparables; otherwise relax it (keep the pre-filter
cohort) the filter-then-relax ladder (ADR-0029)."""
if not active:
return cohort
filtered = [c for c in cohort if predicate(c)]
return filtered if len(filtered) >= minimum_cohort else cohort
def select_comparables(
target: PredictionTarget,
candidates: list[Comparable],
*,
minimum_cohort: int = _DEFAULT_MINIMUM_COHORT,
) -> ComparableProperties:
"""Select the Comparable Properties for `target` from the raw postcode
cohort. Property type is an always-hard filter (a flat is never a comparable
for a house); built form is a conditioning filter on the relax ladder."""
cohort = [
c for c in candidates if c.epc.property_type == target.property_type
]
cohort = _maybe_filter(
cohort,
lambda c: c.epc.built_form == target.built_form,
active=target.built_form is not None,
minimum_cohort=minimum_cohort,
)
cohort = _maybe_filter(
cohort,
lambda c: _main_wall_construction(c) == target.wall_construction,
active=target.wall_construction is not None,
minimum_cohort=minimum_cohort,
)
return ComparableProperties(members=tuple(cohort))
def _main_wall_construction(comparable: Comparable) -> object:
"""The main building part's wall construction, or None when no part lodged."""
parts = comparable.epc.sap_building_parts
return parts[0].wall_construction if parts else None

View file

View file

@ -0,0 +1,126 @@
"""Behaviour of Comparable Properties selection (ADR-0029): given a prediction
target's known inputs and the raw postcode cohort, choose + weight the
comparables EPC Prediction will synthesise from. Filter-then-relax ladder:
hard filters on identity (property type, built form) + known overrides while
enough remain, weighted by recency × similarity. Pure domain logic.
"""
from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
from domain.epc_prediction.comparable_properties import (
Comparable,
ComparableProperties,
PredictionTarget,
select_comparables,
)
def _comparable(
*,
property_type: str,
certificate_number: str,
built_form: str = "1",
wall_construction: Optional[Union[int, str]] = None,
) -> Comparable:
"""A Comparable carrying only the fields under test (opaque EpcPropertyData
with property_type / built_form / main wall set the partial-instance idiom)."""
epc: EpcPropertyData = object.__new__(EpcPropertyData)
epc.property_type = property_type
epc.built_form = built_form
main: SapBuildingPart = object.__new__(SapBuildingPart)
if wall_construction is not None:
main.wall_construction = wall_construction
epc.sap_building_parts = [main]
return Comparable(epc=epc, certificate_number=certificate_number)
def test_selects_only_candidates_of_the_same_property_type() -> None:
# Arrange — a target house (property_type "2"); cohort of 2 houses + 1 flat.
target = PredictionTarget(postcode="LS6 1AA", property_type="2")
candidates = [
_comparable(property_type="2", certificate_number="A"),
_comparable(property_type="2", certificate_number="B"),
_comparable(property_type="1", certificate_number="C"),
]
# Act
result: ComparableProperties = select_comparables(target, candidates)
# Assert — the flat is excluded; the two houses remain.
assert {c.certificate_number for c in result.members} == {"A", "B"}
def test_filters_to_the_known_built_form_when_enough_remain() -> None:
# Arrange — a mid-terrace target (built_form "4"); cohort of 5 mid-terraces
# + 2 detached, all houses. The built form is known and leaves ≥ k, so it is
# applied as a hard filter.
target = PredictionTarget(
postcode="LS6 1AA", property_type="2", built_form="4"
)
candidates = [
_comparable(property_type="2", built_form="4", certificate_number=f"T{i}")
for i in range(5)
] + [
_comparable(property_type="2", built_form="1", certificate_number=f"D{i}")
for i in range(2)
]
# Act
result: ComparableProperties = select_comparables(
target, candidates, minimum_cohort=5
)
# Assert — only the five mid-terraces survive.
assert {c.certificate_number for c in result.members} == {
"T0", "T1", "T2", "T3", "T4"
}
def test_known_wall_override_emphasises_matching_comparables() -> None:
# Arrange — a mixed street: 5 solid-brick (code 2) + 3 cavity (code 1) houses.
# We KNOW the target is solid brick (a Landlord Override), and the filter
# leaves ≥ k, so cavity neighbours are dropped (the border-property case).
target = PredictionTarget(
postcode="LS6 1AA", property_type="2", wall_construction=2
)
candidates = [
_comparable(property_type="2", wall_construction=2, certificate_number=f"S{i}")
for i in range(5)
] + [
_comparable(property_type="2", wall_construction=1, certificate_number=f"C{i}")
for i in range(3)
]
# Act
result: ComparableProperties = select_comparables(
target, candidates, minimum_cohort=5
)
# Assert — only the solid-brick comparables remain.
assert {c.certificate_number for c in result.members} == {
"S0", "S1", "S2", "S3", "S4"
}
def test_known_wall_override_relaxes_when_too_few_match() -> None:
# Arrange — only 2 solid-brick but 6 cavity houses; the override would leave
# 2 (< k=5), so it relaxes to keep the full type cohort (graceful degradation).
target = PredictionTarget(
postcode="LS6 1AA", property_type="2", wall_construction=2
)
candidates = [
_comparable(property_type="2", wall_construction=2, certificate_number=f"S{i}")
for i in range(2)
] + [
_comparable(property_type="2", wall_construction=1, certificate_number=f"C{i}")
for i in range(6)
]
# Act
result: ComparableProperties = select_comparables(
target, candidates, minimum_cohort=5
)
# Assert — relaxed: all eight houses retained.
assert len(result.members) == 8