mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(epc-prediction): Comparable Properties selection ladder (ADR-0029)
Pure-domain select_comparables: property type is an always-hard filter; built form and known Landlord Overrides (e.g. solid brick) are conditioning filters on the filter-then-relax ladder — applied while >= minimum_cohort survive, relaxed otherwise (the mixed-street border case degrades gracefully). PredictionTarget (known inputs) + Comparable (epc + register metadata) + ComparableProperties (selected cohort). Weighting (recency x similarity) follows in the synthesis slice. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
80b525f0f4
commit
bf6b6fac17
4 changed files with 226 additions and 0 deletions
0
domain/epc_prediction/__init__.py
Normal file
0
domain/epc_prediction/__init__.py
Normal file
100
domain/epc_prediction/comparable_properties.py
Normal file
100
domain/epc_prediction/comparable_properties.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""Comparable Properties selection for EPC Prediction (ADR-0029).
|
||||
|
||||
Given a `PredictionTarget` (the known inputs for an EPC-less Property) and the
|
||||
raw postcode cohort of candidate `Comparable`s, `select_comparables` chooses the
|
||||
reference cohort EPC Prediction synthesises from. Pure domain logic — the cohort
|
||||
IO (postcode search → per-cert fetch) lives behind a repository port.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
|
||||
# Default floor on the cohort: a conditioning filter (built form, a known
|
||||
# override) is applied only while at least this many comparables survive it,
|
||||
# else it is relaxed (ADR-0029 filter-then-relax ladder).
|
||||
_DEFAULT_MINIMUM_COHORT = 5
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Comparable:
|
||||
"""One candidate neighbour: its structured `EpcPropertyData` picture plus the
|
||||
register metadata not carried on the cert (identity for leave-one-out
|
||||
exclusion; recency + address for weighting)."""
|
||||
|
||||
epc: EpcPropertyData
|
||||
certificate_number: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PredictionTarget:
|
||||
"""The known inputs for the Property whose EPC we are predicting — the fields
|
||||
guaranteed at ingestion (plus any Landlord Overrides, added as they're used).
|
||||
`built_form` is often but not always known.
|
||||
"""
|
||||
|
||||
postcode: str
|
||||
property_type: str
|
||||
built_form: Optional[str] = None
|
||||
# A known Landlord Override (e.g. solid brick) conditions cohort selection —
|
||||
# matching comparables are emphasised while enough remain (ADR-0029).
|
||||
wall_construction: Optional[Union[int, str]] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ComparableProperties:
|
||||
"""The selected reference cohort for a `PredictionTarget`."""
|
||||
|
||||
members: tuple[Comparable, ...]
|
||||
|
||||
|
||||
def _maybe_filter(
|
||||
cohort: list[Comparable],
|
||||
predicate: Callable[[Comparable], bool],
|
||||
*,
|
||||
active: bool,
|
||||
minimum_cohort: int,
|
||||
) -> list[Comparable]:
|
||||
"""Apply a conditioning filter only while it leaves at least
|
||||
`minimum_cohort` comparables; otherwise relax it (keep the pre-filter
|
||||
cohort) — the filter-then-relax ladder (ADR-0029)."""
|
||||
if not active:
|
||||
return cohort
|
||||
filtered = [c for c in cohort if predicate(c)]
|
||||
return filtered if len(filtered) >= minimum_cohort else cohort
|
||||
|
||||
|
||||
def select_comparables(
|
||||
target: PredictionTarget,
|
||||
candidates: list[Comparable],
|
||||
*,
|
||||
minimum_cohort: int = _DEFAULT_MINIMUM_COHORT,
|
||||
) -> ComparableProperties:
|
||||
"""Select the Comparable Properties for `target` from the raw postcode
|
||||
cohort. Property type is an always-hard filter (a flat is never a comparable
|
||||
for a house); built form is a conditioning filter on the relax ladder."""
|
||||
cohort = [
|
||||
c for c in candidates if c.epc.property_type == target.property_type
|
||||
]
|
||||
cohort = _maybe_filter(
|
||||
cohort,
|
||||
lambda c: c.epc.built_form == target.built_form,
|
||||
active=target.built_form is not None,
|
||||
minimum_cohort=minimum_cohort,
|
||||
)
|
||||
cohort = _maybe_filter(
|
||||
cohort,
|
||||
lambda c: _main_wall_construction(c) == target.wall_construction,
|
||||
active=target.wall_construction is not None,
|
||||
minimum_cohort=minimum_cohort,
|
||||
)
|
||||
return ComparableProperties(members=tuple(cohort))
|
||||
|
||||
|
||||
def _main_wall_construction(comparable: Comparable) -> object:
|
||||
"""The main building part's wall construction, or None when no part lodged."""
|
||||
parts = comparable.epc.sap_building_parts
|
||||
return parts[0].wall_construction if parts else None
|
||||
0
tests/domain/epc_prediction/__init__.py
Normal file
0
tests/domain/epc_prediction/__init__.py
Normal file
126
tests/domain/epc_prediction/test_comparable_properties.py
Normal file
126
tests/domain/epc_prediction/test_comparable_properties.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""Behaviour of Comparable Properties selection (ADR-0029): given a prediction
|
||||
target's known inputs and the raw postcode cohort, choose + weight the
|
||||
comparables EPC Prediction will synthesise from. Filter-then-relax ladder:
|
||||
hard filters on identity (property type, built form) + known overrides while
|
||||
enough remain, weighted by recency × similarity. Pure domain logic.
|
||||
"""
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
ComparableProperties,
|
||||
PredictionTarget,
|
||||
select_comparables,
|
||||
)
|
||||
|
||||
|
||||
def _comparable(
|
||||
*,
|
||||
property_type: str,
|
||||
certificate_number: str,
|
||||
built_form: str = "1",
|
||||
wall_construction: Optional[Union[int, str]] = None,
|
||||
) -> Comparable:
|
||||
"""A Comparable carrying only the fields under test (opaque EpcPropertyData
|
||||
with property_type / built_form / main wall set — the partial-instance idiom)."""
|
||||
epc: EpcPropertyData = object.__new__(EpcPropertyData)
|
||||
epc.property_type = property_type
|
||||
epc.built_form = built_form
|
||||
main: SapBuildingPart = object.__new__(SapBuildingPart)
|
||||
if wall_construction is not None:
|
||||
main.wall_construction = wall_construction
|
||||
epc.sap_building_parts = [main]
|
||||
return Comparable(epc=epc, certificate_number=certificate_number)
|
||||
|
||||
|
||||
def test_selects_only_candidates_of_the_same_property_type() -> None:
|
||||
# Arrange — a target house (property_type "2"); cohort of 2 houses + 1 flat.
|
||||
target = PredictionTarget(postcode="LS6 1AA", property_type="2")
|
||||
candidates = [
|
||||
_comparable(property_type="2", certificate_number="A"),
|
||||
_comparable(property_type="2", certificate_number="B"),
|
||||
_comparable(property_type="1", certificate_number="C"),
|
||||
]
|
||||
|
||||
# Act
|
||||
result: ComparableProperties = select_comparables(target, candidates)
|
||||
|
||||
# Assert — the flat is excluded; the two houses remain.
|
||||
assert {c.certificate_number for c in result.members} == {"A", "B"}
|
||||
|
||||
|
||||
def test_filters_to_the_known_built_form_when_enough_remain() -> None:
|
||||
# Arrange — a mid-terrace target (built_form "4"); cohort of 5 mid-terraces
|
||||
# + 2 detached, all houses. The built form is known and leaves ≥ k, so it is
|
||||
# applied as a hard filter.
|
||||
target = PredictionTarget(
|
||||
postcode="LS6 1AA", property_type="2", built_form="4"
|
||||
)
|
||||
candidates = [
|
||||
_comparable(property_type="2", built_form="4", certificate_number=f"T{i}")
|
||||
for i in range(5)
|
||||
] + [
|
||||
_comparable(property_type="2", built_form="1", certificate_number=f"D{i}")
|
||||
for i in range(2)
|
||||
]
|
||||
|
||||
# Act
|
||||
result: ComparableProperties = select_comparables(
|
||||
target, candidates, minimum_cohort=5
|
||||
)
|
||||
|
||||
# Assert — only the five mid-terraces survive.
|
||||
assert {c.certificate_number for c in result.members} == {
|
||||
"T0", "T1", "T2", "T3", "T4"
|
||||
}
|
||||
|
||||
|
||||
def test_known_wall_override_emphasises_matching_comparables() -> None:
|
||||
# Arrange — a mixed street: 5 solid-brick (code 2) + 3 cavity (code 1) houses.
|
||||
# We KNOW the target is solid brick (a Landlord Override), and the filter
|
||||
# leaves ≥ k, so cavity neighbours are dropped (the border-property case).
|
||||
target = PredictionTarget(
|
||||
postcode="LS6 1AA", property_type="2", wall_construction=2
|
||||
)
|
||||
candidates = [
|
||||
_comparable(property_type="2", wall_construction=2, certificate_number=f"S{i}")
|
||||
for i in range(5)
|
||||
] + [
|
||||
_comparable(property_type="2", wall_construction=1, certificate_number=f"C{i}")
|
||||
for i in range(3)
|
||||
]
|
||||
|
||||
# Act
|
||||
result: ComparableProperties = select_comparables(
|
||||
target, candidates, minimum_cohort=5
|
||||
)
|
||||
|
||||
# Assert — only the solid-brick comparables remain.
|
||||
assert {c.certificate_number for c in result.members} == {
|
||||
"S0", "S1", "S2", "S3", "S4"
|
||||
}
|
||||
|
||||
|
||||
def test_known_wall_override_relaxes_when_too_few_match() -> None:
|
||||
# Arrange — only 2 solid-brick but 6 cavity houses; the override would leave
|
||||
# 2 (< k=5), so it relaxes to keep the full type cohort (graceful degradation).
|
||||
target = PredictionTarget(
|
||||
postcode="LS6 1AA", property_type="2", wall_construction=2
|
||||
)
|
||||
candidates = [
|
||||
_comparable(property_type="2", wall_construction=2, certificate_number=f"S{i}")
|
||||
for i in range(2)
|
||||
] + [
|
||||
_comparable(property_type="2", wall_construction=1, certificate_number=f"C{i}")
|
||||
for i in range(6)
|
||||
]
|
||||
|
||||
# Act
|
||||
result: ComparableProperties = select_comparables(
|
||||
target, candidates, minimum_cohort=5
|
||||
)
|
||||
|
||||
# Assert — relaxed: all eight houses retained.
|
||||
assert len(result.members) == 8
|
||||
Loading…
Add table
Reference in a new issue