mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
refactor(epc-prediction): PR review — rename ComparableProperty, relocate PredictionTarget
Two review points from @dancafc: 1) Rename the `Comparable` dataclass → `ComparableProperty` (it models one comparable *property*; the collection stays `ComparableProperties`). Applied across domain, repositories, orchestration, harness, scripts, and tests with a word-boundary rename so `ComparableProperties` is untouched. 2) Move `PredictionTarget` out of comparable_properties.py into prediction_target.py (where `PredictionTargetAttributes` + `build_prediction_target` already live). comparable_properties.py now imports it; no import cycle (prediction_target no longer depends on comparable_properties). Importers updated. 92 tests pass across the touched suites; pyright strict clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
f66e2cb020
commit
7ca1f815f6
16 changed files with 125 additions and 125 deletions
|
|
@ -1,18 +1,19 @@
|
|||
"""Comparable Properties selection for EPC Prediction (ADR-0029).
|
||||
|
||||
Given a `PredictionTarget` (the known inputs for an EPC-less Property) and the
|
||||
raw postcode cohort of candidate `Comparable`s, `select_comparables` chooses the
|
||||
reference cohort EPC Prediction synthesises from. Pure domain logic — the cohort
|
||||
IO (postcode search → per-cert fetch) lives behind a repository port.
|
||||
raw postcode cohort of candidate `ComparableProperty` objects, `select_comparables`
|
||||
chooses the reference cohort EPC Prediction synthesises from. Pure domain logic —
|
||||
the cohort IO (postcode search → per-cert fetch) lives behind a repository port.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Callable, Optional, Union
|
||||
from typing import Callable, Optional
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from domain.epc_prediction.prediction_target import PredictionTarget
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
|
||||
# Default floor on the cohort: a conditioning filter (built form, a known
|
||||
|
|
@ -22,7 +23,7 @@ _DEFAULT_MINIMUM_COHORT = 5
|
|||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Comparable:
|
||||
class ComparableProperty:
|
||||
"""One candidate neighbour: its structured `EpcPropertyData` picture plus the
|
||||
register metadata not carried on the cert (identity for leave-one-out
|
||||
exclusion; recency + address for weighting + re-lodgement dedup)."""
|
||||
|
|
@ -37,38 +38,20 @@ class Comparable:
|
|||
coordinates: Optional[Coordinates] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PredictionTarget:
|
||||
"""The known inputs for the Property whose EPC we are predicting — the fields
|
||||
guaranteed at ingestion (plus any Landlord Overrides, added as they're used).
|
||||
`built_form` is often but not always known.
|
||||
"""
|
||||
|
||||
postcode: str
|
||||
property_type: str
|
||||
built_form: Optional[str] = None
|
||||
# A known Landlord Override (e.g. solid brick) conditions cohort selection —
|
||||
# matching comparables are emphasised while enough remain (ADR-0029).
|
||||
wall_construction: Optional[Union[int, str]] = None
|
||||
# The target Property's own coordinates (resolved from its UPRN), against
|
||||
# which neighbours are distance-weighted. None disables geo-weighting.
|
||||
coordinates: Optional[Coordinates] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ComparableProperties:
|
||||
"""The selected reference cohort for a `PredictionTarget`."""
|
||||
|
||||
members: tuple[Comparable, ...]
|
||||
members: tuple[ComparableProperty, ...]
|
||||
|
||||
|
||||
def _maybe_filter(
|
||||
cohort: list[Comparable],
|
||||
predicate: Callable[[Comparable], bool],
|
||||
cohort: list[ComparableProperty],
|
||||
predicate: Callable[[ComparableProperty], bool],
|
||||
*,
|
||||
active: bool,
|
||||
minimum_cohort: int,
|
||||
) -> list[Comparable]:
|
||||
) -> list[ComparableProperty]:
|
||||
"""Apply a conditioning filter only while it leaves at least
|
||||
`minimum_cohort` comparables; otherwise relax it (keep the pre-filter
|
||||
cohort) — the filter-then-relax ladder (ADR-0029)."""
|
||||
|
|
@ -80,11 +63,11 @@ def _maybe_filter(
|
|||
|
||||
def select_comparables(
|
||||
target: PredictionTarget,
|
||||
candidates: list[Comparable],
|
||||
candidates: list[ComparableProperty],
|
||||
*,
|
||||
minimum_cohort: int = _DEFAULT_MINIMUM_COHORT,
|
||||
) -> ComparableProperties:
|
||||
"""Select the Comparable Properties for `target` from the raw postcode
|
||||
"""Select the ComparableProperty Properties for `target` from the raw postcode
|
||||
cohort. The register lists every historical lodgement, so first dedupe each
|
||||
address to its latest cert (one comparable per real neighbour); then property
|
||||
type is an always-hard filter (a flat is never a comparable for a house) and
|
||||
|
|
@ -109,15 +92,15 @@ def select_comparables(
|
|||
|
||||
|
||||
def _dedupe_to_latest_per_address(
|
||||
candidates: list[Comparable],
|
||||
) -> list[Comparable]:
|
||||
candidates: list[ComparableProperty],
|
||||
) -> list[ComparableProperty]:
|
||||
"""Collapse the register's re-lodgements: keep one comparable per address —
|
||||
the latest by registration date (ties broken by certificate number, for
|
||||
determinism) — so a re-lodged neighbour does not count more than once.
|
||||
Candidates with no address are passed through untouched (each is its own
|
||||
neighbour). Input order is otherwise preserved."""
|
||||
latest: dict[str, Comparable] = {}
|
||||
passthrough: list[Comparable] = []
|
||||
latest: dict[str, ComparableProperty] = {}
|
||||
passthrough: list[ComparableProperty] = []
|
||||
for c in candidates:
|
||||
if c.address is None:
|
||||
passthrough.append(c)
|
||||
|
|
@ -128,7 +111,7 @@ def _dedupe_to_latest_per_address(
|
|||
return list(latest.values()) + passthrough
|
||||
|
||||
|
||||
def _recency_key(comparable: Comparable) -> tuple[date, str]:
|
||||
def _recency_key(comparable: ComparableProperty) -> tuple[date, str]:
|
||||
"""Sort key making the most recent (then highest cert number) win. A missing
|
||||
registration date sorts oldest."""
|
||||
return (
|
||||
|
|
@ -137,7 +120,7 @@ def _recency_key(comparable: Comparable) -> tuple[date, str]:
|
|||
)
|
||||
|
||||
|
||||
def _main_wall_construction(comparable: Comparable) -> object:
|
||||
def _main_wall_construction(comparable: ComparableProperty) -> object:
|
||||
"""The main building part's wall construction, or None when no part lodged."""
|
||||
parts = comparable.epc.sap_building_parts
|
||||
return parts[0].wall_construction if parts else None
|
||||
|
|
|
|||
|
|
@ -23,10 +23,10 @@ from datatypes.epc.domain.epc_property_data import (
|
|||
SapBuildingPart,
|
||||
)
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
ComparableProperty,
|
||||
ComparableProperties,
|
||||
PredictionTarget,
|
||||
)
|
||||
from domain.epc_prediction.prediction_target import PredictionTarget
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ from domain.geospatial.coordinates import Coordinates
|
|||
class PredictionConfidence:
|
||||
"""A compute-only confidence signal for a prediction (ADR-0029 open item).
|
||||
|
||||
`cohort_size` is the number of Comparable Properties the prediction drew on;
|
||||
`cohort_size` is the number of ComparableProperty Properties the prediction drew on;
|
||||
`component_agreement` maps a homogeneous component to the cohort's *agreement*
|
||||
— the modal value's share (0..1) of the neighbours that lodge one. A small or
|
||||
split cohort flags a component downstream may want to treat cautiously (e.g.
|
||||
|
|
@ -52,7 +52,7 @@ class PredictionConfidence:
|
|||
|
||||
|
||||
class EpcPrediction:
|
||||
"""Synthesises a predicted `EpcPropertyData` from Comparable Properties."""
|
||||
"""Synthesises a predicted `EpcPropertyData` from ComparableProperty Properties."""
|
||||
|
||||
def predict(
|
||||
self, target: PredictionTarget, comparables: ComparableProperties
|
||||
|
|
@ -62,7 +62,7 @@ class EpcPrediction:
|
|||
the cohort median (the best point estimate of the target's size, decoupled
|
||||
from the one template's own area), then set the homogeneous categoricals
|
||||
to the cohort mode."""
|
||||
template: Comparable = self._template(comparables)
|
||||
template: ComparableProperty = self._template(comparables)
|
||||
predicted: EpcPropertyData = copy.deepcopy(template.epc)
|
||||
predicted.total_floor_area_m2 = _geo_weighted_floor_area(
|
||||
comparables.members, target.coordinates
|
||||
|
|
@ -126,7 +126,7 @@ class EpcPrediction:
|
|||
for each homogeneous categorical, the modal value's share among the
|
||||
neighbours that lodge one (ADR-0029). Compute-only — it never alters the
|
||||
prediction, only annotates how much the cohort agreed."""
|
||||
members: tuple[Comparable, ...] = comparables.members
|
||||
members: tuple[ComparableProperty, ...] = comparables.members
|
||||
agreement: dict[str, float] = {}
|
||||
for attr in _MAIN_PART_CATEGORICALS:
|
||||
share: Optional[float] = _modal_share(
|
||||
|
|
@ -145,13 +145,13 @@ class EpcPrediction:
|
|||
)
|
||||
|
||||
@staticmethod
|
||||
def _template(comparables: ComparableProperties) -> Comparable:
|
||||
def _template(comparables: ComparableProperties) -> ComparableProperty:
|
||||
"""The representative comparable whose structure seeds the prediction:
|
||||
the member whose floor area is closest to the cohort median. A single
|
||||
neighbour's geometry is copied wholesale, so a size-representative
|
||||
template keeps the prediction off the cohort's size outliers (ADR-0029
|
||||
decision 4: closest on size)."""
|
||||
members: tuple[Comparable, ...] = comparables.members
|
||||
members: tuple[ComparableProperty, ...] = comparables.members
|
||||
median_area: float = statistics.median(
|
||||
c.epc.total_floor_area_m2 for c in members
|
||||
)
|
||||
|
|
@ -281,13 +281,13 @@ _GEO_WEIGHTED_CATEGORICALS: frozenset[str] = frozenset(
|
|||
|
||||
|
||||
def _main_part_attr(
|
||||
comparable: Comparable, attr: str
|
||||
comparable: ComparableProperty, attr: str
|
||||
) -> Optional[Union[int, str]]:
|
||||
parts: list[SapBuildingPart] = comparable.epc.sap_building_parts
|
||||
return getattr(parts[0], attr) if parts else None
|
||||
|
||||
|
||||
def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]:
|
||||
def _main_floor_attr(comparable: ComparableProperty, attr: str) -> Optional[int]:
|
||||
parts: list[SapBuildingPart] = comparable.epc.sap_building_parts
|
||||
if not parts:
|
||||
return None
|
||||
|
|
@ -297,7 +297,7 @@ def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]:
|
|||
|
||||
|
||||
def _geo_weighted_floor_area(
|
||||
members: tuple[Comparable, ...],
|
||||
members: tuple[ComparableProperty, ...],
|
||||
target_coordinates: Optional[Coordinates],
|
||||
) -> float:
|
||||
"""The cohort's geo-proximity-weighted median floor area — the point estimate
|
||||
|
|
@ -336,7 +336,7 @@ def _weighted_median(values_weights: list[tuple[float, float]]) -> float:
|
|||
return ordered[-1][0]
|
||||
|
||||
|
||||
def _age_band_index(comparable: Comparable) -> Optional[int]:
|
||||
def _age_band_index(comparable: ComparableProperty) -> Optional[int]:
|
||||
"""The main building part's construction-age-band position (A=0 … L=11), or
|
||||
None when no recognisable band is lodged."""
|
||||
band = _main_part_attr(comparable, "construction_age_band")
|
||||
|
|
@ -345,7 +345,7 @@ def _age_band_index(comparable: Comparable) -> Optional[int]:
|
|||
return None
|
||||
|
||||
|
||||
def _similarity_weights(members: tuple[Comparable, ...]) -> list[float]:
|
||||
def _similarity_weights(members: tuple[ComparableProperty, ...]) -> list[float]:
|
||||
"""A physical-similarity weight per comparable (ADR-0029 decision 5): the
|
||||
product of an exponential decay in its floor-area distance from the cohort
|
||||
median and in its age-band distance from the cohort's modal band. A neighbour
|
||||
|
|
@ -436,7 +436,7 @@ def _haversine_km(origin: Coordinates, point: Coordinates) -> float:
|
|||
|
||||
|
||||
def _geo_weights(
|
||||
target: Optional[Coordinates], members: tuple[Comparable, ...]
|
||||
target: Optional[Coordinates], members: tuple[ComparableProperty, ...]
|
||||
) -> list[float]:
|
||||
"""A geo-proximity weight per comparable — an exponential decay in haversine
|
||||
distance to the target. All-neutral (1.0) when the target has no coordinates
|
||||
|
|
@ -456,7 +456,7 @@ def _geo_weights(
|
|||
return weights
|
||||
|
||||
|
||||
def _recency_weights(members: tuple[Comparable, ...]) -> list[float]:
|
||||
def _recency_weights(members: tuple[ComparableProperty, ...]) -> list[float]:
|
||||
"""A recency weight per comparable — exponential decay in the cert's age
|
||||
relative to the newest in the cohort, so newer neighbours dominate. All-equal
|
||||
when no registration dates are lodged. Aligned with `members`."""
|
||||
|
|
@ -473,8 +473,8 @@ def _recency_weights(members: tuple[Comparable, ...]) -> list[float]:
|
|||
|
||||
|
||||
def _recency_weighted_choice(
|
||||
members: tuple[Comparable, ...],
|
||||
value_of: Callable[[Comparable], Optional[Union[int, str]]],
|
||||
members: tuple[ComparableProperty, ...],
|
||||
value_of: Callable[[ComparableProperty], Optional[Union[int, str]]],
|
||||
) -> Optional[Union[int, str]]:
|
||||
"""The recency-weighted cohort mode of a per-comparable value: each
|
||||
neighbour's vote decays exponentially with the cert's age relative to the
|
||||
|
|
@ -490,7 +490,7 @@ def _recency_weighted_choice(
|
|||
|
||||
|
||||
def _recency_weighted_mode(
|
||||
members: tuple[Comparable, ...], attr: str
|
||||
members: tuple[ComparableProperty, ...], attr: str
|
||||
) -> Optional[Union[int, str]]:
|
||||
"""`_recency_weighted_choice` over a main building-part attribute."""
|
||||
return _recency_weighted_choice(
|
||||
|
|
@ -499,7 +499,7 @@ def _recency_weighted_mode(
|
|||
|
||||
|
||||
def _comparable_modal_glazing(
|
||||
comparable: Comparable,
|
||||
comparable: ComparableProperty,
|
||||
) -> Optional[Union[int, str]]:
|
||||
"""A comparable's modal glazing type — the most common across its windows, or
|
||||
None when it lodges none. One glazing signal per neighbour, robust to a single
|
||||
|
|
@ -508,14 +508,14 @@ def _comparable_modal_glazing(
|
|||
return Counter(types).most_common(1)[0][0] if types else None
|
||||
|
||||
|
||||
def _main_heating_detail(comparable: Comparable) -> Optional[MainHeatingDetail]:
|
||||
def _main_heating_detail(comparable: ComparableProperty) -> Optional[MainHeatingDetail]:
|
||||
"""The primary heating system's detail row, or None when none is lodged."""
|
||||
details = comparable.epc.sap_heating.main_heating_details
|
||||
return details[0] if details else None
|
||||
|
||||
|
||||
def _heating_signature(
|
||||
comparable: Comparable,
|
||||
comparable: ComparableProperty,
|
||||
) -> Optional[tuple[Union[int, str], Optional[int], bool]]:
|
||||
"""The donor-matching signature — main fuel + heating category + cylinder
|
||||
presence: the coarse identity of the heating system. None when no main heating
|
||||
|
|
@ -530,7 +530,7 @@ def _heating_signature(
|
|||
)
|
||||
|
||||
|
||||
def _heating_donor(members: tuple[Comparable, ...]) -> Optional[Comparable]:
|
||||
def _heating_donor(members: tuple[ComparableProperty, ...]) -> Optional[ComparableProperty]:
|
||||
"""The coherent heating donor: the comparable whose heating signature is the
|
||||
cohort mode, breaking ties toward the most recent cert (then certificate
|
||||
number, for determinism). None when no neighbour lodges a heating system."""
|
||||
|
|
|
|||
|
|
@ -14,11 +14,28 @@ from __future__ import annotations
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional, Union
|
||||
|
||||
from domain.epc_prediction.comparable_properties import PredictionTarget
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from domain.property.property import PropertyIdentity
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PredictionTarget:
|
||||
"""The known inputs for the Property whose EPC we are predicting — the fields
|
||||
guaranteed at ingestion (plus any Landlord Overrides, added as they're used).
|
||||
`built_form` is often but not always known.
|
||||
"""
|
||||
|
||||
postcode: str
|
||||
property_type: str
|
||||
built_form: Optional[str] = None
|
||||
# A known Landlord Override (e.g. solid brick) conditions cohort selection —
|
||||
# matching comparables are emphasised while enough remain (ADR-0029).
|
||||
wall_construction: Optional[Union[int, str]] = None
|
||||
# The target Property's own coordinates (resolved from its UPRN), against
|
||||
# which neighbours are distance-weighted. None disables geo-weighting.
|
||||
coordinates: Optional[Coordinates] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PredictionTargetAttributes:
|
||||
"""The target Property's own attributes resolved from Landlord Overrides,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""Component Accuracy aggregation for EPC Prediction (ADR-0030).
|
||||
|
||||
The leave-one-out scorer, calculator-FREE on purpose: it holds out each SAP 10.2
|
||||
target, predicts it from its (all-vintage) Comparable Properties, and aggregates
|
||||
target, predicts it from its (all-vintage) ComparableProperty Properties, and aggregates
|
||||
the per-component classification hits + geometry residuals from
|
||||
`compare_prediction`. This is the *primary*, calculator-independent signal — the
|
||||
end-to-end SAP / carbon / PE check (which needs the calculator) is layered on top
|
||||
|
|
@ -20,12 +20,12 @@ from typing import Iterable, Iterator, Optional, Sequence
|
|||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
PredictionTarget,
|
||||
ComparableProperty,
|
||||
select_comparables,
|
||||
)
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction
|
||||
from domain.epc_prediction.prediction_comparison import compare_prediction
|
||||
from domain.epc_prediction.prediction_target import PredictionTarget
|
||||
|
||||
# Only SAP 10.2 certs are valid held-out targets (ADR-0030) — the only vintage
|
||||
# with full-fidelity lodged components. The source cohort keeps all vintages.
|
||||
|
|
@ -68,18 +68,18 @@ class ComponentAccuracy:
|
|||
return sum(abs(v) for v in values) / len(values) if values else None
|
||||
|
||||
|
||||
def _recency_key(comparable: Comparable) -> tuple[date, str]:
|
||||
def _recency_key(comparable: ComparableProperty) -> tuple[date, str]:
|
||||
return (
|
||||
comparable.registration_date or date.min,
|
||||
comparable.certificate_number,
|
||||
)
|
||||
|
||||
|
||||
def _latest_per_address(cohort: Sequence[Comparable]) -> list[Comparable]:
|
||||
def _latest_per_address(cohort: Sequence[ComparableProperty]) -> list[ComparableProperty]:
|
||||
"""One held-out property per address — the latest cert, the best ground
|
||||
truth. Comparables with no address each stand alone."""
|
||||
latest: dict[str, Comparable] = {}
|
||||
standalone: list[Comparable] = []
|
||||
latest: dict[str, ComparableProperty] = {}
|
||||
standalone: list[ComparableProperty] = []
|
||||
for c in cohort:
|
||||
if c.address is None:
|
||||
standalone.append(c)
|
||||
|
|
@ -91,7 +91,7 @@ def _latest_per_address(cohort: Sequence[Comparable]) -> list[Comparable]:
|
|||
|
||||
|
||||
def iter_predictions(
|
||||
cohorts: Iterable[Sequence[Comparable]],
|
||||
cohorts: Iterable[Sequence[ComparableProperty]],
|
||||
*,
|
||||
target_sap_version: float = _SAP_10_2,
|
||||
) -> Iterator[tuple[EpcPropertyData, EpcPropertyData]]:
|
||||
|
|
@ -124,7 +124,7 @@ def iter_predictions(
|
|||
|
||||
|
||||
def evaluate_component_accuracy(
|
||||
cohorts: Iterable[Sequence[Comparable]],
|
||||
cohorts: Iterable[Sequence[ComparableProperty]],
|
||||
*,
|
||||
target_sap_version: float = _SAP_10_2,
|
||||
) -> ComponentAccuracy:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""Load a postcode-clustered EPC corpus into Comparable cohorts (ADR-0030).
|
||||
"""Load a postcode-clustered EPC corpus into ComparableProperty cohorts (ADR-0030).
|
||||
|
||||
The IO half of the EPC Prediction validation: read each postcode's cached cert
|
||||
payloads, map them through `EpcPropertyDataMapper.from_api_response`, and build
|
||||
`Comparable`s carrying the register metadata (address + registration date) the
|
||||
`ComparableProperty`s carrying the register metadata (address + registration date) the
|
||||
leave-one-out scorer needs to dedupe re-lodgements and hold out a whole address.
|
||||
A cert the mapper rejects (unsupported schema, malformed) is skipped, never fatal.
|
||||
|
||||
|
|
@ -20,7 +20,7 @@ from pathlib import Path
|
|||
from typing import Any, Optional
|
||||
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.epc_prediction.comparable_properties import Comparable
|
||||
from domain.epc_prediction.comparable_properties import ComparableProperty
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
|
||||
# Identifying free-text fields blanked when freezing a payload into the committed
|
||||
|
|
@ -28,7 +28,7 @@ from domain.geospatial.coordinates import Coordinates
|
|||
_PII_BLANK_FIELDS = ("address_line_2", "address_line_3", "post_town")
|
||||
|
||||
|
||||
def load_corpus(corpus_dir: Path) -> list[list[Comparable]]:
|
||||
def load_corpus(corpus_dir: Path) -> list[list[ComparableProperty]]:
|
||||
"""Load every postcode cohort under `corpus_dir`. Returns one list of
|
||||
Comparables per postcode (the unit the leave-one-out scorer iterates)."""
|
||||
index_path = corpus_dir / "_index.json"
|
||||
|
|
@ -49,8 +49,8 @@ def _load_cohort(
|
|||
postcode: str,
|
||||
certs: list[str],
|
||||
coordinates: dict[int, Coordinates],
|
||||
) -> list[Comparable]:
|
||||
cohort: list[Comparable] = []
|
||||
) -> list[ComparableProperty]:
|
||||
cohort: list[ComparableProperty] = []
|
||||
for cert in certs:
|
||||
path = corpus_dir / postcode / f"{cert}.json"
|
||||
if not path.exists():
|
||||
|
|
@ -62,7 +62,7 @@ def _load_cohort(
|
|||
continue
|
||||
uprn = _uprn(raw)
|
||||
cohort.append(
|
||||
Comparable(
|
||||
ComparableProperty(
|
||||
epc=epc,
|
||||
certificate_number=cert,
|
||||
address=_address(raw),
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from typing import Any, Optional, Protocol
|
|||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
ComparableProperty,
|
||||
select_comparables,
|
||||
)
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction
|
||||
|
|
@ -30,7 +30,7 @@ class EpcFetcher(Protocol):
|
|||
class ComparablesRepo(Protocol):
|
||||
"""The cohort source for EPC Prediction (e.g. EpcComparablePropertiesRepository)."""
|
||||
|
||||
def candidates_for(self, postcode: str) -> list[Comparable]: ...
|
||||
def candidates_for(self, postcode: str) -> list[ComparableProperty]: ...
|
||||
|
||||
|
||||
class PredictionAttributesReader(Protocol):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""The ComparableProperties repository port (ADR-0029 decision 3; ADR-0031).
|
||||
|
||||
Owns the cohort IO for EPC Prediction — given a target's postcode, return the
|
||||
candidate `Comparable`s (the postcode's other lodged certs, mapped to
|
||||
candidate `ComparableProperty`s (the postcode's other lodged certs, mapped to
|
||||
`EpcPropertyData` with their register metadata + resolved coordinates). The pure
|
||||
domain `select_comparables` then filters these into the reference cohort, and
|
||||
`EpcPrediction.predict` synthesises the picture. Kept a port so the orchestrator
|
||||
|
|
@ -12,13 +12,13 @@ from __future__ import annotations
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from domain.epc_prediction.comparable_properties import Comparable
|
||||
from domain.epc_prediction.comparable_properties import ComparableProperty
|
||||
|
||||
|
||||
class ComparablePropertiesRepository(ABC):
|
||||
@abstractmethod
|
||||
def candidates_for(self, postcode: str) -> list[Comparable]:
|
||||
"""Every candidate neighbour in `postcode` — one `Comparable` per lodged
|
||||
def candidates_for(self, postcode: str) -> list[ComparableProperty]:
|
||||
"""Every candidate neighbour in `postcode` — one `ComparableProperty` per lodged
|
||||
cert, carrying its `EpcPropertyData`, certificate number, address,
|
||||
registration date, and resolved coordinates (None when unresolvable)."""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from typing import Optional, Protocol
|
|||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.search.epc_search_result import EpcSearchResult
|
||||
from domain.epc_prediction.comparable_properties import Comparable
|
||||
from domain.epc_prediction.comparable_properties import ComparableProperty
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from repositories.comparable_properties.comparable_properties_repository import (
|
||||
ComparablePropertiesRepository,
|
||||
|
|
@ -45,7 +45,7 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
|
|||
self._epc_client = epc_client
|
||||
self._geospatial = geospatial
|
||||
|
||||
def candidates_for(self, postcode: str) -> list[Comparable]:
|
||||
def candidates_for(self, postcode: str) -> list[ComparableProperty]:
|
||||
results: list[EpcSearchResult] = self._epc_client.search_by_postcode(
|
||||
postcode
|
||||
)
|
||||
|
|
@ -57,14 +57,14 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
|
|||
|
||||
def _comparable(
|
||||
self, result: EpcSearchResult, coordinates: dict[int, Coordinates]
|
||||
) -> Comparable:
|
||||
) -> ComparableProperty:
|
||||
epc: EpcPropertyData = self._epc_client.get_by_certificate_number(
|
||||
result.certificate_number
|
||||
)
|
||||
resolved: Optional[Coordinates] = (
|
||||
coordinates.get(result.uprn) if result.uprn is not None else None
|
||||
)
|
||||
return Comparable(
|
||||
return ComparableProperty(
|
||||
epc=epc,
|
||||
certificate_number=result.certificate_number,
|
||||
address=result.address_line_1,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
WHAT THIS IS FOR
|
||||
----------------
|
||||
EPC Prediction estimates an EPC-less Property's `EpcPropertyData` from its
|
||||
**Comparable Properties** — the other certs in its postcode. Validating that
|
||||
**ComparableProperty Properties** — the other certs in its postcode. Validating that
|
||||
needs *geographic clusters* (many certs per postcode), not random certs, so the
|
||||
leave-one-out harness can drop one cert and predict it from its neighbours.
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ from pathlib import Path
|
|||
from typing import Optional
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from domain.epc_prediction.comparable_properties import Comparable
|
||||
from domain.epc_prediction.comparable_properties import ComparableProperty
|
||||
from domain.epc_prediction.validation import (
|
||||
evaluate_component_accuracy,
|
||||
iter_predictions,
|
||||
|
|
@ -133,7 +133,7 @@ def main() -> None:
|
|||
_sap_line(" floor: PEI |calc(actual) − lodged|", pei_floor)
|
||||
|
||||
|
||||
def _floor_area_error(cohorts: list[list[Comparable]]) -> None:
|
||||
def _floor_area_error(cohorts: list[list[ComparableProperty]]) -> None:
|
||||
"""Floor-area accuracy as MAE (m²) and MAPE (% of the actual), plus the
|
||||
typical (median actual) size — so the absolute error can be read relative to
|
||||
how big dwellings are. The predicted area is the cohort median, set
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Behaviour of Comparable Properties selection (ADR-0029): given a prediction
|
||||
"""Behaviour of ComparableProperty Properties selection (ADR-0029): given a prediction
|
||||
target's known inputs and the raw postcode cohort, choose + weight the
|
||||
comparables EPC Prediction will synthesise from. Filter-then-relax ladder:
|
||||
hard filters on identity (property type, built form) + known overrides while
|
||||
|
|
@ -10,11 +10,11 @@ from typing import Optional, Union
|
|||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
ComparableProperty,
|
||||
ComparableProperties,
|
||||
PredictionTarget,
|
||||
select_comparables,
|
||||
)
|
||||
from domain.epc_prediction.prediction_target import PredictionTarget
|
||||
|
||||
|
||||
def _comparable(
|
||||
|
|
@ -25,8 +25,8 @@ def _comparable(
|
|||
wall_construction: Optional[Union[int, str]] = None,
|
||||
address: Optional[str] = None,
|
||||
registration_date: Optional[date] = None,
|
||||
) -> Comparable:
|
||||
"""A Comparable carrying only the fields under test (opaque EpcPropertyData
|
||||
) -> ComparableProperty:
|
||||
"""A ComparableProperty carrying only the fields under test (opaque EpcPropertyData
|
||||
with property_type / built_form / main wall set — the partial-instance idiom)."""
|
||||
epc: EpcPropertyData = object.__new__(EpcPropertyData)
|
||||
epc.property_type = property_type
|
||||
|
|
@ -35,7 +35,7 @@ def _comparable(
|
|||
if wall_construction is not None:
|
||||
main.wall_construction = wall_construction
|
||||
epc.sap_building_parts = [main]
|
||||
return Comparable(
|
||||
return ComparableProperty(
|
||||
epc=epc,
|
||||
certificate_number=certificate_number,
|
||||
address=address,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
"""Behaviour of EPC Prediction synthesis (ADR-0029): turn the selected
|
||||
Comparable Properties into a predicted EpcPropertyData. Hybrid — copy a coherent
|
||||
ComparableProperty Properties into a predicted EpcPropertyData. Hybrid — copy a coherent
|
||||
representative template's structure (building parts, windows, geometry), set the
|
||||
homogeneous categoricals to the recency-weighted cohort mode, apply Landlord
|
||||
Overrides on top. Pure domain logic.
|
||||
|
|
@ -18,14 +18,14 @@ from datatypes.epc.domain.epc_property_data import (
|
|||
)
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
ComparableProperty,
|
||||
ComparableProperties,
|
||||
PredictionTarget,
|
||||
)
|
||||
from domain.epc_prediction.epc_prediction import (
|
||||
EpcPrediction,
|
||||
PredictionConfidence,
|
||||
)
|
||||
from domain.epc_prediction.prediction_target import PredictionTarget
|
||||
|
||||
|
||||
def _epc(
|
||||
|
|
@ -90,7 +90,7 @@ def _epc(
|
|||
def _cohort(*epcs: EpcPropertyData) -> ComparableProperties:
|
||||
return ComparableProperties(
|
||||
members=tuple(
|
||||
Comparable(epc=e, certificate_number=str(i)) for i, e in enumerate(epcs)
|
||||
ComparableProperty(epc=e, certificate_number=str(i)) for i, e in enumerate(epcs)
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ def _dated_cohort(
|
|||
) -> ComparableProperties:
|
||||
return ComparableProperties(
|
||||
members=tuple(
|
||||
Comparable(epc=e, certificate_number=str(i), registration_date=d)
|
||||
ComparableProperty(epc=e, certificate_number=str(i), registration_date=d)
|
||||
for i, (e, d) in enumerate(dated)
|
||||
)
|
||||
)
|
||||
|
|
@ -281,10 +281,10 @@ def test_floor_area_leans_toward_the_nearest_neighbours_size() -> None:
|
|||
far = Coordinates(longitude=1.0, latitude=1.0) # ~150 km away
|
||||
cohort = ComparableProperties(
|
||||
members=(
|
||||
Comparable(_epc(floor_area=60.0), "1", coordinates=far),
|
||||
Comparable(_epc(floor_area=60.0), "2", coordinates=far),
|
||||
Comparable(_epc(floor_area=60.0), "3", coordinates=far),
|
||||
Comparable(_epc(floor_area=120.0), "4", coordinates=here),
|
||||
ComparableProperty(_epc(floor_area=60.0), "1", coordinates=far),
|
||||
ComparableProperty(_epc(floor_area=60.0), "2", coordinates=far),
|
||||
ComparableProperty(_epc(floor_area=60.0), "3", coordinates=far),
|
||||
ComparableProperty(_epc(floor_area=120.0), "4", coordinates=here),
|
||||
)
|
||||
)
|
||||
target = PredictionTarget(
|
||||
|
|
@ -305,10 +305,10 @@ def test_floor_area_median_is_unweighted_without_target_coordinates() -> None:
|
|||
far = Coordinates(longitude=1.0, latitude=1.0)
|
||||
cohort = ComparableProperties(
|
||||
members=(
|
||||
Comparable(_epc(floor_area=60.0), "1", coordinates=far),
|
||||
Comparable(_epc(floor_area=60.0), "2", coordinates=far),
|
||||
Comparable(_epc(floor_area=60.0), "3", coordinates=far),
|
||||
Comparable(_epc(floor_area=120.0), "4", coordinates=here),
|
||||
ComparableProperty(_epc(floor_area=60.0), "1", coordinates=far),
|
||||
ComparableProperty(_epc(floor_area=60.0), "2", coordinates=far),
|
||||
ComparableProperty(_epc(floor_area=60.0), "3", coordinates=far),
|
||||
ComparableProperty(_epc(floor_area=120.0), "4", coordinates=here),
|
||||
)
|
||||
)
|
||||
target = PredictionTarget(postcode="LS6 1AA", property_type="2")
|
||||
|
|
@ -487,10 +487,10 @@ def test_geo_proximity_weights_the_nearest_neighbour() -> None:
|
|||
far = Coordinates(longitude=1.0, latitude=1.0) # ~150 km away
|
||||
cohort = ComparableProperties(
|
||||
members=(
|
||||
Comparable(_epc(wall_construction=1), "1", coordinates=far),
|
||||
Comparable(_epc(wall_construction=1), "2", coordinates=far),
|
||||
Comparable(_epc(wall_construction=1), "3", coordinates=far),
|
||||
Comparable(_epc(wall_construction=2), "4", coordinates=here),
|
||||
ComparableProperty(_epc(wall_construction=1), "1", coordinates=far),
|
||||
ComparableProperty(_epc(wall_construction=1), "2", coordinates=far),
|
||||
ComparableProperty(_epc(wall_construction=1), "3", coordinates=far),
|
||||
ComparableProperty(_epc(wall_construction=2), "4", coordinates=here),
|
||||
)
|
||||
)
|
||||
target = PredictionTarget(
|
||||
|
|
@ -511,10 +511,10 @@ def test_geo_proximity_is_off_without_target_coordinates() -> None:
|
|||
far = Coordinates(longitude=1.0, latitude=1.0)
|
||||
cohort = ComparableProperties(
|
||||
members=(
|
||||
Comparable(_epc(wall_construction=1), "1", coordinates=far),
|
||||
Comparable(_epc(wall_construction=1), "2", coordinates=far),
|
||||
Comparable(_epc(wall_construction=1), "3", coordinates=far),
|
||||
Comparable(_epc(wall_construction=2), "4", coordinates=here),
|
||||
ComparableProperty(_epc(wall_construction=1), "1", coordinates=far),
|
||||
ComparableProperty(_epc(wall_construction=1), "2", coordinates=far),
|
||||
ComparableProperty(_epc(wall_construction=1), "3", coordinates=far),
|
||||
ComparableProperty(_epc(wall_construction=2), "4", coordinates=here),
|
||||
)
|
||||
)
|
||||
target = PredictionTarget(postcode="LS6 1AA", property_type="2")
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ from __future__ import annotations
|
|||
|
||||
from typing import Optional
|
||||
|
||||
from domain.epc_prediction.comparable_properties import PredictionTarget
|
||||
from domain.epc_prediction.prediction_target import (
|
||||
PredictionTarget,
|
||||
PredictionTargetAttributes,
|
||||
build_prediction_target,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from datatypes.epc.domain.epc_property_data import (
|
|||
SapFloorDimension,
|
||||
SapHeating,
|
||||
)
|
||||
from domain.epc_prediction.comparable_properties import Comparable
|
||||
from domain.epc_prediction.comparable_properties import ComparableProperty
|
||||
from domain.epc_prediction.validation import evaluate_component_accuracy
|
||||
|
||||
|
||||
|
|
@ -26,8 +26,8 @@ def _comparable(
|
|||
sap_version: float,
|
||||
wall_construction: Union[int, str] = 1,
|
||||
registration_date: Optional[date] = None,
|
||||
) -> Comparable:
|
||||
"""A Comparable carrying a fully-populated opaque EpcPropertyData — every
|
||||
) -> ComparableProperty:
|
||||
"""A ComparableProperty carrying a fully-populated opaque EpcPropertyData — every
|
||||
field the predictor + comparison read (the partial-instance idiom)."""
|
||||
epc: EpcPropertyData = object.__new__(EpcPropertyData)
|
||||
epc.sap_version = sap_version
|
||||
|
|
@ -66,7 +66,7 @@ def _comparable(
|
|||
energy.photovoltaic_supply = None
|
||||
energy.photovoltaic_arrays = None
|
||||
epc.sap_energy_source = energy
|
||||
return Comparable(
|
||||
return ComparableProperty(
|
||||
epc=epc,
|
||||
certificate_number=certificate_number,
|
||||
address=address,
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from typing import Any, Optional
|
|||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.epc_prediction.comparable_properties import Comparable
|
||||
from domain.epc_prediction.comparable_properties import ComparableProperty
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction
|
||||
from domain.epc_prediction.prediction_target import PredictionTargetAttributes
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
|
|
@ -76,11 +76,11 @@ class _FakeSolarFetcher:
|
|||
|
||||
|
||||
class _FakeComparablesRepo:
|
||||
def __init__(self, candidates: list[Comparable]) -> None:
|
||||
def __init__(self, candidates: list[ComparableProperty]) -> None:
|
||||
self._candidates = candidates
|
||||
self.searched: list[str] = []
|
||||
|
||||
def candidates_for(self, postcode: str) -> list[Comparable]:
|
||||
def candidates_for(self, postcode: str) -> list[ComparableProperty]:
|
||||
self.searched.append(postcode)
|
||||
return self._candidates
|
||||
|
||||
|
|
@ -93,11 +93,11 @@ class _FakeAttributesReader:
|
|||
return self._attributes
|
||||
|
||||
|
||||
def _cohort() -> list[Comparable]:
|
||||
def _cohort() -> list[ComparableProperty]:
|
||||
# Three same-type neighbours (property_type "0"), distinct addresses so the
|
||||
# dedupe keeps all three.
|
||||
return [
|
||||
Comparable(
|
||||
ComparableProperty(
|
||||
epc=_epc(),
|
||||
certificate_number=f"CERT-{i}",
|
||||
address=f"{i} Some Street",
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ def test_candidates_for_assembles_a_comparable_per_cert() -> None:
|
|||
# Act
|
||||
candidates = repo.candidates_for("LS6 1AA")
|
||||
|
||||
# Assert — the register metadata is threaded onto the Comparable.
|
||||
# Assert — the register metadata is threaded onto the ComparableProperty.
|
||||
assert len(candidates) == 1
|
||||
comparable = candidates[0]
|
||||
assert comparable.certificate_number == "CERT-1"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue