From 7ca1f815f605d44a9c9fd6b18731bcf92b58bad9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Jun 2026 13:34:44 +0000 Subject: [PATCH] =?UTF-8?q?refactor(epc-prediction):=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20rename=20ComparableProperty,=20relocate=20Predictio?= =?UTF-8?q?nTarget?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two review points from @dancafc: 1) Rename the `Comparable` dataclass → `ComparableProperty` (it models one comparable *property*; the collection stays `ComparableProperties`). Applied across domain, repositories, orchestration, harness, scripts, and tests with a word-boundary rename so `ComparableProperties` is untouched. 2) Move `PredictionTarget` out of comparable_properties.py into prediction_target.py (where `PredictionTargetAttributes` + `build_prediction_target` already live). comparable_properties.py now imports it; no import cycle (prediction_target no longer depends on comparable_properties). Importers updated. 92 tests pass across the touched suites; pyright strict clean. Co-Authored-By: Claude Opus 4.8 --- .../epc_prediction/comparable_properties.py | 53 +++++++------------ domain/epc_prediction/epc_prediction.py | 44 +++++++-------- domain/epc_prediction/prediction_target.py | 19 ++++++- domain/epc_prediction/validation.py | 18 +++---- harness/epc_prediction_corpus.py | 14 ++--- orchestration/ingestion_orchestrator.py | 4 +- .../comparable_properties_repository.py | 8 +-- .../epc_comparable_properties_repository.py | 8 +-- scripts/fetch_epc_prediction_corpus.py | 2 +- scripts/validate_epc_prediction.py | 4 +- .../test_comparable_properties.py | 12 ++--- .../epc_prediction/test_epc_prediction.py | 42 +++++++-------- .../epc_prediction/test_prediction_target.py | 2 +- .../domain/epc_prediction/test_validation.py | 8 +-- .../test_ingestion_prediction.py | 10 ++-- ...st_epc_comparable_properties_repository.py | 2 +- 16 files changed, 125 insertions(+), 125 deletions(-) diff --git a/domain/epc_prediction/comparable_properties.py b/domain/epc_prediction/comparable_properties.py index dedbc9e9..13e33cf7 100644 --- a/domain/epc_prediction/comparable_properties.py +++ b/domain/epc_prediction/comparable_properties.py @@ -1,18 +1,19 @@ """Comparable Properties selection for EPC Prediction (ADR-0029). Given a `PredictionTarget` (the known inputs for an EPC-less Property) and the -raw postcode cohort of candidate `Comparable`s, `select_comparables` chooses the -reference cohort EPC Prediction synthesises from. Pure domain logic — the cohort -IO (postcode search → per-cert fetch) lives behind a repository port. +raw postcode cohort of candidate `ComparableProperty` objects, `select_comparables` +chooses the reference cohort EPC Prediction synthesises from. Pure domain logic — +the cohort IO (postcode search → per-cert fetch) lives behind a repository port. """ from __future__ import annotations from dataclasses import dataclass from datetime import date -from typing import Callable, Optional, Union +from typing import Callable, Optional from datatypes.epc.domain.epc_property_data import EpcPropertyData +from domain.epc_prediction.prediction_target import PredictionTarget from domain.geospatial.coordinates import Coordinates # Default floor on the cohort: a conditioning filter (built form, a known @@ -22,7 +23,7 @@ _DEFAULT_MINIMUM_COHORT = 5 @dataclass(frozen=True) -class Comparable: +class ComparableProperty: """One candidate neighbour: its structured `EpcPropertyData` picture plus the register metadata not carried on the cert (identity for leave-one-out exclusion; recency + address for weighting + re-lodgement dedup).""" @@ -37,38 +38,20 @@ class Comparable: coordinates: Optional[Coordinates] = None -@dataclass(frozen=True) -class PredictionTarget: - """The known inputs for the Property whose EPC we are predicting — the fields - guaranteed at ingestion (plus any Landlord Overrides, added as they're used). - `built_form` is often but not always known. - """ - - postcode: str - property_type: str - built_form: Optional[str] = None - # A known Landlord Override (e.g. solid brick) conditions cohort selection — - # matching comparables are emphasised while enough remain (ADR-0029). - wall_construction: Optional[Union[int, str]] = None - # The target Property's own coordinates (resolved from its UPRN), against - # which neighbours are distance-weighted. None disables geo-weighting. - coordinates: Optional[Coordinates] = None - - @dataclass(frozen=True) class ComparableProperties: """The selected reference cohort for a `PredictionTarget`.""" - members: tuple[Comparable, ...] + members: tuple[ComparableProperty, ...] def _maybe_filter( - cohort: list[Comparable], - predicate: Callable[[Comparable], bool], + cohort: list[ComparableProperty], + predicate: Callable[[ComparableProperty], bool], *, active: bool, minimum_cohort: int, -) -> list[Comparable]: +) -> list[ComparableProperty]: """Apply a conditioning filter only while it leaves at least `minimum_cohort` comparables; otherwise relax it (keep the pre-filter cohort) — the filter-then-relax ladder (ADR-0029).""" @@ -80,11 +63,11 @@ def _maybe_filter( def select_comparables( target: PredictionTarget, - candidates: list[Comparable], + candidates: list[ComparableProperty], *, minimum_cohort: int = _DEFAULT_MINIMUM_COHORT, ) -> ComparableProperties: - """Select the Comparable Properties for `target` from the raw postcode + """Select the ComparableProperty Properties for `target` from the raw postcode cohort. The register lists every historical lodgement, so first dedupe each address to its latest cert (one comparable per real neighbour); then property type is an always-hard filter (a flat is never a comparable for a house) and @@ -109,15 +92,15 @@ def select_comparables( def _dedupe_to_latest_per_address( - candidates: list[Comparable], -) -> list[Comparable]: + candidates: list[ComparableProperty], +) -> list[ComparableProperty]: """Collapse the register's re-lodgements: keep one comparable per address — the latest by registration date (ties broken by certificate number, for determinism) — so a re-lodged neighbour does not count more than once. Candidates with no address are passed through untouched (each is its own neighbour). Input order is otherwise preserved.""" - latest: dict[str, Comparable] = {} - passthrough: list[Comparable] = [] + latest: dict[str, ComparableProperty] = {} + passthrough: list[ComparableProperty] = [] for c in candidates: if c.address is None: passthrough.append(c) @@ -128,7 +111,7 @@ def _dedupe_to_latest_per_address( return list(latest.values()) + passthrough -def _recency_key(comparable: Comparable) -> tuple[date, str]: +def _recency_key(comparable: ComparableProperty) -> tuple[date, str]: """Sort key making the most recent (then highest cert number) win. A missing registration date sorts oldest.""" return ( @@ -137,7 +120,7 @@ def _recency_key(comparable: Comparable) -> tuple[date, str]: ) -def _main_wall_construction(comparable: Comparable) -> object: +def _main_wall_construction(comparable: ComparableProperty) -> object: """The main building part's wall construction, or None when no part lodged.""" parts = comparable.epc.sap_building_parts return parts[0].wall_construction if parts else None diff --git a/domain/epc_prediction/epc_prediction.py b/domain/epc_prediction/epc_prediction.py index f1d07e28..bcd4f1cf 100644 --- a/domain/epc_prediction/epc_prediction.py +++ b/domain/epc_prediction/epc_prediction.py @@ -23,10 +23,10 @@ from datatypes.epc.domain.epc_property_data import ( SapBuildingPart, ) from domain.epc_prediction.comparable_properties import ( - Comparable, + ComparableProperty, ComparableProperties, - PredictionTarget, ) +from domain.epc_prediction.prediction_target import PredictionTarget from domain.geospatial.coordinates import Coordinates @@ -34,7 +34,7 @@ from domain.geospatial.coordinates import Coordinates class PredictionConfidence: """A compute-only confidence signal for a prediction (ADR-0029 open item). - `cohort_size` is the number of Comparable Properties the prediction drew on; + `cohort_size` is the number of ComparableProperty Properties the prediction drew on; `component_agreement` maps a homogeneous component to the cohort's *agreement* — the modal value's share (0..1) of the neighbours that lodge one. A small or split cohort flags a component downstream may want to treat cautiously (e.g. @@ -52,7 +52,7 @@ class PredictionConfidence: class EpcPrediction: - """Synthesises a predicted `EpcPropertyData` from Comparable Properties.""" + """Synthesises a predicted `EpcPropertyData` from ComparableProperty Properties.""" def predict( self, target: PredictionTarget, comparables: ComparableProperties @@ -62,7 +62,7 @@ class EpcPrediction: the cohort median (the best point estimate of the target's size, decoupled from the one template's own area), then set the homogeneous categoricals to the cohort mode.""" - template: Comparable = self._template(comparables) + template: ComparableProperty = self._template(comparables) predicted: EpcPropertyData = copy.deepcopy(template.epc) predicted.total_floor_area_m2 = _geo_weighted_floor_area( comparables.members, target.coordinates @@ -126,7 +126,7 @@ class EpcPrediction: for each homogeneous categorical, the modal value's share among the neighbours that lodge one (ADR-0029). Compute-only — it never alters the prediction, only annotates how much the cohort agreed.""" - members: tuple[Comparable, ...] = comparables.members + members: tuple[ComparableProperty, ...] = comparables.members agreement: dict[str, float] = {} for attr in _MAIN_PART_CATEGORICALS: share: Optional[float] = _modal_share( @@ -145,13 +145,13 @@ class EpcPrediction: ) @staticmethod - def _template(comparables: ComparableProperties) -> Comparable: + def _template(comparables: ComparableProperties) -> ComparableProperty: """The representative comparable whose structure seeds the prediction: the member whose floor area is closest to the cohort median. A single neighbour's geometry is copied wholesale, so a size-representative template keeps the prediction off the cohort's size outliers (ADR-0029 decision 4: closest on size).""" - members: tuple[Comparable, ...] = comparables.members + members: tuple[ComparableProperty, ...] = comparables.members median_area: float = statistics.median( c.epc.total_floor_area_m2 for c in members ) @@ -281,13 +281,13 @@ _GEO_WEIGHTED_CATEGORICALS: frozenset[str] = frozenset( def _main_part_attr( - comparable: Comparable, attr: str + comparable: ComparableProperty, attr: str ) -> Optional[Union[int, str]]: parts: list[SapBuildingPart] = comparable.epc.sap_building_parts return getattr(parts[0], attr) if parts else None -def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]: +def _main_floor_attr(comparable: ComparableProperty, attr: str) -> Optional[int]: parts: list[SapBuildingPart] = comparable.epc.sap_building_parts if not parts: return None @@ -297,7 +297,7 @@ def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]: def _geo_weighted_floor_area( - members: tuple[Comparable, ...], + members: tuple[ComparableProperty, ...], target_coordinates: Optional[Coordinates], ) -> float: """The cohort's geo-proximity-weighted median floor area — the point estimate @@ -336,7 +336,7 @@ def _weighted_median(values_weights: list[tuple[float, float]]) -> float: return ordered[-1][0] -def _age_band_index(comparable: Comparable) -> Optional[int]: +def _age_band_index(comparable: ComparableProperty) -> Optional[int]: """The main building part's construction-age-band position (A=0 … L=11), or None when no recognisable band is lodged.""" band = _main_part_attr(comparable, "construction_age_band") @@ -345,7 +345,7 @@ def _age_band_index(comparable: Comparable) -> Optional[int]: return None -def _similarity_weights(members: tuple[Comparable, ...]) -> list[float]: +def _similarity_weights(members: tuple[ComparableProperty, ...]) -> list[float]: """A physical-similarity weight per comparable (ADR-0029 decision 5): the product of an exponential decay in its floor-area distance from the cohort median and in its age-band distance from the cohort's modal band. A neighbour @@ -436,7 +436,7 @@ def _haversine_km(origin: Coordinates, point: Coordinates) -> float: def _geo_weights( - target: Optional[Coordinates], members: tuple[Comparable, ...] + target: Optional[Coordinates], members: tuple[ComparableProperty, ...] ) -> list[float]: """A geo-proximity weight per comparable — an exponential decay in haversine distance to the target. All-neutral (1.0) when the target has no coordinates @@ -456,7 +456,7 @@ def _geo_weights( return weights -def _recency_weights(members: tuple[Comparable, ...]) -> list[float]: +def _recency_weights(members: tuple[ComparableProperty, ...]) -> list[float]: """A recency weight per comparable — exponential decay in the cert's age relative to the newest in the cohort, so newer neighbours dominate. All-equal when no registration dates are lodged. Aligned with `members`.""" @@ -473,8 +473,8 @@ def _recency_weights(members: tuple[Comparable, ...]) -> list[float]: def _recency_weighted_choice( - members: tuple[Comparable, ...], - value_of: Callable[[Comparable], Optional[Union[int, str]]], + members: tuple[ComparableProperty, ...], + value_of: Callable[[ComparableProperty], Optional[Union[int, str]]], ) -> Optional[Union[int, str]]: """The recency-weighted cohort mode of a per-comparable value: each neighbour's vote decays exponentially with the cert's age relative to the @@ -490,7 +490,7 @@ def _recency_weighted_choice( def _recency_weighted_mode( - members: tuple[Comparable, ...], attr: str + members: tuple[ComparableProperty, ...], attr: str ) -> Optional[Union[int, str]]: """`_recency_weighted_choice` over a main building-part attribute.""" return _recency_weighted_choice( @@ -499,7 +499,7 @@ def _recency_weighted_mode( def _comparable_modal_glazing( - comparable: Comparable, + comparable: ComparableProperty, ) -> Optional[Union[int, str]]: """A comparable's modal glazing type — the most common across its windows, or None when it lodges none. One glazing signal per neighbour, robust to a single @@ -508,14 +508,14 @@ def _comparable_modal_glazing( return Counter(types).most_common(1)[0][0] if types else None -def _main_heating_detail(comparable: Comparable) -> Optional[MainHeatingDetail]: +def _main_heating_detail(comparable: ComparableProperty) -> Optional[MainHeatingDetail]: """The primary heating system's detail row, or None when none is lodged.""" details = comparable.epc.sap_heating.main_heating_details return details[0] if details else None def _heating_signature( - comparable: Comparable, + comparable: ComparableProperty, ) -> Optional[tuple[Union[int, str], Optional[int], bool]]: """The donor-matching signature — main fuel + heating category + cylinder presence: the coarse identity of the heating system. None when no main heating @@ -530,7 +530,7 @@ def _heating_signature( ) -def _heating_donor(members: tuple[Comparable, ...]) -> Optional[Comparable]: +def _heating_donor(members: tuple[ComparableProperty, ...]) -> Optional[ComparableProperty]: """The coherent heating donor: the comparable whose heating signature is the cohort mode, breaking ties toward the most recent cert (then certificate number, for determinism). None when no neighbour lodges a heating system.""" diff --git a/domain/epc_prediction/prediction_target.py b/domain/epc_prediction/prediction_target.py index 70800e55..faa5bd35 100644 --- a/domain/epc_prediction/prediction_target.py +++ b/domain/epc_prediction/prediction_target.py @@ -14,11 +14,28 @@ from __future__ import annotations from dataclasses import dataclass from typing import Optional, Union -from domain.epc_prediction.comparable_properties import PredictionTarget from domain.geospatial.coordinates import Coordinates from domain.property.property import PropertyIdentity +@dataclass(frozen=True) +class PredictionTarget: + """The known inputs for the Property whose EPC we are predicting — the fields + guaranteed at ingestion (plus any Landlord Overrides, added as they're used). + `built_form` is often but not always known. + """ + + postcode: str + property_type: str + built_form: Optional[str] = None + # A known Landlord Override (e.g. solid brick) conditions cohort selection — + # matching comparables are emphasised while enough remain (ADR-0029). + wall_construction: Optional[Union[int, str]] = None + # The target Property's own coordinates (resolved from its UPRN), against + # which neighbours are distance-weighted. None disables geo-weighting. + coordinates: Optional[Coordinates] = None + + @dataclass(frozen=True) class PredictionTargetAttributes: """The target Property's own attributes resolved from Landlord Overrides, diff --git a/domain/epc_prediction/validation.py b/domain/epc_prediction/validation.py index 0e1234a6..d05082f6 100644 --- a/domain/epc_prediction/validation.py +++ b/domain/epc_prediction/validation.py @@ -1,7 +1,7 @@ """Component Accuracy aggregation for EPC Prediction (ADR-0030). The leave-one-out scorer, calculator-FREE on purpose: it holds out each SAP 10.2 -target, predicts it from its (all-vintage) Comparable Properties, and aggregates +target, predicts it from its (all-vintage) ComparableProperty Properties, and aggregates the per-component classification hits + geometry residuals from `compare_prediction`. This is the *primary*, calculator-independent signal — the end-to-end SAP / carbon / PE check (which needs the calculator) is layered on top @@ -20,12 +20,12 @@ from typing import Iterable, Iterator, Optional, Sequence from datatypes.epc.domain.epc_property_data import EpcPropertyData from domain.epc_prediction.comparable_properties import ( - Comparable, - PredictionTarget, + ComparableProperty, select_comparables, ) from domain.epc_prediction.epc_prediction import EpcPrediction from domain.epc_prediction.prediction_comparison import compare_prediction +from domain.epc_prediction.prediction_target import PredictionTarget # Only SAP 10.2 certs are valid held-out targets (ADR-0030) — the only vintage # with full-fidelity lodged components. The source cohort keeps all vintages. @@ -68,18 +68,18 @@ class ComponentAccuracy: return sum(abs(v) for v in values) / len(values) if values else None -def _recency_key(comparable: Comparable) -> tuple[date, str]: +def _recency_key(comparable: ComparableProperty) -> tuple[date, str]: return ( comparable.registration_date or date.min, comparable.certificate_number, ) -def _latest_per_address(cohort: Sequence[Comparable]) -> list[Comparable]: +def _latest_per_address(cohort: Sequence[ComparableProperty]) -> list[ComparableProperty]: """One held-out property per address — the latest cert, the best ground truth. Comparables with no address each stand alone.""" - latest: dict[str, Comparable] = {} - standalone: list[Comparable] = [] + latest: dict[str, ComparableProperty] = {} + standalone: list[ComparableProperty] = [] for c in cohort: if c.address is None: standalone.append(c) @@ -91,7 +91,7 @@ def _latest_per_address(cohort: Sequence[Comparable]) -> list[Comparable]: def iter_predictions( - cohorts: Iterable[Sequence[Comparable]], + cohorts: Iterable[Sequence[ComparableProperty]], *, target_sap_version: float = _SAP_10_2, ) -> Iterator[tuple[EpcPropertyData, EpcPropertyData]]: @@ -124,7 +124,7 @@ def iter_predictions( def evaluate_component_accuracy( - cohorts: Iterable[Sequence[Comparable]], + cohorts: Iterable[Sequence[ComparableProperty]], *, target_sap_version: float = _SAP_10_2, ) -> ComponentAccuracy: diff --git a/harness/epc_prediction_corpus.py b/harness/epc_prediction_corpus.py index e29117da..47920324 100644 --- a/harness/epc_prediction_corpus.py +++ b/harness/epc_prediction_corpus.py @@ -1,8 +1,8 @@ -"""Load a postcode-clustered EPC corpus into Comparable cohorts (ADR-0030). +"""Load a postcode-clustered EPC corpus into ComparableProperty cohorts (ADR-0030). The IO half of the EPC Prediction validation: read each postcode's cached cert payloads, map them through `EpcPropertyDataMapper.from_api_response`, and build -`Comparable`s carrying the register metadata (address + registration date) the +`ComparableProperty`s carrying the register metadata (address + registration date) the leave-one-out scorer needs to dedupe re-lodgements and hold out a whole address. A cert the mapper rejects (unsupported schema, malformed) is skipped, never fatal. @@ -20,7 +20,7 @@ from pathlib import Path from typing import Any, Optional from datatypes.epc.domain.mapper import EpcPropertyDataMapper -from domain.epc_prediction.comparable_properties import Comparable +from domain.epc_prediction.comparable_properties import ComparableProperty from domain.geospatial.coordinates import Coordinates # Identifying free-text fields blanked when freezing a payload into the committed @@ -28,7 +28,7 @@ from domain.geospatial.coordinates import Coordinates _PII_BLANK_FIELDS = ("address_line_2", "address_line_3", "post_town") -def load_corpus(corpus_dir: Path) -> list[list[Comparable]]: +def load_corpus(corpus_dir: Path) -> list[list[ComparableProperty]]: """Load every postcode cohort under `corpus_dir`. Returns one list of Comparables per postcode (the unit the leave-one-out scorer iterates).""" index_path = corpus_dir / "_index.json" @@ -49,8 +49,8 @@ def _load_cohort( postcode: str, certs: list[str], coordinates: dict[int, Coordinates], -) -> list[Comparable]: - cohort: list[Comparable] = [] +) -> list[ComparableProperty]: + cohort: list[ComparableProperty] = [] for cert in certs: path = corpus_dir / postcode / f"{cert}.json" if not path.exists(): @@ -62,7 +62,7 @@ def _load_cohort( continue uprn = _uprn(raw) cohort.append( - Comparable( + ComparableProperty( epc=epc, certificate_number=cert, address=_address(raw), diff --git a/orchestration/ingestion_orchestrator.py b/orchestration/ingestion_orchestrator.py index 0d4a5cff..0ed58145 100644 --- a/orchestration/ingestion_orchestrator.py +++ b/orchestration/ingestion_orchestrator.py @@ -6,7 +6,7 @@ from typing import Any, Optional, Protocol from datatypes.epc.domain.epc_property_data import EpcPropertyData from domain.epc_prediction.comparable_properties import ( - Comparable, + ComparableProperty, select_comparables, ) from domain.epc_prediction.epc_prediction import EpcPrediction @@ -30,7 +30,7 @@ class EpcFetcher(Protocol): class ComparablesRepo(Protocol): """The cohort source for EPC Prediction (e.g. EpcComparablePropertiesRepository).""" - def candidates_for(self, postcode: str) -> list[Comparable]: ... + def candidates_for(self, postcode: str) -> list[ComparableProperty]: ... class PredictionAttributesReader(Protocol): diff --git a/repositories/comparable_properties/comparable_properties_repository.py b/repositories/comparable_properties/comparable_properties_repository.py index f71654f7..c327df46 100644 --- a/repositories/comparable_properties/comparable_properties_repository.py +++ b/repositories/comparable_properties/comparable_properties_repository.py @@ -1,7 +1,7 @@ """The ComparableProperties repository port (ADR-0029 decision 3; ADR-0031). Owns the cohort IO for EPC Prediction — given a target's postcode, return the -candidate `Comparable`s (the postcode's other lodged certs, mapped to +candidate `ComparableProperty`s (the postcode's other lodged certs, mapped to `EpcPropertyData` with their register metadata + resolved coordinates). The pure domain `select_comparables` then filters these into the reference cohort, and `EpcPrediction.predict` synthesises the picture. Kept a port so the orchestrator @@ -12,13 +12,13 @@ from __future__ import annotations from abc import ABC, abstractmethod -from domain.epc_prediction.comparable_properties import Comparable +from domain.epc_prediction.comparable_properties import ComparableProperty class ComparablePropertiesRepository(ABC): @abstractmethod - def candidates_for(self, postcode: str) -> list[Comparable]: - """Every candidate neighbour in `postcode` — one `Comparable` per lodged + def candidates_for(self, postcode: str) -> list[ComparableProperty]: + """Every candidate neighbour in `postcode` — one `ComparableProperty` per lodged cert, carrying its `EpcPropertyData`, certificate number, address, registration date, and resolved coordinates (None when unresolvable).""" ... diff --git a/repositories/comparable_properties/epc_comparable_properties_repository.py b/repositories/comparable_properties/epc_comparable_properties_repository.py index e806a834..3bfd92b9 100644 --- a/repositories/comparable_properties/epc_comparable_properties_repository.py +++ b/repositories/comparable_properties/epc_comparable_properties_repository.py @@ -14,7 +14,7 @@ from typing import Optional, Protocol from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.search.epc_search_result import EpcSearchResult -from domain.epc_prediction.comparable_properties import Comparable +from domain.epc_prediction.comparable_properties import ComparableProperty from domain.geospatial.coordinates import Coordinates from repositories.comparable_properties.comparable_properties_repository import ( ComparablePropertiesRepository, @@ -45,7 +45,7 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository): self._epc_client = epc_client self._geospatial = geospatial - def candidates_for(self, postcode: str) -> list[Comparable]: + def candidates_for(self, postcode: str) -> list[ComparableProperty]: results: list[EpcSearchResult] = self._epc_client.search_by_postcode( postcode ) @@ -57,14 +57,14 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository): def _comparable( self, result: EpcSearchResult, coordinates: dict[int, Coordinates] - ) -> Comparable: + ) -> ComparableProperty: epc: EpcPropertyData = self._epc_client.get_by_certificate_number( result.certificate_number ) resolved: Optional[Coordinates] = ( coordinates.get(result.uprn) if result.uprn is not None else None ) - return Comparable( + return ComparableProperty( epc=epc, certificate_number=result.certificate_number, address=result.address_line_1, diff --git a/scripts/fetch_epc_prediction_corpus.py b/scripts/fetch_epc_prediction_corpus.py index 2e69ee6c..14831d02 100644 --- a/scripts/fetch_epc_prediction_corpus.py +++ b/scripts/fetch_epc_prediction_corpus.py @@ -4,7 +4,7 @@ WHAT THIS IS FOR ---------------- EPC Prediction estimates an EPC-less Property's `EpcPropertyData` from its -**Comparable Properties** — the other certs in its postcode. Validating that +**ComparableProperty Properties** — the other certs in its postcode. Validating that needs *geographic clusters* (many certs per postcode), not random certs, so the leave-one-out harness can drop one cert and predict it from its neighbours. diff --git a/scripts/validate_epc_prediction.py b/scripts/validate_epc_prediction.py index 3c817078..2da9beca 100644 --- a/scripts/validate_epc_prediction.py +++ b/scripts/validate_epc_prediction.py @@ -29,7 +29,7 @@ from pathlib import Path from typing import Optional from datatypes.epc.domain.epc_property_data import EpcPropertyData -from domain.epc_prediction.comparable_properties import Comparable +from domain.epc_prediction.comparable_properties import ComparableProperty from domain.epc_prediction.validation import ( evaluate_component_accuracy, iter_predictions, @@ -133,7 +133,7 @@ def main() -> None: _sap_line(" floor: PEI |calc(actual) − lodged|", pei_floor) -def _floor_area_error(cohorts: list[list[Comparable]]) -> None: +def _floor_area_error(cohorts: list[list[ComparableProperty]]) -> None: """Floor-area accuracy as MAE (m²) and MAPE (% of the actual), plus the typical (median actual) size — so the absolute error can be read relative to how big dwellings are. The predicted area is the cohort median, set diff --git a/tests/domain/epc_prediction/test_comparable_properties.py b/tests/domain/epc_prediction/test_comparable_properties.py index 223ff601..10921b7a 100644 --- a/tests/domain/epc_prediction/test_comparable_properties.py +++ b/tests/domain/epc_prediction/test_comparable_properties.py @@ -1,4 +1,4 @@ -"""Behaviour of Comparable Properties selection (ADR-0029): given a prediction +"""Behaviour of ComparableProperty Properties selection (ADR-0029): given a prediction target's known inputs and the raw postcode cohort, choose + weight the comparables EPC Prediction will synthesise from. Filter-then-relax ladder: hard filters on identity (property type, built form) + known overrides while @@ -10,11 +10,11 @@ from typing import Optional, Union from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart from domain.epc_prediction.comparable_properties import ( - Comparable, + ComparableProperty, ComparableProperties, - PredictionTarget, select_comparables, ) +from domain.epc_prediction.prediction_target import PredictionTarget def _comparable( @@ -25,8 +25,8 @@ def _comparable( wall_construction: Optional[Union[int, str]] = None, address: Optional[str] = None, registration_date: Optional[date] = None, -) -> Comparable: - """A Comparable carrying only the fields under test (opaque EpcPropertyData +) -> ComparableProperty: + """A ComparableProperty carrying only the fields under test (opaque EpcPropertyData with property_type / built_form / main wall set — the partial-instance idiom).""" epc: EpcPropertyData = object.__new__(EpcPropertyData) epc.property_type = property_type @@ -35,7 +35,7 @@ def _comparable( if wall_construction is not None: main.wall_construction = wall_construction epc.sap_building_parts = [main] - return Comparable( + return ComparableProperty( epc=epc, certificate_number=certificate_number, address=address, diff --git a/tests/domain/epc_prediction/test_epc_prediction.py b/tests/domain/epc_prediction/test_epc_prediction.py index c127ec37..1f1ef42f 100644 --- a/tests/domain/epc_prediction/test_epc_prediction.py +++ b/tests/domain/epc_prediction/test_epc_prediction.py @@ -1,5 +1,5 @@ """Behaviour of EPC Prediction synthesis (ADR-0029): turn the selected -Comparable Properties into a predicted EpcPropertyData. Hybrid — copy a coherent +ComparableProperty Properties into a predicted EpcPropertyData. Hybrid — copy a coherent representative template's structure (building parts, windows, geometry), set the homogeneous categoricals to the recency-weighted cohort mode, apply Landlord Overrides on top. Pure domain logic. @@ -18,14 +18,14 @@ from datatypes.epc.domain.epc_property_data import ( ) from domain.geospatial.coordinates import Coordinates from domain.epc_prediction.comparable_properties import ( - Comparable, + ComparableProperty, ComparableProperties, - PredictionTarget, ) from domain.epc_prediction.epc_prediction import ( EpcPrediction, PredictionConfidence, ) +from domain.epc_prediction.prediction_target import PredictionTarget def _epc( @@ -90,7 +90,7 @@ def _epc( def _cohort(*epcs: EpcPropertyData) -> ComparableProperties: return ComparableProperties( members=tuple( - Comparable(epc=e, certificate_number=str(i)) for i, e in enumerate(epcs) + ComparableProperty(epc=e, certificate_number=str(i)) for i, e in enumerate(epcs) ) ) @@ -100,7 +100,7 @@ def _dated_cohort( ) -> ComparableProperties: return ComparableProperties( members=tuple( - Comparable(epc=e, certificate_number=str(i), registration_date=d) + ComparableProperty(epc=e, certificate_number=str(i), registration_date=d) for i, (e, d) in enumerate(dated) ) ) @@ -281,10 +281,10 @@ def test_floor_area_leans_toward_the_nearest_neighbours_size() -> None: far = Coordinates(longitude=1.0, latitude=1.0) # ~150 km away cohort = ComparableProperties( members=( - Comparable(_epc(floor_area=60.0), "1", coordinates=far), - Comparable(_epc(floor_area=60.0), "2", coordinates=far), - Comparable(_epc(floor_area=60.0), "3", coordinates=far), - Comparable(_epc(floor_area=120.0), "4", coordinates=here), + ComparableProperty(_epc(floor_area=60.0), "1", coordinates=far), + ComparableProperty(_epc(floor_area=60.0), "2", coordinates=far), + ComparableProperty(_epc(floor_area=60.0), "3", coordinates=far), + ComparableProperty(_epc(floor_area=120.0), "4", coordinates=here), ) ) target = PredictionTarget( @@ -305,10 +305,10 @@ def test_floor_area_median_is_unweighted_without_target_coordinates() -> None: far = Coordinates(longitude=1.0, latitude=1.0) cohort = ComparableProperties( members=( - Comparable(_epc(floor_area=60.0), "1", coordinates=far), - Comparable(_epc(floor_area=60.0), "2", coordinates=far), - Comparable(_epc(floor_area=60.0), "3", coordinates=far), - Comparable(_epc(floor_area=120.0), "4", coordinates=here), + ComparableProperty(_epc(floor_area=60.0), "1", coordinates=far), + ComparableProperty(_epc(floor_area=60.0), "2", coordinates=far), + ComparableProperty(_epc(floor_area=60.0), "3", coordinates=far), + ComparableProperty(_epc(floor_area=120.0), "4", coordinates=here), ) ) target = PredictionTarget(postcode="LS6 1AA", property_type="2") @@ -487,10 +487,10 @@ def test_geo_proximity_weights_the_nearest_neighbour() -> None: far = Coordinates(longitude=1.0, latitude=1.0) # ~150 km away cohort = ComparableProperties( members=( - Comparable(_epc(wall_construction=1), "1", coordinates=far), - Comparable(_epc(wall_construction=1), "2", coordinates=far), - Comparable(_epc(wall_construction=1), "3", coordinates=far), - Comparable(_epc(wall_construction=2), "4", coordinates=here), + ComparableProperty(_epc(wall_construction=1), "1", coordinates=far), + ComparableProperty(_epc(wall_construction=1), "2", coordinates=far), + ComparableProperty(_epc(wall_construction=1), "3", coordinates=far), + ComparableProperty(_epc(wall_construction=2), "4", coordinates=here), ) ) target = PredictionTarget( @@ -511,10 +511,10 @@ def test_geo_proximity_is_off_without_target_coordinates() -> None: far = Coordinates(longitude=1.0, latitude=1.0) cohort = ComparableProperties( members=( - Comparable(_epc(wall_construction=1), "1", coordinates=far), - Comparable(_epc(wall_construction=1), "2", coordinates=far), - Comparable(_epc(wall_construction=1), "3", coordinates=far), - Comparable(_epc(wall_construction=2), "4", coordinates=here), + ComparableProperty(_epc(wall_construction=1), "1", coordinates=far), + ComparableProperty(_epc(wall_construction=1), "2", coordinates=far), + ComparableProperty(_epc(wall_construction=1), "3", coordinates=far), + ComparableProperty(_epc(wall_construction=2), "4", coordinates=here), ) ) target = PredictionTarget(postcode="LS6 1AA", property_type="2") diff --git a/tests/domain/epc_prediction/test_prediction_target.py b/tests/domain/epc_prediction/test_prediction_target.py index 4aebc452..ec21b422 100644 --- a/tests/domain/epc_prediction/test_prediction_target.py +++ b/tests/domain/epc_prediction/test_prediction_target.py @@ -5,8 +5,8 @@ from __future__ import annotations from typing import Optional -from domain.epc_prediction.comparable_properties import PredictionTarget from domain.epc_prediction.prediction_target import ( + PredictionTarget, PredictionTargetAttributes, build_prediction_target, ) diff --git a/tests/domain/epc_prediction/test_validation.py b/tests/domain/epc_prediction/test_validation.py index acf49a47..1e20b1b1 100644 --- a/tests/domain/epc_prediction/test_validation.py +++ b/tests/domain/epc_prediction/test_validation.py @@ -15,7 +15,7 @@ from datatypes.epc.domain.epc_property_data import ( SapFloorDimension, SapHeating, ) -from domain.epc_prediction.comparable_properties import Comparable +from domain.epc_prediction.comparable_properties import ComparableProperty from domain.epc_prediction.validation import evaluate_component_accuracy @@ -26,8 +26,8 @@ def _comparable( sap_version: float, wall_construction: Union[int, str] = 1, registration_date: Optional[date] = None, -) -> Comparable: - """A Comparable carrying a fully-populated opaque EpcPropertyData — every +) -> ComparableProperty: + """A ComparableProperty carrying a fully-populated opaque EpcPropertyData — every field the predictor + comparison read (the partial-instance idiom).""" epc: EpcPropertyData = object.__new__(EpcPropertyData) epc.sap_version = sap_version @@ -66,7 +66,7 @@ def _comparable( energy.photovoltaic_supply = None energy.photovoltaic_arrays = None epc.sap_energy_source = energy - return Comparable( + return ComparableProperty( epc=epc, certificate_number=certificate_number, address=address, diff --git a/tests/orchestration/test_ingestion_prediction.py b/tests/orchestration/test_ingestion_prediction.py index 0a79a79e..a3917dad 100644 --- a/tests/orchestration/test_ingestion_prediction.py +++ b/tests/orchestration/test_ingestion_prediction.py @@ -11,7 +11,7 @@ from typing import Any, Optional from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.domain.mapper import EpcPropertyDataMapper -from domain.epc_prediction.comparable_properties import Comparable +from domain.epc_prediction.comparable_properties import ComparableProperty from domain.epc_prediction.epc_prediction import EpcPrediction from domain.epc_prediction.prediction_target import PredictionTargetAttributes from domain.geospatial.coordinates import Coordinates @@ -76,11 +76,11 @@ class _FakeSolarFetcher: class _FakeComparablesRepo: - def __init__(self, candidates: list[Comparable]) -> None: + def __init__(self, candidates: list[ComparableProperty]) -> None: self._candidates = candidates self.searched: list[str] = [] - def candidates_for(self, postcode: str) -> list[Comparable]: + def candidates_for(self, postcode: str) -> list[ComparableProperty]: self.searched.append(postcode) return self._candidates @@ -93,11 +93,11 @@ class _FakeAttributesReader: return self._attributes -def _cohort() -> list[Comparable]: +def _cohort() -> list[ComparableProperty]: # Three same-type neighbours (property_type "0"), distinct addresses so the # dedupe keeps all three. return [ - Comparable( + ComparableProperty( epc=_epc(), certificate_number=f"CERT-{i}", address=f"{i} Some Street", diff --git a/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py b/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py index 6b473179..662d5b75 100644 --- a/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py +++ b/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py @@ -84,7 +84,7 @@ def test_candidates_for_assembles_a_comparable_per_cert() -> None: # Act candidates = repo.candidates_for("LS6 1AA") - # Assert — the register metadata is threaded onto the Comparable. + # Assert — the register metadata is threaded onto the ComparableProperty. assert len(candidates) == 1 comparable = candidates[0] assert comparable.certificate_number == "CERT-1"