mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(modelling_e2e): price gap measures via overlay + broaden prediction to nearby postcodes
Two reconciliations to make the modelling_e2e Lambda handler production-ready. 1. Price through the off-catalogue overlay, drop the workarounds The handler priced through a plain ProductPostgresRepository and excluded secondary_heating_removal / system_tune_up / system_tune_up_zoned to dodge ProductNotFound (and a poisoning pgEnum DataError). Those measures are now priced by catalogue_with_off_catalogue_overrides (already used by the e2e runner and PostgresUnitOfWork), so the exclusions are removed and ALL measure types are considered. This also fixes gas-boiler / single-glazed properties, which Dan's handler never excluded and so still crashed (the standard system_tune_up option is built unconditionally — the considered-measures exclusion never actually gated it). 2. Broaden the EPC-Prediction cohort to nearby real postcodes (ADR-0031) A property with no lodged EPC and no same-type comparable in its own postcode (e.g. the only flat among houses) used to gate out and fail the subtask. The gov EPC API cannot search by radius/outcode, so we resolve the real unit postcodes physically nearest the target via postcodes.io (keyless; already a trusted in-repo dependency) and walk them nearest-first until enough same-type comparables surface. New PostcodesIoClient (transient-failure retry with exponential backoff, soft-failing to the seed so broadening never breaks prediction) and EpcComparablePropertiesRepository.candidates_near. Wired into the handler and e2e runner; broadening is lazy (only on gate-out) and memoised per (postcode, property_type). Validated live: property 728476 (gas boiler) prices system_tune_up at GBP295; property 718580 (lone flat in BR6 6BS) now predicts via nearby BR6 postcodes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c3422704f5
commit
0bd2db4f03
9 changed files with 744 additions and 10 deletions
|
|
@ -42,7 +42,10 @@ from domain.epc_prediction.comparable_properties import (
|
|||
select_comparables,
|
||||
)
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction
|
||||
from domain.epc_prediction.prediction_target import build_prediction_target
|
||||
from domain.epc_prediction.prediction_target import (
|
||||
PredictionTarget,
|
||||
build_prediction_target,
|
||||
)
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from domain.geospatial.planning_restrictions import PlanningRestrictions
|
||||
from domain.geospatial.spatial_reference import SpatialReference
|
||||
|
|
@ -50,6 +53,7 @@ from domain.property.property import Property, PropertyIdentity
|
|||
from domain.tasks.tasks import Source
|
||||
from harness.console import run_modelling
|
||||
from infrastructure.epc_client.epc_client_service import EpcClientService
|
||||
from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient
|
||||
from infrastructure.postgres.config import PostgresConfig
|
||||
from infrastructure.postgres.engine import make_engine
|
||||
from infrastructure.solar.google_solar_api_client import (
|
||||
|
|
@ -85,6 +89,10 @@ from utilities.logger import setup_logger
|
|||
|
||||
_engine: Optional[Engine] = None
|
||||
_cohort_cache: dict[str, list[ComparableProperty]] = {}
|
||||
# Broadened (nearby-postcode) cohorts, keyed by (seed postcode, target property
|
||||
# type): the early-stop walk depends on the type it is filling for, so two types
|
||||
# in the same postcode must not share a cached result.
|
||||
_nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {}
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -140,13 +148,18 @@ def _predict_epc(
|
|||
attributes_reader: OverrideBackedPredictionAttributesReader,
|
||||
coordinates: Optional[Coordinates],
|
||||
cohort_for: Callable[[str], list[ComparableProperty]],
|
||||
broaden: Callable[[PredictionTarget], list[ComparableProperty]],
|
||||
predictor: EpcPrediction,
|
||||
) -> Optional[EpcPropertyData]:
|
||||
"""Synthesise an EpcPropertyData for an EPC-less property from its postcode
|
||||
cohort (EPC Prediction Path 3, ADR-0031), or None when ineligible.
|
||||
|
||||
When the property's own postcode holds no same-type comparables (a sparse
|
||||
postcode — e.g. the only flat among houses), the cohort is broadened to the
|
||||
real unit postcodes physically nearest it (``broaden``) before giving up.
|
||||
|
||||
Returns None when property_type is unresolvable (hard cohort filter cannot
|
||||
fire) or when the postcode cohort is empty after filtering.
|
||||
fire) or when even the broadened cohort is empty after filtering.
|
||||
"""
|
||||
attributes = attributes_reader.attributes_for(property_id)
|
||||
identity = PropertyIdentity(
|
||||
|
|
@ -156,6 +169,8 @@ def _predict_epc(
|
|||
if target is None:
|
||||
return None
|
||||
comparables = select_comparables(target, cohort_for(target.postcode))
|
||||
if not comparables.members:
|
||||
comparables = select_comparables(target, broaden(target))
|
||||
if not comparables.members:
|
||||
return None
|
||||
predicted = predictor.predict(target, comparables)
|
||||
|
|
@ -201,7 +216,9 @@ def handler(body: dict[str, Any], context: Any) -> None:
|
|||
|
||||
overrides_reader = PropertyOverridesPostgresReader(lambda: Session(engine))
|
||||
prediction_attrs_reader = OverrideBackedPredictionAttributesReader(overrides_reader)
|
||||
comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial)
|
||||
comparables_repo = EpcComparablePropertiesRepository(
|
||||
epc_client, geospatial, nearby_postcodes=PostcodesIoClient()
|
||||
)
|
||||
predictor = EpcPrediction()
|
||||
|
||||
def _get_cohort(postcode: str) -> list[ComparableProperty]:
|
||||
|
|
@ -211,6 +228,24 @@ def handler(body: dict[str, Any], context: Any) -> None:
|
|||
)
|
||||
return _cohort_cache[postcode]
|
||||
|
||||
def _broaden(target: PredictionTarget) -> list[ComparableProperty]:
|
||||
"""The nearby-postcode cohort for a gated-out target — the real unit
|
||||
postcodes nearest it, walked until enough same-type comparables surface
|
||||
(ADR-0031). Memoised per (postcode, property_type) so co-located
|
||||
same-type misses share one walk."""
|
||||
key = (target.postcode, target.property_type)
|
||||
if key not in _nearby_cohort_cache:
|
||||
_nearby_cohort_cache[key] = (
|
||||
comparables_repo.candidates_near(
|
||||
target.postcode,
|
||||
target.coordinates,
|
||||
enough=lambda c: c.epc.property_type == target.property_type,
|
||||
)
|
||||
if target.postcode
|
||||
else []
|
||||
)
|
||||
return _nearby_cohort_cache[key]
|
||||
|
||||
read_session = Session(engine)
|
||||
try:
|
||||
scenario = ScenarioPostgresRepository(read_session).get_many([scenario_id])[0]
|
||||
|
|
@ -261,12 +296,14 @@ def handler(body: dict[str, Any], context: Any) -> None:
|
|||
attributes_reader=prediction_attrs_reader,
|
||||
coordinates=coordinates,
|
||||
cohort_for=_get_cohort,
|
||||
broaden=_broaden,
|
||||
predictor=predictor,
|
||||
)
|
||||
if predicted_epc is None:
|
||||
raise ValueError(
|
||||
f"no EPC for UPRN {uprn} and not predictable "
|
||||
f"(unresolved property_type or empty '{postcode}' cohort)"
|
||||
f"(unresolved property_type, or no same-type "
|
||||
f"comparables in or near '{postcode}')"
|
||||
)
|
||||
effective_epc = Property(
|
||||
identity=PropertyIdentity(
|
||||
|
|
|
|||
0
infrastructure/postcodes_io/__init__.py
Normal file
0
infrastructure/postcodes_io/__init__.py
Normal file
151
infrastructure/postcodes_io/postcodes_io_client.py
Normal file
151
infrastructure/postcodes_io/postcodes_io_client.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
"""postcodes.io adapter — a coordinate (or seed postcode) → the real unit
|
||||
postcodes physically near it.
|
||||
|
||||
The gov EPC API only searches a *full* real postcode — no outcode/prefix, no
|
||||
radius, no lat/long (confirmed against its OpenAPI spec). So to broaden an
|
||||
EPC-Prediction cohort beyond the target's own postcode we must first discover the
|
||||
real unit postcodes around it. postcodes.io's free, keyless ``nearest`` endpoint
|
||||
does exactly that: given a point it returns the unit postcodes within a radius,
|
||||
nearest first.
|
||||
|
||||
Failure is deliberately non-fatal: any error (network, unknown seed, missing
|
||||
coordinates) returns just the seed postcode, so broadening degrades to "no
|
||||
broadening" rather than breaking prediction.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
|
||||
|
||||
class PostcodesIoClient:
|
||||
BASE_URL = "https://api.postcodes.io"
|
||||
REQUEST_TIMEOUT = 10.0
|
||||
# Transient failures (transport errors, 429s, 5xx) are retried with
|
||||
# exponential backoff; everything else (and exhaustion) soft-fails to the
|
||||
# seed, so broadening never breaks prediction.
|
||||
MAX_RETRIES = 3
|
||||
BACKOFF_BASE = 0.5
|
||||
BACKOFF_MULTIPLIER = 2.0
|
||||
MAX_BACKOFF = 8.0
|
||||
|
||||
def __init__(self, *, radius_m: int = 1000, limit: int = 30) -> None:
|
||||
"""``radius_m`` bounds how far the broadened cohort reaches; ``limit``
|
||||
caps how many nearby postcodes are returned (and so the per-gate-out
|
||||
fetch cost)."""
|
||||
self._radius_m = radius_m
|
||||
self._limit = limit
|
||||
|
||||
def nearby(
|
||||
self, postcode: str, coordinates: Optional[Coordinates] = None
|
||||
) -> list[str]:
|
||||
"""The real unit postcodes within ``radius_m`` of ``postcode`` — nearest
|
||||
first, the seed always included — or just ``[postcode]`` when the seed's
|
||||
coordinates cannot be resolved or the lookup fails.
|
||||
|
||||
``coordinates`` (the target's own, resolved from its UPRN) is used when
|
||||
given, sparing a postcode→centroid round-trip; otherwise postcodes.io
|
||||
resolves the seed postcode's centroid itself."""
|
||||
point = coordinates if coordinates is not None else self._centroid_of(postcode)
|
||||
if point is None:
|
||||
return [postcode]
|
||||
found = self._nearest_to(point)
|
||||
ordered = [postcode] + [p for p in found if p != postcode]
|
||||
return ordered[: self._limit]
|
||||
|
||||
def _centroid_of(self, postcode: str) -> Optional[Coordinates]:
|
||||
result = self._get(f"/postcodes/{postcode.replace(' ', '')}")
|
||||
if result is None:
|
||||
return None
|
||||
latitude: Any = result.get("latitude")
|
||||
longitude: Any = result.get("longitude")
|
||||
if latitude is None or longitude is None:
|
||||
return None
|
||||
return Coordinates(longitude=float(longitude), latitude=float(latitude))
|
||||
|
||||
def _nearest_to(self, point: Coordinates) -> list[str]:
|
||||
results = self._get_list(
|
||||
"/postcodes",
|
||||
{
|
||||
"lon": point.longitude,
|
||||
"lat": point.latitude,
|
||||
"radius": self._radius_m,
|
||||
"limit": self._limit,
|
||||
},
|
||||
)
|
||||
return [str(row["postcode"]) for row in results if row.get("postcode")]
|
||||
|
||||
def _get(self, path: str) -> Optional[dict[str, Any]]:
|
||||
payload = self._call(path, None)
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
def _get_list(self, path: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
payload = self._call(path, params)
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
return [row for row in payload if isinstance(row, dict)]
|
||||
|
||||
def _call(self, path: str, params: Optional[dict[str, Any]]) -> Any:
|
||||
"""One GET against postcodes.io, retrying transient failures (transport
|
||||
errors, 429s, 5xx) with exponential backoff. Returns the parsed
|
||||
``result`` payload, or None on a non-transient failure (e.g. an unknown
|
||||
postcode's 404) or once retries are exhausted — broadening then falls
|
||||
back to the seed alone."""
|
||||
for attempt in range(self.MAX_RETRIES + 1):
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{self.BASE_URL}{path}",
|
||||
params=params,
|
||||
timeout=self.REQUEST_TIMEOUT,
|
||||
)
|
||||
except httpx.TransportError:
|
||||
if not self._sleep_before_retry(attempt, retry_after=None):
|
||||
return None
|
||||
continue
|
||||
except httpx.HTTPError:
|
||||
return None # non-transient client-side error (e.g. bad URL)
|
||||
if self._is_transient(response.status_code):
|
||||
if not self._sleep_before_retry(
|
||||
attempt, retry_after=self._retry_after(response)
|
||||
):
|
||||
return None
|
||||
continue
|
||||
if not response.is_success:
|
||||
return None
|
||||
try:
|
||||
body: Any = response.json()
|
||||
except ValueError:
|
||||
return None
|
||||
return body.get("result") if isinstance(body, dict) else None
|
||||
return None
|
||||
|
||||
def _sleep_before_retry(self, attempt: int, retry_after: Optional[float]) -> bool:
|
||||
"""Sleep before the next attempt and report whether one remains; on the
|
||||
final attempt, return False so the caller soft-fails instead of looping."""
|
||||
if attempt >= self.MAX_RETRIES:
|
||||
return False
|
||||
if retry_after is not None:
|
||||
delay = retry_after
|
||||
else:
|
||||
delay = self.BACKOFF_BASE * (self.BACKOFF_MULTIPLIER**attempt)
|
||||
time.sleep(min(delay, self.MAX_BACKOFF))
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _is_transient(status_code: int) -> bool:
|
||||
return status_code == 429 or status_code >= 500
|
||||
|
||||
@staticmethod
|
||||
def _retry_after(response: httpx.Response) -> Optional[float]:
|
||||
header = response.headers.get("Retry-After")
|
||||
if header is None:
|
||||
return None
|
||||
try:
|
||||
return float(header)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
|
@ -10,7 +10,7 @@ UPRNs share a partition). Register metadata the cert itself doesn't carry
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from typing import Optional, Protocol
|
||||
from typing import Callable, Optional, Protocol
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.search.epc_search_result import EpcSearchResult
|
||||
|
|
@ -20,6 +20,11 @@ from repositories.comparable_properties.comparable_properties_repository import
|
|||
ComparablePropertiesRepository,
|
||||
)
|
||||
|
||||
# The same default floor `select_comparables` uses: keep walking nearby postcodes
|
||||
# until this many candidates match, so the broadened cohort is big enough for the
|
||||
# downstream relax ladder rather than stopping at the first stray match.
|
||||
_DEFAULT_MINIMUM_COHORT = 5
|
||||
|
||||
|
||||
class CohortEpcClient(Protocol):
|
||||
"""The slice of the EPC-API client the cohort fetch needs (e.g.
|
||||
|
|
@ -38,12 +43,26 @@ class CohortGeospatial(Protocol):
|
|||
) -> dict[int, Coordinates]: ...
|
||||
|
||||
|
||||
class NearbyPostcodes(Protocol):
|
||||
"""Resolves the real unit postcodes physically near a seed postcode (e.g.
|
||||
`PostcodesIoClient`). The gov EPC API cannot search by radius, so this is how
|
||||
the cohort reaches beyond the target's own postcode (ADR-0031)."""
|
||||
|
||||
def nearby(
|
||||
self, postcode: str, coordinates: Optional[Coordinates] = None
|
||||
) -> list[str]: ...
|
||||
|
||||
|
||||
class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
|
||||
def __init__(
|
||||
self, epc_client: CohortEpcClient, geospatial: CohortGeospatial
|
||||
self,
|
||||
epc_client: CohortEpcClient,
|
||||
geospatial: CohortGeospatial,
|
||||
nearby_postcodes: Optional[NearbyPostcodes] = None,
|
||||
) -> None:
|
||||
self._epc_client = epc_client
|
||||
self._geospatial = geospatial
|
||||
self._nearby_postcodes = nearby_postcodes
|
||||
|
||||
def candidates_for(self, postcode: str) -> list[ComparableProperty]:
|
||||
results: list[EpcSearchResult] = self._epc_client.search_by_postcode(
|
||||
|
|
@ -55,6 +74,45 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
|
|||
)
|
||||
return [self._comparable(result, coordinates) for result in results]
|
||||
|
||||
def candidates_near(
|
||||
self,
|
||||
postcode: str,
|
||||
coordinates: Optional[Coordinates] = None,
|
||||
*,
|
||||
enough: Optional[Callable[[ComparableProperty], bool]] = None,
|
||||
minimum: int = _DEFAULT_MINIMUM_COHORT,
|
||||
) -> list[ComparableProperty]:
|
||||
"""The broadened cohort: candidates drawn from the real unit postcodes
|
||||
nearest ``postcode`` (ADR-0031), for when the target's own postcode holds
|
||||
no same-type comparables. Postcodes are visited nearest first and each
|
||||
candidate is deduped by certificate number across them.
|
||||
|
||||
``enough`` lets the caller stop the walk early — once ``minimum``
|
||||
candidates satisfy it (e.g. they match the target's property type) the
|
||||
remaining, further-away postcodes are not fetched, so a dense area
|
||||
resolves in one or two searches instead of the whole radius. Without a
|
||||
configured ``NearbyPostcodes`` source this degrades to the seed postcode
|
||||
alone."""
|
||||
postcodes = (
|
||||
self._nearby_postcodes.nearby(postcode, coordinates)
|
||||
if self._nearby_postcodes is not None
|
||||
else [postcode]
|
||||
)
|
||||
candidates: list[ComparableProperty] = []
|
||||
seen_certs: set[str] = set()
|
||||
matches = 0
|
||||
for nearby_postcode in postcodes:
|
||||
for candidate in self.candidates_for(nearby_postcode):
|
||||
if candidate.certificate_number in seen_certs:
|
||||
continue
|
||||
seen_certs.add(candidate.certificate_number)
|
||||
candidates.append(candidate)
|
||||
if enough is not None and enough(candidate):
|
||||
matches += 1
|
||||
if enough is not None and matches >= minimum:
|
||||
break
|
||||
return candidates
|
||||
|
||||
def _comparable(
|
||||
self, result: EpcSearchResult, coordinates: dict[int, Coordinates]
|
||||
) -> ComparableProperty:
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ from domain.epc_prediction.comparable_properties import ( # noqa: E402
|
|||
)
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction # noqa: E402
|
||||
from domain.epc_prediction.prediction_target import ( # noqa: E402
|
||||
PredictionTarget,
|
||||
build_prediction_target,
|
||||
)
|
||||
from domain.geospatial.coordinates import Coordinates # noqa: E402
|
||||
|
|
@ -96,6 +97,9 @@ from domain.modelling.scenario import Scenario # noqa: E402
|
|||
from harness.console import candidate_recommendations, run_modelling # noqa: E402
|
||||
from harness.plan_table import format_plan_table # noqa: E402
|
||||
from infrastructure.epc_client.epc_client_service import EpcClientService # noqa: E402
|
||||
from infrastructure.postcodes_io.postcodes_io_client import ( # noqa: E402
|
||||
PostcodesIoClient,
|
||||
)
|
||||
from infrastructure.solar.google_solar_api_client import ( # noqa: E402
|
||||
BuildingInsightsNotFoundError,
|
||||
GoogleSolarApiClient,
|
||||
|
|
@ -400,6 +404,7 @@ def _predict_epc(
|
|||
attributes_reader: OverrideBackedPredictionAttributesReader,
|
||||
coordinates: Optional[Coordinates],
|
||||
cohort_for: Callable[[str], list[ComparableProperty]],
|
||||
broaden: Callable[[PredictionTarget], list[ComparableProperty]],
|
||||
predictor: EpcPrediction,
|
||||
) -> Optional[EpcPropertyData]:
|
||||
"""Synthesise an EpcPropertyData for an EPC-less Property from its postcode
|
||||
|
|
@ -408,7 +413,8 @@ def _predict_epc(
|
|||
|
||||
The cohort is found by POSTCODE, so a wrong postcode on the property row
|
||||
yields the wrong neighbours — a prediction is only as good as the postcode it
|
||||
is given."""
|
||||
is given. When the own postcode holds no same-type comparables, the cohort is
|
||||
broadened to the real unit postcodes physically nearest it (``broaden``)."""
|
||||
attributes = attributes_reader.attributes_for(property_id)
|
||||
identity = PropertyIdentity(
|
||||
portfolio_id=portfolio_id, postcode=postcode, address="", uprn=uprn
|
||||
|
|
@ -418,7 +424,10 @@ def _predict_epc(
|
|||
return None # property_type unresolvable — gated out of prediction
|
||||
comparables = select_comparables(target, cohort_for(target.postcode))
|
||||
if not comparables.members:
|
||||
return None # no comparable neighbours in the postcode
|
||||
# Sparse own postcode — reach out to the nearest real postcodes.
|
||||
comparables = select_comparables(target, broaden(target))
|
||||
if not comparables.members:
|
||||
return None # no comparable neighbours nearby either
|
||||
predicted = predictor.predict(target, comparables)
|
||||
# The calculator needs a MAIN building part; a cohort whose template carries
|
||||
# none (e.g. a malformed flat record) yields an unscoreable picture, so reject
|
||||
|
|
@ -684,9 +693,12 @@ def main() -> None:
|
|||
# from the live EPC API (search-by-postcode + per-cert fetch), memoised per
|
||||
# postcode so co-located missing Properties don't refetch the same cohort.
|
||||
prediction_attributes = OverrideBackedPredictionAttributesReader(overrides_reader)
|
||||
comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial)
|
||||
comparables_repo = EpcComparablePropertiesRepository(
|
||||
epc_client, geospatial, nearby_postcodes=PostcodesIoClient()
|
||||
)
|
||||
predictor = EpcPrediction()
|
||||
_cohort_cache: dict[str, list[ComparableProperty]] = {}
|
||||
_nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {}
|
||||
|
||||
def cohort_for(postcode: str) -> list[ComparableProperty]:
|
||||
if postcode not in _cohort_cache:
|
||||
|
|
@ -694,6 +706,23 @@ def main() -> None:
|
|||
comparables_repo.candidates_for(postcode) if postcode else []
|
||||
)
|
||||
return _cohort_cache[postcode]
|
||||
|
||||
def broaden(target: PredictionTarget) -> list[ComparableProperty]:
|
||||
# Broadened cohort for a gated-out target: the nearest real postcodes,
|
||||
# walked until enough same-type comparables surface (ADR-0031). Memoised
|
||||
# per (postcode, property_type).
|
||||
key = (target.postcode, target.property_type)
|
||||
if key not in _nearby_cohort_cache:
|
||||
_nearby_cohort_cache[key] = (
|
||||
comparables_repo.candidates_near(
|
||||
target.postcode,
|
||||
target.coordinates,
|
||||
enough=lambda c: c.epc.property_type == target.property_type,
|
||||
)
|
||||
if target.postcode
|
||||
else []
|
||||
)
|
||||
return _nearby_cohort_cache[key]
|
||||
# One read-only session for the live `material` catalogue, reused across the
|
||||
# batch so both store and no-store runs price against the same DB rows.
|
||||
catalogue_session = Session(engine)
|
||||
|
|
@ -831,12 +860,14 @@ def main() -> None:
|
|||
attributes_reader=prediction_attributes,
|
||||
coordinates=coordinates,
|
||||
cohort_for=cohort_for,
|
||||
broaden=broaden,
|
||||
predictor=predictor,
|
||||
)
|
||||
if predicted_epc is None:
|
||||
raise ValueError(
|
||||
f"no EPC for UPRN {uprn} and not predictable "
|
||||
f"(unresolved property_type or empty '{postcode}' cohort)"
|
||||
f"(unresolved property_type, or no same-type "
|
||||
f"comparables in or near '{postcode}')"
|
||||
)
|
||||
# Property.effective_epc folds any Landlord Overrides onto the
|
||||
# synthesised EPC (cohort fills the unknown fields, the landlord's
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ def _clear_cohort_cache() -> None:
|
|||
import applications.modelling_e2e.handler as h
|
||||
|
||||
h._cohort_cache.clear()
|
||||
h._nearby_cohort_cache.clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -396,6 +397,124 @@ def test_empty_cohort_gates_property_out_and_raises() -> None:
|
|||
MockUoW.return_value.__enter__.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Broadened cohort — sparse own postcode falls back to nearby postcodes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_empty_own_postcode_broadens_to_nearby_and_predicts() -> None:
|
||||
"""When the property's own postcode holds no same-type comparables, the
|
||||
handler broadens to the nearby-postcode cohort (candidates_near) and, finding
|
||||
comparables there, synthesises the EPC and saves the plan."""
|
||||
# Arrange
|
||||
mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE])
|
||||
mock_plan = _plan_mock()
|
||||
mock_uow = MagicMock()
|
||||
|
||||
mock_predicted_epc = MagicMock()
|
||||
from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier
|
||||
|
||||
mock_part = MagicMock()
|
||||
mock_part.identifier = BuildingPartIdentifier.MAIN
|
||||
mock_predicted_epc.sap_building_parts = [mock_part]
|
||||
|
||||
# First select_comparables (own postcode) is empty → broaden; the second
|
||||
# (nearby cohort) finds comparables.
|
||||
empty_comparables = MagicMock()
|
||||
empty_comparables.members = []
|
||||
found_comparables = MagicMock()
|
||||
found_comparables.members = [MagicMock()]
|
||||
|
||||
with ExitStack() as stack:
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.os.environ", _ENV)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler._get_engine",
|
||||
return_value=mock_engine,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.EpcClientService")
|
||||
).return_value.get_by_uprn.return_value = None # no lodged EPC
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.GeospatialS3Repository")
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.GoogleSolarApiClient")
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler._spatial_for", return_value=None)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler._solar_insights_for",
|
||||
return_value=None,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.overlays_from", return_value=[])
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.PropertyOverridesPostgresReader")
|
||||
)
|
||||
from domain.epc_prediction.prediction_target import PredictionTargetAttributes
|
||||
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.OverrideBackedPredictionAttributesReader"
|
||||
)
|
||||
).return_value.attributes_for.return_value = PredictionTargetAttributes(
|
||||
property_type="2"
|
||||
)
|
||||
MockRepo = stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.EpcComparablePropertiesRepository"
|
||||
)
|
||||
)
|
||||
MockRepo.return_value.candidates_for.return_value = []
|
||||
MockRepo.return_value.candidates_near.return_value = [MagicMock()]
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.select_comparables",
|
||||
side_effect=[empty_comparables, found_comparables],
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.EpcPrediction")
|
||||
).return_value.predict.return_value = mock_predicted_epc
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.ScenarioPostgresRepository")
|
||||
).return_value.get_many.return_value = [MagicMock()]
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.catalogue_with_off_catalogue_overrides"
|
||||
)
|
||||
)
|
||||
stack.enter_context(patch("applications.modelling_e2e.handler.Session"))
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.run_modelling",
|
||||
return_value=mock_plan,
|
||||
)
|
||||
)
|
||||
MockUoW = stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.PostgresUnitOfWork")
|
||||
)
|
||||
MockUoW.return_value.__enter__.return_value = mock_uow
|
||||
MockUoW.return_value.__exit__.return_value = False
|
||||
|
||||
# Act
|
||||
_call_handler(_BODY)
|
||||
|
||||
# Assert — broadening fired, and the broadened cohort produced a saved plan.
|
||||
MockRepo.return_value.candidates_near.assert_called_once()
|
||||
mock_uow.epc.save.assert_not_called() # predicted, never lodged
|
||||
mock_uow.plan.save.assert_called_once()
|
||||
mock_uow.commit.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Partial batch failure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
0
tests/infrastructure/postcodes_io/__init__.py
Normal file
0
tests/infrastructure/postcodes_io/__init__.py
Normal file
223
tests/infrastructure/postcodes_io/test_postcodes_io_client.py
Normal file
223
tests/infrastructure/postcodes_io/test_postcodes_io_client.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""PostcodesIoClient — coordinate/seed postcode → the real unit postcodes near
|
||||
it, via postcodes.io's keyless nearest endpoint. Failure degrades to the seed
|
||||
alone so broadening never breaks prediction."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterator, Optional
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient
|
||||
|
||||
_MODULE = "infrastructure.postcodes_io.postcodes_io_client"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_sleep() -> Iterator[MagicMock]:
|
||||
"""Never actually sleep during backoff — just record the calls."""
|
||||
with patch(f"{_MODULE}.time.sleep") as sleep:
|
||||
yield sleep
|
||||
|
||||
|
||||
def _response(
|
||||
payload: Any,
|
||||
*,
|
||||
status_code: int = 200,
|
||||
headers: Optional[dict[str, str]] = None,
|
||||
) -> MagicMock:
|
||||
resp = MagicMock()
|
||||
resp.status_code = status_code
|
||||
resp.is_success = 200 <= status_code < 300
|
||||
resp.headers = headers if headers is not None else {}
|
||||
resp.json.return_value = payload
|
||||
return resp
|
||||
|
||||
|
||||
def _nearest_payload(postcodes: list[str]) -> dict[str, Any]:
|
||||
return {"result": [{"postcode": p} for p in postcodes]}
|
||||
|
||||
|
||||
def test_nearby_with_coordinates_skips_the_centroid_lookup() -> None:
|
||||
"""When the target's own coordinates are passed, only the radius search is
|
||||
issued — no postcode→centroid round-trip — and the seed leads the result."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient(radius_m=500, limit=10)
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response(
|
||||
_nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX"])
|
||||
)
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert — one call (the radius search), seed first, neighbours follow
|
||||
assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"]
|
||||
assert mock_get.call_count == 1
|
||||
_, kwargs = mock_get.call_args
|
||||
assert kwargs["params"]["lat"] == 51.3
|
||||
assert kwargs["params"]["lon"] == 0.1
|
||||
|
||||
|
||||
def test_nearby_resolves_the_seed_centroid_when_no_coordinates_given() -> None:
|
||||
"""Without coordinates the client first resolves the seed's own centroid via
|
||||
postcodes.io, then runs the radius search from it."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
centroid = {"result": {"latitude": 51.3, "longitude": 0.1}}
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
_response(centroid),
|
||||
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS")
|
||||
|
||||
# Assert — two calls: centroid then radius
|
||||
assert result == ["BR6 6BS", "BR6 6BU"]
|
||||
assert mock_get.call_count == 2
|
||||
|
||||
|
||||
def test_nearby_dedupes_the_seed_and_caps_at_limit() -> None:
|
||||
"""The seed always leads exactly once even when the radius search echoes it,
|
||||
and the result is capped at ``limit``."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient(limit=3)
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response(
|
||||
_nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX", "BR6 6AA"])
|
||||
)
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"]
|
||||
assert result.count("BR6 6BS") == 1
|
||||
|
||||
|
||||
def test_nearby_returns_just_the_seed_after_exhausting_retries(
|
||||
_no_sleep: MagicMock,
|
||||
) -> None:
|
||||
"""A persistent network error is retried, then degrades to broadening-off:
|
||||
only the seed comes back, and the retries were actually attempted."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(
|
||||
f"{_MODULE}.httpx.get", side_effect=httpx.ConnectError("down")
|
||||
) as mock_get:
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert — one initial try + MAX_RETRIES, sleeping between each.
|
||||
assert result == ["BR6 6BS"]
|
||||
assert mock_get.call_count == client.MAX_RETRIES + 1
|
||||
assert _no_sleep.call_count == client.MAX_RETRIES
|
||||
|
||||
|
||||
def test_nearby_retries_a_transport_error_then_succeeds(_no_sleep: MagicMock) -> None:
|
||||
"""A transient transport error is retried, and the subsequent success is
|
||||
returned in full."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
httpx.ReadTimeout("slow"),
|
||||
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS", "BR6 6BU"]
|
||||
assert mock_get.call_count == 2
|
||||
assert _no_sleep.call_count == 1
|
||||
|
||||
|
||||
def test_nearby_retries_a_429_honouring_retry_after(_no_sleep: MagicMock) -> None:
|
||||
"""A 429 is retried, and the server's Retry-After drives the backoff delay."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
_response(None, status_code=429, headers={"Retry-After": "2"}),
|
||||
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert — succeeded on the retry, having slept the advertised 2 seconds.
|
||||
assert result == ["BR6 6BS", "BR6 6BU"]
|
||||
assert mock_get.call_count == 2
|
||||
_no_sleep.assert_called_once_with(2.0)
|
||||
|
||||
|
||||
def test_nearby_retries_a_server_error_then_succeeds(_no_sleep: MagicMock) -> None:
|
||||
"""A 5xx is treated as transient and retried."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
_response(None, status_code=503),
|
||||
_response(_nearest_payload(["BR6 6BS"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS"]
|
||||
assert mock_get.call_count == 2
|
||||
|
||||
|
||||
def test_nearby_returns_just_the_seed_when_centroid_unresolvable() -> None:
|
||||
"""An unknown seed (no coordinates, centroid lookup fails) yields the seed
|
||||
alone rather than raising."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response(None, status_code=404)
|
||||
|
||||
# Act
|
||||
result: list[str] = client.nearby("ZZ99 9ZZ")
|
||||
|
||||
# Assert — a 404 is non-transient, so no retry was attempted.
|
||||
assert result == ["ZZ99 9ZZ"]
|
||||
assert mock_get.call_count == 1
|
||||
|
||||
|
||||
def test_nearby_tolerates_a_null_nearest_result() -> None:
|
||||
"""postcodes.io returns ``result: null`` when a point has no neighbours; the
|
||||
client treats that as an empty neighbour set (seed only)."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords: Optional[Coordinates] = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response({"result": None})
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS"]
|
||||
|
|
@ -126,3 +126,118 @@ def test_no_certs_in_the_postcode_yields_no_candidates() -> None:
|
|||
# Assert — no candidates, and the postcode was searched (normalisation/IO ran).
|
||||
assert candidates == []
|
||||
assert client.searched_postcode == "LS6 1AA"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Broadened cohort — candidates_near (ADR-0031 nearby-postcode broadening)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _MultiPostcodeEpcClient:
|
||||
"""Serves a different cohort per postcode and records every search, so the
|
||||
broadened walk's reach and ordering can be asserted."""
|
||||
|
||||
def __init__(self, by_postcode: dict[str, list[EpcSearchResult]]) -> None:
|
||||
self._by_postcode = by_postcode
|
||||
self.searched: list[str] = []
|
||||
|
||||
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
|
||||
self.searched.append(postcode)
|
||||
return self._by_postcode.get(postcode, [])
|
||||
|
||||
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData:
|
||||
return _epc()
|
||||
|
||||
|
||||
class _FakeNearbyPostcodes:
|
||||
"""Returns a fixed nearest-first list and records the seed it was asked for."""
|
||||
|
||||
def __init__(self, postcodes: list[str]) -> None:
|
||||
self._postcodes = postcodes
|
||||
self.calls: list[tuple[str, Optional[Coordinates]]] = []
|
||||
|
||||
def nearby(
|
||||
self, postcode: str, coordinates: Optional[Coordinates] = None
|
||||
) -> list[str]:
|
||||
self.calls.append((postcode, coordinates))
|
||||
return self._postcodes
|
||||
|
||||
|
||||
def test_candidates_near_aggregates_and_dedupes_across_nearby_postcodes() -> None:
|
||||
# Arrange — three nearby postcodes; CERT-1 is re-lodged in two of them.
|
||||
client = _MultiPostcodeEpcClient(
|
||||
{
|
||||
"P0": [_result("CERT-1", uprn=1)],
|
||||
"P1": [_result("CERT-2", uprn=2), _result("CERT-1", uprn=1)],
|
||||
"P2": [_result("CERT-3", uprn=3)],
|
||||
}
|
||||
)
|
||||
nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"])
|
||||
repo = EpcComparablePropertiesRepository(
|
||||
client, _FakeGeospatial({}), nearby_postcodes=nearby
|
||||
)
|
||||
|
||||
# Act — no early-stop predicate, so the whole nearby set is visited.
|
||||
candidates = repo.candidates_near("P0", None)
|
||||
|
||||
# Assert — one candidate per distinct cert, all three postcodes searched.
|
||||
certs = {c.certificate_number for c in candidates}
|
||||
assert certs == {"CERT-1", "CERT-2", "CERT-3"}
|
||||
assert client.searched == ["P0", "P1", "P2"]
|
||||
|
||||
|
||||
def test_candidates_near_stops_early_once_enough_match() -> None:
|
||||
# Arrange — the seed postcode alone already yields enough matches; the two
|
||||
# further postcodes must not be fetched.
|
||||
client = _MultiPostcodeEpcClient(
|
||||
{
|
||||
"P0": [_result(f"MATCH-{i}", uprn=i) for i in range(5)],
|
||||
"P1": [_result("OTHER-1", uprn=99)],
|
||||
"P2": [_result("OTHER-2", uprn=98)],
|
||||
}
|
||||
)
|
||||
nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"])
|
||||
repo = EpcComparablePropertiesRepository(
|
||||
client, _FakeGeospatial({}), nearby_postcodes=nearby
|
||||
)
|
||||
|
||||
# Act
|
||||
candidates = repo.candidates_near(
|
||||
"P0",
|
||||
None,
|
||||
enough=lambda c: c.certificate_number.startswith("MATCH"),
|
||||
minimum=5,
|
||||
)
|
||||
|
||||
# Assert — walk halted after the seed; the further postcodes were never hit.
|
||||
assert client.searched == ["P0"]
|
||||
assert len(candidates) == 5
|
||||
|
||||
|
||||
def test_candidates_near_passes_coordinates_to_the_nearby_source() -> None:
|
||||
# Arrange
|
||||
here = Coordinates(longitude=0.1, latitude=51.3)
|
||||
client = _MultiPostcodeEpcClient({"P0": []})
|
||||
nearby = _FakeNearbyPostcodes(["P0"])
|
||||
repo = EpcComparablePropertiesRepository(
|
||||
client, _FakeGeospatial({}), nearby_postcodes=nearby
|
||||
)
|
||||
|
||||
# Act
|
||||
repo.candidates_near("P0", here)
|
||||
|
||||
# Assert — the target's own coordinates seed the radius search.
|
||||
assert nearby.calls == [("P0", here)]
|
||||
|
||||
|
||||
def test_candidates_near_without_a_source_uses_only_the_seed() -> None:
|
||||
# Arrange — no NearbyPostcodes configured (broadening unavailable).
|
||||
client = _MultiPostcodeEpcClient({"P0": [_result("CERT-1", uprn=1)]})
|
||||
repo = EpcComparablePropertiesRepository(client, _FakeGeospatial({}))
|
||||
|
||||
# Act
|
||||
candidates = repo.candidates_near("P0", None)
|
||||
|
||||
# Assert — degrades to the seed postcode alone.
|
||||
assert client.searched == ["P0"]
|
||||
assert [c.certificate_number for c in candidates] == ["CERT-1"]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue