diff --git a/applications/modelling_e2e/handler.py b/applications/modelling_e2e/handler.py index de7b5542..c3c16924 100644 --- a/applications/modelling_e2e/handler.py +++ b/applications/modelling_e2e/handler.py @@ -42,7 +42,10 @@ from domain.epc_prediction.comparable_properties import ( select_comparables, ) from domain.epc_prediction.epc_prediction import EpcPrediction -from domain.epc_prediction.prediction_target import build_prediction_target +from domain.epc_prediction.prediction_target import ( + PredictionTarget, + build_prediction_target, +) from domain.geospatial.coordinates import Coordinates from domain.geospatial.planning_restrictions import PlanningRestrictions from domain.geospatial.spatial_reference import SpatialReference @@ -50,6 +53,7 @@ from domain.property.property import Property, PropertyIdentity from domain.tasks.tasks import Source from harness.console import run_modelling from infrastructure.epc_client.epc_client_service import EpcClientService +from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient from infrastructure.postgres.config import PostgresConfig from infrastructure.postgres.engine import make_engine from infrastructure.solar.google_solar_api_client import ( @@ -85,6 +89,10 @@ from utilities.logger import setup_logger _engine: Optional[Engine] = None _cohort_cache: dict[str, list[ComparableProperty]] = {} +# Broadened (nearby-postcode) cohorts, keyed by (seed postcode, target property +# type): the early-stop walk depends on the type it is filling for, so two types +# in the same postcode must not share a cached result. +_nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {} logger = setup_logger() @@ -140,13 +148,18 @@ def _predict_epc( attributes_reader: OverrideBackedPredictionAttributesReader, coordinates: Optional[Coordinates], cohort_for: Callable[[str], list[ComparableProperty]], + broaden: Callable[[PredictionTarget], list[ComparableProperty]], predictor: EpcPrediction, ) -> Optional[EpcPropertyData]: """Synthesise an EpcPropertyData for an EPC-less property from its postcode cohort (EPC Prediction Path 3, ADR-0031), or None when ineligible. + When the property's own postcode holds no same-type comparables (a sparse + postcode — e.g. the only flat among houses), the cohort is broadened to the + real unit postcodes physically nearest it (``broaden``) before giving up. + Returns None when property_type is unresolvable (hard cohort filter cannot - fire) or when the postcode cohort is empty after filtering. + fire) or when even the broadened cohort is empty after filtering. """ attributes = attributes_reader.attributes_for(property_id) identity = PropertyIdentity( @@ -156,6 +169,8 @@ def _predict_epc( if target is None: return None comparables = select_comparables(target, cohort_for(target.postcode)) + if not comparables.members: + comparables = select_comparables(target, broaden(target)) if not comparables.members: return None predicted = predictor.predict(target, comparables) @@ -201,7 +216,9 @@ def handler(body: dict[str, Any], context: Any) -> None: overrides_reader = PropertyOverridesPostgresReader(lambda: Session(engine)) prediction_attrs_reader = OverrideBackedPredictionAttributesReader(overrides_reader) - comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial) + comparables_repo = EpcComparablePropertiesRepository( + epc_client, geospatial, nearby_postcodes=PostcodesIoClient() + ) predictor = EpcPrediction() def _get_cohort(postcode: str) -> list[ComparableProperty]: @@ -211,6 +228,24 @@ def handler(body: dict[str, Any], context: Any) -> None: ) return _cohort_cache[postcode] + def _broaden(target: PredictionTarget) -> list[ComparableProperty]: + """The nearby-postcode cohort for a gated-out target — the real unit + postcodes nearest it, walked until enough same-type comparables surface + (ADR-0031). Memoised per (postcode, property_type) so co-located + same-type misses share one walk.""" + key = (target.postcode, target.property_type) + if key not in _nearby_cohort_cache: + _nearby_cohort_cache[key] = ( + comparables_repo.candidates_near( + target.postcode, + target.coordinates, + enough=lambda c: c.epc.property_type == target.property_type, + ) + if target.postcode + else [] + ) + return _nearby_cohort_cache[key] + read_session = Session(engine) try: scenario = ScenarioPostgresRepository(read_session).get_many([scenario_id])[0] @@ -261,12 +296,14 @@ def handler(body: dict[str, Any], context: Any) -> None: attributes_reader=prediction_attrs_reader, coordinates=coordinates, cohort_for=_get_cohort, + broaden=_broaden, predictor=predictor, ) if predicted_epc is None: raise ValueError( f"no EPC for UPRN {uprn} and not predictable " - f"(unresolved property_type or empty '{postcode}' cohort)" + f"(unresolved property_type, or no same-type " + f"comparables in or near '{postcode}')" ) effective_epc = Property( identity=PropertyIdentity( diff --git a/infrastructure/postcodes_io/__init__.py b/infrastructure/postcodes_io/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/postcodes_io/postcodes_io_client.py b/infrastructure/postcodes_io/postcodes_io_client.py new file mode 100644 index 00000000..6128e51c --- /dev/null +++ b/infrastructure/postcodes_io/postcodes_io_client.py @@ -0,0 +1,151 @@ +"""postcodes.io adapter — a coordinate (or seed postcode) → the real unit +postcodes physically near it. + +The gov EPC API only searches a *full* real postcode — no outcode/prefix, no +radius, no lat/long (confirmed against its OpenAPI spec). So to broaden an +EPC-Prediction cohort beyond the target's own postcode we must first discover the +real unit postcodes around it. postcodes.io's free, keyless ``nearest`` endpoint +does exactly that: given a point it returns the unit postcodes within a radius, +nearest first. + +Failure is deliberately non-fatal: any error (network, unknown seed, missing +coordinates) returns just the seed postcode, so broadening degrades to "no +broadening" rather than breaking prediction. +""" + +from __future__ import annotations + +import time +from typing import Any, Optional + +import httpx + +from domain.geospatial.coordinates import Coordinates + + +class PostcodesIoClient: + BASE_URL = "https://api.postcodes.io" + REQUEST_TIMEOUT = 10.0 + # Transient failures (transport errors, 429s, 5xx) are retried with + # exponential backoff; everything else (and exhaustion) soft-fails to the + # seed, so broadening never breaks prediction. + MAX_RETRIES = 3 + BACKOFF_BASE = 0.5 + BACKOFF_MULTIPLIER = 2.0 + MAX_BACKOFF = 8.0 + + def __init__(self, *, radius_m: int = 1000, limit: int = 30) -> None: + """``radius_m`` bounds how far the broadened cohort reaches; ``limit`` + caps how many nearby postcodes are returned (and so the per-gate-out + fetch cost).""" + self._radius_m = radius_m + self._limit = limit + + def nearby( + self, postcode: str, coordinates: Optional[Coordinates] = None + ) -> list[str]: + """The real unit postcodes within ``radius_m`` of ``postcode`` — nearest + first, the seed always included — or just ``[postcode]`` when the seed's + coordinates cannot be resolved or the lookup fails. + + ``coordinates`` (the target's own, resolved from its UPRN) is used when + given, sparing a postcode→centroid round-trip; otherwise postcodes.io + resolves the seed postcode's centroid itself.""" + point = coordinates if coordinates is not None else self._centroid_of(postcode) + if point is None: + return [postcode] + found = self._nearest_to(point) + ordered = [postcode] + [p for p in found if p != postcode] + return ordered[: self._limit] + + def _centroid_of(self, postcode: str) -> Optional[Coordinates]: + result = self._get(f"/postcodes/{postcode.replace(' ', '')}") + if result is None: + return None + latitude: Any = result.get("latitude") + longitude: Any = result.get("longitude") + if latitude is None or longitude is None: + return None + return Coordinates(longitude=float(longitude), latitude=float(latitude)) + + def _nearest_to(self, point: Coordinates) -> list[str]: + results = self._get_list( + "/postcodes", + { + "lon": point.longitude, + "lat": point.latitude, + "radius": self._radius_m, + "limit": self._limit, + }, + ) + return [str(row["postcode"]) for row in results if row.get("postcode")] + + def _get(self, path: str) -> Optional[dict[str, Any]]: + payload = self._call(path, None) + return payload if isinstance(payload, dict) else None + + def _get_list(self, path: str, params: dict[str, Any]) -> list[dict[str, Any]]: + payload = self._call(path, params) + if not isinstance(payload, list): + return [] + return [row for row in payload if isinstance(row, dict)] + + def _call(self, path: str, params: Optional[dict[str, Any]]) -> Any: + """One GET against postcodes.io, retrying transient failures (transport + errors, 429s, 5xx) with exponential backoff. Returns the parsed + ``result`` payload, or None on a non-transient failure (e.g. an unknown + postcode's 404) or once retries are exhausted — broadening then falls + back to the seed alone.""" + for attempt in range(self.MAX_RETRIES + 1): + try: + response = httpx.get( + f"{self.BASE_URL}{path}", + params=params, + timeout=self.REQUEST_TIMEOUT, + ) + except httpx.TransportError: + if not self._sleep_before_retry(attempt, retry_after=None): + return None + continue + except httpx.HTTPError: + return None # non-transient client-side error (e.g. bad URL) + if self._is_transient(response.status_code): + if not self._sleep_before_retry( + attempt, retry_after=self._retry_after(response) + ): + return None + continue + if not response.is_success: + return None + try: + body: Any = response.json() + except ValueError: + return None + return body.get("result") if isinstance(body, dict) else None + return None + + def _sleep_before_retry(self, attempt: int, retry_after: Optional[float]) -> bool: + """Sleep before the next attempt and report whether one remains; on the + final attempt, return False so the caller soft-fails instead of looping.""" + if attempt >= self.MAX_RETRIES: + return False + if retry_after is not None: + delay = retry_after + else: + delay = self.BACKOFF_BASE * (self.BACKOFF_MULTIPLIER**attempt) + time.sleep(min(delay, self.MAX_BACKOFF)) + return True + + @staticmethod + def _is_transient(status_code: int) -> bool: + return status_code == 429 or status_code >= 500 + + @staticmethod + def _retry_after(response: httpx.Response) -> Optional[float]: + header = response.headers.get("Retry-After") + if header is None: + return None + try: + return float(header) + except (TypeError, ValueError): + return None diff --git a/repositories/comparable_properties/epc_comparable_properties_repository.py b/repositories/comparable_properties/epc_comparable_properties_repository.py index 3bfd92b9..b49a6948 100644 --- a/repositories/comparable_properties/epc_comparable_properties_repository.py +++ b/repositories/comparable_properties/epc_comparable_properties_repository.py @@ -10,7 +10,7 @@ UPRNs share a partition). Register metadata the cert itself doesn't carry from __future__ import annotations from datetime import date -from typing import Optional, Protocol +from typing import Callable, Optional, Protocol from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.search.epc_search_result import EpcSearchResult @@ -20,6 +20,11 @@ from repositories.comparable_properties.comparable_properties_repository import ComparablePropertiesRepository, ) +# The same default floor `select_comparables` uses: keep walking nearby postcodes +# until this many candidates match, so the broadened cohort is big enough for the +# downstream relax ladder rather than stopping at the first stray match. +_DEFAULT_MINIMUM_COHORT = 5 + class CohortEpcClient(Protocol): """The slice of the EPC-API client the cohort fetch needs (e.g. @@ -38,12 +43,26 @@ class CohortGeospatial(Protocol): ) -> dict[int, Coordinates]: ... +class NearbyPostcodes(Protocol): + """Resolves the real unit postcodes physically near a seed postcode (e.g. + `PostcodesIoClient`). The gov EPC API cannot search by radius, so this is how + the cohort reaches beyond the target's own postcode (ADR-0031).""" + + def nearby( + self, postcode: str, coordinates: Optional[Coordinates] = None + ) -> list[str]: ... + + class EpcComparablePropertiesRepository(ComparablePropertiesRepository): def __init__( - self, epc_client: CohortEpcClient, geospatial: CohortGeospatial + self, + epc_client: CohortEpcClient, + geospatial: CohortGeospatial, + nearby_postcodes: Optional[NearbyPostcodes] = None, ) -> None: self._epc_client = epc_client self._geospatial = geospatial + self._nearby_postcodes = nearby_postcodes def candidates_for(self, postcode: str) -> list[ComparableProperty]: results: list[EpcSearchResult] = self._epc_client.search_by_postcode( @@ -55,6 +74,45 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository): ) return [self._comparable(result, coordinates) for result in results] + def candidates_near( + self, + postcode: str, + coordinates: Optional[Coordinates] = None, + *, + enough: Optional[Callable[[ComparableProperty], bool]] = None, + minimum: int = _DEFAULT_MINIMUM_COHORT, + ) -> list[ComparableProperty]: + """The broadened cohort: candidates drawn from the real unit postcodes + nearest ``postcode`` (ADR-0031), for when the target's own postcode holds + no same-type comparables. Postcodes are visited nearest first and each + candidate is deduped by certificate number across them. + + ``enough`` lets the caller stop the walk early — once ``minimum`` + candidates satisfy it (e.g. they match the target's property type) the + remaining, further-away postcodes are not fetched, so a dense area + resolves in one or two searches instead of the whole radius. Without a + configured ``NearbyPostcodes`` source this degrades to the seed postcode + alone.""" + postcodes = ( + self._nearby_postcodes.nearby(postcode, coordinates) + if self._nearby_postcodes is not None + else [postcode] + ) + candidates: list[ComparableProperty] = [] + seen_certs: set[str] = set() + matches = 0 + for nearby_postcode in postcodes: + for candidate in self.candidates_for(nearby_postcode): + if candidate.certificate_number in seen_certs: + continue + seen_certs.add(candidate.certificate_number) + candidates.append(candidate) + if enough is not None and enough(candidate): + matches += 1 + if enough is not None and matches >= minimum: + break + return candidates + def _comparable( self, result: EpcSearchResult, coordinates: dict[int, Coordinates] ) -> ComparableProperty: diff --git a/scripts/run_modelling_e2e.py b/scripts/run_modelling_e2e.py index 6625aa49..27ad473c 100644 --- a/scripts/run_modelling_e2e.py +++ b/scripts/run_modelling_e2e.py @@ -81,6 +81,7 @@ from domain.epc_prediction.comparable_properties import ( # noqa: E402 ) from domain.epc_prediction.epc_prediction import EpcPrediction # noqa: E402 from domain.epc_prediction.prediction_target import ( # noqa: E402 + PredictionTarget, build_prediction_target, ) from domain.geospatial.coordinates import Coordinates # noqa: E402 @@ -96,6 +97,9 @@ from domain.modelling.scenario import Scenario # noqa: E402 from harness.console import candidate_recommendations, run_modelling # noqa: E402 from harness.plan_table import format_plan_table # noqa: E402 from infrastructure.epc_client.epc_client_service import EpcClientService # noqa: E402 +from infrastructure.postcodes_io.postcodes_io_client import ( # noqa: E402 + PostcodesIoClient, +) from infrastructure.solar.google_solar_api_client import ( # noqa: E402 BuildingInsightsNotFoundError, GoogleSolarApiClient, @@ -400,6 +404,7 @@ def _predict_epc( attributes_reader: OverrideBackedPredictionAttributesReader, coordinates: Optional[Coordinates], cohort_for: Callable[[str], list[ComparableProperty]], + broaden: Callable[[PredictionTarget], list[ComparableProperty]], predictor: EpcPrediction, ) -> Optional[EpcPropertyData]: """Synthesise an EpcPropertyData for an EPC-less Property from its postcode @@ -408,7 +413,8 @@ def _predict_epc( The cohort is found by POSTCODE, so a wrong postcode on the property row yields the wrong neighbours — a prediction is only as good as the postcode it - is given.""" + is given. When the own postcode holds no same-type comparables, the cohort is + broadened to the real unit postcodes physically nearest it (``broaden``).""" attributes = attributes_reader.attributes_for(property_id) identity = PropertyIdentity( portfolio_id=portfolio_id, postcode=postcode, address="", uprn=uprn @@ -418,7 +424,10 @@ def _predict_epc( return None # property_type unresolvable — gated out of prediction comparables = select_comparables(target, cohort_for(target.postcode)) if not comparables.members: - return None # no comparable neighbours in the postcode + # Sparse own postcode — reach out to the nearest real postcodes. + comparables = select_comparables(target, broaden(target)) + if not comparables.members: + return None # no comparable neighbours nearby either predicted = predictor.predict(target, comparables) # The calculator needs a MAIN building part; a cohort whose template carries # none (e.g. a malformed flat record) yields an unscoreable picture, so reject @@ -684,9 +693,12 @@ def main() -> None: # from the live EPC API (search-by-postcode + per-cert fetch), memoised per # postcode so co-located missing Properties don't refetch the same cohort. prediction_attributes = OverrideBackedPredictionAttributesReader(overrides_reader) - comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial) + comparables_repo = EpcComparablePropertiesRepository( + epc_client, geospatial, nearby_postcodes=PostcodesIoClient() + ) predictor = EpcPrediction() _cohort_cache: dict[str, list[ComparableProperty]] = {} + _nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {} def cohort_for(postcode: str) -> list[ComparableProperty]: if postcode not in _cohort_cache: @@ -694,6 +706,23 @@ def main() -> None: comparables_repo.candidates_for(postcode) if postcode else [] ) return _cohort_cache[postcode] + + def broaden(target: PredictionTarget) -> list[ComparableProperty]: + # Broadened cohort for a gated-out target: the nearest real postcodes, + # walked until enough same-type comparables surface (ADR-0031). Memoised + # per (postcode, property_type). + key = (target.postcode, target.property_type) + if key not in _nearby_cohort_cache: + _nearby_cohort_cache[key] = ( + comparables_repo.candidates_near( + target.postcode, + target.coordinates, + enough=lambda c: c.epc.property_type == target.property_type, + ) + if target.postcode + else [] + ) + return _nearby_cohort_cache[key] # One read-only session for the live `material` catalogue, reused across the # batch so both store and no-store runs price against the same DB rows. catalogue_session = Session(engine) @@ -831,12 +860,14 @@ def main() -> None: attributes_reader=prediction_attributes, coordinates=coordinates, cohort_for=cohort_for, + broaden=broaden, predictor=predictor, ) if predicted_epc is None: raise ValueError( f"no EPC for UPRN {uprn} and not predictable " - f"(unresolved property_type or empty '{postcode}' cohort)" + f"(unresolved property_type, or no same-type " + f"comparables in or near '{postcode}')" ) # Property.effective_epc folds any Landlord Overrides onto the # synthesised EPC (cohort fills the unknown fields, the landlord's diff --git a/tests/applications/modelling_e2e/test_handler.py b/tests/applications/modelling_e2e/test_handler.py index 762cea60..92697508 100644 --- a/tests/applications/modelling_e2e/test_handler.py +++ b/tests/applications/modelling_e2e/test_handler.py @@ -81,6 +81,7 @@ def _clear_cohort_cache() -> None: import applications.modelling_e2e.handler as h h._cohort_cache.clear() + h._nearby_cohort_cache.clear() # --------------------------------------------------------------------------- @@ -396,6 +397,124 @@ def test_empty_cohort_gates_property_out_and_raises() -> None: MockUoW.return_value.__enter__.assert_not_called() +# --------------------------------------------------------------------------- +# Broadened cohort — sparse own postcode falls back to nearby postcodes +# --------------------------------------------------------------------------- + + +def test_empty_own_postcode_broadens_to_nearby_and_predicts() -> None: + """When the property's own postcode holds no same-type comparables, the + handler broadens to the nearby-postcode cohort (candidates_near) and, finding + comparables there, synthesises the EPC and saves the plan.""" + # Arrange + mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE]) + mock_plan = _plan_mock() + mock_uow = MagicMock() + + mock_predicted_epc = MagicMock() + from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier + + mock_part = MagicMock() + mock_part.identifier = BuildingPartIdentifier.MAIN + mock_predicted_epc.sap_building_parts = [mock_part] + + # First select_comparables (own postcode) is empty → broaden; the second + # (nearby cohort) finds comparables. + empty_comparables = MagicMock() + empty_comparables.members = [] + found_comparables = MagicMock() + found_comparables.members = [MagicMock()] + + with ExitStack() as stack: + stack.enter_context( + patch("applications.modelling_e2e.handler.os.environ", _ENV) + ) + stack.enter_context( + patch( + "applications.modelling_e2e.handler._get_engine", + return_value=mock_engine, + ) + ) + stack.enter_context( + patch("applications.modelling_e2e.handler.EpcClientService") + ).return_value.get_by_uprn.return_value = None # no lodged EPC + stack.enter_context( + patch("applications.modelling_e2e.handler.GeospatialS3Repository") + ) + stack.enter_context( + patch("applications.modelling_e2e.handler.GoogleSolarApiClient") + ) + stack.enter_context( + patch("applications.modelling_e2e.handler._spatial_for", return_value=None) + ) + stack.enter_context( + patch( + "applications.modelling_e2e.handler._solar_insights_for", + return_value=None, + ) + ) + stack.enter_context( + patch("applications.modelling_e2e.handler.overlays_from", return_value=[]) + ) + stack.enter_context( + patch("applications.modelling_e2e.handler.PropertyOverridesPostgresReader") + ) + from domain.epc_prediction.prediction_target import PredictionTargetAttributes + + stack.enter_context( + patch( + "applications.modelling_e2e.handler.OverrideBackedPredictionAttributesReader" + ) + ).return_value.attributes_for.return_value = PredictionTargetAttributes( + property_type="2" + ) + MockRepo = stack.enter_context( + patch( + "applications.modelling_e2e.handler.EpcComparablePropertiesRepository" + ) + ) + MockRepo.return_value.candidates_for.return_value = [] + MockRepo.return_value.candidates_near.return_value = [MagicMock()] + stack.enter_context( + patch( + "applications.modelling_e2e.handler.select_comparables", + side_effect=[empty_comparables, found_comparables], + ) + ) + stack.enter_context( + patch("applications.modelling_e2e.handler.EpcPrediction") + ).return_value.predict.return_value = mock_predicted_epc + stack.enter_context( + patch("applications.modelling_e2e.handler.ScenarioPostgresRepository") + ).return_value.get_many.return_value = [MagicMock()] + stack.enter_context( + patch( + "applications.modelling_e2e.handler.catalogue_with_off_catalogue_overrides" + ) + ) + stack.enter_context(patch("applications.modelling_e2e.handler.Session")) + stack.enter_context( + patch( + "applications.modelling_e2e.handler.run_modelling", + return_value=mock_plan, + ) + ) + MockUoW = stack.enter_context( + patch("applications.modelling_e2e.handler.PostgresUnitOfWork") + ) + MockUoW.return_value.__enter__.return_value = mock_uow + MockUoW.return_value.__exit__.return_value = False + + # Act + _call_handler(_BODY) + + # Assert — broadening fired, and the broadened cohort produced a saved plan. + MockRepo.return_value.candidates_near.assert_called_once() + mock_uow.epc.save.assert_not_called() # predicted, never lodged + mock_uow.plan.save.assert_called_once() + mock_uow.commit.assert_called_once() + + # --------------------------------------------------------------------------- # Partial batch failure # --------------------------------------------------------------------------- diff --git a/tests/infrastructure/postcodes_io/__init__.py b/tests/infrastructure/postcodes_io/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/infrastructure/postcodes_io/test_postcodes_io_client.py b/tests/infrastructure/postcodes_io/test_postcodes_io_client.py new file mode 100644 index 00000000..708ac964 --- /dev/null +++ b/tests/infrastructure/postcodes_io/test_postcodes_io_client.py @@ -0,0 +1,223 @@ +"""PostcodesIoClient — coordinate/seed postcode → the real unit postcodes near +it, via postcodes.io's keyless nearest endpoint. Failure degrades to the seed +alone so broadening never breaks prediction.""" + +from __future__ import annotations + +from typing import Any, Iterator, Optional +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from domain.geospatial.coordinates import Coordinates +from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient + +_MODULE = "infrastructure.postcodes_io.postcodes_io_client" + + +@pytest.fixture(autouse=True) +def _no_sleep() -> Iterator[MagicMock]: + """Never actually sleep during backoff — just record the calls.""" + with patch(f"{_MODULE}.time.sleep") as sleep: + yield sleep + + +def _response( + payload: Any, + *, + status_code: int = 200, + headers: Optional[dict[str, str]] = None, +) -> MagicMock: + resp = MagicMock() + resp.status_code = status_code + resp.is_success = 200 <= status_code < 300 + resp.headers = headers if headers is not None else {} + resp.json.return_value = payload + return resp + + +def _nearest_payload(postcodes: list[str]) -> dict[str, Any]: + return {"result": [{"postcode": p} for p in postcodes]} + + +def test_nearby_with_coordinates_skips_the_centroid_lookup() -> None: + """When the target's own coordinates are passed, only the radius search is + issued — no postcode→centroid round-trip — and the seed leads the result.""" + # Arrange + client = PostcodesIoClient(radius_m=500, limit=10) + coords = Coordinates(longitude=0.1, latitude=51.3) + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.return_value = _response( + _nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX"]) + ) + + # Act + result = client.nearby("BR6 6BS", coords) + + # Assert — one call (the radius search), seed first, neighbours follow + assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"] + assert mock_get.call_count == 1 + _, kwargs = mock_get.call_args + assert kwargs["params"]["lat"] == 51.3 + assert kwargs["params"]["lon"] == 0.1 + + +def test_nearby_resolves_the_seed_centroid_when_no_coordinates_given() -> None: + """Without coordinates the client first resolves the seed's own centroid via + postcodes.io, then runs the radius search from it.""" + # Arrange + client = PostcodesIoClient() + centroid = {"result": {"latitude": 51.3, "longitude": 0.1}} + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.side_effect = [ + _response(centroid), + _response(_nearest_payload(["BR6 6BS", "BR6 6BU"])), + ] + + # Act + result = client.nearby("BR6 6BS") + + # Assert — two calls: centroid then radius + assert result == ["BR6 6BS", "BR6 6BU"] + assert mock_get.call_count == 2 + + +def test_nearby_dedupes_the_seed_and_caps_at_limit() -> None: + """The seed always leads exactly once even when the radius search echoes it, + and the result is capped at ``limit``.""" + # Arrange + client = PostcodesIoClient(limit=3) + coords = Coordinates(longitude=0.1, latitude=51.3) + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.return_value = _response( + _nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX", "BR6 6AA"]) + ) + + # Act + result = client.nearby("BR6 6BS", coords) + + # Assert + assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"] + assert result.count("BR6 6BS") == 1 + + +def test_nearby_returns_just_the_seed_after_exhausting_retries( + _no_sleep: MagicMock, +) -> None: + """A persistent network error is retried, then degrades to broadening-off: + only the seed comes back, and the retries were actually attempted.""" + # Arrange + client = PostcodesIoClient() + coords = Coordinates(longitude=0.1, latitude=51.3) + + with patch( + f"{_MODULE}.httpx.get", side_effect=httpx.ConnectError("down") + ) as mock_get: + # Act + result = client.nearby("BR6 6BS", coords) + + # Assert — one initial try + MAX_RETRIES, sleeping between each. + assert result == ["BR6 6BS"] + assert mock_get.call_count == client.MAX_RETRIES + 1 + assert _no_sleep.call_count == client.MAX_RETRIES + + +def test_nearby_retries_a_transport_error_then_succeeds(_no_sleep: MagicMock) -> None: + """A transient transport error is retried, and the subsequent success is + returned in full.""" + # Arrange + client = PostcodesIoClient() + coords = Coordinates(longitude=0.1, latitude=51.3) + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.side_effect = [ + httpx.ReadTimeout("slow"), + _response(_nearest_payload(["BR6 6BS", "BR6 6BU"])), + ] + + # Act + result = client.nearby("BR6 6BS", coords) + + # Assert + assert result == ["BR6 6BS", "BR6 6BU"] + assert mock_get.call_count == 2 + assert _no_sleep.call_count == 1 + + +def test_nearby_retries_a_429_honouring_retry_after(_no_sleep: MagicMock) -> None: + """A 429 is retried, and the server's Retry-After drives the backoff delay.""" + # Arrange + client = PostcodesIoClient() + coords = Coordinates(longitude=0.1, latitude=51.3) + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.side_effect = [ + _response(None, status_code=429, headers={"Retry-After": "2"}), + _response(_nearest_payload(["BR6 6BS", "BR6 6BU"])), + ] + + # Act + result = client.nearby("BR6 6BS", coords) + + # Assert — succeeded on the retry, having slept the advertised 2 seconds. + assert result == ["BR6 6BS", "BR6 6BU"] + assert mock_get.call_count == 2 + _no_sleep.assert_called_once_with(2.0) + + +def test_nearby_retries_a_server_error_then_succeeds(_no_sleep: MagicMock) -> None: + """A 5xx is treated as transient and retried.""" + # Arrange + client = PostcodesIoClient() + coords = Coordinates(longitude=0.1, latitude=51.3) + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.side_effect = [ + _response(None, status_code=503), + _response(_nearest_payload(["BR6 6BS"])), + ] + + # Act + result = client.nearby("BR6 6BS", coords) + + # Assert + assert result == ["BR6 6BS"] + assert mock_get.call_count == 2 + + +def test_nearby_returns_just_the_seed_when_centroid_unresolvable() -> None: + """An unknown seed (no coordinates, centroid lookup fails) yields the seed + alone rather than raising.""" + # Arrange + client = PostcodesIoClient() + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.return_value = _response(None, status_code=404) + + # Act + result: list[str] = client.nearby("ZZ99 9ZZ") + + # Assert — a 404 is non-transient, so no retry was attempted. + assert result == ["ZZ99 9ZZ"] + assert mock_get.call_count == 1 + + +def test_nearby_tolerates_a_null_nearest_result() -> None: + """postcodes.io returns ``result: null`` when a point has no neighbours; the + client treats that as an empty neighbour set (seed only).""" + # Arrange + client = PostcodesIoClient() + coords: Optional[Coordinates] = Coordinates(longitude=0.1, latitude=51.3) + + with patch(f"{_MODULE}.httpx.get") as mock_get: + mock_get.return_value = _response({"result": None}) + + # Act + result = client.nearby("BR6 6BS", coords) + + # Assert + assert result == ["BR6 6BS"] diff --git a/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py b/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py index 662d5b75..f9c6ecdc 100644 --- a/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py +++ b/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py @@ -126,3 +126,118 @@ def test_no_certs_in_the_postcode_yields_no_candidates() -> None: # Assert — no candidates, and the postcode was searched (normalisation/IO ran). assert candidates == [] assert client.searched_postcode == "LS6 1AA" + + +# --------------------------------------------------------------------------- +# Broadened cohort — candidates_near (ADR-0031 nearby-postcode broadening) +# --------------------------------------------------------------------------- + + +class _MultiPostcodeEpcClient: + """Serves a different cohort per postcode and records every search, so the + broadened walk's reach and ordering can be asserted.""" + + def __init__(self, by_postcode: dict[str, list[EpcSearchResult]]) -> None: + self._by_postcode = by_postcode + self.searched: list[str] = [] + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + self.searched.append(postcode) + return self._by_postcode.get(postcode, []) + + def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: + return _epc() + + +class _FakeNearbyPostcodes: + """Returns a fixed nearest-first list and records the seed it was asked for.""" + + def __init__(self, postcodes: list[str]) -> None: + self._postcodes = postcodes + self.calls: list[tuple[str, Optional[Coordinates]]] = [] + + def nearby( + self, postcode: str, coordinates: Optional[Coordinates] = None + ) -> list[str]: + self.calls.append((postcode, coordinates)) + return self._postcodes + + +def test_candidates_near_aggregates_and_dedupes_across_nearby_postcodes() -> None: + # Arrange — three nearby postcodes; CERT-1 is re-lodged in two of them. + client = _MultiPostcodeEpcClient( + { + "P0": [_result("CERT-1", uprn=1)], + "P1": [_result("CERT-2", uprn=2), _result("CERT-1", uprn=1)], + "P2": [_result("CERT-3", uprn=3)], + } + ) + nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"]) + repo = EpcComparablePropertiesRepository( + client, _FakeGeospatial({}), nearby_postcodes=nearby + ) + + # Act — no early-stop predicate, so the whole nearby set is visited. + candidates = repo.candidates_near("P0", None) + + # Assert — one candidate per distinct cert, all three postcodes searched. + certs = {c.certificate_number for c in candidates} + assert certs == {"CERT-1", "CERT-2", "CERT-3"} + assert client.searched == ["P0", "P1", "P2"] + + +def test_candidates_near_stops_early_once_enough_match() -> None: + # Arrange — the seed postcode alone already yields enough matches; the two + # further postcodes must not be fetched. + client = _MultiPostcodeEpcClient( + { + "P0": [_result(f"MATCH-{i}", uprn=i) for i in range(5)], + "P1": [_result("OTHER-1", uprn=99)], + "P2": [_result("OTHER-2", uprn=98)], + } + ) + nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"]) + repo = EpcComparablePropertiesRepository( + client, _FakeGeospatial({}), nearby_postcodes=nearby + ) + + # Act + candidates = repo.candidates_near( + "P0", + None, + enough=lambda c: c.certificate_number.startswith("MATCH"), + minimum=5, + ) + + # Assert — walk halted after the seed; the further postcodes were never hit. + assert client.searched == ["P0"] + assert len(candidates) == 5 + + +def test_candidates_near_passes_coordinates_to_the_nearby_source() -> None: + # Arrange + here = Coordinates(longitude=0.1, latitude=51.3) + client = _MultiPostcodeEpcClient({"P0": []}) + nearby = _FakeNearbyPostcodes(["P0"]) + repo = EpcComparablePropertiesRepository( + client, _FakeGeospatial({}), nearby_postcodes=nearby + ) + + # Act + repo.candidates_near("P0", here) + + # Assert — the target's own coordinates seed the radius search. + assert nearby.calls == [("P0", here)] + + +def test_candidates_near_without_a_source_uses_only_the_seed() -> None: + # Arrange — no NearbyPostcodes configured (broadening unavailable). + client = _MultiPostcodeEpcClient({"P0": [_result("CERT-1", uprn=1)]}) + repo = EpcComparablePropertiesRepository(client, _FakeGeospatial({})) + + # Act + candidates = repo.candidates_near("P0", None) + + # Assert — degrades to the seed postcode alone. + assert client.searched == ["P0"] + assert [c.certificate_number for c in candidates] == ["CERT-1"]