resolve merge conflicts

This commit is contained in:
Jun-te Kim 2026-06-23 18:09:09 +00:00
commit 390da1e491
16 changed files with 1080 additions and 63 deletions

View file

@ -44,14 +44,23 @@ from domain.epc_prediction.comparable_properties import (
select_comparables,
)
from domain.epc_prediction.epc_prediction import EpcPrediction
from domain.epc_prediction.prediction_target import build_prediction_target
from domain.epc_prediction.prediction_target import (
PredictionTarget,
build_prediction_target,
)
from domain.geospatial.coordinates import Coordinates
from domain.geospatial.planning_restrictions import PlanningRestrictions
from domain.geospatial.spatial_reference import SpatialReference
from domain.property.property import Property, PropertyIdentity
from domain.property_baseline.calculator_rebaseliner import CalculatorRebaseliner
from domain.sap10_calculator.calculator import Sap10Calculator
from domain.tasks.tasks import Source
from harness.console import run_modelling
from orchestration.property_baseline_orchestrator import (
PropertyBaselineOrchestrator,
)
from infrastructure.epc_client.epc_client_service import EpcClientService
from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient
from infrastructure.postgres.config import PostgresConfig
from infrastructure.postgres.engine import make_engine
from infrastructure.solar.google_solar_api_client import (
@ -69,6 +78,9 @@ from repositories.geospatial.geospatial_s3_repository import (
GeospatialS3Repository,
ParquetReader,
)
from repositories.fuel_rates.fuel_rates_static_file_repository import (
FuelRatesStaticFileRepository,
)
from repositories.postgres_unit_of_work import PostgresUnitOfWork
from repositories.product.composite_product_repository import (
catalogue_with_off_catalogue_overrides,
@ -89,6 +101,10 @@ from utilities.logger import setup_logger
_engine: Optional[Engine] = None
_cohort_cache: dict[str, list[ComparableProperty]] = {}
# Broadened (nearby-postcode) cohorts, keyed by (seed postcode, target property
# type): the early-stop walk depends on the type it is filling for, so two types
# in the same postcode must not share a cached result.
_nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {}
logger = setup_logger()
@ -162,13 +178,18 @@ def _predict_epc(
attributes_reader: OverrideBackedPredictionAttributesReader,
coordinates: Optional[Coordinates],
cohort_for: Callable[[str], list[ComparableProperty]],
broaden: Callable[[PredictionTarget], list[ComparableProperty]],
predictor: EpcPrediction,
) -> Optional[EpcPropertyData]:
"""Synthesise an EpcPropertyData for an EPC-less property from its postcode
cohort (EPC Prediction Path 3, ADR-0031), or None when ineligible.
When the property's own postcode holds no same-type comparables (a sparse
postcode e.g. the only flat among houses), the cohort is broadened to the
real unit postcodes physically nearest it (``broaden``) before giving up.
Returns None when property_type is unresolvable (hard cohort filter cannot
fire) or when the postcode cohort is empty after filtering.
fire) or when even the broadened cohort is empty after filtering.
"""
attributes = attributes_reader.attributes_for(property_id)
identity = PropertyIdentity(
@ -178,6 +199,8 @@ def _predict_epc(
if target is None:
return None
comparables = select_comparables(target, cohort_for(target.postcode))
if not comparables.members:
comparables = select_comparables(target, broaden(target))
if not comparables.members:
return None
predicted = predictor.predict(target, comparables)
@ -223,7 +246,9 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
overrides_reader = PropertyOverridesPostgresReader(lambda: Session(engine))
prediction_attrs_reader = OverrideBackedPredictionAttributesReader(overrides_reader)
comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial)
comparables_repo = EpcComparablePropertiesRepository(
epc_client, geospatial, nearby_postcodes=PostcodesIoClient()
)
predictor = EpcPrediction()
def _get_cohort(postcode: str) -> list[ComparableProperty]:
@ -233,6 +258,34 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
)
return _cohort_cache[postcode]
def _broaden(target: PredictionTarget) -> list[ComparableProperty]:
"""The nearby-postcode cohort for a gated-out target — the real unit
postcodes nearest it, walked until enough same-type comparables surface
(ADR-0034). Memoised per (postcode, property_type) so co-located
same-type misses share one walk."""
key = (target.postcode, target.property_type)
if key not in _nearby_cohort_cache:
_nearby_cohort_cache[key] = (
comparables_repo.candidates_near(
target.postcode,
target.coordinates,
enough=lambda c: c.epc.property_type == target.property_type,
)
if target.postcode
else []
)
return _nearby_cohort_cache[key]
# Re-establishes each lodged Property's Baseline Performance from the just-
# persisted EPC (one UoW per property, committed after the Plan's). Predicted
# Properties have no lodged figures, so they get no baseline (mirrors the e2e
# runner and the ara_first_run Baseline stage).
baseline_orchestrator = PropertyBaselineOrchestrator(
unit_of_work=lambda: PostgresUnitOfWork(lambda: Session(engine)),
rebaseliner=CalculatorRebaseliner(Sap10Calculator()),
fuel_rates=FuelRatesStaticFileRepository(),
)
read_session = Session(engine)
try:
scenario = ScenarioPostgresRepository(read_session).get_many([scenario_id])[0]
@ -284,12 +337,14 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
attributes_reader=prediction_attrs_reader,
coordinates=coordinates,
cohort_for=_get_cohort,
broaden=_broaden,
predictor=predictor,
)
if predicted_epc is None:
raise ValueError(
f"no EPC for UPRN {uprn} and not predictable "
f"(unresolved property_type or empty '{postcode}' cohort)"
f"(unresolved property_type, or no same-type "
f"comparables in or near '{postcode}')"
)
effective_epc = Property(
identity=PropertyIdentity(
@ -375,6 +430,13 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
uow.commit()
logger.info(f"property={property_id} plan saved")
# Baseline Performance is re-established from the persisted EPC, so
# it runs after the Plan UoW commits. Only lodged Properties have
# the lodged figures the Baseline reads; predicted ones are skipped.
if epc is not None:
baseline_orchestrator.run([property_id])
logger.info(f"property={property_id} baseline saved")
except Exception as error: # noqa: BLE001
logger.error(
f"property={property_id} uprn={uprns.get(property_id)}: "

View file

@ -0,0 +1,90 @@
# EPC Prediction cohort broadens to nearby postcodes
ADR-0029 sizes an EPC-less Property from the **Comparable Properties** in its
postcode; ADR-0031 decision 5 made `property_type` the **hard** cohort filter — a
flat is never sized from houses — and gated out any Property with no same-type
comparable. This records how we **broaden** the cohort when a Property's own
postcode is too sparse to fill, rather than gating it out. Resolved against a
real failure on branch `feature/e2e-runs`: property 718580, the only flat lodged
in postcode `BR6 6BS` (the other 8 certs were houses/bungalows), gated out and
failed its modelling subtask.
## Status
Accepted. Extends ADR-0029 / ADR-0031 decision 5 (the postcode cohort).
## Context
ADR-0031 decision 5 anticipated this — "an Ordnance Survey `postcode_search`
source can supply property type more broadly … wiring it is a later enhancement
that widens the eligible population." The gating it describes is correct (never
size a flat from a mixed-type cohort), but it makes prediction only as good as
the *own* postcode's lodged stock. A genuinely-isolated type (one flat among
houses) is unpredictable with no recourse, and a no-EPC + can't-predict Property
fails its subtask — which, over SQS, retries and dead-letters even though the
input data is permanently insufficient.
The legacy engine handled this by **trimming the postcode** (`BR6 6BS``BR6 6B`
`BR6` …) and re-querying, because the old EPC API accepted partial postcodes.
The current gov EPC API (`api.get-energy-performance-data.communities.gov.uk`)
does **not**: its domestic search accepts only a *full* real postcode, `uprn`,
`council[]`, `constituency[]`, or `address` — confirmed against its OpenAPI spec.
There is no outcode/prefix, radius, or lat/long search.
## Decisions
### 1. Broaden to the real unit postcodes physically nearest the target
When the own-postcode cohort yields no same-type member, the cohort is broadened
to the real unit postcodes around the target, resolved from its coordinates via
**postcodes.io**'s keyless `nearest` endpoint (already a trusted dependency in
`scripts/fetch_epc_prediction_dense_corpus.py`). Each nearby postcode is then
searched exactly as the own postcode is — so `property_type` stays the same hard
filter and ADR-0031 decision 5 is upheld; only the *reach* of the candidate pool
widens, never the selection rule. The hard gate still fires when even the
broadened cohort has no same-type comparable: broadening widens the eligible
population, it does not force a prediction.
### 2. postcodes.io over `council[]` and a bulk centroid dataset
- **`council[]`** (the only "wider than postcode" gov-API filter) returns the
*whole* local authority — tens of thousands of certs, many pages — and its
result rows carry **no `property_type`**, so narrowing to the target's type
would mean fetching every certificate. Rejected: cost is unbounded by distance
and dominated by per-cert fetches.
- **OS Code-Point Open / ONSPD → S3** would give an offline unit-postcode→
coordinate table, but is a ~1.7M-row asset to ingest and host for a path that
fires rarely. Rejected as disproportionate; reconsider if postcodes.io's
availability becomes a liability.
- **OS Places radius API** needs an `OS_API_KEY` the modelling path does not
otherwise carry. Rejected to avoid a new secret.
postcodes.io is keyless, bounded by radius (a tight neighbourhood, not an
authority), and reuses the existing per-postcode search unchanged.
### 3. Lazy, nearest-first with early-stop, and soft-failing
Broadening runs **only on gate-out** (a no-EPC Property whose own postcode lacks
a same-type comparable) — the common cases never pay for it. Nearby postcodes are
walked **nearest first** and the walk **stops early** once enough same-type
comparables surface, so a dense area resolves in one or two searches instead of
the whole radius. Results are memoised per `(postcode, property_type)`. The
postcodes.io call is **soft-failing**: any error (after the shared transient
retry) degrades to the seed postcode alone, so a flaky or unavailable
postcodes.io can never break prediction — it just declines to broaden.
## Consequences
- A new external dependency, postcodes.io, sits in the modelling/Lambda path. It
is keyless and isolated behind `PostcodesIoClient` (shared transient-retry +
soft-fail), but its availability and rate limits are now a (bounded) operational
concern. ADR-0031 decision 5's `postcode_search` lever remains an alternative.
- The `ComparablePropertiesRepository` gains `candidates_near` alongside
`candidates_for`; the predictor/handler try the own postcode first and broaden
only on an empty cohort.
- Prediction reaches across postcode boundaries, so the geo-distance weighting
ADR-0029 already applies (a comparable's vote decays with haversine distance)
becomes load-bearing for broadened cohorts, not just a refinement.
- `property_type` gating is unchanged: a Property with no same-type comparable
*anywhere nearby* is still gated out — now a genuine "no comparable stock"
signal rather than an artefact of a sparse single postcode.

View file

@ -1,35 +0,0 @@
import time
from typing import Callable, Optional, TypeVar
import httpx
from infrastructure.epc_client.exceptions import EpcRateLimitError
T = TypeVar("T")
def call_with_retry(
fn: Callable[[], T],
max_retries: int = 5,
backoff_base: float = 1.0,
backoff_multiplier: float = 2.0,
max_backoff: float = 60.0,
) -> T:
"""Retry `fn` on transient EPC-API failures: HTTP 429 rate limits and
transport errors (read/connect timeouts, connection resets). A 429 honours
the server's `Retry-After`; transport errors back off exponentially. Non-
transient failures (other 4xx/5xx, mapping errors) propagate immediately."""
last_exc: Optional[Exception] = None
for attempt in range(max_retries + 1):
try:
return fn()
except (EpcRateLimitError, httpx.TransportError) as exc:
last_exc = exc
if attempt < max_retries:
if isinstance(exc, EpcRateLimitError) and exc.retry_after is not None:
delay = exc.retry_after
else:
delay = backoff_base * (backoff_multiplier**attempt)
time.sleep(min(delay, max_backoff))
assert last_exc is not None
raise last_exc

View file

@ -10,7 +10,7 @@ from infrastructure.epc_client.exceptions import (
EpcNotFoundError,
EpcRateLimitError,
)
from infrastructure.epc_client._retry import call_with_retry
from infrastructure.http_retry import call_with_retry
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from datatypes.epc.search import EpcSearchResult

View file

@ -1,4 +1,4 @@
from typing import Optional
from infrastructure.http_retry import TransientHttpError
class EpcApiError(Exception):
@ -9,9 +9,7 @@ class EpcNotFoundError(EpcApiError):
"""Raised when the API returns 404."""
class EpcRateLimitError(EpcApiError):
"""Raised when the API returns 429 and all retries are exhausted."""
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
super().__init__(message)
self.retry_after = retry_after
class EpcRateLimitError(EpcApiError, TransientHttpError):
"""Raised when the API returns 429. A ``TransientHttpError`` so the shared
``call_with_retry`` retries it (honouring ``Retry-After``), while remaining an
``EpcApiError`` for callers that catch the EPC hierarchy."""

View file

@ -0,0 +1,60 @@
"""Shared transient-failure retry for the HTTP source clients.
The retry *mechanism* is generic; each client owns the *policy* of what counts as
transient. A caller signals "retry this" by raising ``TransientHttpError``
(carrying any server-advised ``retry_after``); transport-level errors (read /
connect timeouts, connection resets) are always treated as transient.
``call_with_retry`` backs off exponentially between attempts honouring
``retry_after`` when present and re-raises the last error once attempts are
exhausted, leaving the caller to decide how to surface it (the EPC client lets it
propagate; postcodes.io soft-fails to the seed postcode).
"""
from __future__ import annotations
import time
from typing import Callable, Optional, TypeVar
import httpx
T = TypeVar("T")
class TransientHttpError(Exception):
"""A failure worth retrying. ``retry_after`` is the server-advised delay (a
429's ``Retry-After``), used in place of the computed backoff when present."""
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
super().__init__(message)
self.retry_after = retry_after
def call_with_retry(
fn: Callable[[], T],
max_retries: int = 5,
backoff_base: float = 1.0,
backoff_multiplier: float = 2.0,
max_backoff: float = 60.0,
) -> T:
"""Retry ``fn`` on transient failures — ``TransientHttpError`` (e.g. a 429)
and ``httpx.TransportError`` (read/connect timeouts, connection resets)
backing off exponentially, or by the error's ``retry_after`` when it carries
one. Non-transient failures propagate immediately; the last transient error
is re-raised once ``max_retries`` is exhausted."""
last_exc: Optional[Exception] = None
for attempt in range(max_retries + 1):
try:
return fn()
except (TransientHttpError, httpx.TransportError) as exc:
last_exc = exc
if attempt < max_retries:
retry_after = (
exc.retry_after if isinstance(exc, TransientHttpError) else None
)
if retry_after is not None:
delay = retry_after
else:
delay = backoff_base * (backoff_multiplier**attempt)
time.sleep(min(delay, max_backoff))
assert last_exc is not None
raise last_exc

View file

View file

@ -0,0 +1,145 @@
"""postcodes.io adapter — a coordinate (or seed postcode) → the real unit
postcodes physically near it.
The gov EPC API only searches a *full* real postcode no outcode/prefix, no
radius, no lat/long (confirmed against its OpenAPI spec). So to broaden an
EPC-Prediction cohort beyond the target's own postcode we must first discover the
real unit postcodes around it. postcodes.io's free, keyless ``nearest`` endpoint
does exactly that: given a point it returns the unit postcodes within a radius,
nearest first.
Failure is deliberately non-fatal: any error (network, unknown seed, missing
coordinates) returns just the seed postcode, so broadening degrades to "no
broadening" rather than breaking prediction.
"""
from __future__ import annotations
from typing import Any, Optional
import httpx
from domain.geospatial.coordinates import Coordinates
from infrastructure.http_retry import TransientHttpError, call_with_retry
class PostcodesIoClient:
BASE_URL = "https://api.postcodes.io"
REQUEST_TIMEOUT = 10.0
# Transient failures (transport errors, 429s, 5xx) are retried with
# exponential backoff; everything else (and exhaustion) soft-fails to the
# seed, so broadening never breaks prediction.
MAX_RETRIES = 3
BACKOFF_BASE = 0.5
BACKOFF_MULTIPLIER = 2.0
MAX_BACKOFF = 8.0
def __init__(self, *, radius_m: int = 1000, limit: int = 30) -> None:
"""``radius_m`` bounds how far the broadened cohort reaches; ``limit``
caps how many nearby postcodes are returned (and so the per-gate-out
fetch cost)."""
self._radius_m = radius_m
self._limit = limit
def nearby(
self, postcode: str, coordinates: Optional[Coordinates] = None
) -> list[str]:
"""The real unit postcodes within ``radius_m`` of ``postcode`` — nearest
first, the seed always included or just ``[postcode]`` when the seed's
coordinates cannot be resolved or the lookup fails.
``coordinates`` (the target's own, resolved from its UPRN) is used when
given, sparing a postcodecentroid round-trip; otherwise postcodes.io
resolves the seed postcode's centroid itself."""
point = coordinates if coordinates is not None else self._centroid_of(postcode)
if point is None:
return [postcode]
found = self._nearest_to(point)
ordered = [postcode] + [p for p in found if p != postcode]
return ordered[: self._limit]
def _centroid_of(self, postcode: str) -> Optional[Coordinates]:
result = self._get(f"/postcodes/{postcode.replace(' ', '')}")
if result is None:
return None
latitude: Any = result.get("latitude")
longitude: Any = result.get("longitude")
if latitude is None or longitude is None:
return None
return Coordinates(longitude=float(longitude), latitude=float(latitude))
def _nearest_to(self, point: Coordinates) -> list[str]:
results = self._get_list(
"/postcodes",
{
"lon": point.longitude,
"lat": point.latitude,
"radius": self._radius_m,
"limit": self._limit,
},
)
return [str(row["postcode"]) for row in results if row.get("postcode")]
def _get(self, path: str) -> Optional[dict[str, Any]]:
payload = self._call(path, None)
return payload if isinstance(payload, dict) else None
def _get_list(self, path: str, params: dict[str, Any]) -> list[dict[str, Any]]:
payload = self._call(path, params)
if not isinstance(payload, list):
return []
return [row for row in payload if isinstance(row, dict)]
def _call(self, path: str, params: Optional[dict[str, Any]]) -> Any:
"""The parsed ``result`` payload for a postcodes.io GET, retried on
transient failures via the shared ``call_with_retry``, or None on a
non-transient failure (e.g. an unknown postcode's 404) or once retries are
exhausted broadening then falls back to the seed alone. The soft-fail is
the difference from the EPC client, which lets the error propagate."""
try:
return call_with_retry(
lambda: self._fetch(path, params),
max_retries=self.MAX_RETRIES,
backoff_base=self.BACKOFF_BASE,
backoff_multiplier=self.BACKOFF_MULTIPLIER,
max_backoff=self.MAX_BACKOFF,
)
except (TransientHttpError, httpx.HTTPError):
return None
def _fetch(self, path: str, params: Optional[dict[str, Any]]) -> Any:
"""One GET. Raises ``TransientHttpError`` on a 429/5xx (and lets
``httpx.TransportError`` propagate) so ``call_with_retry`` retries it;
returns None for a non-transient non-success (e.g. 404) or unparseable
body."""
response = httpx.get(
f"{self.BASE_URL}{path}",
params=params,
timeout=self.REQUEST_TIMEOUT,
)
if self._is_transient(response.status_code):
raise TransientHttpError(
f"postcodes.io {response.status_code} on {path}",
retry_after=self._retry_after(response),
)
if not response.is_success:
return None
try:
body: Any = response.json()
except ValueError:
return None
return body.get("result") if isinstance(body, dict) else None
@staticmethod
def _is_transient(status_code: int) -> bool:
return status_code == 429 or status_code >= 500
@staticmethod
def _retry_after(response: httpx.Response) -> Optional[float]:
header = response.headers.get("Retry-After")
if header is None:
return None
try:
return float(header)
except (TypeError, ValueError):
return None

View file

@ -12,7 +12,7 @@ from __future__ import annotations
import logging
from dataclasses import dataclass
from datetime import date
from typing import Optional, Protocol
from typing import Callable, Optional, Protocol
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.search.epc_search_result import EpcSearchResult
@ -22,6 +22,11 @@ from repositories.comparable_properties.comparable_properties_repository import
ComparablePropertiesRepository,
)
# The same default floor `select_comparables` uses: keep walking nearby postcodes
# until this many candidates match, so the broadened cohort is big enough for the
# downstream relax ladder rather than stopping at the first stray match.
_DEFAULT_MINIMUM_COHORT = 5
class CohortEpcClient(Protocol):
"""The slice of the EPC-API client the cohort fetch needs (e.g.
@ -40,6 +45,16 @@ class CohortGeospatial(Protocol):
) -> dict[int, Coordinates]: ...
class NearbyPostcodes(Protocol):
"""Resolves the real unit postcodes physically near a seed postcode (e.g.
`PostcodesIoClient`). The gov EPC API cannot search by radius, so this is how
the cohort reaches beyond the target's own postcode (ADR-0034)."""
def nearby(
self, postcode: str, coordinates: Optional[Coordinates] = None
) -> list[str]: ...
logger = logging.getLogger(__name__)
@ -56,10 +71,14 @@ class SkippedCohortCert:
class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
def __init__(
self, epc_client: CohortEpcClient, geospatial: CohortGeospatial
self,
epc_client: CohortEpcClient,
geospatial: CohortGeospatial,
nearby_postcodes: Optional[NearbyPostcodes] = None,
) -> None:
self._epc_client = epc_client
self._geospatial = geospatial
self._nearby_postcodes = nearby_postcodes
# Cohort certs skipped because they are not yet mappable. Accumulates
# across every postcode the instance serves; the caller reads it after
# the run to report the mapper gaps (see modelling_e2e handler).
@ -80,6 +99,45 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
cohort.append(comparable)
return cohort
def candidates_near(
self,
postcode: str,
coordinates: Optional[Coordinates] = None,
*,
enough: Optional[Callable[[ComparableProperty], bool]] = None,
minimum: int = _DEFAULT_MINIMUM_COHORT,
) -> list[ComparableProperty]:
"""The broadened cohort: candidates drawn from the real unit postcodes
nearest ``postcode`` (ADR-0034), for when the target's own postcode holds
no same-type comparables. Postcodes are visited nearest first and each
candidate is deduped by certificate number across them.
``enough`` lets the caller stop the walk early once ``minimum``
candidates satisfy it (e.g. they match the target's property type) the
remaining, further-away postcodes are not fetched, so a dense area
resolves in one or two searches instead of the whole radius. Without a
configured ``NearbyPostcodes`` source this degrades to the seed postcode
alone."""
postcodes = (
self._nearby_postcodes.nearby(postcode, coordinates)
if self._nearby_postcodes is not None
else [postcode]
)
candidates: list[ComparableProperty] = []
seen_certs: set[str] = set()
matches = 0
for nearby_postcode in postcodes:
for candidate in self.candidates_for(nearby_postcode):
if candidate.certificate_number in seen_certs:
continue
seen_certs.add(candidate.certificate_number)
candidates.append(candidate)
if enough is not None and enough(candidate):
matches += 1
if enough is not None and matches >= minimum:
break
return candidates
# Mapper-shape errors: the cert's lodged data does not fit the schema/mapper.
# `ValueError` — missing required field / unmapped code (`UnmappedApiCode`);
# `AttributeError` — a field lodged in the wrong shape (e.g. `photovoltaic_supply`

View file

@ -81,6 +81,7 @@ from domain.epc_prediction.comparable_properties import ( # noqa: E402
)
from domain.epc_prediction.epc_prediction import EpcPrediction # noqa: E402
from domain.epc_prediction.prediction_target import ( # noqa: E402
PredictionTarget,
build_prediction_target,
)
from domain.geospatial.coordinates import Coordinates # noqa: E402
@ -96,6 +97,9 @@ from domain.modelling.scenario import Scenario # noqa: E402
from harness.console import candidate_recommendations, run_modelling # noqa: E402
from harness.plan_table import format_plan_table # noqa: E402
from infrastructure.epc_client.epc_client_service import EpcClientService # noqa: E402
from infrastructure.postcodes_io.postcodes_io_client import ( # noqa: E402
PostcodesIoClient,
)
from infrastructure.solar.google_solar_api_client import ( # noqa: E402
BuildingInsightsNotFoundError,
GoogleSolarApiClient,
@ -400,6 +404,7 @@ def _predict_epc(
attributes_reader: OverrideBackedPredictionAttributesReader,
coordinates: Optional[Coordinates],
cohort_for: Callable[[str], list[ComparableProperty]],
broaden: Callable[[PredictionTarget], list[ComparableProperty]],
predictor: EpcPrediction,
) -> Optional[EpcPropertyData]:
"""Synthesise an EpcPropertyData for an EPC-less Property from its postcode
@ -408,7 +413,8 @@ def _predict_epc(
The cohort is found by POSTCODE, so a wrong postcode on the property row
yields the wrong neighbours a prediction is only as good as the postcode it
is given."""
is given. When the own postcode holds no same-type comparables, the cohort is
broadened to the real unit postcodes physically nearest it (``broaden``)."""
attributes = attributes_reader.attributes_for(property_id)
identity = PropertyIdentity(
portfolio_id=portfolio_id, postcode=postcode, address="", uprn=uprn
@ -418,7 +424,10 @@ def _predict_epc(
return None # property_type unresolvable — gated out of prediction
comparables = select_comparables(target, cohort_for(target.postcode))
if not comparables.members:
return None # no comparable neighbours in the postcode
# Sparse own postcode — reach out to the nearest real postcodes.
comparables = select_comparables(target, broaden(target))
if not comparables.members:
return None # no comparable neighbours nearby either
predicted = predictor.predict(target, comparables)
# The calculator needs a MAIN building part; a cohort whose template carries
# none (e.g. a malformed flat record) yields an unscoreable picture, so reject
@ -684,9 +693,12 @@ def main() -> None:
# from the live EPC API (search-by-postcode + per-cert fetch), memoised per
# postcode so co-located missing Properties don't refetch the same cohort.
prediction_attributes = OverrideBackedPredictionAttributesReader(overrides_reader)
comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial)
comparables_repo = EpcComparablePropertiesRepository(
epc_client, geospatial, nearby_postcodes=PostcodesIoClient()
)
predictor = EpcPrediction()
_cohort_cache: dict[str, list[ComparableProperty]] = {}
_nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {}
def cohort_for(postcode: str) -> list[ComparableProperty]:
if postcode not in _cohort_cache:
@ -694,6 +706,23 @@ def main() -> None:
comparables_repo.candidates_for(postcode) if postcode else []
)
return _cohort_cache[postcode]
def broaden(target: PredictionTarget) -> list[ComparableProperty]:
# Broadened cohort for a gated-out target: the nearest real postcodes,
# walked until enough same-type comparables surface (ADR-0034). Memoised
# per (postcode, property_type).
key = (target.postcode, target.property_type)
if key not in _nearby_cohort_cache:
_nearby_cohort_cache[key] = (
comparables_repo.candidates_near(
target.postcode,
target.coordinates,
enough=lambda c: c.epc.property_type == target.property_type,
)
if target.postcode
else []
)
return _nearby_cohort_cache[key]
# One read-only session for the live `material` catalogue, reused across the
# batch so both store and no-store runs price against the same DB rows.
catalogue_session = Session(engine)
@ -831,12 +860,14 @@ def main() -> None:
attributes_reader=prediction_attributes,
coordinates=coordinates,
cohort_for=cohort_for,
broaden=broaden,
predictor=predictor,
)
if predicted_epc is None:
raise ValueError(
f"no EPC for UPRN {uprn} and not predictable "
f"(unresolved property_type or empty '{postcode}' cohort)"
f"(unresolved property_type, or no same-type "
f"comparables in or near '{postcode}')"
)
# Property.effective_epc folds any Landlord Overrides onto the
# synthesised EPC (cohort fills the unknown fields, the landlord's

View file

@ -8,7 +8,7 @@ is needed. One test per distinct behaviour path.
from __future__ import annotations
from contextlib import ExitStack
from typing import Any
from typing import Any, Iterator
from unittest.mock import MagicMock, call, patch
import pytest
@ -81,6 +81,17 @@ def _clear_cohort_cache() -> None:
import applications.modelling_e2e.handler as h
h._cohort_cache.clear()
h._nearby_cohort_cache.clear()
@pytest.fixture(autouse=True)
def _baseline_orchestrator() -> Iterator[MagicMock]:
"""Patch the Baseline orchestrator for every test — construction stays cheap
and ``.run`` is a no-op mock the baseline tests assert against."""
with patch(
"applications.modelling_e2e.handler.PropertyBaselineOrchestrator"
) as orchestrator:
yield orchestrator
# ---------------------------------------------------------------------------
@ -116,9 +127,12 @@ def test_trigger_body_rejects_missing_property_ids() -> None:
# ---------------------------------------------------------------------------
def test_lodged_epc_path_saves_epc_plan_and_marks_modelled() -> None:
def test_lodged_epc_path_saves_epc_plan_and_marks_modelled(
_baseline_orchestrator: MagicMock,
) -> None:
"""When get_by_uprn returns an EPC the handler saves it, saves the plan,
and marks the property as modelled all inside one UoW per property."""
marks the property as modelled all inside one UoW per property and
re-establishes its Baseline Performance after the plan commits."""
# Arrange
mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE])
mock_epc = MagicMock()
@ -196,6 +210,8 @@ def test_lodged_epc_path_saves_epc_plan_and_marks_modelled() -> None:
PROPERTY_ID, has_recommendations=False
)
mock_uow.commit.assert_called_once()
# Baseline Performance is re-established for the lodged property.
_baseline_orchestrator.return_value.run.assert_called_once_with([PROPERTY_ID])
def test_skipped_cohort_certs_are_surfaced_in_the_outputs() -> None:
@ -296,9 +312,12 @@ def test_skipped_cohort_certs_are_surfaced_in_the_outputs() -> None:
# ---------------------------------------------------------------------------
def test_prediction_path_saves_plan_without_epc_save() -> None:
def test_prediction_path_saves_plan_without_epc_save(
_baseline_orchestrator: MagicMock,
) -> None:
"""When get_by_uprn returns None the handler synthesises an EPC via
prediction and saves the plan but never calls epc.save."""
prediction and saves the plan but never calls epc.save, and (having no
lodged figures) never establishes a Baseline."""
# Arrange
mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE])
mock_plan = _plan_mock()
@ -398,10 +417,11 @@ def test_prediction_path_saves_plan_without_epc_save() -> None:
# Act
_call_handler(_BODY)
# Assert — epc.save NOT called (no lodged cert), plan IS saved
# Assert — epc.save NOT called (no lodged cert), plan IS saved, no baseline
mock_uow.epc.save.assert_not_called()
mock_uow.plan.save.assert_called_once()
mock_uow.commit.assert_called_once()
_baseline_orchestrator.return_value.run.assert_not_called()
# ---------------------------------------------------------------------------
@ -489,6 +509,124 @@ def test_empty_cohort_gates_property_out_and_raises() -> None:
MockUoW.return_value.__enter__.assert_not_called()
# ---------------------------------------------------------------------------
# Broadened cohort — sparse own postcode falls back to nearby postcodes
# ---------------------------------------------------------------------------
def test_empty_own_postcode_broadens_to_nearby_and_predicts() -> None:
"""When the property's own postcode holds no same-type comparables, the
handler broadens to the nearby-postcode cohort (candidates_near) and, finding
comparables there, synthesises the EPC and saves the plan."""
# Arrange
mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE])
mock_plan = _plan_mock()
mock_uow = MagicMock()
mock_predicted_epc = MagicMock()
from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier
mock_part = MagicMock()
mock_part.identifier = BuildingPartIdentifier.MAIN
mock_predicted_epc.sap_building_parts = [mock_part]
# First select_comparables (own postcode) is empty → broaden; the second
# (nearby cohort) finds comparables.
empty_comparables = MagicMock()
empty_comparables.members = []
found_comparables = MagicMock()
found_comparables.members = [MagicMock()]
with ExitStack() as stack:
stack.enter_context(
patch("applications.modelling_e2e.handler.os.environ", _ENV)
)
stack.enter_context(
patch(
"applications.modelling_e2e.handler._get_engine",
return_value=mock_engine,
)
)
stack.enter_context(
patch("applications.modelling_e2e.handler.EpcClientService")
).return_value.get_by_uprn.return_value = None # no lodged EPC
stack.enter_context(
patch("applications.modelling_e2e.handler.GeospatialS3Repository")
)
stack.enter_context(
patch("applications.modelling_e2e.handler.GoogleSolarApiClient")
)
stack.enter_context(
patch("applications.modelling_e2e.handler._spatial_for", return_value=None)
)
stack.enter_context(
patch(
"applications.modelling_e2e.handler._solar_insights_for",
return_value=None,
)
)
stack.enter_context(
patch("applications.modelling_e2e.handler.overlays_from", return_value=[])
)
stack.enter_context(
patch("applications.modelling_e2e.handler.PropertyOverridesPostgresReader")
)
from domain.epc_prediction.prediction_target import PredictionTargetAttributes
stack.enter_context(
patch(
"applications.modelling_e2e.handler.OverrideBackedPredictionAttributesReader"
)
).return_value.attributes_for.return_value = PredictionTargetAttributes(
property_type="2"
)
MockRepo = stack.enter_context(
patch(
"applications.modelling_e2e.handler.EpcComparablePropertiesRepository"
)
)
MockRepo.return_value.candidates_for.return_value = []
MockRepo.return_value.candidates_near.return_value = [MagicMock()]
stack.enter_context(
patch(
"applications.modelling_e2e.handler.select_comparables",
side_effect=[empty_comparables, found_comparables],
)
)
stack.enter_context(
patch("applications.modelling_e2e.handler.EpcPrediction")
).return_value.predict.return_value = mock_predicted_epc
stack.enter_context(
patch("applications.modelling_e2e.handler.ScenarioPostgresRepository")
).return_value.get_many.return_value = [MagicMock()]
stack.enter_context(
patch(
"applications.modelling_e2e.handler.catalogue_with_off_catalogue_overrides"
)
)
stack.enter_context(patch("applications.modelling_e2e.handler.Session"))
stack.enter_context(
patch(
"applications.modelling_e2e.handler.run_modelling",
return_value=mock_plan,
)
)
MockUoW = stack.enter_context(
patch("applications.modelling_e2e.handler.PostgresUnitOfWork")
)
MockUoW.return_value.__enter__.return_value = mock_uow
MockUoW.return_value.__exit__.return_value = False
# Act
_call_handler(_BODY)
# Assert — broadening fired, and the broadened cohort produced a saved plan.
MockRepo.return_value.candidates_near.assert_called_once()
mock_uow.epc.save.assert_not_called() # predicted, never lodged
mock_uow.plan.save.assert_called_once()
mock_uow.commit.assert_called_once()
# ---------------------------------------------------------------------------
# Partial batch failure
# ---------------------------------------------------------------------------

View file

@ -78,7 +78,7 @@ def test_429_retry_after_header_drives_sleep_duration(
_mock_response(200, cert_response),
]
with patch("httpx.get", side_effect=responses), patch(
"infrastructure.epc_client._retry.time.sleep"
"infrastructure.http_retry.time.sleep"
) as mock_sleep:
epc_service.get_by_certificate_number("CERT-001")
@ -100,7 +100,7 @@ def test_429_without_retry_after_uses_exponential_backoff(
_mock_response(200, cert_response),
]
with patch("httpx.get", side_effect=responses), patch(
"infrastructure.epc_client._retry.time.sleep"
"infrastructure.http_retry.time.sleep"
) as mock_sleep:
epc_service.get_by_certificate_number("CERT-001")
@ -121,7 +121,7 @@ def test_429_malformed_retry_after_falls_back_to_backoff(
_mock_response(200, cert_response),
]
with patch("httpx.get", side_effect=responses), patch(
"infrastructure.epc_client._retry.time.sleep"
"infrastructure.http_retry.time.sleep"
) as mock_sleep:
epc_service.get_by_certificate_number("CERT-001")
@ -140,7 +140,7 @@ def test_429_retry_after_capped_by_max_backoff(epc_service, rdsap_21_0_1_cert):
_mock_response(200, cert_response),
]
with patch("httpx.get", side_effect=responses), patch(
"infrastructure.epc_client._retry.time.sleep"
"infrastructure.http_retry.time.sleep"
) as mock_sleep:
epc_service.get_by_certificate_number("CERT-001")

View file

@ -0,0 +1,224 @@
"""PostcodesIoClient — coordinate/seed postcode → the real unit postcodes near
it, via postcodes.io's keyless nearest endpoint. Failure degrades to the seed
alone so broadening never breaks prediction."""
from __future__ import annotations
from typing import Any, Iterator, Optional
from unittest.mock import MagicMock, patch
import httpx
import pytest
from domain.geospatial.coordinates import Coordinates
from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient
_MODULE = "infrastructure.postcodes_io.postcodes_io_client"
@pytest.fixture(autouse=True)
def _no_sleep() -> Iterator[MagicMock]:
"""Never actually sleep during backoff (the shared retry owns the sleep) —
just record the calls."""
with patch("infrastructure.http_retry.time.sleep") as sleep:
yield sleep
def _response(
payload: Any,
*,
status_code: int = 200,
headers: Optional[dict[str, str]] = None,
) -> MagicMock:
resp = MagicMock()
resp.status_code = status_code
resp.is_success = 200 <= status_code < 300
resp.headers = headers if headers is not None else {}
resp.json.return_value = payload
return resp
def _nearest_payload(postcodes: list[str]) -> dict[str, Any]:
return {"result": [{"postcode": p} for p in postcodes]}
def test_nearby_with_coordinates_skips_the_centroid_lookup() -> None:
"""When the target's own coordinates are passed, only the radius search is
issued no postcodecentroid round-trip and the seed leads the result."""
# Arrange
client = PostcodesIoClient(radius_m=500, limit=10)
coords = Coordinates(longitude=0.1, latitude=51.3)
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.return_value = _response(
_nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX"])
)
# Act
result = client.nearby("BR6 6BS", coords)
# Assert — one call (the radius search), seed first, neighbours follow
assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"]
assert mock_get.call_count == 1
_, kwargs = mock_get.call_args
assert kwargs["params"]["lat"] == 51.3
assert kwargs["params"]["lon"] == 0.1
def test_nearby_resolves_the_seed_centroid_when_no_coordinates_given() -> None:
"""Without coordinates the client first resolves the seed's own centroid via
postcodes.io, then runs the radius search from it."""
# Arrange
client = PostcodesIoClient()
centroid = {"result": {"latitude": 51.3, "longitude": 0.1}}
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.side_effect = [
_response(centroid),
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
]
# Act
result = client.nearby("BR6 6BS")
# Assert — two calls: centroid then radius
assert result == ["BR6 6BS", "BR6 6BU"]
assert mock_get.call_count == 2
def test_nearby_dedupes_the_seed_and_caps_at_limit() -> None:
"""The seed always leads exactly once even when the radius search echoes it,
and the result is capped at ``limit``."""
# Arrange
client = PostcodesIoClient(limit=3)
coords = Coordinates(longitude=0.1, latitude=51.3)
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.return_value = _response(
_nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX", "BR6 6AA"])
)
# Act
result = client.nearby("BR6 6BS", coords)
# Assert
assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"]
assert result.count("BR6 6BS") == 1
def test_nearby_returns_just_the_seed_after_exhausting_retries(
_no_sleep: MagicMock,
) -> None:
"""A persistent network error is retried, then degrades to broadening-off:
only the seed comes back, and the retries were actually attempted."""
# Arrange
client = PostcodesIoClient()
coords = Coordinates(longitude=0.1, latitude=51.3)
with patch(
f"{_MODULE}.httpx.get", side_effect=httpx.ConnectError("down")
) as mock_get:
# Act
result = client.nearby("BR6 6BS", coords)
# Assert — one initial try + MAX_RETRIES, sleeping between each.
assert result == ["BR6 6BS"]
assert mock_get.call_count == client.MAX_RETRIES + 1
assert _no_sleep.call_count == client.MAX_RETRIES
def test_nearby_retries_a_transport_error_then_succeeds(_no_sleep: MagicMock) -> None:
"""A transient transport error is retried, and the subsequent success is
returned in full."""
# Arrange
client = PostcodesIoClient()
coords = Coordinates(longitude=0.1, latitude=51.3)
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.side_effect = [
httpx.ReadTimeout("slow"),
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
]
# Act
result = client.nearby("BR6 6BS", coords)
# Assert
assert result == ["BR6 6BS", "BR6 6BU"]
assert mock_get.call_count == 2
assert _no_sleep.call_count == 1
def test_nearby_retries_a_429_honouring_retry_after(_no_sleep: MagicMock) -> None:
"""A 429 is retried, and the server's Retry-After drives the backoff delay."""
# Arrange
client = PostcodesIoClient()
coords = Coordinates(longitude=0.1, latitude=51.3)
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.side_effect = [
_response(None, status_code=429, headers={"Retry-After": "2"}),
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
]
# Act
result = client.nearby("BR6 6BS", coords)
# Assert — succeeded on the retry, having slept the advertised 2 seconds.
assert result == ["BR6 6BS", "BR6 6BU"]
assert mock_get.call_count == 2
_no_sleep.assert_called_once_with(2.0)
def test_nearby_retries_a_server_error_then_succeeds(_no_sleep: MagicMock) -> None:
"""A 5xx is treated as transient and retried."""
# Arrange
client = PostcodesIoClient()
coords = Coordinates(longitude=0.1, latitude=51.3)
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.side_effect = [
_response(None, status_code=503),
_response(_nearest_payload(["BR6 6BS"])),
]
# Act
result = client.nearby("BR6 6BS", coords)
# Assert
assert result == ["BR6 6BS"]
assert mock_get.call_count == 2
def test_nearby_returns_just_the_seed_when_centroid_unresolvable() -> None:
"""An unknown seed (no coordinates, centroid lookup fails) yields the seed
alone rather than raising."""
# Arrange
client = PostcodesIoClient()
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.return_value = _response(None, status_code=404)
# Act
result: list[str] = client.nearby("ZZ99 9ZZ")
# Assert — a 404 is non-transient, so no retry was attempted.
assert result == ["ZZ99 9ZZ"]
assert mock_get.call_count == 1
def test_nearby_tolerates_a_null_nearest_result() -> None:
"""postcodes.io returns ``result: null`` when a point has no neighbours; the
client treats that as an empty neighbour set (seed only)."""
# Arrange
client = PostcodesIoClient()
coords: Optional[Coordinates] = Coordinates(longitude=0.1, latitude=51.3)
with patch(f"{_MODULE}.httpx.get") as mock_get:
mock_get.return_value = _response({"result": None})
# Act
result = client.nearby("BR6 6BS", coords)
# Assert
assert result == ["BR6 6BS"]

View file

@ -0,0 +1,126 @@
"""call_with_retry — the shared transient-failure retry both HTTP source clients
use. Generic mechanism (exponential backoff, Retry-After), per-client policy
(what gets raised as transient)."""
from __future__ import annotations
from typing import Iterator
from unittest.mock import MagicMock, call, patch
import httpx
import pytest
from infrastructure.http_retry import TransientHttpError, call_with_retry
@pytest.fixture(autouse=True)
def _no_sleep() -> Iterator[MagicMock]:
with patch("infrastructure.http_retry.time.sleep") as sleep:
yield sleep
def test_returns_immediately_on_success(_no_sleep: MagicMock) -> None:
# Act
result = call_with_retry(lambda: 42)
# Assert
assert result == 42
_no_sleep.assert_not_called()
def test_retries_a_transient_error_then_succeeds(_no_sleep: MagicMock) -> None:
# Arrange — fail twice transiently, then succeed.
calls = {"n": 0}
def fn() -> str:
calls["n"] += 1
if calls["n"] < 3:
raise TransientHttpError("429")
return "ok"
# Act
result = call_with_retry(fn)
# Assert — exponential backoff between the three attempts.
assert result == "ok"
assert _no_sleep.call_args_list == [call(1.0), call(2.0)]
def test_honours_retry_after_over_the_computed_backoff(_no_sleep: MagicMock) -> None:
# Arrange
raised = {"done": False}
def fn() -> str:
if not raised["done"]:
raised["done"] = True
raise TransientHttpError("429", retry_after=7.0)
return "ok"
# Act
call_with_retry(fn)
# Assert
_no_sleep.assert_called_once_with(7.0)
def test_retries_transport_errors(_no_sleep: MagicMock) -> None:
# Arrange
attempts = {"n": 0}
def fn() -> str:
attempts["n"] += 1
if attempts["n"] == 1:
raise httpx.ReadTimeout("slow")
return "ok"
# Act
result = call_with_retry(fn)
# Assert
assert result == "ok"
assert attempts["n"] == 2
def test_reraises_the_last_transient_error_once_exhausted(
_no_sleep: MagicMock,
) -> None:
# Arrange — always transient.
def fn() -> str:
raise TransientHttpError("persistent 429")
# Act / Assert
with pytest.raises(TransientHttpError, match="persistent 429"):
call_with_retry(fn, max_retries=2)
assert _no_sleep.call_count == 2
def test_non_transient_error_propagates_without_retry(_no_sleep: MagicMock) -> None:
# Arrange
attempts = {"n": 0}
def fn() -> str:
attempts["n"] += 1
raise ValueError("not transient")
# Act / Assert
with pytest.raises(ValueError, match="not transient"):
call_with_retry(fn)
assert attempts["n"] == 1
_no_sleep.assert_not_called()
def test_retry_after_is_capped_by_max_backoff(_no_sleep: MagicMock) -> None:
# Arrange
raised = {"done": False}
def fn() -> str:
if not raised["done"]:
raised["done"] = True
raise TransientHttpError("429", retry_after=999.0)
return "ok"
# Act
call_with_retry(fn, max_backoff=60.0)
# Assert
_no_sleep.assert_called_once_with(60.0)

View file

@ -130,6 +130,126 @@ def test_no_certs_in_the_postcode_yields_no_candidates() -> None:
assert client.searched_postcode == "LS6 1AA"
# ---------------------------------------------------------------------------
# Broadened cohort — candidates_near (ADR-0034 nearby-postcode broadening)
# ---------------------------------------------------------------------------
class _MultiPostcodeEpcClient:
"""Serves a different cohort per postcode and records every search, so the
broadened walk's reach and ordering can be asserted."""
def __init__(self, by_postcode: dict[str, list[EpcSearchResult]]) -> None:
self._by_postcode = by_postcode
self.searched: list[str] = []
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
self.searched.append(postcode)
return self._by_postcode.get(postcode, [])
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData:
return _epc()
class _FakeNearbyPostcodes:
"""Returns a fixed nearest-first list and records the seed it was asked for."""
def __init__(self, postcodes: list[str]) -> None:
self._postcodes = postcodes
self.calls: list[tuple[str, Optional[Coordinates]]] = []
def nearby(
self, postcode: str, coordinates: Optional[Coordinates] = None
) -> list[str]:
self.calls.append((postcode, coordinates))
return self._postcodes
def test_candidates_near_aggregates_and_dedupes_across_nearby_postcodes() -> None:
# Arrange — three nearby postcodes; CERT-1 is re-lodged in two of them.
client = _MultiPostcodeEpcClient(
{
"P0": [_result("CERT-1", uprn=1)],
"P1": [_result("CERT-2", uprn=2), _result("CERT-1", uprn=1)],
"P2": [_result("CERT-3", uprn=3)],
}
)
nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"])
repo = EpcComparablePropertiesRepository(
client, _FakeGeospatial({}), nearby_postcodes=nearby
)
# Act — no early-stop predicate, so the whole nearby set is visited.
candidates = repo.candidates_near("P0", None)
# Assert — one candidate per distinct cert, all three postcodes searched.
certs = {c.certificate_number for c in candidates}
assert certs == {"CERT-1", "CERT-2", "CERT-3"}
assert client.searched == ["P0", "P1", "P2"]
def test_candidates_near_stops_early_once_enough_match() -> None:
# Arrange — the seed postcode alone already yields enough matches; the two
# further postcodes must not be fetched.
client = _MultiPostcodeEpcClient(
{
"P0": [_result(f"MATCH-{i}", uprn=i) for i in range(5)],
"P1": [_result("OTHER-1", uprn=99)],
"P2": [_result("OTHER-2", uprn=98)],
}
)
nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"])
repo = EpcComparablePropertiesRepository(
client, _FakeGeospatial({}), nearby_postcodes=nearby
)
# Act
candidates = repo.candidates_near(
"P0",
None,
enough=lambda c: c.certificate_number.startswith("MATCH"),
minimum=5,
)
# Assert — walk halted after the seed; the further postcodes were never hit.
assert client.searched == ["P0"]
assert len(candidates) == 5
def test_candidates_near_passes_coordinates_to_the_nearby_source() -> None:
# Arrange
here = Coordinates(longitude=0.1, latitude=51.3)
client = _MultiPostcodeEpcClient({"P0": []})
nearby = _FakeNearbyPostcodes(["P0"])
repo = EpcComparablePropertiesRepository(
client, _FakeGeospatial({}), nearby_postcodes=nearby
)
# Act
repo.candidates_near("P0", here)
# Assert — the target's own coordinates seed the radius search.
assert nearby.calls == [("P0", here)]
def test_candidates_near_without_a_source_uses_only_the_seed() -> None:
# Arrange — no NearbyPostcodes configured (broadening unavailable).
client = _MultiPostcodeEpcClient({"P0": [_result("CERT-1", uprn=1)]})
repo = EpcComparablePropertiesRepository(client, _FakeGeospatial({}))
# Act
candidates = repo.candidates_near("P0", None)
# Assert — degrades to the seed postcode alone.
assert client.searched == ["P0"]
assert [c.certificate_number for c in candidates] == ["CERT-1"]
# ---------------------------------------------------------------------------
# Unmappable cohort certs are skipped + recorded, not allowed to sink the cohort
# ---------------------------------------------------------------------------
class _FakeEpcClientRaising:
"""Serves the cohort, but raises the given exception for specific certs."""