mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Merge pull request #1276 from Hestia-Homes/feature/e2e-runs
Feature/e2e runs
This commit is contained in:
commit
6f0e526d3d
16 changed files with 1080 additions and 63 deletions
|
|
@ -43,14 +43,23 @@ from domain.epc_prediction.comparable_properties import (
|
|||
select_comparables,
|
||||
)
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction
|
||||
from domain.epc_prediction.prediction_target import build_prediction_target
|
||||
from domain.epc_prediction.prediction_target import (
|
||||
PredictionTarget,
|
||||
build_prediction_target,
|
||||
)
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from domain.geospatial.planning_restrictions import PlanningRestrictions
|
||||
from domain.geospatial.spatial_reference import SpatialReference
|
||||
from domain.property.property import Property, PropertyIdentity
|
||||
from domain.property_baseline.calculator_rebaseliner import CalculatorRebaseliner
|
||||
from domain.sap10_calculator.calculator import Sap10Calculator
|
||||
from domain.tasks.tasks import Source
|
||||
from harness.console import run_modelling
|
||||
from orchestration.property_baseline_orchestrator import (
|
||||
PropertyBaselineOrchestrator,
|
||||
)
|
||||
from infrastructure.epc_client.epc_client_service import EpcClientService
|
||||
from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient
|
||||
from infrastructure.postgres.config import PostgresConfig
|
||||
from infrastructure.postgres.engine import make_engine
|
||||
from infrastructure.solar.google_solar_api_client import (
|
||||
|
|
@ -68,6 +77,9 @@ from repositories.geospatial.geospatial_s3_repository import (
|
|||
GeospatialS3Repository,
|
||||
ParquetReader,
|
||||
)
|
||||
from repositories.fuel_rates.fuel_rates_static_file_repository import (
|
||||
FuelRatesStaticFileRepository,
|
||||
)
|
||||
from repositories.postgres_unit_of_work import PostgresUnitOfWork
|
||||
from repositories.product.composite_product_repository import (
|
||||
catalogue_with_off_catalogue_overrides,
|
||||
|
|
@ -87,6 +99,10 @@ from utilities.logger import setup_logger
|
|||
|
||||
_engine: Optional[Engine] = None
|
||||
_cohort_cache: dict[str, list[ComparableProperty]] = {}
|
||||
# Broadened (nearby-postcode) cohorts, keyed by (seed postcode, target property
|
||||
# type): the early-stop walk depends on the type it is filling for, so two types
|
||||
# in the same postcode must not share a cached result.
|
||||
_nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {}
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -160,13 +176,18 @@ def _predict_epc(
|
|||
attributes_reader: OverrideBackedPredictionAttributesReader,
|
||||
coordinates: Optional[Coordinates],
|
||||
cohort_for: Callable[[str], list[ComparableProperty]],
|
||||
broaden: Callable[[PredictionTarget], list[ComparableProperty]],
|
||||
predictor: EpcPrediction,
|
||||
) -> Optional[EpcPropertyData]:
|
||||
"""Synthesise an EpcPropertyData for an EPC-less property from its postcode
|
||||
cohort (EPC Prediction Path 3, ADR-0031), or None when ineligible.
|
||||
|
||||
When the property's own postcode holds no same-type comparables (a sparse
|
||||
postcode — e.g. the only flat among houses), the cohort is broadened to the
|
||||
real unit postcodes physically nearest it (``broaden``) before giving up.
|
||||
|
||||
Returns None when property_type is unresolvable (hard cohort filter cannot
|
||||
fire) or when the postcode cohort is empty after filtering.
|
||||
fire) or when even the broadened cohort is empty after filtering.
|
||||
"""
|
||||
attributes = attributes_reader.attributes_for(property_id)
|
||||
identity = PropertyIdentity(
|
||||
|
|
@ -176,6 +197,8 @@ def _predict_epc(
|
|||
if target is None:
|
||||
return None
|
||||
comparables = select_comparables(target, cohort_for(target.postcode))
|
||||
if not comparables.members:
|
||||
comparables = select_comparables(target, broaden(target))
|
||||
if not comparables.members:
|
||||
return None
|
||||
predicted = predictor.predict(target, comparables)
|
||||
|
|
@ -221,7 +244,9 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
|
|||
|
||||
overrides_reader = PropertyOverridesPostgresReader(lambda: Session(engine))
|
||||
prediction_attrs_reader = OverrideBackedPredictionAttributesReader(overrides_reader)
|
||||
comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial)
|
||||
comparables_repo = EpcComparablePropertiesRepository(
|
||||
epc_client, geospatial, nearby_postcodes=PostcodesIoClient()
|
||||
)
|
||||
predictor = EpcPrediction()
|
||||
|
||||
def _get_cohort(postcode: str) -> list[ComparableProperty]:
|
||||
|
|
@ -231,6 +256,34 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
|
|||
)
|
||||
return _cohort_cache[postcode]
|
||||
|
||||
def _broaden(target: PredictionTarget) -> list[ComparableProperty]:
|
||||
"""The nearby-postcode cohort for a gated-out target — the real unit
|
||||
postcodes nearest it, walked until enough same-type comparables surface
|
||||
(ADR-0034). Memoised per (postcode, property_type) so co-located
|
||||
same-type misses share one walk."""
|
||||
key = (target.postcode, target.property_type)
|
||||
if key not in _nearby_cohort_cache:
|
||||
_nearby_cohort_cache[key] = (
|
||||
comparables_repo.candidates_near(
|
||||
target.postcode,
|
||||
target.coordinates,
|
||||
enough=lambda c: c.epc.property_type == target.property_type,
|
||||
)
|
||||
if target.postcode
|
||||
else []
|
||||
)
|
||||
return _nearby_cohort_cache[key]
|
||||
|
||||
# Re-establishes each lodged Property's Baseline Performance from the just-
|
||||
# persisted EPC (one UoW per property, committed after the Plan's). Predicted
|
||||
# Properties have no lodged figures, so they get no baseline (mirrors the e2e
|
||||
# runner and the ara_first_run Baseline stage).
|
||||
baseline_orchestrator = PropertyBaselineOrchestrator(
|
||||
unit_of_work=lambda: PostgresUnitOfWork(lambda: Session(engine)),
|
||||
rebaseliner=CalculatorRebaseliner(Sap10Calculator()),
|
||||
fuel_rates=FuelRatesStaticFileRepository(),
|
||||
)
|
||||
|
||||
read_session = Session(engine)
|
||||
try:
|
||||
scenario = ScenarioPostgresRepository(read_session).get_many([scenario_id])[0]
|
||||
|
|
@ -281,12 +334,14 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
|
|||
attributes_reader=prediction_attrs_reader,
|
||||
coordinates=coordinates,
|
||||
cohort_for=_get_cohort,
|
||||
broaden=_broaden,
|
||||
predictor=predictor,
|
||||
)
|
||||
if predicted_epc is None:
|
||||
raise ValueError(
|
||||
f"no EPC for UPRN {uprn} and not predictable "
|
||||
f"(unresolved property_type or empty '{postcode}' cohort)"
|
||||
f"(unresolved property_type, or no same-type "
|
||||
f"comparables in or near '{postcode}')"
|
||||
)
|
||||
effective_epc = Property(
|
||||
identity=PropertyIdentity(
|
||||
|
|
@ -362,6 +417,13 @@ def handler(body: dict[str, Any], context: Any) -> Optional[dict[str, Any]]:
|
|||
uow.commit()
|
||||
logger.info(f"property={property_id} plan saved")
|
||||
|
||||
# Baseline Performance is re-established from the persisted EPC, so
|
||||
# it runs after the Plan UoW commits. Only lodged Properties have
|
||||
# the lodged figures the Baseline reads; predicted ones are skipped.
|
||||
if epc is not None:
|
||||
baseline_orchestrator.run([property_id])
|
||||
logger.info(f"property={property_id} baseline saved")
|
||||
|
||||
except Exception as error: # noqa: BLE001
|
||||
logger.error(
|
||||
f"property={property_id}: {type(error).__name__}: {error}",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
# EPC Prediction cohort broadens to nearby postcodes
|
||||
|
||||
ADR-0029 sizes an EPC-less Property from the **Comparable Properties** in its
|
||||
postcode; ADR-0031 decision 5 made `property_type` the **hard** cohort filter — a
|
||||
flat is never sized from houses — and gated out any Property with no same-type
|
||||
comparable. This records how we **broaden** the cohort when a Property's own
|
||||
postcode is too sparse to fill, rather than gating it out. Resolved against a
|
||||
real failure on branch `feature/e2e-runs`: property 718580, the only flat lodged
|
||||
in postcode `BR6 6BS` (the other 8 certs were houses/bungalows), gated out and
|
||||
failed its modelling subtask.
|
||||
|
||||
## Status
|
||||
|
||||
Accepted. Extends ADR-0029 / ADR-0031 decision 5 (the postcode cohort).
|
||||
|
||||
## Context
|
||||
|
||||
ADR-0031 decision 5 anticipated this — "an Ordnance Survey `postcode_search`
|
||||
source can supply property type more broadly … wiring it is a later enhancement
|
||||
that widens the eligible population." The gating it describes is correct (never
|
||||
size a flat from a mixed-type cohort), but it makes prediction only as good as
|
||||
the *own* postcode's lodged stock. A genuinely-isolated type (one flat among
|
||||
houses) is unpredictable with no recourse, and a no-EPC + can't-predict Property
|
||||
fails its subtask — which, over SQS, retries and dead-letters even though the
|
||||
input data is permanently insufficient.
|
||||
|
||||
The legacy engine handled this by **trimming the postcode** (`BR6 6BS` → `BR6 6B`
|
||||
→ `BR6` …) and re-querying, because the old EPC API accepted partial postcodes.
|
||||
The current gov EPC API (`api.get-energy-performance-data.communities.gov.uk`)
|
||||
does **not**: its domestic search accepts only a *full* real postcode, `uprn`,
|
||||
`council[]`, `constituency[]`, or `address` — confirmed against its OpenAPI spec.
|
||||
There is no outcode/prefix, radius, or lat/long search.
|
||||
|
||||
## Decisions
|
||||
|
||||
### 1. Broaden to the real unit postcodes physically nearest the target
|
||||
|
||||
When the own-postcode cohort yields no same-type member, the cohort is broadened
|
||||
to the real unit postcodes around the target, resolved from its coordinates via
|
||||
**postcodes.io**'s keyless `nearest` endpoint (already a trusted dependency in
|
||||
`scripts/fetch_epc_prediction_dense_corpus.py`). Each nearby postcode is then
|
||||
searched exactly as the own postcode is — so `property_type` stays the same hard
|
||||
filter and ADR-0031 decision 5 is upheld; only the *reach* of the candidate pool
|
||||
widens, never the selection rule. The hard gate still fires when even the
|
||||
broadened cohort has no same-type comparable: broadening widens the eligible
|
||||
population, it does not force a prediction.
|
||||
|
||||
### 2. postcodes.io over `council[]` and a bulk centroid dataset
|
||||
|
||||
- **`council[]`** (the only "wider than postcode" gov-API filter) returns the
|
||||
*whole* local authority — tens of thousands of certs, many pages — and its
|
||||
result rows carry **no `property_type`**, so narrowing to the target's type
|
||||
would mean fetching every certificate. Rejected: cost is unbounded by distance
|
||||
and dominated by per-cert fetches.
|
||||
- **OS Code-Point Open / ONSPD → S3** would give an offline unit-postcode→
|
||||
coordinate table, but is a ~1.7M-row asset to ingest and host for a path that
|
||||
fires rarely. Rejected as disproportionate; reconsider if postcodes.io's
|
||||
availability becomes a liability.
|
||||
- **OS Places radius API** needs an `OS_API_KEY` the modelling path does not
|
||||
otherwise carry. Rejected to avoid a new secret.
|
||||
|
||||
postcodes.io is keyless, bounded by radius (a tight neighbourhood, not an
|
||||
authority), and reuses the existing per-postcode search unchanged.
|
||||
|
||||
### 3. Lazy, nearest-first with early-stop, and soft-failing
|
||||
|
||||
Broadening runs **only on gate-out** (a no-EPC Property whose own postcode lacks
|
||||
a same-type comparable) — the common cases never pay for it. Nearby postcodes are
|
||||
walked **nearest first** and the walk **stops early** once enough same-type
|
||||
comparables surface, so a dense area resolves in one or two searches instead of
|
||||
the whole radius. Results are memoised per `(postcode, property_type)`. The
|
||||
postcodes.io call is **soft-failing**: any error (after the shared transient
|
||||
retry) degrades to the seed postcode alone, so a flaky or unavailable
|
||||
postcodes.io can never break prediction — it just declines to broaden.
|
||||
|
||||
## Consequences
|
||||
|
||||
- A new external dependency, postcodes.io, sits in the modelling/Lambda path. It
|
||||
is keyless and isolated behind `PostcodesIoClient` (shared transient-retry +
|
||||
soft-fail), but its availability and rate limits are now a (bounded) operational
|
||||
concern. ADR-0031 decision 5's `postcode_search` lever remains an alternative.
|
||||
- The `ComparablePropertiesRepository` gains `candidates_near` alongside
|
||||
`candidates_for`; the predictor/handler try the own postcode first and broaden
|
||||
only on an empty cohort.
|
||||
- Prediction reaches across postcode boundaries, so the geo-distance weighting
|
||||
ADR-0029 already applies (a comparable's vote decays with haversine distance)
|
||||
becomes load-bearing for broadened cohorts, not just a refinement.
|
||||
- `property_type` gating is unchanged: a Property with no same-type comparable
|
||||
*anywhere nearby* is still gated out — now a genuine "no comparable stock"
|
||||
signal rather than an artefact of a sparse single postcode.
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
import time
|
||||
from typing import Callable, Optional, TypeVar
|
||||
|
||||
import httpx
|
||||
|
||||
from infrastructure.epc_client.exceptions import EpcRateLimitError
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def call_with_retry(
|
||||
fn: Callable[[], T],
|
||||
max_retries: int = 5,
|
||||
backoff_base: float = 1.0,
|
||||
backoff_multiplier: float = 2.0,
|
||||
max_backoff: float = 60.0,
|
||||
) -> T:
|
||||
"""Retry `fn` on transient EPC-API failures: HTTP 429 rate limits and
|
||||
transport errors (read/connect timeouts, connection resets). A 429 honours
|
||||
the server's `Retry-After`; transport errors back off exponentially. Non-
|
||||
transient failures (other 4xx/5xx, mapping errors) propagate immediately."""
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
return fn()
|
||||
except (EpcRateLimitError, httpx.TransportError) as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
if isinstance(exc, EpcRateLimitError) and exc.retry_after is not None:
|
||||
delay = exc.retry_after
|
||||
else:
|
||||
delay = backoff_base * (backoff_multiplier**attempt)
|
||||
time.sleep(min(delay, max_backoff))
|
||||
assert last_exc is not None
|
||||
raise last_exc
|
||||
|
|
@ -10,7 +10,7 @@ from infrastructure.epc_client.exceptions import (
|
|||
EpcNotFoundError,
|
||||
EpcRateLimitError,
|
||||
)
|
||||
from infrastructure.epc_client._retry import call_with_retry
|
||||
from infrastructure.http_retry import call_with_retry
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from datatypes.epc.search import EpcSearchResult
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Optional
|
||||
from infrastructure.http_retry import TransientHttpError
|
||||
|
||||
|
||||
class EpcApiError(Exception):
|
||||
|
|
@ -9,9 +9,7 @@ class EpcNotFoundError(EpcApiError):
|
|||
"""Raised when the API returns 404."""
|
||||
|
||||
|
||||
class EpcRateLimitError(EpcApiError):
|
||||
"""Raised when the API returns 429 and all retries are exhausted."""
|
||||
|
||||
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
|
||||
super().__init__(message)
|
||||
self.retry_after = retry_after
|
||||
class EpcRateLimitError(EpcApiError, TransientHttpError):
|
||||
"""Raised when the API returns 429. A ``TransientHttpError`` so the shared
|
||||
``call_with_retry`` retries it (honouring ``Retry-After``), while remaining an
|
||||
``EpcApiError`` for callers that catch the EPC hierarchy."""
|
||||
|
|
|
|||
60
infrastructure/http_retry.py
Normal file
60
infrastructure/http_retry.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
"""Shared transient-failure retry for the HTTP source clients.
|
||||
|
||||
The retry *mechanism* is generic; each client owns the *policy* of what counts as
|
||||
transient. A caller signals "retry this" by raising ``TransientHttpError``
|
||||
(carrying any server-advised ``retry_after``); transport-level errors (read /
|
||||
connect timeouts, connection resets) are always treated as transient.
|
||||
``call_with_retry`` backs off exponentially between attempts — honouring
|
||||
``retry_after`` when present — and re-raises the last error once attempts are
|
||||
exhausted, leaving the caller to decide how to surface it (the EPC client lets it
|
||||
propagate; postcodes.io soft-fails to the seed postcode).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from typing import Callable, Optional, TypeVar
|
||||
|
||||
import httpx
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class TransientHttpError(Exception):
|
||||
"""A failure worth retrying. ``retry_after`` is the server-advised delay (a
|
||||
429's ``Retry-After``), used in place of the computed backoff when present."""
|
||||
|
||||
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
|
||||
super().__init__(message)
|
||||
self.retry_after = retry_after
|
||||
|
||||
|
||||
def call_with_retry(
|
||||
fn: Callable[[], T],
|
||||
max_retries: int = 5,
|
||||
backoff_base: float = 1.0,
|
||||
backoff_multiplier: float = 2.0,
|
||||
max_backoff: float = 60.0,
|
||||
) -> T:
|
||||
"""Retry ``fn`` on transient failures — ``TransientHttpError`` (e.g. a 429)
|
||||
and ``httpx.TransportError`` (read/connect timeouts, connection resets) —
|
||||
backing off exponentially, or by the error's ``retry_after`` when it carries
|
||||
one. Non-transient failures propagate immediately; the last transient error
|
||||
is re-raised once ``max_retries`` is exhausted."""
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
return fn()
|
||||
except (TransientHttpError, httpx.TransportError) as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
retry_after = (
|
||||
exc.retry_after if isinstance(exc, TransientHttpError) else None
|
||||
)
|
||||
if retry_after is not None:
|
||||
delay = retry_after
|
||||
else:
|
||||
delay = backoff_base * (backoff_multiplier**attempt)
|
||||
time.sleep(min(delay, max_backoff))
|
||||
assert last_exc is not None
|
||||
raise last_exc
|
||||
0
infrastructure/postcodes_io/__init__.py
Normal file
0
infrastructure/postcodes_io/__init__.py
Normal file
145
infrastructure/postcodes_io/postcodes_io_client.py
Normal file
145
infrastructure/postcodes_io/postcodes_io_client.py
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
"""postcodes.io adapter — a coordinate (or seed postcode) → the real unit
|
||||
postcodes physically near it.
|
||||
|
||||
The gov EPC API only searches a *full* real postcode — no outcode/prefix, no
|
||||
radius, no lat/long (confirmed against its OpenAPI spec). So to broaden an
|
||||
EPC-Prediction cohort beyond the target's own postcode we must first discover the
|
||||
real unit postcodes around it. postcodes.io's free, keyless ``nearest`` endpoint
|
||||
does exactly that: given a point it returns the unit postcodes within a radius,
|
||||
nearest first.
|
||||
|
||||
Failure is deliberately non-fatal: any error (network, unknown seed, missing
|
||||
coordinates) returns just the seed postcode, so broadening degrades to "no
|
||||
broadening" rather than breaking prediction.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from infrastructure.http_retry import TransientHttpError, call_with_retry
|
||||
|
||||
|
||||
class PostcodesIoClient:
|
||||
BASE_URL = "https://api.postcodes.io"
|
||||
REQUEST_TIMEOUT = 10.0
|
||||
# Transient failures (transport errors, 429s, 5xx) are retried with
|
||||
# exponential backoff; everything else (and exhaustion) soft-fails to the
|
||||
# seed, so broadening never breaks prediction.
|
||||
MAX_RETRIES = 3
|
||||
BACKOFF_BASE = 0.5
|
||||
BACKOFF_MULTIPLIER = 2.0
|
||||
MAX_BACKOFF = 8.0
|
||||
|
||||
def __init__(self, *, radius_m: int = 1000, limit: int = 30) -> None:
|
||||
"""``radius_m`` bounds how far the broadened cohort reaches; ``limit``
|
||||
caps how many nearby postcodes are returned (and so the per-gate-out
|
||||
fetch cost)."""
|
||||
self._radius_m = radius_m
|
||||
self._limit = limit
|
||||
|
||||
def nearby(
|
||||
self, postcode: str, coordinates: Optional[Coordinates] = None
|
||||
) -> list[str]:
|
||||
"""The real unit postcodes within ``radius_m`` of ``postcode`` — nearest
|
||||
first, the seed always included — or just ``[postcode]`` when the seed's
|
||||
coordinates cannot be resolved or the lookup fails.
|
||||
|
||||
``coordinates`` (the target's own, resolved from its UPRN) is used when
|
||||
given, sparing a postcode→centroid round-trip; otherwise postcodes.io
|
||||
resolves the seed postcode's centroid itself."""
|
||||
point = coordinates if coordinates is not None else self._centroid_of(postcode)
|
||||
if point is None:
|
||||
return [postcode]
|
||||
found = self._nearest_to(point)
|
||||
ordered = [postcode] + [p for p in found if p != postcode]
|
||||
return ordered[: self._limit]
|
||||
|
||||
def _centroid_of(self, postcode: str) -> Optional[Coordinates]:
|
||||
result = self._get(f"/postcodes/{postcode.replace(' ', '')}")
|
||||
if result is None:
|
||||
return None
|
||||
latitude: Any = result.get("latitude")
|
||||
longitude: Any = result.get("longitude")
|
||||
if latitude is None or longitude is None:
|
||||
return None
|
||||
return Coordinates(longitude=float(longitude), latitude=float(latitude))
|
||||
|
||||
def _nearest_to(self, point: Coordinates) -> list[str]:
|
||||
results = self._get_list(
|
||||
"/postcodes",
|
||||
{
|
||||
"lon": point.longitude,
|
||||
"lat": point.latitude,
|
||||
"radius": self._radius_m,
|
||||
"limit": self._limit,
|
||||
},
|
||||
)
|
||||
return [str(row["postcode"]) for row in results if row.get("postcode")]
|
||||
|
||||
def _get(self, path: str) -> Optional[dict[str, Any]]:
|
||||
payload = self._call(path, None)
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
def _get_list(self, path: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
payload = self._call(path, params)
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
return [row for row in payload if isinstance(row, dict)]
|
||||
|
||||
def _call(self, path: str, params: Optional[dict[str, Any]]) -> Any:
|
||||
"""The parsed ``result`` payload for a postcodes.io GET, retried on
|
||||
transient failures via the shared ``call_with_retry``, or None on a
|
||||
non-transient failure (e.g. an unknown postcode's 404) or once retries are
|
||||
exhausted — broadening then falls back to the seed alone. The soft-fail is
|
||||
the difference from the EPC client, which lets the error propagate."""
|
||||
try:
|
||||
return call_with_retry(
|
||||
lambda: self._fetch(path, params),
|
||||
max_retries=self.MAX_RETRIES,
|
||||
backoff_base=self.BACKOFF_BASE,
|
||||
backoff_multiplier=self.BACKOFF_MULTIPLIER,
|
||||
max_backoff=self.MAX_BACKOFF,
|
||||
)
|
||||
except (TransientHttpError, httpx.HTTPError):
|
||||
return None
|
||||
|
||||
def _fetch(self, path: str, params: Optional[dict[str, Any]]) -> Any:
|
||||
"""One GET. Raises ``TransientHttpError`` on a 429/5xx (and lets
|
||||
``httpx.TransportError`` propagate) so ``call_with_retry`` retries it;
|
||||
returns None for a non-transient non-success (e.g. 404) or unparseable
|
||||
body."""
|
||||
response = httpx.get(
|
||||
f"{self.BASE_URL}{path}",
|
||||
params=params,
|
||||
timeout=self.REQUEST_TIMEOUT,
|
||||
)
|
||||
if self._is_transient(response.status_code):
|
||||
raise TransientHttpError(
|
||||
f"postcodes.io {response.status_code} on {path}",
|
||||
retry_after=self._retry_after(response),
|
||||
)
|
||||
if not response.is_success:
|
||||
return None
|
||||
try:
|
||||
body: Any = response.json()
|
||||
except ValueError:
|
||||
return None
|
||||
return body.get("result") if isinstance(body, dict) else None
|
||||
|
||||
@staticmethod
|
||||
def _is_transient(status_code: int) -> bool:
|
||||
return status_code == 429 or status_code >= 500
|
||||
|
||||
@staticmethod
|
||||
def _retry_after(response: httpx.Response) -> Optional[float]:
|
||||
header = response.headers.get("Retry-After")
|
||||
if header is None:
|
||||
return None
|
||||
try:
|
||||
return float(header)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
|
@ -12,7 +12,7 @@ from __future__ import annotations
|
|||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Optional, Protocol
|
||||
from typing import Callable, Optional, Protocol
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.search.epc_search_result import EpcSearchResult
|
||||
|
|
@ -22,6 +22,11 @@ from repositories.comparable_properties.comparable_properties_repository import
|
|||
ComparablePropertiesRepository,
|
||||
)
|
||||
|
||||
# The same default floor `select_comparables` uses: keep walking nearby postcodes
|
||||
# until this many candidates match, so the broadened cohort is big enough for the
|
||||
# downstream relax ladder rather than stopping at the first stray match.
|
||||
_DEFAULT_MINIMUM_COHORT = 5
|
||||
|
||||
|
||||
class CohortEpcClient(Protocol):
|
||||
"""The slice of the EPC-API client the cohort fetch needs (e.g.
|
||||
|
|
@ -40,6 +45,16 @@ class CohortGeospatial(Protocol):
|
|||
) -> dict[int, Coordinates]: ...
|
||||
|
||||
|
||||
class NearbyPostcodes(Protocol):
|
||||
"""Resolves the real unit postcodes physically near a seed postcode (e.g.
|
||||
`PostcodesIoClient`). The gov EPC API cannot search by radius, so this is how
|
||||
the cohort reaches beyond the target's own postcode (ADR-0034)."""
|
||||
|
||||
def nearby(
|
||||
self, postcode: str, coordinates: Optional[Coordinates] = None
|
||||
) -> list[str]: ...
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
|
@ -56,10 +71,14 @@ class SkippedCohortCert:
|
|||
|
||||
class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
|
||||
def __init__(
|
||||
self, epc_client: CohortEpcClient, geospatial: CohortGeospatial
|
||||
self,
|
||||
epc_client: CohortEpcClient,
|
||||
geospatial: CohortGeospatial,
|
||||
nearby_postcodes: Optional[NearbyPostcodes] = None,
|
||||
) -> None:
|
||||
self._epc_client = epc_client
|
||||
self._geospatial = geospatial
|
||||
self._nearby_postcodes = nearby_postcodes
|
||||
# Cohort certs skipped because they are not yet mappable. Accumulates
|
||||
# across every postcode the instance serves; the caller reads it after
|
||||
# the run to report the mapper gaps (see modelling_e2e handler).
|
||||
|
|
@ -80,6 +99,45 @@ class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
|
|||
cohort.append(comparable)
|
||||
return cohort
|
||||
|
||||
def candidates_near(
|
||||
self,
|
||||
postcode: str,
|
||||
coordinates: Optional[Coordinates] = None,
|
||||
*,
|
||||
enough: Optional[Callable[[ComparableProperty], bool]] = None,
|
||||
minimum: int = _DEFAULT_MINIMUM_COHORT,
|
||||
) -> list[ComparableProperty]:
|
||||
"""The broadened cohort: candidates drawn from the real unit postcodes
|
||||
nearest ``postcode`` (ADR-0034), for when the target's own postcode holds
|
||||
no same-type comparables. Postcodes are visited nearest first and each
|
||||
candidate is deduped by certificate number across them.
|
||||
|
||||
``enough`` lets the caller stop the walk early — once ``minimum``
|
||||
candidates satisfy it (e.g. they match the target's property type) the
|
||||
remaining, further-away postcodes are not fetched, so a dense area
|
||||
resolves in one or two searches instead of the whole radius. Without a
|
||||
configured ``NearbyPostcodes`` source this degrades to the seed postcode
|
||||
alone."""
|
||||
postcodes = (
|
||||
self._nearby_postcodes.nearby(postcode, coordinates)
|
||||
if self._nearby_postcodes is not None
|
||||
else [postcode]
|
||||
)
|
||||
candidates: list[ComparableProperty] = []
|
||||
seen_certs: set[str] = set()
|
||||
matches = 0
|
||||
for nearby_postcode in postcodes:
|
||||
for candidate in self.candidates_for(nearby_postcode):
|
||||
if candidate.certificate_number in seen_certs:
|
||||
continue
|
||||
seen_certs.add(candidate.certificate_number)
|
||||
candidates.append(candidate)
|
||||
if enough is not None and enough(candidate):
|
||||
matches += 1
|
||||
if enough is not None and matches >= minimum:
|
||||
break
|
||||
return candidates
|
||||
|
||||
# Mapper-shape errors: the cert's lodged data does not fit the schema/mapper.
|
||||
# `ValueError` — missing required field / unmapped code (`UnmappedApiCode`);
|
||||
# `AttributeError` — a field lodged in the wrong shape (e.g. `photovoltaic_supply`
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ from domain.epc_prediction.comparable_properties import ( # noqa: E402
|
|||
)
|
||||
from domain.epc_prediction.epc_prediction import EpcPrediction # noqa: E402
|
||||
from domain.epc_prediction.prediction_target import ( # noqa: E402
|
||||
PredictionTarget,
|
||||
build_prediction_target,
|
||||
)
|
||||
from domain.geospatial.coordinates import Coordinates # noqa: E402
|
||||
|
|
@ -96,6 +97,9 @@ from domain.modelling.scenario import Scenario # noqa: E402
|
|||
from harness.console import candidate_recommendations, run_modelling # noqa: E402
|
||||
from harness.plan_table import format_plan_table # noqa: E402
|
||||
from infrastructure.epc_client.epc_client_service import EpcClientService # noqa: E402
|
||||
from infrastructure.postcodes_io.postcodes_io_client import ( # noqa: E402
|
||||
PostcodesIoClient,
|
||||
)
|
||||
from infrastructure.solar.google_solar_api_client import ( # noqa: E402
|
||||
BuildingInsightsNotFoundError,
|
||||
GoogleSolarApiClient,
|
||||
|
|
@ -400,6 +404,7 @@ def _predict_epc(
|
|||
attributes_reader: OverrideBackedPredictionAttributesReader,
|
||||
coordinates: Optional[Coordinates],
|
||||
cohort_for: Callable[[str], list[ComparableProperty]],
|
||||
broaden: Callable[[PredictionTarget], list[ComparableProperty]],
|
||||
predictor: EpcPrediction,
|
||||
) -> Optional[EpcPropertyData]:
|
||||
"""Synthesise an EpcPropertyData for an EPC-less Property from its postcode
|
||||
|
|
@ -408,7 +413,8 @@ def _predict_epc(
|
|||
|
||||
The cohort is found by POSTCODE, so a wrong postcode on the property row
|
||||
yields the wrong neighbours — a prediction is only as good as the postcode it
|
||||
is given."""
|
||||
is given. When the own postcode holds no same-type comparables, the cohort is
|
||||
broadened to the real unit postcodes physically nearest it (``broaden``)."""
|
||||
attributes = attributes_reader.attributes_for(property_id)
|
||||
identity = PropertyIdentity(
|
||||
portfolio_id=portfolio_id, postcode=postcode, address="", uprn=uprn
|
||||
|
|
@ -418,7 +424,10 @@ def _predict_epc(
|
|||
return None # property_type unresolvable — gated out of prediction
|
||||
comparables = select_comparables(target, cohort_for(target.postcode))
|
||||
if not comparables.members:
|
||||
return None # no comparable neighbours in the postcode
|
||||
# Sparse own postcode — reach out to the nearest real postcodes.
|
||||
comparables = select_comparables(target, broaden(target))
|
||||
if not comparables.members:
|
||||
return None # no comparable neighbours nearby either
|
||||
predicted = predictor.predict(target, comparables)
|
||||
# The calculator needs a MAIN building part; a cohort whose template carries
|
||||
# none (e.g. a malformed flat record) yields an unscoreable picture, so reject
|
||||
|
|
@ -684,9 +693,12 @@ def main() -> None:
|
|||
# from the live EPC API (search-by-postcode + per-cert fetch), memoised per
|
||||
# postcode so co-located missing Properties don't refetch the same cohort.
|
||||
prediction_attributes = OverrideBackedPredictionAttributesReader(overrides_reader)
|
||||
comparables_repo = EpcComparablePropertiesRepository(epc_client, geospatial)
|
||||
comparables_repo = EpcComparablePropertiesRepository(
|
||||
epc_client, geospatial, nearby_postcodes=PostcodesIoClient()
|
||||
)
|
||||
predictor = EpcPrediction()
|
||||
_cohort_cache: dict[str, list[ComparableProperty]] = {}
|
||||
_nearby_cohort_cache: dict[tuple[str, str], list[ComparableProperty]] = {}
|
||||
|
||||
def cohort_for(postcode: str) -> list[ComparableProperty]:
|
||||
if postcode not in _cohort_cache:
|
||||
|
|
@ -694,6 +706,23 @@ def main() -> None:
|
|||
comparables_repo.candidates_for(postcode) if postcode else []
|
||||
)
|
||||
return _cohort_cache[postcode]
|
||||
|
||||
def broaden(target: PredictionTarget) -> list[ComparableProperty]:
|
||||
# Broadened cohort for a gated-out target: the nearest real postcodes,
|
||||
# walked until enough same-type comparables surface (ADR-0034). Memoised
|
||||
# per (postcode, property_type).
|
||||
key = (target.postcode, target.property_type)
|
||||
if key not in _nearby_cohort_cache:
|
||||
_nearby_cohort_cache[key] = (
|
||||
comparables_repo.candidates_near(
|
||||
target.postcode,
|
||||
target.coordinates,
|
||||
enough=lambda c: c.epc.property_type == target.property_type,
|
||||
)
|
||||
if target.postcode
|
||||
else []
|
||||
)
|
||||
return _nearby_cohort_cache[key]
|
||||
# One read-only session for the live `material` catalogue, reused across the
|
||||
# batch so both store and no-store runs price against the same DB rows.
|
||||
catalogue_session = Session(engine)
|
||||
|
|
@ -831,12 +860,14 @@ def main() -> None:
|
|||
attributes_reader=prediction_attributes,
|
||||
coordinates=coordinates,
|
||||
cohort_for=cohort_for,
|
||||
broaden=broaden,
|
||||
predictor=predictor,
|
||||
)
|
||||
if predicted_epc is None:
|
||||
raise ValueError(
|
||||
f"no EPC for UPRN {uprn} and not predictable "
|
||||
f"(unresolved property_type or empty '{postcode}' cohort)"
|
||||
f"(unresolved property_type, or no same-type "
|
||||
f"comparables in or near '{postcode}')"
|
||||
)
|
||||
# Property.effective_epc folds any Landlord Overrides onto the
|
||||
# synthesised EPC (cohort fills the unknown fields, the landlord's
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ is needed. One test per distinct behaviour path.
|
|||
from __future__ import annotations
|
||||
|
||||
from contextlib import ExitStack
|
||||
from typing import Any
|
||||
from typing import Any, Iterator
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
|
||||
import pytest
|
||||
|
|
@ -81,6 +81,17 @@ def _clear_cohort_cache() -> None:
|
|||
import applications.modelling_e2e.handler as h
|
||||
|
||||
h._cohort_cache.clear()
|
||||
h._nearby_cohort_cache.clear()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _baseline_orchestrator() -> Iterator[MagicMock]:
|
||||
"""Patch the Baseline orchestrator for every test — construction stays cheap
|
||||
and ``.run`` is a no-op mock the baseline tests assert against."""
|
||||
with patch(
|
||||
"applications.modelling_e2e.handler.PropertyBaselineOrchestrator"
|
||||
) as orchestrator:
|
||||
yield orchestrator
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -116,9 +127,12 @@ def test_trigger_body_rejects_missing_property_ids() -> None:
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_lodged_epc_path_saves_epc_plan_and_marks_modelled() -> None:
|
||||
def test_lodged_epc_path_saves_epc_plan_and_marks_modelled(
|
||||
_baseline_orchestrator: MagicMock,
|
||||
) -> None:
|
||||
"""When get_by_uprn returns an EPC the handler saves it, saves the plan,
|
||||
and marks the property as modelled — all inside one UoW per property."""
|
||||
marks the property as modelled — all inside one UoW per property — and
|
||||
re-establishes its Baseline Performance after the plan commits."""
|
||||
# Arrange
|
||||
mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE])
|
||||
mock_epc = MagicMock()
|
||||
|
|
@ -196,6 +210,8 @@ def test_lodged_epc_path_saves_epc_plan_and_marks_modelled() -> None:
|
|||
PROPERTY_ID, has_recommendations=False
|
||||
)
|
||||
mock_uow.commit.assert_called_once()
|
||||
# Baseline Performance is re-established for the lodged property.
|
||||
_baseline_orchestrator.return_value.run.assert_called_once_with([PROPERTY_ID])
|
||||
|
||||
|
||||
def test_skipped_cohort_certs_are_surfaced_in_the_outputs() -> None:
|
||||
|
|
@ -296,9 +312,12 @@ def test_skipped_cohort_certs_are_surfaced_in_the_outputs() -> None:
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_prediction_path_saves_plan_without_epc_save() -> None:
|
||||
def test_prediction_path_saves_plan_without_epc_save(
|
||||
_baseline_orchestrator: MagicMock,
|
||||
) -> None:
|
||||
"""When get_by_uprn returns None the handler synthesises an EPC via
|
||||
prediction and saves the plan — but never calls epc.save."""
|
||||
prediction and saves the plan — but never calls epc.save, and (having no
|
||||
lodged figures) never establishes a Baseline."""
|
||||
# Arrange
|
||||
mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE])
|
||||
mock_plan = _plan_mock()
|
||||
|
|
@ -398,10 +417,11 @@ def test_prediction_path_saves_plan_without_epc_save() -> None:
|
|||
# Act
|
||||
_call_handler(_BODY)
|
||||
|
||||
# Assert — epc.save NOT called (no lodged cert), plan IS saved
|
||||
# Assert — epc.save NOT called (no lodged cert), plan IS saved, no baseline
|
||||
mock_uow.epc.save.assert_not_called()
|
||||
mock_uow.plan.save.assert_called_once()
|
||||
mock_uow.commit.assert_called_once()
|
||||
_baseline_orchestrator.return_value.run.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -489,6 +509,124 @@ def test_empty_cohort_gates_property_out_and_raises() -> None:
|
|||
MockUoW.return_value.__enter__.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Broadened cohort — sparse own postcode falls back to nearby postcodes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_empty_own_postcode_broadens_to_nearby_and_predicts() -> None:
|
||||
"""When the property's own postcode holds no same-type comparables, the
|
||||
handler broadens to the nearby-postcode cohort (candidates_near) and, finding
|
||||
comparables there, synthesises the EPC and saves the plan."""
|
||||
# Arrange
|
||||
mock_engine = _engine_mock([PROPERTY_ID], [UPRN], [POSTCODE])
|
||||
mock_plan = _plan_mock()
|
||||
mock_uow = MagicMock()
|
||||
|
||||
mock_predicted_epc = MagicMock()
|
||||
from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier
|
||||
|
||||
mock_part = MagicMock()
|
||||
mock_part.identifier = BuildingPartIdentifier.MAIN
|
||||
mock_predicted_epc.sap_building_parts = [mock_part]
|
||||
|
||||
# First select_comparables (own postcode) is empty → broaden; the second
|
||||
# (nearby cohort) finds comparables.
|
||||
empty_comparables = MagicMock()
|
||||
empty_comparables.members = []
|
||||
found_comparables = MagicMock()
|
||||
found_comparables.members = [MagicMock()]
|
||||
|
||||
with ExitStack() as stack:
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.os.environ", _ENV)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler._get_engine",
|
||||
return_value=mock_engine,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.EpcClientService")
|
||||
).return_value.get_by_uprn.return_value = None # no lodged EPC
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.GeospatialS3Repository")
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.GoogleSolarApiClient")
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler._spatial_for", return_value=None)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler._solar_insights_for",
|
||||
return_value=None,
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.overlays_from", return_value=[])
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.PropertyOverridesPostgresReader")
|
||||
)
|
||||
from domain.epc_prediction.prediction_target import PredictionTargetAttributes
|
||||
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.OverrideBackedPredictionAttributesReader"
|
||||
)
|
||||
).return_value.attributes_for.return_value = PredictionTargetAttributes(
|
||||
property_type="2"
|
||||
)
|
||||
MockRepo = stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.EpcComparablePropertiesRepository"
|
||||
)
|
||||
)
|
||||
MockRepo.return_value.candidates_for.return_value = []
|
||||
MockRepo.return_value.candidates_near.return_value = [MagicMock()]
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.select_comparables",
|
||||
side_effect=[empty_comparables, found_comparables],
|
||||
)
|
||||
)
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.EpcPrediction")
|
||||
).return_value.predict.return_value = mock_predicted_epc
|
||||
stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.ScenarioPostgresRepository")
|
||||
).return_value.get_many.return_value = [MagicMock()]
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.catalogue_with_off_catalogue_overrides"
|
||||
)
|
||||
)
|
||||
stack.enter_context(patch("applications.modelling_e2e.handler.Session"))
|
||||
stack.enter_context(
|
||||
patch(
|
||||
"applications.modelling_e2e.handler.run_modelling",
|
||||
return_value=mock_plan,
|
||||
)
|
||||
)
|
||||
MockUoW = stack.enter_context(
|
||||
patch("applications.modelling_e2e.handler.PostgresUnitOfWork")
|
||||
)
|
||||
MockUoW.return_value.__enter__.return_value = mock_uow
|
||||
MockUoW.return_value.__exit__.return_value = False
|
||||
|
||||
# Act
|
||||
_call_handler(_BODY)
|
||||
|
||||
# Assert — broadening fired, and the broadened cohort produced a saved plan.
|
||||
MockRepo.return_value.candidates_near.assert_called_once()
|
||||
mock_uow.epc.save.assert_not_called() # predicted, never lodged
|
||||
mock_uow.plan.save.assert_called_once()
|
||||
mock_uow.commit.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Partial batch failure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ def test_429_retry_after_header_drives_sleep_duration(
|
|||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"infrastructure.epc_client._retry.time.sleep"
|
||||
"infrastructure.http_retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ def test_429_without_retry_after_uses_exponential_backoff(
|
|||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"infrastructure.epc_client._retry.time.sleep"
|
||||
"infrastructure.http_retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ def test_429_malformed_retry_after_falls_back_to_backoff(
|
|||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"infrastructure.epc_client._retry.time.sleep"
|
||||
"infrastructure.http_retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
|
|
@ -140,7 +140,7 @@ def test_429_retry_after_capped_by_max_backoff(epc_service, rdsap_21_0_1_cert):
|
|||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"infrastructure.epc_client._retry.time.sleep"
|
||||
"infrastructure.http_retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
|
|
|
|||
0
tests/infrastructure/postcodes_io/__init__.py
Normal file
0
tests/infrastructure/postcodes_io/__init__.py
Normal file
224
tests/infrastructure/postcodes_io/test_postcodes_io_client.py
Normal file
224
tests/infrastructure/postcodes_io/test_postcodes_io_client.py
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
"""PostcodesIoClient — coordinate/seed postcode → the real unit postcodes near
|
||||
it, via postcodes.io's keyless nearest endpoint. Failure degrades to the seed
|
||||
alone so broadening never breaks prediction."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterator, Optional
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from infrastructure.postcodes_io.postcodes_io_client import PostcodesIoClient
|
||||
|
||||
_MODULE = "infrastructure.postcodes_io.postcodes_io_client"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_sleep() -> Iterator[MagicMock]:
|
||||
"""Never actually sleep during backoff (the shared retry owns the sleep) —
|
||||
just record the calls."""
|
||||
with patch("infrastructure.http_retry.time.sleep") as sleep:
|
||||
yield sleep
|
||||
|
||||
|
||||
def _response(
|
||||
payload: Any,
|
||||
*,
|
||||
status_code: int = 200,
|
||||
headers: Optional[dict[str, str]] = None,
|
||||
) -> MagicMock:
|
||||
resp = MagicMock()
|
||||
resp.status_code = status_code
|
||||
resp.is_success = 200 <= status_code < 300
|
||||
resp.headers = headers if headers is not None else {}
|
||||
resp.json.return_value = payload
|
||||
return resp
|
||||
|
||||
|
||||
def _nearest_payload(postcodes: list[str]) -> dict[str, Any]:
|
||||
return {"result": [{"postcode": p} for p in postcodes]}
|
||||
|
||||
|
||||
def test_nearby_with_coordinates_skips_the_centroid_lookup() -> None:
|
||||
"""When the target's own coordinates are passed, only the radius search is
|
||||
issued — no postcode→centroid round-trip — and the seed leads the result."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient(radius_m=500, limit=10)
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response(
|
||||
_nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX"])
|
||||
)
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert — one call (the radius search), seed first, neighbours follow
|
||||
assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"]
|
||||
assert mock_get.call_count == 1
|
||||
_, kwargs = mock_get.call_args
|
||||
assert kwargs["params"]["lat"] == 51.3
|
||||
assert kwargs["params"]["lon"] == 0.1
|
||||
|
||||
|
||||
def test_nearby_resolves_the_seed_centroid_when_no_coordinates_given() -> None:
|
||||
"""Without coordinates the client first resolves the seed's own centroid via
|
||||
postcodes.io, then runs the radius search from it."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
centroid = {"result": {"latitude": 51.3, "longitude": 0.1}}
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
_response(centroid),
|
||||
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS")
|
||||
|
||||
# Assert — two calls: centroid then radius
|
||||
assert result == ["BR6 6BS", "BR6 6BU"]
|
||||
assert mock_get.call_count == 2
|
||||
|
||||
|
||||
def test_nearby_dedupes_the_seed_and_caps_at_limit() -> None:
|
||||
"""The seed always leads exactly once even when the radius search echoes it,
|
||||
and the result is capped at ``limit``."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient(limit=3)
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response(
|
||||
_nearest_payload(["BR6 6BS", "BR6 6BU", "BR6 6NX", "BR6 6AA"])
|
||||
)
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS", "BR6 6BU", "BR6 6NX"]
|
||||
assert result.count("BR6 6BS") == 1
|
||||
|
||||
|
||||
def test_nearby_returns_just_the_seed_after_exhausting_retries(
|
||||
_no_sleep: MagicMock,
|
||||
) -> None:
|
||||
"""A persistent network error is retried, then degrades to broadening-off:
|
||||
only the seed comes back, and the retries were actually attempted."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(
|
||||
f"{_MODULE}.httpx.get", side_effect=httpx.ConnectError("down")
|
||||
) as mock_get:
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert — one initial try + MAX_RETRIES, sleeping between each.
|
||||
assert result == ["BR6 6BS"]
|
||||
assert mock_get.call_count == client.MAX_RETRIES + 1
|
||||
assert _no_sleep.call_count == client.MAX_RETRIES
|
||||
|
||||
|
||||
def test_nearby_retries_a_transport_error_then_succeeds(_no_sleep: MagicMock) -> None:
|
||||
"""A transient transport error is retried, and the subsequent success is
|
||||
returned in full."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
httpx.ReadTimeout("slow"),
|
||||
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS", "BR6 6BU"]
|
||||
assert mock_get.call_count == 2
|
||||
assert _no_sleep.call_count == 1
|
||||
|
||||
|
||||
def test_nearby_retries_a_429_honouring_retry_after(_no_sleep: MagicMock) -> None:
|
||||
"""A 429 is retried, and the server's Retry-After drives the backoff delay."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
_response(None, status_code=429, headers={"Retry-After": "2"}),
|
||||
_response(_nearest_payload(["BR6 6BS", "BR6 6BU"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert — succeeded on the retry, having slept the advertised 2 seconds.
|
||||
assert result == ["BR6 6BS", "BR6 6BU"]
|
||||
assert mock_get.call_count == 2
|
||||
_no_sleep.assert_called_once_with(2.0)
|
||||
|
||||
|
||||
def test_nearby_retries_a_server_error_then_succeeds(_no_sleep: MagicMock) -> None:
|
||||
"""A 5xx is treated as transient and retried."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.side_effect = [
|
||||
_response(None, status_code=503),
|
||||
_response(_nearest_payload(["BR6 6BS"])),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS"]
|
||||
assert mock_get.call_count == 2
|
||||
|
||||
|
||||
def test_nearby_returns_just_the_seed_when_centroid_unresolvable() -> None:
|
||||
"""An unknown seed (no coordinates, centroid lookup fails) yields the seed
|
||||
alone rather than raising."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response(None, status_code=404)
|
||||
|
||||
# Act
|
||||
result: list[str] = client.nearby("ZZ99 9ZZ")
|
||||
|
||||
# Assert — a 404 is non-transient, so no retry was attempted.
|
||||
assert result == ["ZZ99 9ZZ"]
|
||||
assert mock_get.call_count == 1
|
||||
|
||||
|
||||
def test_nearby_tolerates_a_null_nearest_result() -> None:
|
||||
"""postcodes.io returns ``result: null`` when a point has no neighbours; the
|
||||
client treats that as an empty neighbour set (seed only)."""
|
||||
# Arrange
|
||||
client = PostcodesIoClient()
|
||||
coords: Optional[Coordinates] = Coordinates(longitude=0.1, latitude=51.3)
|
||||
|
||||
with patch(f"{_MODULE}.httpx.get") as mock_get:
|
||||
mock_get.return_value = _response({"result": None})
|
||||
|
||||
# Act
|
||||
result = client.nearby("BR6 6BS", coords)
|
||||
|
||||
# Assert
|
||||
assert result == ["BR6 6BS"]
|
||||
126
tests/infrastructure/test_http_retry.py
Normal file
126
tests/infrastructure/test_http_retry.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""call_with_retry — the shared transient-failure retry both HTTP source clients
|
||||
use. Generic mechanism (exponential backoff, Retry-After), per-client policy
|
||||
(what gets raised as transient)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterator
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from infrastructure.http_retry import TransientHttpError, call_with_retry
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_sleep() -> Iterator[MagicMock]:
|
||||
with patch("infrastructure.http_retry.time.sleep") as sleep:
|
||||
yield sleep
|
||||
|
||||
|
||||
def test_returns_immediately_on_success(_no_sleep: MagicMock) -> None:
|
||||
# Act
|
||||
result = call_with_retry(lambda: 42)
|
||||
|
||||
# Assert
|
||||
assert result == 42
|
||||
_no_sleep.assert_not_called()
|
||||
|
||||
|
||||
def test_retries_a_transient_error_then_succeeds(_no_sleep: MagicMock) -> None:
|
||||
# Arrange — fail twice transiently, then succeed.
|
||||
calls = {"n": 0}
|
||||
|
||||
def fn() -> str:
|
||||
calls["n"] += 1
|
||||
if calls["n"] < 3:
|
||||
raise TransientHttpError("429")
|
||||
return "ok"
|
||||
|
||||
# Act
|
||||
result = call_with_retry(fn)
|
||||
|
||||
# Assert — exponential backoff between the three attempts.
|
||||
assert result == "ok"
|
||||
assert _no_sleep.call_args_list == [call(1.0), call(2.0)]
|
||||
|
||||
|
||||
def test_honours_retry_after_over_the_computed_backoff(_no_sleep: MagicMock) -> None:
|
||||
# Arrange
|
||||
raised = {"done": False}
|
||||
|
||||
def fn() -> str:
|
||||
if not raised["done"]:
|
||||
raised["done"] = True
|
||||
raise TransientHttpError("429", retry_after=7.0)
|
||||
return "ok"
|
||||
|
||||
# Act
|
||||
call_with_retry(fn)
|
||||
|
||||
# Assert
|
||||
_no_sleep.assert_called_once_with(7.0)
|
||||
|
||||
|
||||
def test_retries_transport_errors(_no_sleep: MagicMock) -> None:
|
||||
# Arrange
|
||||
attempts = {"n": 0}
|
||||
|
||||
def fn() -> str:
|
||||
attempts["n"] += 1
|
||||
if attempts["n"] == 1:
|
||||
raise httpx.ReadTimeout("slow")
|
||||
return "ok"
|
||||
|
||||
# Act
|
||||
result = call_with_retry(fn)
|
||||
|
||||
# Assert
|
||||
assert result == "ok"
|
||||
assert attempts["n"] == 2
|
||||
|
||||
|
||||
def test_reraises_the_last_transient_error_once_exhausted(
|
||||
_no_sleep: MagicMock,
|
||||
) -> None:
|
||||
# Arrange — always transient.
|
||||
def fn() -> str:
|
||||
raise TransientHttpError("persistent 429")
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(TransientHttpError, match="persistent 429"):
|
||||
call_with_retry(fn, max_retries=2)
|
||||
assert _no_sleep.call_count == 2
|
||||
|
||||
|
||||
def test_non_transient_error_propagates_without_retry(_no_sleep: MagicMock) -> None:
|
||||
# Arrange
|
||||
attempts = {"n": 0}
|
||||
|
||||
def fn() -> str:
|
||||
attempts["n"] += 1
|
||||
raise ValueError("not transient")
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="not transient"):
|
||||
call_with_retry(fn)
|
||||
assert attempts["n"] == 1
|
||||
_no_sleep.assert_not_called()
|
||||
|
||||
|
||||
def test_retry_after_is_capped_by_max_backoff(_no_sleep: MagicMock) -> None:
|
||||
# Arrange
|
||||
raised = {"done": False}
|
||||
|
||||
def fn() -> str:
|
||||
if not raised["done"]:
|
||||
raised["done"] = True
|
||||
raise TransientHttpError("429", retry_after=999.0)
|
||||
return "ok"
|
||||
|
||||
# Act
|
||||
call_with_retry(fn, max_backoff=60.0)
|
||||
|
||||
# Assert
|
||||
_no_sleep.assert_called_once_with(60.0)
|
||||
|
|
@ -130,6 +130,126 @@ def test_no_certs_in_the_postcode_yields_no_candidates() -> None:
|
|||
assert client.searched_postcode == "LS6 1AA"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Broadened cohort — candidates_near (ADR-0034 nearby-postcode broadening)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _MultiPostcodeEpcClient:
|
||||
"""Serves a different cohort per postcode and records every search, so the
|
||||
broadened walk's reach and ordering can be asserted."""
|
||||
|
||||
def __init__(self, by_postcode: dict[str, list[EpcSearchResult]]) -> None:
|
||||
self._by_postcode = by_postcode
|
||||
self.searched: list[str] = []
|
||||
|
||||
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
|
||||
self.searched.append(postcode)
|
||||
return self._by_postcode.get(postcode, [])
|
||||
|
||||
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData:
|
||||
return _epc()
|
||||
|
||||
|
||||
class _FakeNearbyPostcodes:
|
||||
"""Returns a fixed nearest-first list and records the seed it was asked for."""
|
||||
|
||||
def __init__(self, postcodes: list[str]) -> None:
|
||||
self._postcodes = postcodes
|
||||
self.calls: list[tuple[str, Optional[Coordinates]]] = []
|
||||
|
||||
def nearby(
|
||||
self, postcode: str, coordinates: Optional[Coordinates] = None
|
||||
) -> list[str]:
|
||||
self.calls.append((postcode, coordinates))
|
||||
return self._postcodes
|
||||
|
||||
|
||||
def test_candidates_near_aggregates_and_dedupes_across_nearby_postcodes() -> None:
|
||||
# Arrange — three nearby postcodes; CERT-1 is re-lodged in two of them.
|
||||
client = _MultiPostcodeEpcClient(
|
||||
{
|
||||
"P0": [_result("CERT-1", uprn=1)],
|
||||
"P1": [_result("CERT-2", uprn=2), _result("CERT-1", uprn=1)],
|
||||
"P2": [_result("CERT-3", uprn=3)],
|
||||
}
|
||||
)
|
||||
nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"])
|
||||
repo = EpcComparablePropertiesRepository(
|
||||
client, _FakeGeospatial({}), nearby_postcodes=nearby
|
||||
)
|
||||
|
||||
# Act — no early-stop predicate, so the whole nearby set is visited.
|
||||
candidates = repo.candidates_near("P0", None)
|
||||
|
||||
# Assert — one candidate per distinct cert, all three postcodes searched.
|
||||
certs = {c.certificate_number for c in candidates}
|
||||
assert certs == {"CERT-1", "CERT-2", "CERT-3"}
|
||||
assert client.searched == ["P0", "P1", "P2"]
|
||||
|
||||
|
||||
def test_candidates_near_stops_early_once_enough_match() -> None:
|
||||
# Arrange — the seed postcode alone already yields enough matches; the two
|
||||
# further postcodes must not be fetched.
|
||||
client = _MultiPostcodeEpcClient(
|
||||
{
|
||||
"P0": [_result(f"MATCH-{i}", uprn=i) for i in range(5)],
|
||||
"P1": [_result("OTHER-1", uprn=99)],
|
||||
"P2": [_result("OTHER-2", uprn=98)],
|
||||
}
|
||||
)
|
||||
nearby = _FakeNearbyPostcodes(["P0", "P1", "P2"])
|
||||
repo = EpcComparablePropertiesRepository(
|
||||
client, _FakeGeospatial({}), nearby_postcodes=nearby
|
||||
)
|
||||
|
||||
# Act
|
||||
candidates = repo.candidates_near(
|
||||
"P0",
|
||||
None,
|
||||
enough=lambda c: c.certificate_number.startswith("MATCH"),
|
||||
minimum=5,
|
||||
)
|
||||
|
||||
# Assert — walk halted after the seed; the further postcodes were never hit.
|
||||
assert client.searched == ["P0"]
|
||||
assert len(candidates) == 5
|
||||
|
||||
|
||||
def test_candidates_near_passes_coordinates_to_the_nearby_source() -> None:
|
||||
# Arrange
|
||||
here = Coordinates(longitude=0.1, latitude=51.3)
|
||||
client = _MultiPostcodeEpcClient({"P0": []})
|
||||
nearby = _FakeNearbyPostcodes(["P0"])
|
||||
repo = EpcComparablePropertiesRepository(
|
||||
client, _FakeGeospatial({}), nearby_postcodes=nearby
|
||||
)
|
||||
|
||||
# Act
|
||||
repo.candidates_near("P0", here)
|
||||
|
||||
# Assert — the target's own coordinates seed the radius search.
|
||||
assert nearby.calls == [("P0", here)]
|
||||
|
||||
|
||||
def test_candidates_near_without_a_source_uses_only_the_seed() -> None:
|
||||
# Arrange — no NearbyPostcodes configured (broadening unavailable).
|
||||
client = _MultiPostcodeEpcClient({"P0": [_result("CERT-1", uprn=1)]})
|
||||
repo = EpcComparablePropertiesRepository(client, _FakeGeospatial({}))
|
||||
|
||||
# Act
|
||||
candidates = repo.candidates_near("P0", None)
|
||||
|
||||
# Assert — degrades to the seed postcode alone.
|
||||
assert client.searched == ["P0"]
|
||||
assert [c.certificate_number for c in candidates] == ["CERT-1"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unmappable cohort certs are skipped + recorded, not allowed to sink the cohort
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeEpcClientRaising:
|
||||
"""Serves the cohort, but raises the given exception for specific certs."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue