feat(epc-prediction): slice-5b ComparableProperties repo port + adapter

Build the cohort IO port ADR-0029 deferred (ADR-0031 slice-5b):
`ComparablePropertiesRepository.candidates_for(postcode) -> list[Comparable]`,
with an EPC-API + geospatial adapter that lists the postcode's lodged certs
(search_by_postcode), fetches + maps each (get_by_certificate_number), and
resolves their UPRNs to coordinates in ONE batched read. Register metadata the
cert doesn't carry (address, registration date) is threaded off the search row;
a UPRN-less or unparseable-date cert is kept, just uncoordinated / unweighted.
The domain select_comparables then filters these candidates into the cohort.

Thin CohortEpcClient / CohortGeospatial Protocols keep the adapter testable
against fakes; EpcClientService + GeospatialS3Repository satisfy them
structurally (no changes). 3 tests; pyright strict clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-16 03:40:59 +00:00
parent 086187ddc7
commit 6979607ace
5 changed files with 234 additions and 0 deletions

View file

@ -0,0 +1,24 @@
"""The ComparableProperties repository port (ADR-0029 decision 3; ADR-0031).
Owns the cohort IO for EPC Prediction given a target's postcode, return the
candidate `Comparable`s (the postcode's other lodged certs, mapped to
`EpcPropertyData` with their register metadata + resolved coordinates). The pure
domain `select_comparables` then filters these into the reference cohort, and
`EpcPrediction.predict` synthesises the picture. Kept a port so the orchestrator
depends on the cohort source abstractly and tests substitute a fake.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from domain.epc_prediction.comparable_properties import Comparable
class ComparablePropertiesRepository(ABC):
@abstractmethod
def candidates_for(self, postcode: str) -> list[Comparable]:
"""Every candidate neighbour in `postcode` — one `Comparable` per lodged
cert, carrying its `EpcPropertyData`, certificate number, address,
registration date, and resolved coordinates (None when unresolvable)."""
...

View file

@ -0,0 +1,82 @@
"""EPC-API + geospatial adapter for the ComparableProperties port (ADR-0031).
Assembles a postcode's candidate cohort: the EPC search lists the postcode's
lodged certs, each is fetched + mapped to `EpcPropertyData`, and the certs' UPRNs
are resolved to coordinates in one batched geospatial read (closely-numbered
UPRNs share a partition). Register metadata the cert itself doesn't carry
(address, registration date) is threaded off the search row.
"""
from __future__ import annotations
from datetime import date
from typing import Optional, Protocol
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.search.epc_search_result import EpcSearchResult
from domain.epc_prediction.comparable_properties import Comparable
from domain.geospatial.coordinates import Coordinates
from repositories.comparable_properties.comparable_properties_repository import (
ComparablePropertiesRepository,
)
class CohortEpcClient(Protocol):
"""The slice of the EPC-API client the cohort fetch needs (e.g.
`EpcClientService`)."""
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: ...
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: ...
class CohortGeospatial(Protocol):
"""The geospatial slice the cohort fetch needs — batch UPRN→coordinate."""
def coordinates_for_uprns(
self, uprns: list[int]
) -> dict[int, Coordinates]: ...
class EpcComparablePropertiesRepository(ComparablePropertiesRepository):
def __init__(
self, epc_client: CohortEpcClient, geospatial: CohortGeospatial
) -> None:
self._epc_client = epc_client
self._geospatial = geospatial
def candidates_for(self, postcode: str) -> list[Comparable]:
results: list[EpcSearchResult] = self._epc_client.search_by_postcode(
postcode
)
uprns: list[int] = [r.uprn for r in results if r.uprn is not None]
coordinates: dict[int, Coordinates] = self._geospatial.coordinates_for_uprns(
uprns
)
return [self._comparable(result, coordinates) for result in results]
def _comparable(
self, result: EpcSearchResult, coordinates: dict[int, Coordinates]
) -> Comparable:
epc: EpcPropertyData = self._epc_client.get_by_certificate_number(
result.certificate_number
)
resolved: Optional[Coordinates] = (
coordinates.get(result.uprn) if result.uprn is not None else None
)
return Comparable(
epc=epc,
certificate_number=result.certificate_number,
address=result.address_line_1,
registration_date=_parse_date(result.registration_date),
coordinates=resolved,
)
def _parse_date(value: str) -> Optional[date]:
"""The register's ISO registration date, or None when unparseable (the
predictor falls back to an unweighted recency)."""
try:
return date.fromisoformat(value[:10])
except ValueError:
return None

View file

@ -0,0 +1,128 @@
"""EpcComparablePropertiesRepository — assembles the candidate cohort for EPC
Prediction from the postcode search, per-cert fetch, and batch coordinate
resolution (ADR-0029 cohort port, wired in ADR-0031 slice-5b)."""
from __future__ import annotations
import json
from datetime import date
from pathlib import Path
from typing import Any, Optional
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from datatypes.epc.search.epc_search_result import EpcSearchResult
from domain.geospatial.coordinates import Coordinates
from repositories.comparable_properties.epc_comparable_properties_repository import (
EpcComparablePropertiesRepository,
)
_JSON_SAMPLES = Path(__file__).resolve().parents[3] / "backend/epc_api/json_samples"
def _epc() -> EpcPropertyData:
raw: dict[str, Any] = json.loads(
(_JSON_SAMPLES / "RdSAP-Schema-21.0.0" / "epc.json").read_text()
)
return EpcPropertyDataMapper.from_api_response(raw)
def _result(
cert: str, uprn: Optional[int], registration_date: str = "2023-12-01"
) -> EpcSearchResult:
return EpcSearchResult(
certificate_number=cert,
address_line_1="1 Some Street",
address_line_2=None,
address_line_3=None,
address_line_4=None,
postcode="LS6 1AA",
post_town="LEEDS",
uprn=uprn,
current_energy_efficiency_band="D",
registration_date=registration_date,
)
class _FakeEpcClient:
"""Records the postcode searched and serves a fixed cohort + per-cert EPC."""
def __init__(self, results: list[EpcSearchResult]) -> None:
self._results = results
self.searched_postcode: Optional[str] = None
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
self.searched_postcode = postcode
return self._results
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData:
return _epc()
class _FakeGeospatial:
"""Serves coordinates for a fixed UPRN set; records the batch it was asked for."""
def __init__(self, coords: dict[int, Coordinates]) -> None:
self._coords = coords
self.batches: list[list[int]] = []
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
return self._coords.get(uprn)
def coordinates_for_uprns(self, uprns: list[int]) -> dict[int, Coordinates]:
self.batches.append(uprns)
return {u: c for u, c in self._coords.items() if u in uprns}
def test_candidates_for_assembles_a_comparable_per_cert() -> None:
# Arrange — one cert in the postcode, with a resolvable UPRN coordinate.
here = Coordinates(longitude=-1.55, latitude=53.81)
client = _FakeEpcClient([_result("CERT-1", uprn=12345)])
geospatial = _FakeGeospatial({12345: here})
repo = EpcComparablePropertiesRepository(client, geospatial)
# Act
candidates = repo.candidates_for("LS6 1AA")
# Assert — the register metadata is threaded onto the Comparable.
assert len(candidates) == 1
comparable = candidates[0]
assert comparable.certificate_number == "CERT-1"
assert comparable.address == "1 Some Street"
assert comparable.registration_date == date(2023, 12, 1)
assert comparable.coordinates is here
assert comparable.epc is not None
def test_a_cert_without_a_uprn_has_no_coordinates_and_coords_are_one_batch() -> None:
# Arrange — two certs share the postcode; one has no UPRN to resolve.
here = Coordinates(longitude=-1.55, latitude=53.81)
client = _FakeEpcClient(
[_result("CERT-1", uprn=12345), _result("CERT-2", uprn=None)]
)
geospatial = _FakeGeospatial({12345: here})
repo = EpcComparablePropertiesRepository(client, geospatial)
# Act
candidates = repo.candidates_for("LS6 1AA")
# Assert — the UPRN-less cert is kept but uncoordinated; coordinates were
# resolved in a single batched read of just the present UPRNs.
by_cert = {c.certificate_number: c for c in candidates}
assert by_cert["CERT-1"].coordinates is here
assert by_cert["CERT-2"].coordinates is None
assert geospatial.batches == [[12345]]
def test_no_certs_in_the_postcode_yields_no_candidates() -> None:
# Arrange — an empty postcode cohort (the search returns nothing).
client = _FakeEpcClient([])
geospatial = _FakeGeospatial({})
repo = EpcComparablePropertiesRepository(client, geospatial)
# Act
candidates = repo.candidates_for("LS6 1AA")
# Assert — no candidates, and the postcode was searched (normalisation/IO ran).
assert candidates == []
assert client.searched_postcode == "LS6 1AA"