diff --git a/repositories/comparable_properties/__init__.py b/repositories/comparable_properties/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/repositories/comparable_properties/comparable_properties_repository.py b/repositories/comparable_properties/comparable_properties_repository.py new file mode 100644 index 00000000..f71654f7 --- /dev/null +++ b/repositories/comparable_properties/comparable_properties_repository.py @@ -0,0 +1,24 @@ +"""The ComparableProperties repository port (ADR-0029 decision 3; ADR-0031). + +Owns the cohort IO for EPC Prediction — given a target's postcode, return the +candidate `Comparable`s (the postcode's other lodged certs, mapped to +`EpcPropertyData` with their register metadata + resolved coordinates). The pure +domain `select_comparables` then filters these into the reference cohort, and +`EpcPrediction.predict` synthesises the picture. Kept a port so the orchestrator +depends on the cohort source abstractly and tests substitute a fake. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + +from domain.epc_prediction.comparable_properties import Comparable + + +class ComparablePropertiesRepository(ABC): + @abstractmethod + def candidates_for(self, postcode: str) -> list[Comparable]: + """Every candidate neighbour in `postcode` — one `Comparable` per lodged + cert, carrying its `EpcPropertyData`, certificate number, address, + registration date, and resolved coordinates (None when unresolvable).""" + ... diff --git a/repositories/comparable_properties/epc_comparable_properties_repository.py b/repositories/comparable_properties/epc_comparable_properties_repository.py new file mode 100644 index 00000000..e806a834 --- /dev/null +++ b/repositories/comparable_properties/epc_comparable_properties_repository.py @@ -0,0 +1,82 @@ +"""EPC-API + geospatial adapter for the ComparableProperties port (ADR-0031). + +Assembles a postcode's candidate cohort: the EPC search lists the postcode's +lodged certs, each is fetched + mapped to `EpcPropertyData`, and the certs' UPRNs +are resolved to coordinates in one batched geospatial read (closely-numbered +UPRNs share a partition). Register metadata the cert itself doesn't carry +(address, registration date) is threaded off the search row. +""" + +from __future__ import annotations + +from datetime import date +from typing import Optional, Protocol + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.search.epc_search_result import EpcSearchResult +from domain.epc_prediction.comparable_properties import Comparable +from domain.geospatial.coordinates import Coordinates +from repositories.comparable_properties.comparable_properties_repository import ( + ComparablePropertiesRepository, +) + + +class CohortEpcClient(Protocol): + """The slice of the EPC-API client the cohort fetch needs (e.g. + `EpcClientService`).""" + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: ... + + def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: ... + + +class CohortGeospatial(Protocol): + """The geospatial slice the cohort fetch needs — batch UPRN→coordinate.""" + + def coordinates_for_uprns( + self, uprns: list[int] + ) -> dict[int, Coordinates]: ... + + +class EpcComparablePropertiesRepository(ComparablePropertiesRepository): + def __init__( + self, epc_client: CohortEpcClient, geospatial: CohortGeospatial + ) -> None: + self._epc_client = epc_client + self._geospatial = geospatial + + def candidates_for(self, postcode: str) -> list[Comparable]: + results: list[EpcSearchResult] = self._epc_client.search_by_postcode( + postcode + ) + uprns: list[int] = [r.uprn for r in results if r.uprn is not None] + coordinates: dict[int, Coordinates] = self._geospatial.coordinates_for_uprns( + uprns + ) + return [self._comparable(result, coordinates) for result in results] + + def _comparable( + self, result: EpcSearchResult, coordinates: dict[int, Coordinates] + ) -> Comparable: + epc: EpcPropertyData = self._epc_client.get_by_certificate_number( + result.certificate_number + ) + resolved: Optional[Coordinates] = ( + coordinates.get(result.uprn) if result.uprn is not None else None + ) + return Comparable( + epc=epc, + certificate_number=result.certificate_number, + address=result.address_line_1, + registration_date=_parse_date(result.registration_date), + coordinates=resolved, + ) + + +def _parse_date(value: str) -> Optional[date]: + """The register's ISO registration date, or None when unparseable (the + predictor falls back to an unweighted recency).""" + try: + return date.fromisoformat(value[:10]) + except ValueError: + return None diff --git a/tests/repositories/comparable_properties/__init__.py b/tests/repositories/comparable_properties/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py b/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py new file mode 100644 index 00000000..6b473179 --- /dev/null +++ b/tests/repositories/comparable_properties/test_epc_comparable_properties_repository.py @@ -0,0 +1,128 @@ +"""EpcComparablePropertiesRepository — assembles the candidate cohort for EPC +Prediction from the postcode search, per-cert fetch, and batch coordinate +resolution (ADR-0029 cohort port, wired in ADR-0031 slice-5b).""" + +from __future__ import annotations + +import json +from datetime import date +from pathlib import Path +from typing import Any, Optional + +from datatypes.epc.domain.epc_property_data import EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.search.epc_search_result import EpcSearchResult +from domain.geospatial.coordinates import Coordinates +from repositories.comparable_properties.epc_comparable_properties_repository import ( + EpcComparablePropertiesRepository, +) + +_JSON_SAMPLES = Path(__file__).resolve().parents[3] / "backend/epc_api/json_samples" + + +def _epc() -> EpcPropertyData: + raw: dict[str, Any] = json.loads( + (_JSON_SAMPLES / "RdSAP-Schema-21.0.0" / "epc.json").read_text() + ) + return EpcPropertyDataMapper.from_api_response(raw) + + +def _result( + cert: str, uprn: Optional[int], registration_date: str = "2023-12-01" +) -> EpcSearchResult: + return EpcSearchResult( + certificate_number=cert, + address_line_1="1 Some Street", + address_line_2=None, + address_line_3=None, + address_line_4=None, + postcode="LS6 1AA", + post_town="LEEDS", + uprn=uprn, + current_energy_efficiency_band="D", + registration_date=registration_date, + ) + + +class _FakeEpcClient: + """Records the postcode searched and serves a fixed cohort + per-cert EPC.""" + + def __init__(self, results: list[EpcSearchResult]) -> None: + self._results = results + self.searched_postcode: Optional[str] = None + + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: + self.searched_postcode = postcode + return self._results + + def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData: + return _epc() + + +class _FakeGeospatial: + """Serves coordinates for a fixed UPRN set; records the batch it was asked for.""" + + def __init__(self, coords: dict[int, Coordinates]) -> None: + self._coords = coords + self.batches: list[list[int]] = [] + + def coordinates_for(self, uprn: int) -> Optional[Coordinates]: + return self._coords.get(uprn) + + def coordinates_for_uprns(self, uprns: list[int]) -> dict[int, Coordinates]: + self.batches.append(uprns) + return {u: c for u, c in self._coords.items() if u in uprns} + + +def test_candidates_for_assembles_a_comparable_per_cert() -> None: + # Arrange — one cert in the postcode, with a resolvable UPRN coordinate. + here = Coordinates(longitude=-1.55, latitude=53.81) + client = _FakeEpcClient([_result("CERT-1", uprn=12345)]) + geospatial = _FakeGeospatial({12345: here}) + repo = EpcComparablePropertiesRepository(client, geospatial) + + # Act + candidates = repo.candidates_for("LS6 1AA") + + # Assert — the register metadata is threaded onto the Comparable. + assert len(candidates) == 1 + comparable = candidates[0] + assert comparable.certificate_number == "CERT-1" + assert comparable.address == "1 Some Street" + assert comparable.registration_date == date(2023, 12, 1) + assert comparable.coordinates is here + assert comparable.epc is not None + + +def test_a_cert_without_a_uprn_has_no_coordinates_and_coords_are_one_batch() -> None: + # Arrange — two certs share the postcode; one has no UPRN to resolve. + here = Coordinates(longitude=-1.55, latitude=53.81) + client = _FakeEpcClient( + [_result("CERT-1", uprn=12345), _result("CERT-2", uprn=None)] + ) + geospatial = _FakeGeospatial({12345: here}) + repo = EpcComparablePropertiesRepository(client, geospatial) + + # Act + candidates = repo.candidates_for("LS6 1AA") + + # Assert — the UPRN-less cert is kept but uncoordinated; coordinates were + # resolved in a single batched read of just the present UPRNs. + by_cert = {c.certificate_number: c for c in candidates} + assert by_cert["CERT-1"].coordinates is here + assert by_cert["CERT-2"].coordinates is None + assert geospatial.batches == [[12345]] + + +def test_no_certs_in_the_postcode_yields_no_candidates() -> None: + # Arrange — an empty postcode cohort (the search returns nothing). + client = _FakeEpcClient([]) + geospatial = _FakeGeospatial({}) + repo = EpcComparablePropertiesRepository(client, geospatial) + + # Act + candidates = repo.candidates_for("LS6 1AA") + + # Assert — no candidates, and the postcode was searched (normalisation/IO ran). + assert candidates == [] + assert client.searched_postcode == "LS6 1AA"