diff --git a/domain/geospatial/__init__.py b/domain/geospatial/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/domain/geospatial/coordinates.py b/domain/geospatial/coordinates.py new file mode 100644 index 00000000..a190c23d --- /dev/null +++ b/domain/geospatial/coordinates.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Coordinates: + """A WGS84 point for a Property — longitude/latitude in decimal degrees. + + Resolved from the Ordnance Survey Open-UPRN reference data and fed to the + Google Solar fetcher by the Ingestion orchestrator. + """ + + longitude: float + latitude: float diff --git a/repositories/geospatial/__init__.py b/repositories/geospatial/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/repositories/geospatial/geospatial_repository.py b/repositories/geospatial/geospatial_repository.py new file mode 100644 index 00000000..558216bb --- /dev/null +++ b/repositories/geospatial/geospatial_repository.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Optional + +from domain.geospatial.coordinates import Coordinates + + +class GeospatialRepository(ABC): + """Resolves a Property's coordinates from hosted reference data by UPRN. + + A Repo, not a Fetcher (ADR-0011): it reads stored Ordnance Survey Open-UPRN + data, with no live API call. Returns None when the UPRN is not covered. + """ + + @abstractmethod + def coordinates_for(self, uprn: int) -> Optional[Coordinates]: ... diff --git a/repositories/geospatial/geospatial_s3_repository.py b/repositories/geospatial/geospatial_s3_repository.py new file mode 100644 index 00000000..c91a57e1 --- /dev/null +++ b/repositories/geospatial/geospatial_s3_repository.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from collections.abc import Callable +from typing import Optional + +import pandas as pd + +from domain.geospatial.coordinates import Coordinates +from repositories.geospatial.geospatial_repository import GeospatialRepository + +ParquetReader = Callable[[str], pd.DataFrame] + +_META_KEY = "spatial/filename_meta.parquet" + + +class GeospatialS3Repository(GeospatialRepository): + """Reads the partitioned Ordnance Survey Open-UPRN parquet dataset. + + `spatial/filename_meta.parquet` maps a UPRN range (lower/upper) to a + partition file; that partition carries `UPRN`/`LATITUDE`/`LONGITUDE`. The + parquet reader is injected so the dataset can be sourced from S3 in + production or a fixture directory in tests — the Repo holds no S3/HTTP code. + """ + + def __init__(self, read_parquet: ParquetReader) -> None: + self._read_parquet = read_parquet + + def coordinates_for(self, uprn: int) -> Optional[Coordinates]: + meta = self._read_parquet(_META_KEY) + covering = meta[(meta["lower"] <= uprn) & (meta["upper"] >= uprn)] + if covering.empty: + return None + filename = str(covering["filenames"].iloc[0]) + + partition = self._read_parquet(f"spatial/{filename}") + rows = partition[partition["UPRN"] == uprn] + if rows.empty: + return None + row = rows.iloc[0] + return Coordinates( + longitude=float(row["LONGITUDE"]), + latitude=float(row["LATITUDE"]), + ) diff --git a/tests/repositories/geospatial/__init__.py b/tests/repositories/geospatial/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/repositories/geospatial/test_geospatial_repository.py b/tests/repositories/geospatial/test_geospatial_repository.py new file mode 100644 index 00000000..4b0834c9 --- /dev/null +++ b/tests/repositories/geospatial/test_geospatial_repository.py @@ -0,0 +1,71 @@ +"""GeospatialRepo resolves a Property's coordinates from the OS Open-UPRN data. + +A reference-data lookup, not a Fetcher (ADR-0011): no live OS API call. The +adapter reads the partitioned Open-UPRN parquet via an injected reader, so the +test exercises the partition lookup + filter against real fixture parquets with +no network. +""" + +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path + +import pandas as pd + +from domain.geospatial.coordinates import Coordinates +from repositories.geospatial.geospatial_s3_repository import GeospatialS3Repository + + +def _reader(base: Path) -> Callable[[str], pd.DataFrame]: + def read(key: str) -> pd.DataFrame: + return pd.read_parquet(base / key) + + return read + + +def _write_open_uprn(base: Path) -> None: + spatial = base / "spatial" + spatial.mkdir(parents=True, exist_ok=True) + pd.DataFrame( + {"lower": [0], "upper": [100000], "filenames": ["0_100000.parquet"]} + ).to_parquet(spatial / "filename_meta.parquet") + pd.DataFrame( + { + "UPRN": [12345, 12346], + "LATITUDE": [51.5074, 51.6000], + "LONGITUDE": [-0.1278, -0.2000], + } + ).to_parquet(spatial / "0_100000.parquet") + + +def test_coordinates_for_returns_lon_lat(tmp_path: Path) -> None: + # Arrange + _write_open_uprn(tmp_path) + repo = GeospatialS3Repository(_reader(tmp_path)) + + # Act + coords = repo.coordinates_for(12345) + + # Assert + assert coords == Coordinates(longitude=-0.1278, latitude=51.5074) + + +def test_coordinates_for_returns_none_when_uprn_absent(tmp_path: Path) -> None: + # Arrange + _write_open_uprn(tmp_path) + repo = GeospatialS3Repository(_reader(tmp_path)) + + # Act / Assert — uprn inside the partition range but not present in the data + assert repo.coordinates_for(99999) is None + + +def test_coordinates_for_returns_none_when_no_partition_covers_uprn( + tmp_path: Path, +) -> None: + # Arrange + _write_open_uprn(tmp_path) + repo = GeospatialS3Repository(_reader(tmp_path)) + + # Act / Assert — uprn beyond every partition's range + assert repo.coordinates_for(500000) is None