feat(geospatial): GeospatialRepo — OS Open-UPRN coordinate lookup (#1131)

Add Coordinates value object + GeospatialRepository port + GeospatialS3Repository
adapter. Resolves a Property's lon/lat from the partitioned Ordnance Survey
Open-UPRN parquet (filename_meta -> partition -> UPRN row). A Repo, not a
Fetcher (ADR-0011): no live OS API call. The parquet reader is injected, so it's
unit-tested against fixture parquets with no S3/network; returns None when the
UPRN is uncovered or absent. pyright strict clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-30 19:55:46 +00:00 committed by Jun-te Kim
parent 5a3be9d672
commit 285e7f8824
7 changed files with 146 additions and 0 deletions

View file

View file

@ -0,0 +1,15 @@
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class Coordinates:
"""A WGS84 point for a Property — longitude/latitude in decimal degrees.
Resolved from the Ordnance Survey Open-UPRN reference data and fed to the
Google Solar fetcher by the Ingestion orchestrator.
"""
longitude: float
latitude: float

View file

View file

@ -0,0 +1,17 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Optional
from domain.geospatial.coordinates import Coordinates
class GeospatialRepository(ABC):
"""Resolves a Property's coordinates from hosted reference data by UPRN.
A Repo, not a Fetcher (ADR-0011): it reads stored Ordnance Survey Open-UPRN
data, with no live API call. Returns None when the UPRN is not covered.
"""
@abstractmethod
def coordinates_for(self, uprn: int) -> Optional[Coordinates]: ...

View file

@ -0,0 +1,43 @@
from __future__ import annotations
from collections.abc import Callable
from typing import Optional
import pandas as pd
from domain.geospatial.coordinates import Coordinates
from repositories.geospatial.geospatial_repository import GeospatialRepository
ParquetReader = Callable[[str], pd.DataFrame]
_META_KEY = "spatial/filename_meta.parquet"
class GeospatialS3Repository(GeospatialRepository):
"""Reads the partitioned Ordnance Survey Open-UPRN parquet dataset.
`spatial/filename_meta.parquet` maps a UPRN range (lower/upper) to a
partition file; that partition carries `UPRN`/`LATITUDE`/`LONGITUDE`. The
parquet reader is injected so the dataset can be sourced from S3 in
production or a fixture directory in tests the Repo holds no S3/HTTP code.
"""
def __init__(self, read_parquet: ParquetReader) -> None:
self._read_parquet = read_parquet
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
meta = self._read_parquet(_META_KEY)
covering = meta[(meta["lower"] <= uprn) & (meta["upper"] >= uprn)]
if covering.empty:
return None
filename = str(covering["filenames"].iloc[0])
partition = self._read_parquet(f"spatial/{filename}")
rows = partition[partition["UPRN"] == uprn]
if rows.empty:
return None
row = rows.iloc[0]
return Coordinates(
longitude=float(row["LONGITUDE"]),
latitude=float(row["LATITUDE"]),
)

View file

@ -0,0 +1,71 @@
"""GeospatialRepo resolves a Property's coordinates from the OS Open-UPRN data.
A reference-data lookup, not a Fetcher (ADR-0011): no live OS API call. The
adapter reads the partitioned Open-UPRN parquet via an injected reader, so the
test exercises the partition lookup + filter against real fixture parquets with
no network.
"""
from __future__ import annotations
from collections.abc import Callable
from pathlib import Path
import pandas as pd
from domain.geospatial.coordinates import Coordinates
from repositories.geospatial.geospatial_s3_repository import GeospatialS3Repository
def _reader(base: Path) -> Callable[[str], pd.DataFrame]:
def read(key: str) -> pd.DataFrame:
return pd.read_parquet(base / key)
return read
def _write_open_uprn(base: Path) -> None:
spatial = base / "spatial"
spatial.mkdir(parents=True, exist_ok=True)
pd.DataFrame(
{"lower": [0], "upper": [100000], "filenames": ["0_100000.parquet"]}
).to_parquet(spatial / "filename_meta.parquet")
pd.DataFrame(
{
"UPRN": [12345, 12346],
"LATITUDE": [51.5074, 51.6000],
"LONGITUDE": [-0.1278, -0.2000],
}
).to_parquet(spatial / "0_100000.parquet")
def test_coordinates_for_returns_lon_lat(tmp_path: Path) -> None:
# Arrange
_write_open_uprn(tmp_path)
repo = GeospatialS3Repository(_reader(tmp_path))
# Act
coords = repo.coordinates_for(12345)
# Assert
assert coords == Coordinates(longitude=-0.1278, latitude=51.5074)
def test_coordinates_for_returns_none_when_uprn_absent(tmp_path: Path) -> None:
# Arrange
_write_open_uprn(tmp_path)
repo = GeospatialS3Repository(_reader(tmp_path))
# Act / Assert — uprn inside the partition range but not present in the data
assert repo.coordinates_for(99999) is None
def test_coordinates_for_returns_none_when_no_partition_covers_uprn(
tmp_path: Path,
) -> None:
# Arrange
_write_open_uprn(tmp_path)
repo = GeospatialS3Repository(_reader(tmp_path))
# Act / Assert — uprn beyond every partition's range
assert repo.coordinates_for(500000) is None