Model/repositories/geospatial/geospatial_s3_repository.py
Khalim Conn-Kowlessar 3998ef586c feat(geospatial): GeospatialRepo — OS Open-UPRN coordinate lookup (#1131)
Add Coordinates value object + GeospatialRepository port + GeospatialS3Repository
adapter. Resolves a Property's lon/lat from the partitioned Ordnance Survey
Open-UPRN parquet (filename_meta -> partition -> UPRN row). A Repo, not a
Fetcher (ADR-0011): no live OS API call. The parquet reader is injected, so it's
unit-tested against fixture parquets with no S3/network; returns None when the
UPRN is uncovered or absent. pyright strict clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-30 19:55:46 +00:00

43 lines
1.5 KiB
Python

from __future__ import annotations
from collections.abc import Callable
from typing import Optional
import pandas as pd
from domain.geospatial.coordinates import Coordinates
from repositories.geospatial.geospatial_repository import GeospatialRepository
ParquetReader = Callable[[str], pd.DataFrame]
_META_KEY = "spatial/filename_meta.parquet"
class GeospatialS3Repository(GeospatialRepository):
"""Reads the partitioned Ordnance Survey Open-UPRN parquet dataset.
`spatial/filename_meta.parquet` maps a UPRN range (lower/upper) to a
partition file; that partition carries `UPRN`/`LATITUDE`/`LONGITUDE`. The
parquet reader is injected so the dataset can be sourced from S3 in
production or a fixture directory in tests — the Repo holds no S3/HTTP code.
"""
def __init__(self, read_parquet: ParquetReader) -> None:
self._read_parquet = read_parquet
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
meta = self._read_parquet(_META_KEY)
covering = meta[(meta["lower"] <= uprn) & (meta["upper"] >= uprn)]
if covering.empty:
return None
filename = str(covering["filenames"].iloc[0])
partition = self._read_parquet(f"spatial/{filename}")
rows = partition[partition["UPRN"] == uprn]
if rows.empty:
return None
row = rows.iloc[0]
return Coordinates(
longitude=float(row["LONGITUDE"]),
latitude=float(row["LATITUDE"]),
)