from __future__ import annotations from collections.abc import Callable from typing import Optional import pandas as pd from domain.geospatial.coordinates import Coordinates from repositories.geospatial.geospatial_repository import GeospatialRepository ParquetReader = Callable[[str], pd.DataFrame] _META_KEY = "spatial/filename_meta.parquet" class GeospatialS3Repository(GeospatialRepository): """Reads the partitioned Ordnance Survey Open-UPRN parquet dataset. `spatial/filename_meta.parquet` maps a UPRN range (lower/upper) to a partition file; that partition carries `UPRN`/`LATITUDE`/`LONGITUDE`. The parquet reader is injected so the dataset can be sourced from S3 in production or a fixture directory in tests — the Repo holds no S3/HTTP code. """ def __init__(self, read_parquet: ParquetReader) -> None: self._read_parquet = read_parquet def coordinates_for(self, uprn: int) -> Optional[Coordinates]: meta = self._read_parquet(_META_KEY) covering = meta[(meta["lower"] <= uprn) & (meta["upper"] >= uprn)] if covering.empty: return None filename = str(covering["filenames"].iloc[0]) partition = self._read_parquet(f"spatial/{filename}") rows = partition[partition["UPRN"] == uprn] if rows.empty: return None row = rows.iloc[0] return Coordinates( longitude=float(row["LONGITUDE"]), latitude=float(row["LATITUDE"]), )