mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Add Coordinates value object + GeospatialRepository port + GeospatialS3Repository adapter. Resolves a Property's lon/lat from the partitioned Ordnance Survey Open-UPRN parquet (filename_meta -> partition -> UPRN row). A Repo, not a Fetcher (ADR-0011): no live OS API call. The parquet reader is injected, so it's unit-tested against fixture parquets with no S3/network; returns None when the UPRN is uncovered or absent. pyright strict clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
43 lines
1.5 KiB
Python
43 lines
1.5 KiB
Python
from __future__ import annotations
|
|
|
|
from collections.abc import Callable
|
|
from typing import Optional
|
|
|
|
import pandas as pd
|
|
|
|
from domain.geospatial.coordinates import Coordinates
|
|
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
|
|
|
ParquetReader = Callable[[str], pd.DataFrame]
|
|
|
|
_META_KEY = "spatial/filename_meta.parquet"
|
|
|
|
|
|
class GeospatialS3Repository(GeospatialRepository):
|
|
"""Reads the partitioned Ordnance Survey Open-UPRN parquet dataset.
|
|
|
|
`spatial/filename_meta.parquet` maps a UPRN range (lower/upper) to a
|
|
partition file; that partition carries `UPRN`/`LATITUDE`/`LONGITUDE`. The
|
|
parquet reader is injected so the dataset can be sourced from S3 in
|
|
production or a fixture directory in tests — the Repo holds no S3/HTTP code.
|
|
"""
|
|
|
|
def __init__(self, read_parquet: ParquetReader) -> None:
|
|
self._read_parquet = read_parquet
|
|
|
|
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
|
|
meta = self._read_parquet(_META_KEY)
|
|
covering = meta[(meta["lower"] <= uprn) & (meta["upper"] >= uprn)]
|
|
if covering.empty:
|
|
return None
|
|
filename = str(covering["filenames"].iloc[0])
|
|
|
|
partition = self._read_parquet(f"spatial/{filename}")
|
|
rows = partition[partition["UPRN"] == uprn]
|
|
if rows.empty:
|
|
return None
|
|
row = rows.iloc[0]
|
|
return Coordinates(
|
|
longitude=float(row["LONGITUDE"]),
|
|
latitude=float(row["LATITUDE"]),
|
|
)
|