mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(geospatial): GeospatialRepo — OS Open-UPRN coordinate lookup (#1131)
Add Coordinates value object + GeospatialRepository port + GeospatialS3Repository adapter. Resolves a Property's lon/lat from the partitioned Ordnance Survey Open-UPRN parquet (filename_meta -> partition -> UPRN row). A Repo, not a Fetcher (ADR-0011): no live OS API call. The parquet reader is injected, so it's unit-tested against fixture parquets with no S3/network; returns None when the UPRN is uncovered or absent. pyright strict clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
5a3be9d672
commit
285e7f8824
7 changed files with 146 additions and 0 deletions
0
domain/geospatial/__init__.py
Normal file
0
domain/geospatial/__init__.py
Normal file
15
domain/geospatial/coordinates.py
Normal file
15
domain/geospatial/coordinates.py
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Coordinates:
|
||||||
|
"""A WGS84 point for a Property — longitude/latitude in decimal degrees.
|
||||||
|
|
||||||
|
Resolved from the Ordnance Survey Open-UPRN reference data and fed to the
|
||||||
|
Google Solar fetcher by the Ingestion orchestrator.
|
||||||
|
"""
|
||||||
|
|
||||||
|
longitude: float
|
||||||
|
latitude: float
|
||||||
0
repositories/geospatial/__init__.py
Normal file
0
repositories/geospatial/__init__.py
Normal file
17
repositories/geospatial/geospatial_repository.py
Normal file
17
repositories/geospatial/geospatial_repository.py
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from domain.geospatial.coordinates import Coordinates
|
||||||
|
|
||||||
|
|
||||||
|
class GeospatialRepository(ABC):
|
||||||
|
"""Resolves a Property's coordinates from hosted reference data by UPRN.
|
||||||
|
|
||||||
|
A Repo, not a Fetcher (ADR-0011): it reads stored Ordnance Survey Open-UPRN
|
||||||
|
data, with no live API call. Returns None when the UPRN is not covered.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def coordinates_for(self, uprn: int) -> Optional[Coordinates]: ...
|
||||||
43
repositories/geospatial/geospatial_s3_repository.py
Normal file
43
repositories/geospatial/geospatial_s3_repository.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from domain.geospatial.coordinates import Coordinates
|
||||||
|
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
||||||
|
|
||||||
|
ParquetReader = Callable[[str], pd.DataFrame]
|
||||||
|
|
||||||
|
_META_KEY = "spatial/filename_meta.parquet"
|
||||||
|
|
||||||
|
|
||||||
|
class GeospatialS3Repository(GeospatialRepository):
|
||||||
|
"""Reads the partitioned Ordnance Survey Open-UPRN parquet dataset.
|
||||||
|
|
||||||
|
`spatial/filename_meta.parquet` maps a UPRN range (lower/upper) to a
|
||||||
|
partition file; that partition carries `UPRN`/`LATITUDE`/`LONGITUDE`. The
|
||||||
|
parquet reader is injected so the dataset can be sourced from S3 in
|
||||||
|
production or a fixture directory in tests — the Repo holds no S3/HTTP code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, read_parquet: ParquetReader) -> None:
|
||||||
|
self._read_parquet = read_parquet
|
||||||
|
|
||||||
|
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
|
||||||
|
meta = self._read_parquet(_META_KEY)
|
||||||
|
covering = meta[(meta["lower"] <= uprn) & (meta["upper"] >= uprn)]
|
||||||
|
if covering.empty:
|
||||||
|
return None
|
||||||
|
filename = str(covering["filenames"].iloc[0])
|
||||||
|
|
||||||
|
partition = self._read_parquet(f"spatial/{filename}")
|
||||||
|
rows = partition[partition["UPRN"] == uprn]
|
||||||
|
if rows.empty:
|
||||||
|
return None
|
||||||
|
row = rows.iloc[0]
|
||||||
|
return Coordinates(
|
||||||
|
longitude=float(row["LONGITUDE"]),
|
||||||
|
latitude=float(row["LATITUDE"]),
|
||||||
|
)
|
||||||
0
tests/repositories/geospatial/__init__.py
Normal file
0
tests/repositories/geospatial/__init__.py
Normal file
71
tests/repositories/geospatial/test_geospatial_repository.py
Normal file
71
tests/repositories/geospatial/test_geospatial_repository.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
"""GeospatialRepo resolves a Property's coordinates from the OS Open-UPRN data.
|
||||||
|
|
||||||
|
A reference-data lookup, not a Fetcher (ADR-0011): no live OS API call. The
|
||||||
|
adapter reads the partitioned Open-UPRN parquet via an injected reader, so the
|
||||||
|
test exercises the partition lookup + filter against real fixture parquets with
|
||||||
|
no network.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from domain.geospatial.coordinates import Coordinates
|
||||||
|
from repositories.geospatial.geospatial_s3_repository import GeospatialS3Repository
|
||||||
|
|
||||||
|
|
||||||
|
def _reader(base: Path) -> Callable[[str], pd.DataFrame]:
|
||||||
|
def read(key: str) -> pd.DataFrame:
|
||||||
|
return pd.read_parquet(base / key)
|
||||||
|
|
||||||
|
return read
|
||||||
|
|
||||||
|
|
||||||
|
def _write_open_uprn(base: Path) -> None:
|
||||||
|
spatial = base / "spatial"
|
||||||
|
spatial.mkdir(parents=True, exist_ok=True)
|
||||||
|
pd.DataFrame(
|
||||||
|
{"lower": [0], "upper": [100000], "filenames": ["0_100000.parquet"]}
|
||||||
|
).to_parquet(spatial / "filename_meta.parquet")
|
||||||
|
pd.DataFrame(
|
||||||
|
{
|
||||||
|
"UPRN": [12345, 12346],
|
||||||
|
"LATITUDE": [51.5074, 51.6000],
|
||||||
|
"LONGITUDE": [-0.1278, -0.2000],
|
||||||
|
}
|
||||||
|
).to_parquet(spatial / "0_100000.parquet")
|
||||||
|
|
||||||
|
|
||||||
|
def test_coordinates_for_returns_lon_lat(tmp_path: Path) -> None:
|
||||||
|
# Arrange
|
||||||
|
_write_open_uprn(tmp_path)
|
||||||
|
repo = GeospatialS3Repository(_reader(tmp_path))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
coords = repo.coordinates_for(12345)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert coords == Coordinates(longitude=-0.1278, latitude=51.5074)
|
||||||
|
|
||||||
|
|
||||||
|
def test_coordinates_for_returns_none_when_uprn_absent(tmp_path: Path) -> None:
|
||||||
|
# Arrange
|
||||||
|
_write_open_uprn(tmp_path)
|
||||||
|
repo = GeospatialS3Repository(_reader(tmp_path))
|
||||||
|
|
||||||
|
# Act / Assert — uprn inside the partition range but not present in the data
|
||||||
|
assert repo.coordinates_for(99999) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_coordinates_for_returns_none_when_no_partition_covers_uprn(
|
||||||
|
tmp_path: Path,
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
_write_open_uprn(tmp_path)
|
||||||
|
repo = GeospatialS3Repository(_reader(tmp_path))
|
||||||
|
|
||||||
|
# Act / Assert — uprn beyond every partition's range
|
||||||
|
assert repo.coordinates_for(500000) is None
|
||||||
Loading…
Add table
Reference in a new issue