mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
feat(geospatial): GeospatialRepo — OS Open-UPRN coordinate lookup (#1131)
Add Coordinates value object + GeospatialRepository port + GeospatialS3Repository adapter. Resolves a Property's lon/lat from the partitioned Ordnance Survey Open-UPRN parquet (filename_meta -> partition -> UPRN row). A Repo, not a Fetcher (ADR-0011): no live OS API call. The parquet reader is injected, so it's unit-tested against fixture parquets with no S3/network; returns None when the UPRN is uncovered or absent. pyright strict clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
5a3be9d672
commit
285e7f8824
7 changed files with 146 additions and 0 deletions
0
domain/geospatial/__init__.py
Normal file
0
domain/geospatial/__init__.py
Normal file
15
domain/geospatial/coordinates.py
Normal file
15
domain/geospatial/coordinates.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Coordinates:
|
||||
"""A WGS84 point for a Property — longitude/latitude in decimal degrees.
|
||||
|
||||
Resolved from the Ordnance Survey Open-UPRN reference data and fed to the
|
||||
Google Solar fetcher by the Ingestion orchestrator.
|
||||
"""
|
||||
|
||||
longitude: float
|
||||
latitude: float
|
||||
0
repositories/geospatial/__init__.py
Normal file
0
repositories/geospatial/__init__.py
Normal file
17
repositories/geospatial/geospatial_repository.py
Normal file
17
repositories/geospatial/geospatial_repository.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
|
||||
|
||||
class GeospatialRepository(ABC):
|
||||
"""Resolves a Property's coordinates from hosted reference data by UPRN.
|
||||
|
||||
A Repo, not a Fetcher (ADR-0011): it reads stored Ordnance Survey Open-UPRN
|
||||
data, with no live API call. Returns None when the UPRN is not covered.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def coordinates_for(self, uprn: int) -> Optional[Coordinates]: ...
|
||||
43
repositories/geospatial/geospatial_s3_repository.py
Normal file
43
repositories/geospatial/geospatial_s3_repository.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from repositories.geospatial.geospatial_repository import GeospatialRepository
|
||||
|
||||
ParquetReader = Callable[[str], pd.DataFrame]
|
||||
|
||||
_META_KEY = "spatial/filename_meta.parquet"
|
||||
|
||||
|
||||
class GeospatialS3Repository(GeospatialRepository):
|
||||
"""Reads the partitioned Ordnance Survey Open-UPRN parquet dataset.
|
||||
|
||||
`spatial/filename_meta.parquet` maps a UPRN range (lower/upper) to a
|
||||
partition file; that partition carries `UPRN`/`LATITUDE`/`LONGITUDE`. The
|
||||
parquet reader is injected so the dataset can be sourced from S3 in
|
||||
production or a fixture directory in tests — the Repo holds no S3/HTTP code.
|
||||
"""
|
||||
|
||||
def __init__(self, read_parquet: ParquetReader) -> None:
|
||||
self._read_parquet = read_parquet
|
||||
|
||||
def coordinates_for(self, uprn: int) -> Optional[Coordinates]:
|
||||
meta = self._read_parquet(_META_KEY)
|
||||
covering = meta[(meta["lower"] <= uprn) & (meta["upper"] >= uprn)]
|
||||
if covering.empty:
|
||||
return None
|
||||
filename = str(covering["filenames"].iloc[0])
|
||||
|
||||
partition = self._read_parquet(f"spatial/{filename}")
|
||||
rows = partition[partition["UPRN"] == uprn]
|
||||
if rows.empty:
|
||||
return None
|
||||
row = rows.iloc[0]
|
||||
return Coordinates(
|
||||
longitude=float(row["LONGITUDE"]),
|
||||
latitude=float(row["LATITUDE"]),
|
||||
)
|
||||
0
tests/repositories/geospatial/__init__.py
Normal file
0
tests/repositories/geospatial/__init__.py
Normal file
71
tests/repositories/geospatial/test_geospatial_repository.py
Normal file
71
tests/repositories/geospatial/test_geospatial_repository.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
"""GeospatialRepo resolves a Property's coordinates from the OS Open-UPRN data.
|
||||
|
||||
A reference-data lookup, not a Fetcher (ADR-0011): no live OS API call. The
|
||||
adapter reads the partitioned Open-UPRN parquet via an injected reader, so the
|
||||
test exercises the partition lookup + filter against real fixture parquets with
|
||||
no network.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from domain.geospatial.coordinates import Coordinates
|
||||
from repositories.geospatial.geospatial_s3_repository import GeospatialS3Repository
|
||||
|
||||
|
||||
def _reader(base: Path) -> Callable[[str], pd.DataFrame]:
|
||||
def read(key: str) -> pd.DataFrame:
|
||||
return pd.read_parquet(base / key)
|
||||
|
||||
return read
|
||||
|
||||
|
||||
def _write_open_uprn(base: Path) -> None:
|
||||
spatial = base / "spatial"
|
||||
spatial.mkdir(parents=True, exist_ok=True)
|
||||
pd.DataFrame(
|
||||
{"lower": [0], "upper": [100000], "filenames": ["0_100000.parquet"]}
|
||||
).to_parquet(spatial / "filename_meta.parquet")
|
||||
pd.DataFrame(
|
||||
{
|
||||
"UPRN": [12345, 12346],
|
||||
"LATITUDE": [51.5074, 51.6000],
|
||||
"LONGITUDE": [-0.1278, -0.2000],
|
||||
}
|
||||
).to_parquet(spatial / "0_100000.parquet")
|
||||
|
||||
|
||||
def test_coordinates_for_returns_lon_lat(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
_write_open_uprn(tmp_path)
|
||||
repo = GeospatialS3Repository(_reader(tmp_path))
|
||||
|
||||
# Act
|
||||
coords = repo.coordinates_for(12345)
|
||||
|
||||
# Assert
|
||||
assert coords == Coordinates(longitude=-0.1278, latitude=51.5074)
|
||||
|
||||
|
||||
def test_coordinates_for_returns_none_when_uprn_absent(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
_write_open_uprn(tmp_path)
|
||||
repo = GeospatialS3Repository(_reader(tmp_path))
|
||||
|
||||
# Act / Assert — uprn inside the partition range but not present in the data
|
||||
assert repo.coordinates_for(99999) is None
|
||||
|
||||
|
||||
def test_coordinates_for_returns_none_when_no_partition_covers_uprn(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
# Arrange
|
||||
_write_open_uprn(tmp_path)
|
||||
repo = GeospatialS3Repository(_reader(tmp_path))
|
||||
|
||||
# Act / Assert — uprn beyond every partition's range
|
||||
assert repo.coordinates_for(500000) is None
|
||||
Loading…
Add table
Reference in a new issue