mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
153 lines
4.9 KiB
Python
153 lines
4.9 KiB
Python
# backend/ordnanceSurvey/tests/test_os_match.py
|
|
"""
|
|
Debug harness for Ordnance Survey address matching.
|
|
|
|
Mirrors backend/address2UPRN/tests/test_csv.py, but for the OS Places flow:
|
|
for each (User Input, Postcode) case it hits the live OS Places API, scores every
|
|
candidate address with AddressMatch.score (exactly as backend/ordnanceSurvey/main.py
|
|
does), and prints the full ranked breakdown so you can see *why* a match was or
|
|
wasn't found.
|
|
|
|
Run with -s to see the ranking:
|
|
|
|
pytest backend/ordnanceSurvey/tests/test_os_match.py -s
|
|
|
|
Requires ORDNANCE_SURVEY_API_KEY to be set (config Settings / env); skipped otherwise.
|
|
"""
|
|
|
|
import csv
|
|
import os
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
|
|
import pytest
|
|
|
|
from backend.ordnanceSurvey.helpers import (
|
|
lookup_os_places,
|
|
os_places_results_to_dataframe,
|
|
)
|
|
from backend.utils.addressMatch import AddressMatch
|
|
|
|
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
|
|
|
# Be polite to the live OS Places API between cases.
|
|
OS_THROTTLE_SECONDS = 1.0
|
|
|
|
# Handler treats best_score <= 0 as "no match" (see ordnanceSurvey/main.py).
|
|
MATCH_THRESHOLD = 0.0
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _throttle_os_requests():
|
|
yield
|
|
time.sleep(OS_THROTTLE_SECONDS)
|
|
|
|
|
|
def _api_key() -> Optional[str]:
|
|
# Read straight from the environment so the debug harness doesn't depend on
|
|
# the full Settings model loading cleanly. Falls back to Settings if unset.
|
|
key = os.getenv("ORDNANCE_SURVEY_API_KEY")
|
|
if not key:
|
|
try:
|
|
from backend.app.config import get_settings
|
|
|
|
key = get_settings().ORDNANCE_SURVEY_API_KEY
|
|
except Exception:
|
|
key = None
|
|
if not key or key == "changeme":
|
|
return None
|
|
return key
|
|
|
|
|
|
def load_test_cases():
|
|
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
|
reader = csv.DictReader(f)
|
|
return [
|
|
pytest.param(
|
|
row["User Input"],
|
|
row["Postcode"],
|
|
(row.get("Expected UPRN") or "").strip(),
|
|
id=f'{row["User Input"]} [{row["Postcode"]}]',
|
|
)
|
|
for row in reader
|
|
]
|
|
|
|
|
|
def _scored_candidates(
|
|
user_input: str, postcode: str, api_key: str
|
|
) -> list[dict[str, Any]]:
|
|
"""
|
|
Fetch OS Places candidates for a postcode (bypassing the DB cache) and score
|
|
every candidate ADDRESS exactly as ordnanceSurvey/main.py does. Returned
|
|
ranked best-first.
|
|
"""
|
|
response: dict[str, Any] = lookup_os_places(postcode, api_key)
|
|
assert response.get("status") == 200, f"OS Places API failed: {response}"
|
|
assert "data" in response, f"No data in OS Places response: {response}"
|
|
|
|
candidates = os_places_results_to_dataframe(response["data"])
|
|
records: list[dict[str, Any]] = candidates.to_dict("records") # type: ignore[assignment]
|
|
|
|
scored: list[dict[str, Any]] = []
|
|
for rec in records:
|
|
address = str(rec.get("ADDRESS", ""))
|
|
scored.append(
|
|
{
|
|
"uprn": rec.get("UPRN", "?"),
|
|
"address": address,
|
|
"normalised": AddressMatch.normalise_address(address),
|
|
"score": AddressMatch.score(user_input, address),
|
|
}
|
|
)
|
|
scored.sort(key=lambda r: r["score"], reverse=True)
|
|
return scored
|
|
|
|
|
|
def _print_debug(
|
|
user_input: str, postcode: str, scored: list[dict[str, Any]]
|
|
) -> None:
|
|
print(f"\n{'=' * 80}")
|
|
print(f"User input : {user_input!r}")
|
|
print(f"Normalised : {AddressMatch.normalise_address(user_input)!r}")
|
|
print(f"Postcode : {postcode!r}")
|
|
print(f"Candidates : {len(scored)}")
|
|
print(f"{'-' * 80}")
|
|
if not scored:
|
|
print("(no OS Places candidates returned for this postcode)")
|
|
return
|
|
for row in scored[:15]:
|
|
print(f" score={row['score']:.4f} uprn={row['uprn']}")
|
|
print(f" ADDRESS : {row['address']}")
|
|
print(f" normalised : {row['normalised']}")
|
|
print(f"{'=' * 80}")
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.parametrize("user_input,postcode,expected_uprn", load_test_cases())
|
|
def test_os_match_finds_candidate(
|
|
user_input: str,
|
|
postcode: str,
|
|
expected_uprn: str,
|
|
):
|
|
api_key = _api_key()
|
|
if api_key is None:
|
|
pytest.skip("ORDNANCE_SURVEY_API_KEY not set")
|
|
|
|
scored = _scored_candidates(user_input, postcode, api_key)
|
|
_print_debug(user_input, postcode, scored)
|
|
|
|
best = scored[0] if scored else None
|
|
best_score = float(best["score"]) if best is not None else 0.0
|
|
|
|
# The handler records a match only when best_score > 0. This assertion is
|
|
# the debug signal: when it fails, the printed ranking above shows why.
|
|
assert best is not None and best_score > MATCH_THRESHOLD, (
|
|
f"No OS match for {user_input!r} @ {postcode!r} "
|
|
f"(best_score={best_score:.4f}). See ranking above."
|
|
)
|
|
|
|
if expected_uprn:
|
|
assert str(best["uprn"]) == expected_uprn, (
|
|
f"Best match UPRN {best['uprn']!r} != expected {expected_uprn!r}"
|
|
)
|