added tests

This commit is contained in:
Jun-te Kim 2026-05-15 14:27:59 +00:00
parent 0573db1151
commit 7fde0baf56
4 changed files with 98 additions and 1 deletions

View file

@ -153,7 +153,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
# Process each postcode group at a time
for postcode, group in grouped:
print(f"Processing postcode: {postcode} ({len(group)} rows)")
logger.info(f"Processing postcode: {postcode} ({len(group)} rows)")
valid_group = AddressMatch.is_valid_postcode(postcode)
if not valid_group:
logger.warning(f"Postcode {postcode} is invalid, skipping")

View file

View file

@ -0,0 +1,96 @@
"""
Sanity checks for the ordnanceSurvey postcode-lookup + address-match flow.
Mirrors the per-row logic in `backend.ordnanceSurvey.main.handler`:
1. Call OS Places by postcode
2. Flatten to a DataFrame of candidate addresses
3. Score each candidate against the user input with AddressMatch.score
4. Pick the best match (address, UPRN, score)
Hits the live OS Places API, so the test is skipped when no real API key
is configured.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
import pandas as pd
import pytest
from backend.app.config import get_settings
from backend.ordnanceSurvey.helpers import (
lookup_os_places,
os_places_results_to_dataframe,
)
from backend.utils.addressMatch import AddressMatch
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class AddressCase:
user_input: str
postcode: str
expected_uprn: str
CASES: list[AddressCase] = [
AddressCase("3 College Grove Road", "WF1 3RL", "63025389"),
AddressCase("14 Musuem Street", "WA1 1HU", "100012894889"),
AddressCase("7 Hedgerow Walk", "WV8 1UW", "100071164800"),
]
@pytest.fixture(scope="module")
def os_api_key() -> str:
key = get_settings().ORDNANCE_SURVEY_API_KEY
if not key or key in {"changeme", "test"}:
pytest.skip("ORDNANCE_SURVEY_API_KEY not configured for live OS Places call")
return key
@pytest.mark.integration
@pytest.mark.parametrize(
"case",
CASES,
ids=[f"{c.user_input} [{c.postcode}]" for c in CASES],
)
def test_address_lookup(case: AddressCase, os_api_key: str) -> None:
response = lookup_os_places(case.postcode, os_api_key)
assert response.get("status") == 200, f"OS Places failed: {response}"
candidates = os_places_results_to_dataframe(response["data"])
assert not candidates.empty, f"No candidates returned for {case.postcode}"
scores = candidates["ADDRESS"].apply(
lambda addr: AddressMatch.score(case.user_input, addr)
)
best_idx = scores.idxmax()
best_score = float(scores[best_idx])
best_address = candidates.at[best_idx, "ADDRESS"]
best_uprn = candidates.at[best_idx, "UPRN"]
logger.info(
"input=%r postcode=%s -> uprn=%s score=%.4f address=%r",
case.user_input,
case.postcode,
best_uprn,
best_score,
best_address,
)
# Also print so the result is visible without --log-cli.
print(
f"\n[{case.postcode}] {case.user_input!r}\n"
f" best_address = {best_address!r}\n"
f" best_uprn = {best_uprn}\n"
f" best_score = {best_score:.4f}"
)
assert best_score > 0, f"No viable match for {case.user_input} ({case.postcode})"
assert pd.notna(best_uprn), "Expected a UPRN on the best match"
assert str(best_uprn) == case.expected_uprn, (
f"UPRN mismatch for {case.user_input} ({case.postcode}): "
f"got {best_uprn}, expected {case.expected_uprn}"
)

View file

@ -15,6 +15,7 @@ testpaths =
backend/export/tests
backend/magic_plan/tests
backend/onboarders/tests
backend/ordnanceSurvey/tests
backend/pashub_fetcher/tests
datatypes/epc/domain/tests
datatypes/epc/schema/tests