creating lodgment dates data, using old EPC api, to verify test failures

This commit is contained in:
Khalim Conn-Kowlessar 2026-04-27 12:15:30 +00:00
parent 0955862973
commit 1af6bc6748
3 changed files with 1348 additions and 9 deletions

View file

@ -0,0 +1,81 @@
import csv
import json
import os
from pathlib import Path
from urllib.parse import urlencode
import pandas as pd
from epc_api.client import EpcClient
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json"
def fetch_postcode_records(client: EpcClient, postcode: str) -> pd.DataFrame:
url = os.path.join(client.domestic.host, "search")
url += "?" + urlencode({"size": 500})
resp = client.domestic.call(url=url, method="get", params={"postcode": postcode})
if not resp or "rows" not in resp:
return pd.DataFrame()
return pd.DataFrame(resp["rows"], columns=resp["column-names"])
def main():
auth_token = os.getenv("EPC_AUTH_TOKEN")
if not auth_token:
raise RuntimeError("EPC_AUTH_TOKEN not set")
client = EpcClient(auth_token=auth_token)
sidecar = {}
if SIDECAR_PATH.exists():
sidecar = json.loads(SIDECAR_PATH.read_text())
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
rows = list(csv.DictReader(f))
by_postcode: dict[str, list[dict]] = {}
for row in rows:
if row["Manual UPRN Code"] == "None":
continue
by_postcode.setdefault(row["Postcode"], []).append(row)
for postcode, postcode_rows in by_postcode.items():
print(f"Fetching {postcode} ({len(postcode_rows)} rows)...")
try:
epc_df = fetch_postcode_records(client, postcode)
except Exception as e:
print(f" ERROR: {e}")
continue
if epc_df.empty:
print(f" No results from old API for {postcode}")
continue
epc_df["uprn"] = epc_df["uprn"].astype(str).str.replace(r"\.0$", "", regex=True)
for row in postcode_rows:
key = f"{row['User Input']}|{row['Postcode']}"
if key in sidecar:
continue
expected_uprn = str(row["Manual UPRN Code"]).strip()
match = epc_df[epc_df["uprn"] == expected_uprn]
if match.empty:
print(f" WARN: UPRN {expected_uprn} not found in old API for {postcode}")
sidecar[key] = {"lodgement_date": None, "found_in_old_api": False}
else:
lodgement_date = match.iloc[0].get("lodgement-date")
sidecar[key] = {
"lodgement_date": str(lodgement_date) if lodgement_date else None,
"found_in_old_api": True,
}
print(f" {row['User Input']}: {lodgement_date}")
SIDECAR_PATH.write_text(json.dumps(sidecar, indent=2))
print(f"\nWritten to {SIDECAR_PATH}")
if __name__ == "__main__":
main()

View file

@ -1,25 +1,54 @@
# tests/test_address_to_uprn_csv.py
import csv
import json
import pytest
from datetime import date
from pathlib import Path
from backend.address2UPRN.main import get_uprn
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json"
NEW_API_CUTOFF = date(2012, 1, 1)
def _load_sidecar() -> dict:
if SIDECAR_PATH.exists():
return json.loads(SIDECAR_PATH.read_text())
return {}
def load_test_cases():
sidecar = _load_sidecar()
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
return [
pytest.param(
row["User Input"],
row["Postcode"],
row["Manual UPRN Code"],
id=f'{row["User Input"]} [{row["Postcode"]}]',
cases = []
for row in reader:
key = f"{row['User Input']}|{row['Postcode']}"
entry = sidecar.get(key, {})
lodgement_date = entry.get("lodgement_date")
marks = []
if lodgement_date:
parsed = date.fromisoformat(lodgement_date[:10])
if parsed < NEW_API_CUTOFF:
marks.append(
pytest.mark.xfail(
reason=f"EPC lodged {lodgement_date} — predates new API coverage (Jan 2012)",
strict=False,
)
)
cases.append(
pytest.param(
row["User Input"],
row["Postcode"],
row["Manual UPRN Code"],
id=f'{row["User Input"]} [{row["Postcode"]}]',
marks=marks,
)
)
for row in reader
]
return cases
@pytest.mark.parametrize(
@ -31,7 +60,6 @@ def test_uprn_resolution_matches_manual(
postcode: str,
expected_uprn: str,
):
uprn = get_uprn(user_input, postcode)
if uprn:
assert uprn == expected_uprn

File diff suppressed because it is too large Load diff