mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
creating lodgment dates data, using old EPC api, to verify test failures
This commit is contained in:
parent
0955862973
commit
1af6bc6748
3 changed files with 1348 additions and 9 deletions
81
backend/address2UPRN/tests/populate_lodgement_dates.py
Normal file
81
backend/address2UPRN/tests/populate_lodgement_dates.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
import csv
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import pandas as pd
|
||||
from epc_api.client import EpcClient
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
||||
SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json"
|
||||
|
||||
|
||||
def fetch_postcode_records(client: EpcClient, postcode: str) -> pd.DataFrame:
|
||||
url = os.path.join(client.domestic.host, "search")
|
||||
url += "?" + urlencode({"size": 500})
|
||||
resp = client.domestic.call(url=url, method="get", params={"postcode": postcode})
|
||||
if not resp or "rows" not in resp:
|
||||
return pd.DataFrame()
|
||||
return pd.DataFrame(resp["rows"], columns=resp["column-names"])
|
||||
|
||||
|
||||
def main():
|
||||
auth_token = os.getenv("EPC_AUTH_TOKEN")
|
||||
if not auth_token:
|
||||
raise RuntimeError("EPC_AUTH_TOKEN not set")
|
||||
|
||||
client = EpcClient(auth_token=auth_token)
|
||||
|
||||
sidecar = {}
|
||||
if SIDECAR_PATH.exists():
|
||||
sidecar = json.loads(SIDECAR_PATH.read_text())
|
||||
|
||||
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
||||
rows = list(csv.DictReader(f))
|
||||
|
||||
by_postcode: dict[str, list[dict]] = {}
|
||||
for row in rows:
|
||||
if row["Manual UPRN Code"] == "None":
|
||||
continue
|
||||
by_postcode.setdefault(row["Postcode"], []).append(row)
|
||||
|
||||
for postcode, postcode_rows in by_postcode.items():
|
||||
print(f"Fetching {postcode} ({len(postcode_rows)} rows)...")
|
||||
try:
|
||||
epc_df = fetch_postcode_records(client, postcode)
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
continue
|
||||
|
||||
if epc_df.empty:
|
||||
print(f" No results from old API for {postcode}")
|
||||
continue
|
||||
|
||||
epc_df["uprn"] = epc_df["uprn"].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
|
||||
for row in postcode_rows:
|
||||
key = f"{row['User Input']}|{row['Postcode']}"
|
||||
if key in sidecar:
|
||||
continue
|
||||
|
||||
expected_uprn = str(row["Manual UPRN Code"]).strip()
|
||||
match = epc_df[epc_df["uprn"] == expected_uprn]
|
||||
|
||||
if match.empty:
|
||||
print(f" WARN: UPRN {expected_uprn} not found in old API for {postcode}")
|
||||
sidecar[key] = {"lodgement_date": None, "found_in_old_api": False}
|
||||
else:
|
||||
lodgement_date = match.iloc[0].get("lodgement-date")
|
||||
sidecar[key] = {
|
||||
"lodgement_date": str(lodgement_date) if lodgement_date else None,
|
||||
"found_in_old_api": True,
|
||||
}
|
||||
print(f" {row['User Input']}: {lodgement_date}")
|
||||
|
||||
SIDECAR_PATH.write_text(json.dumps(sidecar, indent=2))
|
||||
print(f"\nWritten to {SIDECAR_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,25 +1,54 @@
|
|||
# tests/test_address_to_uprn_csv.py
|
||||
|
||||
import csv
|
||||
import json
|
||||
import pytest
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
||||
SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json"
|
||||
NEW_API_CUTOFF = date(2012, 1, 1)
|
||||
|
||||
|
||||
def _load_sidecar() -> dict:
|
||||
if SIDECAR_PATH.exists():
|
||||
return json.loads(SIDECAR_PATH.read_text())
|
||||
return {}
|
||||
|
||||
|
||||
def load_test_cases():
|
||||
sidecar = _load_sidecar()
|
||||
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
return [
|
||||
pytest.param(
|
||||
row["User Input"],
|
||||
row["Postcode"],
|
||||
row["Manual UPRN Code"],
|
||||
id=f'{row["User Input"]} [{row["Postcode"]}]',
|
||||
cases = []
|
||||
for row in reader:
|
||||
key = f"{row['User Input']}|{row['Postcode']}"
|
||||
entry = sidecar.get(key, {})
|
||||
lodgement_date = entry.get("lodgement_date")
|
||||
|
||||
marks = []
|
||||
if lodgement_date:
|
||||
parsed = date.fromisoformat(lodgement_date[:10])
|
||||
if parsed < NEW_API_CUTOFF:
|
||||
marks.append(
|
||||
pytest.mark.xfail(
|
||||
reason=f"EPC lodged {lodgement_date} — predates new API coverage (Jan 2012)",
|
||||
strict=False,
|
||||
)
|
||||
)
|
||||
|
||||
cases.append(
|
||||
pytest.param(
|
||||
row["User Input"],
|
||||
row["Postcode"],
|
||||
row["Manual UPRN Code"],
|
||||
id=f'{row["User Input"]} [{row["Postcode"]}]',
|
||||
marks=marks,
|
||||
)
|
||||
)
|
||||
for row in reader
|
||||
]
|
||||
return cases
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
@ -31,7 +60,6 @@ def test_uprn_resolution_matches_manual(
|
|||
postcode: str,
|
||||
expected_uprn: str,
|
||||
):
|
||||
|
||||
uprn = get_uprn(user_input, postcode)
|
||||
if uprn:
|
||||
assert uprn == expected_uprn
|
||||
|
|
|
|||
1230
backend/address2UPRN/tests/test_lodgement_dates.json
Normal file
1230
backend/address2UPRN/tests/test_lodgement_dates.json
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue