Model/backend/address2UPRN/tests/populate_lodgement_dates.py

81 lines
2.6 KiB
Python

import csv
import json
import os
from pathlib import Path
from urllib.parse import urlencode
import pandas as pd
from epc_api.client import EpcClient
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json"
def fetch_postcode_records(client: EpcClient, postcode: str) -> pd.DataFrame:
url = os.path.join(client.domestic.host, "search")
url += "?" + urlencode({"size": 500})
resp = client.domestic.call(url=url, method="get", params={"postcode": postcode})
if not resp or "rows" not in resp:
return pd.DataFrame()
return pd.DataFrame(resp["rows"], columns=resp["column-names"])
def main():
auth_token = os.getenv("EPC_AUTH_TOKEN")
if not auth_token:
raise RuntimeError("EPC_AUTH_TOKEN not set")
client = EpcClient(auth_token=auth_token)
sidecar = {}
if SIDECAR_PATH.exists():
sidecar = json.loads(SIDECAR_PATH.read_text())
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
rows = list(csv.DictReader(f))
by_postcode: dict[str, list[dict]] = {}
for row in rows:
if row["Manual UPRN Code"] == "None":
continue
by_postcode.setdefault(row["Postcode"], []).append(row)
for postcode, postcode_rows in by_postcode.items():
print(f"Fetching {postcode} ({len(postcode_rows)} rows)...")
try:
epc_df = fetch_postcode_records(client, postcode)
except Exception as e:
print(f" ERROR: {e}")
continue
if epc_df.empty:
print(f" No results from old API for {postcode}")
continue
epc_df["uprn"] = epc_df["uprn"].astype(str).str.replace(r"\.0$", "", regex=True)
for row in postcode_rows:
key = f"{row['User Input']}|{row['Postcode']}"
if key in sidecar:
continue
expected_uprn = str(row["Manual UPRN Code"]).strip()
match = epc_df[epc_df["uprn"] == expected_uprn]
if match.empty:
print(f" WARN: UPRN {expected_uprn} not found in old API for {postcode}")
sidecar[key] = {"lodgement_date": None, "found_in_old_api": False}
else:
lodgement_date = match.iloc[0].get("lodgement-date")
sidecar[key] = {
"lodgement_date": str(lodgement_date) if lodgement_date else None,
"found_in_old_api": True,
}
print(f" {row['User Input']}: {lodgement_date}")
SIDECAR_PATH.write_text(json.dumps(sidecar, indent=2))
print(f"\nWritten to {SIDECAR_PATH}")
if __name__ == "__main__":
main()