import csv import json import os from pathlib import Path from urllib.parse import urlencode import pandas as pd from epc_api.client import EpcClient FIXTURE_PATH = Path(__file__).parent / "test_data.csv" SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json" def fetch_postcode_records(client: EpcClient, postcode: str) -> pd.DataFrame: url = os.path.join(client.domestic.host, "search") url += "?" + urlencode({"size": 500}) resp = client.domestic.call(url=url, method="get", params={"postcode": postcode}) if not resp or "rows" not in resp: return pd.DataFrame() return pd.DataFrame(resp["rows"], columns=resp["column-names"]) def main(): auth_token = os.getenv("EPC_AUTH_TOKEN") if not auth_token: raise RuntimeError("EPC_AUTH_TOKEN not set") client = EpcClient(auth_token=auth_token) sidecar = {} if SIDECAR_PATH.exists(): sidecar = json.loads(SIDECAR_PATH.read_text()) with open(FIXTURE_PATH, newline="", encoding="utf-8") as f: rows = list(csv.DictReader(f)) by_postcode: dict[str, list[dict]] = {} for row in rows: if row["Manual UPRN Code"] == "None": continue by_postcode.setdefault(row["Postcode"], []).append(row) for postcode, postcode_rows in by_postcode.items(): print(f"Fetching {postcode} ({len(postcode_rows)} rows)...") try: epc_df = fetch_postcode_records(client, postcode) except Exception as e: print(f" ERROR: {e}") continue if epc_df.empty: print(f" No results from old API for {postcode}") continue epc_df["uprn"] = epc_df["uprn"].astype(str).str.replace(r"\.0$", "", regex=True) for row in postcode_rows: key = f"{row['User Input']}|{row['Postcode']}" if key in sidecar: continue expected_uprn = str(row["Manual UPRN Code"]).strip() match = epc_df[epc_df["uprn"] == expected_uprn] if match.empty: print(f" WARN: UPRN {expected_uprn} not found in old API for {postcode}") sidecar[key] = {"lodgement_date": None, "found_in_old_api": False} else: lodgement_date = match.iloc[0].get("lodgement-date") sidecar[key] = { "lodgement_date": str(lodgement_date) if lodgement_date else None, "found_in_old_api": True, } print(f" {row['User Input']}: {lodgement_date}") SIDECAR_PATH.write_text(json.dumps(sidecar, indent=2)) print(f"\nWritten to {SIDECAR_PATH}") if __name__ == "__main__": main()