mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
81 lines
2.6 KiB
Python
81 lines
2.6 KiB
Python
import csv
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from urllib.parse import urlencode
|
|
|
|
import pandas as pd
|
|
from epc_api.client import EpcClient
|
|
|
|
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
|
SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json"
|
|
|
|
|
|
def fetch_postcode_records(client: EpcClient, postcode: str) -> pd.DataFrame:
|
|
url = os.path.join(client.domestic.host, "search")
|
|
url += "?" + urlencode({"size": 500})
|
|
resp = client.domestic.call(url=url, method="get", params={"postcode": postcode})
|
|
if not resp or "rows" not in resp:
|
|
return pd.DataFrame()
|
|
return pd.DataFrame(resp["rows"], columns=resp["column-names"])
|
|
|
|
|
|
def main():
|
|
auth_token = os.getenv("EPC_AUTH_TOKEN")
|
|
if not auth_token:
|
|
raise RuntimeError("EPC_AUTH_TOKEN not set")
|
|
|
|
client = EpcClient(auth_token=auth_token)
|
|
|
|
sidecar = {}
|
|
if SIDECAR_PATH.exists():
|
|
sidecar = json.loads(SIDECAR_PATH.read_text())
|
|
|
|
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
|
rows = list(csv.DictReader(f))
|
|
|
|
by_postcode: dict[str, list[dict]] = {}
|
|
for row in rows:
|
|
if row["Manual UPRN Code"] == "None":
|
|
continue
|
|
by_postcode.setdefault(row["Postcode"], []).append(row)
|
|
|
|
for postcode, postcode_rows in by_postcode.items():
|
|
print(f"Fetching {postcode} ({len(postcode_rows)} rows)...")
|
|
try:
|
|
epc_df = fetch_postcode_records(client, postcode)
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
continue
|
|
|
|
if epc_df.empty:
|
|
print(f" No results from old API for {postcode}")
|
|
continue
|
|
|
|
epc_df["uprn"] = epc_df["uprn"].astype(str).str.replace(r"\.0$", "", regex=True)
|
|
|
|
for row in postcode_rows:
|
|
key = f"{row['User Input']}|{row['Postcode']}"
|
|
if key in sidecar:
|
|
continue
|
|
|
|
expected_uprn = str(row["Manual UPRN Code"]).strip()
|
|
match = epc_df[epc_df["uprn"] == expected_uprn]
|
|
|
|
if match.empty:
|
|
print(f" WARN: UPRN {expected_uprn} not found in old API for {postcode}")
|
|
sidecar[key] = {"lodgement_date": None, "found_in_old_api": False}
|
|
else:
|
|
lodgement_date = match.iloc[0].get("lodgement-date")
|
|
sidecar[key] = {
|
|
"lodgement_date": str(lodgement_date) if lodgement_date else None,
|
|
"found_in_old_api": True,
|
|
}
|
|
print(f" {row['User Input']}: {lodgement_date}")
|
|
|
|
SIDECAR_PATH.write_text(json.dumps(sidecar, indent=2))
|
|
print(f"\nWritten to {SIDECAR_PATH}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|