"""Throwaway one-off: bulk-fetch cohort-2 EPC API JSONs from gov.uk EPB. Persists the inner `data` payload (as returned by EpcClientService._fetch_certificate) to tests/domain/sap10_calculator/rdsap/fixtures/golden/.json. Skips certs whose JSON already exists. """ from __future__ import annotations import json import os import sys from pathlib import Path from typing import Any import httpx from dotenv import load_dotenv REPO_ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(REPO_ROOT)) from infrastructure.epc_client._retry import call_with_retry from infrastructure.epc_client.epc_client_service import EpcClientService from infrastructure.epc_client.exceptions import ( EpcApiError, EpcNotFoundError, EpcRateLimitError, ) def _fetch_raw(token: str, cert_num: str) -> dict[str, Any]: resp = httpx.get( f"{EpcClientService.BASE_URL}/api/certificate", params={"certificate_number": cert_num}, headers={"Authorization": f"Bearer {token}", "Accept": "application/json"}, timeout=EpcClientService.REQUEST_TIMEOUT, ) if resp.status_code == 404: raise EpcNotFoundError(cert_num) if resp.status_code == 429: raise EpcRateLimitError("Rate limited by EPC API") if not resp.is_success: raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") payload: dict[str, Any] = resp.json()["data"] return payload def main() -> int: load_dotenv(REPO_ROOT / "backend" / ".env") token = os.environ["OPEN_EPC_API_TOKEN"] src = REPO_ROOT / "sap worksheets" / "additional with api 2" dst = REPO_ROOT / "domain" / "sap10_calculator" / "rdsap" / "tests" / "fixtures" / "golden" fetched = 0 skipped = 0 missing: list[str] = [] for cd in sorted(src.iterdir()): if not cd.is_dir(): continue out_path = dst / f"{cd.name}.json" if out_path.exists(): print(f"skip {cd.name}") skipped += 1 continue cert_num = cd.name try: raw = call_with_retry(lambda: _fetch_raw(token, cert_num)) except EpcNotFoundError: print(f"404 {cd.name}") missing.append(cd.name) continue out_path.write_text(json.dumps(raw, indent=2)) print(f"fetch {cd.name}") fetched += 1 print(f"\nfetched={fetched} skipped={skipped} missing={len(missing)}") if missing: print("missing:") for c in missing: print(f" {c}") return 0 if __name__ == "__main__": sys.exit(main())