Model/backend/epc_client/client.py
Khalim Conn-Kowlessar d338be867b added missing files
2026-04-25 22:41:57 +00:00

175 lines
5.9 KiB
Python

# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml
from __future__ import annotations
from dataclasses import dataclass
from typing import Callable, Optional
import httpx
import pandas as pd
from backend.epc_client.exceptions import EpcApiError, EpcNotFoundError, EpcRateLimitError
from backend.epc_client._retry import call_with_retry
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
@dataclass
class EpcSearchResult:
certificate_number: str
address_line_1: str
address_line_2: Optional[str]
address_line_3: Optional[str]
address_line_4: Optional[str]
postcode: str
post_town: str
uprn: Optional[int]
current_energy_efficiency_band: str
registration_date: str
def full_address(self) -> str:
parts = [
self.address_line_1,
self.address_line_2,
self.address_line_3,
self.address_line_4,
]
return ", ".join(p for p in parts if p)
class EpcClientService:
BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk"
_MIN_MATCH_SCORE = 0.6
def __init__(self, auth_token: str) -> None:
self._headers = {
"Authorization": f"Bearer {auth_token}",
"Accept": "application/json",
}
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData:
raw = call_with_retry(lambda: self._fetch_certificate(cert_num))
return EpcPropertyDataMapper.from_api_response(raw)
def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]:
results = call_with_retry(lambda: self._search(uprn=uprn))
if not results:
return None
latest = max(results, key=lambda r: r.registration_date)
return self.get_by_certificate_number(latest.certificate_number)
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
return call_with_retry(lambda: self._search(postcode=postcode))
def find_best_match(self, postcode: str, address: str) -> Optional[EpcPropertyData]:
from backend.utils.addressMatch import get_uprn_candidates
candidates = self.search_by_postcode(postcode)
if not candidates:
return None
# Round 1: score on addressLine1 only
cert_num = self._pick_best_cert(candidates, address, use_full_address=False, fn=get_uprn_candidates)
if cert_num:
return self._safe_get(cert_num)
# Round 2: score on all address lines joined
cert_num = self._pick_best_cert(candidates, address, use_full_address=True, fn=get_uprn_candidates)
if cert_num:
return self._safe_get(cert_num)
return None
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _fetch_certificate(self, cert_num: str) -> dict:
resp = httpx.get(
f"{self.BASE_URL}/api/certificate",
params={"certificate_number": cert_num},
headers=self._headers,
)
if resp.status_code == 404:
raise EpcNotFoundError(cert_num)
if resp.status_code == 429:
raise EpcRateLimitError("Rate limited by EPC API")
if not resp.is_success:
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
return resp.json()["data"]
def _search(
self,
postcode: Optional[str] = None,
uprn: Optional[int] = None,
) -> list[EpcSearchResult]:
params: dict[str, str | int] = {}
if postcode:
params["postcode"] = postcode
if uprn is not None:
params["uprn"] = uprn
resp = httpx.get(
f"{self.BASE_URL}/api/domestic/search",
params=params,
headers=self._headers,
)
if resp.status_code == 404:
return []
if resp.status_code == 429:
raise EpcRateLimitError("Rate limited by EPC API")
if not resp.is_success:
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
rows = resp.json().get("data", [])
return [self._parse_search_result(r) for r in rows]
@staticmethod
def _parse_search_result(row: dict) -> EpcSearchResult:
return EpcSearchResult(
certificate_number=row["certificateNumber"],
address_line_1=row["addressLine1"],
address_line_2=row.get("addressLine2"),
address_line_3=row.get("addressLine3"),
address_line_4=row.get("addressLine4"),
postcode=row["postcode"],
post_town=row["postTown"],
uprn=row.get("uprn"),
current_energy_efficiency_band=row["currentEnergyEfficiencyBand"],
registration_date=row["registrationDate"],
)
def _pick_best_cert(
self,
candidates: list[EpcSearchResult],
user_address: str,
use_full_address: bool,
fn: Callable[..., pd.DataFrame],
) -> Optional[str]:
df = pd.DataFrame([
{
"address": r.full_address() if use_full_address else r.address_line_1,
"uprn": str(r.uprn) if r.uprn is not None else "",
"certificate_number": r.certificate_number,
}
for r in candidates
])
scored = fn(df, user_address=user_address)
if scored.empty:
return None
best_score = scored.iloc[0]["lexiscore"]
if best_score < self._MIN_MATCH_SCORE:
return None
top = scored[scored["lexirank"] == 1]
if len(top) != 1:
return None
return str(top.iloc[0]["certificate_number"])
def _safe_get(self, cert_num: str) -> Optional[EpcPropertyData]:
try:
return self.get_by_certificate_number(cert_num)
except EpcNotFoundError:
return None