mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
188 lines
6.1 KiB
Python
188 lines
6.1 KiB
Python
# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Callable, Optional
|
|
|
|
import httpx
|
|
import pandas as pd
|
|
|
|
from backend.epc_client.exceptions import (
|
|
EpcApiError,
|
|
EpcNotFoundError,
|
|
EpcRateLimitError,
|
|
)
|
|
from backend.epc_client._retry import call_with_retry
|
|
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
|
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
|
|
|
|
|
@dataclass
|
|
class EpcSearchResult:
|
|
certificate_number: str
|
|
address_line_1: str
|
|
address_line_2: Optional[str]
|
|
address_line_3: Optional[str]
|
|
address_line_4: Optional[str]
|
|
postcode: str
|
|
post_town: str
|
|
uprn: Optional[int]
|
|
current_energy_efficiency_band: str
|
|
registration_date: str
|
|
|
|
@property
|
|
def full_address(self) -> str:
|
|
parts = [
|
|
self.address_line_1,
|
|
self.address_line_2,
|
|
self.address_line_3,
|
|
self.address_line_4,
|
|
]
|
|
return ", ".join(p for p in parts if p)
|
|
|
|
|
|
class EpcClientService:
|
|
BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk"
|
|
_MIN_MATCH_SCORE = 0.6
|
|
|
|
def __init__(self, auth_token: str) -> None:
|
|
self._headers = {
|
|
"Authorization": f"Bearer {auth_token}",
|
|
"Accept": "application/json",
|
|
}
|
|
|
|
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData:
|
|
raw = call_with_retry(lambda: self._fetch_certificate(cert_num))
|
|
return EpcPropertyDataMapper.from_api_response(raw)
|
|
|
|
def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]:
|
|
results = call_with_retry(lambda: self._search(uprn=uprn))
|
|
if not results:
|
|
return None
|
|
latest = max(results, key=lambda r: r.registration_date)
|
|
return self.get_by_certificate_number(latest.certificate_number)
|
|
|
|
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
|
|
return call_with_retry(lambda: self._search(postcode=postcode))
|
|
|
|
def find_best_match(self, postcode: str, address: str) -> Optional[EpcPropertyData]:
|
|
from backend.utils.addressMatch import get_uprn_candidates
|
|
|
|
candidates = self.search_by_postcode(postcode)
|
|
if not candidates:
|
|
return None
|
|
|
|
# Round 1: score on addressLine1 only
|
|
cert_num = self._pick_best_cert(
|
|
candidates, address, use_full_address=False, fn=get_uprn_candidates
|
|
)
|
|
if cert_num:
|
|
return self._safe_get(cert_num)
|
|
|
|
# Round 2: score on all address lines joined
|
|
cert_num = self._pick_best_cert(
|
|
candidates, address, use_full_address=True, fn=get_uprn_candidates
|
|
)
|
|
if cert_num:
|
|
return self._safe_get(cert_num)
|
|
|
|
return None
|
|
|
|
# ------------------------------------------------------------------
|
|
# Private helpers
|
|
# ------------------------------------------------------------------
|
|
|
|
def _fetch_certificate(self, cert_num: str) -> dict:
|
|
resp = httpx.get(
|
|
f"{self.BASE_URL}/api/certificate",
|
|
params={"certificate_number": cert_num},
|
|
headers=self._headers,
|
|
)
|
|
if resp.status_code == 404:
|
|
raise EpcNotFoundError(cert_num)
|
|
if resp.status_code == 429:
|
|
raise EpcRateLimitError("Rate limited by EPC API")
|
|
if not resp.is_success:
|
|
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
|
|
return resp.json()["data"]
|
|
|
|
def _search(
|
|
self,
|
|
postcode: Optional[str] = None,
|
|
uprn: Optional[int] = None,
|
|
) -> list[EpcSearchResult]:
|
|
params: dict[str, str | int] = {}
|
|
if postcode:
|
|
params["postcode"] = postcode
|
|
if uprn is not None:
|
|
params["uprn"] = uprn
|
|
|
|
resp = httpx.get(
|
|
f"{self.BASE_URL}/api/domestic/search",
|
|
params=params,
|
|
headers=self._headers,
|
|
)
|
|
if resp.status_code == 404:
|
|
return []
|
|
if resp.status_code == 429:
|
|
raise EpcRateLimitError("Rate limited by EPC API")
|
|
if not resp.is_success:
|
|
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
|
|
|
|
rows = resp.json().get("data", [])
|
|
return [self._parse_search_result(r) for r in rows]
|
|
|
|
@staticmethod
|
|
def _parse_search_result(row: dict) -> EpcSearchResult:
|
|
return EpcSearchResult(
|
|
certificate_number=row["certificateNumber"],
|
|
address_line_1=row["addressLine1"],
|
|
address_line_2=row.get("addressLine2"),
|
|
address_line_3=row.get("addressLine3"),
|
|
address_line_4=row.get("addressLine4"),
|
|
postcode=row["postcode"],
|
|
post_town=row["postTown"],
|
|
uprn=row.get("uprn"),
|
|
current_energy_efficiency_band=row["currentEnergyEfficiencyBand"],
|
|
registration_date=row["registrationDate"],
|
|
)
|
|
|
|
def _pick_best_cert(
|
|
self,
|
|
candidates: list[EpcSearchResult],
|
|
user_address: str,
|
|
use_full_address: bool,
|
|
fn: Callable[..., pd.DataFrame],
|
|
) -> Optional[str]:
|
|
df = pd.DataFrame(
|
|
[
|
|
{
|
|
"address": (
|
|
r.full_address() if use_full_address else r.address_line_1
|
|
),
|
|
"uprn": str(r.uprn) if r.uprn is not None else "",
|
|
"certificate_number": r.certificate_number,
|
|
}
|
|
for r in candidates
|
|
]
|
|
)
|
|
|
|
scored = fn(df, user_address=user_address)
|
|
if scored.empty:
|
|
return None
|
|
|
|
best_score = scored.iloc[0]["lexiscore"]
|
|
if best_score < self._MIN_MATCH_SCORE:
|
|
return None
|
|
|
|
top = scored[scored["lexirank"] == 1]
|
|
if len(top) != 1:
|
|
return None
|
|
|
|
return str(top.iloc[0]["certificate_number"])
|
|
|
|
def _safe_get(self, cert_num: str) -> Optional[EpcPropertyData]:
|
|
try:
|
|
return self.get_by_certificate_number(cert_num)
|
|
except EpcNotFoundError:
|
|
return None
|