save match building number

This commit is contained in:
Jun-te Kim 2026-05-12 13:41:59 +00:00
parent 5cd21d8522
commit 46ec68e5db
2 changed files with 26 additions and 10 deletions

View file

@ -40,8 +40,8 @@ class EpcClientService:
return call_with_retry(lambda: self._search(postcode=postcode)) return call_with_retry(lambda: self._search(postcode=postcode))
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Private helpers # Private helperEpcRateLimpolarss
# ------------------------------------------------------------------ # ----------------------EpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolars--------------------------------------------
def _fetch_certificate(self, cert_num: str) -> dict[str, Any]: def _fetch_certificate(self, cert_num: str) -> dict[str, Any]:
resp = httpx.get( resp = httpx.get(
@ -52,7 +52,7 @@ class EpcClientService:
if resp.status_code == 404: if resp.status_code == 404:
raise EpcNotFoundError(cert_num) raise EpcNotFoundError(cert_num)
if resp.status_code == 429: if resp.status_code == 429:
raise EpcRateLimitError("Rate limited by EPC API") raise EpcRateLimpolars vs pandas code examplepolars vs pandas code exampleitError("Rate limited by EPC API")
if not resp.is_success: if not resp.is_success:
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
return resp.json()["data"] return resp.json()["data"]

View file

@ -101,6 +101,16 @@ class AddressMatch:
tokens.append(replacement) tokens.append(replacement)
return " ".join(tokens) return " ".join(tokens)
@staticmethod
def _match_building_number(token: str, next_token: Optional[str]) -> Optional[str]:
if re.fullmatch(r"\d+[a-z]", token):
return token
if re.fullmatch(r"\d+", token):
if next_token is not None and re.fullmatch(r"[a-z]", next_token):
return token + next_token
return token
return None
@staticmethod @staticmethod
def levenshtein(a: str, b: str) -> float: def levenshtein(a: str, b: str) -> float:
""" """
@ -146,13 +156,9 @@ class AddressMatch:
# first remaining number is building number; recombine with a # first remaining number is building number; recombine with a
# single-letter suffix when normalisation has split "82a" → "82 a" # single-letter suffix when normalisation has split "82a" → "82 a"
for i, t in enumerate(cleaned): for i, t in enumerate(cleaned):
if re.fullmatch(r"\d+[a-z]", t): nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
return t if (match := AddressMatch._match_building_number(t, nxt)) is not None:
if re.fullmatch(r"\d+", t): return match
nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
if nxt is not None and re.fullmatch(r"[a-z]", nxt):
return t + nxt
return t
return None return None
@ -259,3 +265,13 @@ def get_uprn_candidates(
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False]) return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False])
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
"""Returns True if all non-null UPRNs in df match the given uprn."""
if column not in df.columns:
return False
uprns = df[column].dropna().astype(str).str.strip().unique()
if len(uprns) == 0:
return False
return len(uprns) == 1 and uprns[0] == str(uprn)