mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
save match building number
This commit is contained in:
parent
5cd21d8522
commit
46ec68e5db
2 changed files with 26 additions and 10 deletions
|
|
@ -40,8 +40,8 @@ class EpcClientService:
|
|||
return call_with_retry(lambda: self._search(postcode=postcode))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
# Private helperEpcRateLimpolarss
|
||||
# ----------------------EpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolars--------------------------------------------
|
||||
|
||||
def _fetch_certificate(self, cert_num: str) -> dict[str, Any]:
|
||||
resp = httpx.get(
|
||||
|
|
@ -52,7 +52,7 @@ class EpcClientService:
|
|||
if resp.status_code == 404:
|
||||
raise EpcNotFoundError(cert_num)
|
||||
if resp.status_code == 429:
|
||||
raise EpcRateLimitError("Rate limited by EPC API")
|
||||
raise EpcRateLimpolars vs pandas code examplepolars vs pandas code exampleitError("Rate limited by EPC API")
|
||||
if not resp.is_success:
|
||||
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
|
||||
return resp.json()["data"]
|
||||
|
|
|
|||
|
|
@ -101,6 +101,16 @@ class AddressMatch:
|
|||
tokens.append(replacement)
|
||||
return " ".join(tokens)
|
||||
|
||||
@staticmethod
|
||||
def _match_building_number(token: str, next_token: Optional[str]) -> Optional[str]:
|
||||
if re.fullmatch(r"\d+[a-z]", token):
|
||||
return token
|
||||
if re.fullmatch(r"\d+", token):
|
||||
if next_token is not None and re.fullmatch(r"[a-z]", next_token):
|
||||
return token + next_token
|
||||
return token
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def levenshtein(a: str, b: str) -> float:
|
||||
"""
|
||||
|
|
@ -146,13 +156,9 @@ class AddressMatch:
|
|||
# first remaining number is building number; recombine with a
|
||||
# single-letter suffix when normalisation has split "82a" → "82 a"
|
||||
for i, t in enumerate(cleaned):
|
||||
if re.fullmatch(r"\d+[a-z]", t):
|
||||
return t
|
||||
if re.fullmatch(r"\d+", t):
|
||||
nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
|
||||
if nxt is not None and re.fullmatch(r"[a-z]", nxt):
|
||||
return t + nxt
|
||||
return t
|
||||
nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
|
||||
if (match := AddressMatch._match_building_number(t, nxt)) is not None:
|
||||
return match
|
||||
|
||||
return None
|
||||
|
||||
|
|
@ -259,3 +265,13 @@ def get_uprn_candidates(
|
|||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
||||
return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False])
|
||||
|
||||
|
||||
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
|
||||
"""Returns True if all non-null UPRNs in df match the given uprn."""
|
||||
if column not in df.columns:
|
||||
return False
|
||||
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||
if len(uprns) == 0:
|
||||
return False
|
||||
return len(uprns) == 1 and uprns[0] == str(uprn)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue