mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
save match building number
This commit is contained in:
parent
5cd21d8522
commit
46ec68e5db
2 changed files with 26 additions and 10 deletions
|
|
@ -40,8 +40,8 @@ class EpcClientService:
|
||||||
return call_with_retry(lambda: self._search(postcode=postcode))
|
return call_with_retry(lambda: self._search(postcode=postcode))
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Private helpers
|
# Private helperEpcRateLimpolarss
|
||||||
# ------------------------------------------------------------------
|
# ----------------------EpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolars--------------------------------------------
|
||||||
|
|
||||||
def _fetch_certificate(self, cert_num: str) -> dict[str, Any]:
|
def _fetch_certificate(self, cert_num: str) -> dict[str, Any]:
|
||||||
resp = httpx.get(
|
resp = httpx.get(
|
||||||
|
|
@ -52,7 +52,7 @@ class EpcClientService:
|
||||||
if resp.status_code == 404:
|
if resp.status_code == 404:
|
||||||
raise EpcNotFoundError(cert_num)
|
raise EpcNotFoundError(cert_num)
|
||||||
if resp.status_code == 429:
|
if resp.status_code == 429:
|
||||||
raise EpcRateLimitError("Rate limited by EPC API")
|
raise EpcRateLimpolars vs pandas code examplepolars vs pandas code exampleitError("Rate limited by EPC API")
|
||||||
if not resp.is_success:
|
if not resp.is_success:
|
||||||
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
|
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
|
||||||
return resp.json()["data"]
|
return resp.json()["data"]
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,16 @@ class AddressMatch:
|
||||||
tokens.append(replacement)
|
tokens.append(replacement)
|
||||||
return " ".join(tokens)
|
return " ".join(tokens)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _match_building_number(token: str, next_token: Optional[str]) -> Optional[str]:
|
||||||
|
if re.fullmatch(r"\d+[a-z]", token):
|
||||||
|
return token
|
||||||
|
if re.fullmatch(r"\d+", token):
|
||||||
|
if next_token is not None and re.fullmatch(r"[a-z]", next_token):
|
||||||
|
return token + next_token
|
||||||
|
return token
|
||||||
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def levenshtein(a: str, b: str) -> float:
|
def levenshtein(a: str, b: str) -> float:
|
||||||
"""
|
"""
|
||||||
|
|
@ -146,13 +156,9 @@ class AddressMatch:
|
||||||
# first remaining number is building number; recombine with a
|
# first remaining number is building number; recombine with a
|
||||||
# single-letter suffix when normalisation has split "82a" → "82 a"
|
# single-letter suffix when normalisation has split "82a" → "82 a"
|
||||||
for i, t in enumerate(cleaned):
|
for i, t in enumerate(cleaned):
|
||||||
if re.fullmatch(r"\d+[a-z]", t):
|
nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
|
||||||
return t
|
if (match := AddressMatch._match_building_number(t, nxt)) is not None:
|
||||||
if re.fullmatch(r"\d+", t):
|
return match
|
||||||
nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
|
|
||||||
if nxt is not None and re.fullmatch(r"[a-z]", nxt):
|
|
||||||
return t + nxt
|
|
||||||
return t
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -259,3 +265,13 @@ def get_uprn_candidates(
|
||||||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||||
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
||||||
return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False])
|
return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False])
|
||||||
|
|
||||||
|
|
||||||
|
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
|
||||||
|
"""Returns True if all non-null UPRNs in df match the given uprn."""
|
||||||
|
if column not in df.columns:
|
||||||
|
return False
|
||||||
|
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||||
|
if len(uprns) == 0:
|
||||||
|
return False
|
||||||
|
return len(uprns) == 1 and uprns[0] == str(uprn)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue