diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py index d00a164f..777e8d14 100644 --- a/backend/epc_client/epc_client_service.py +++ b/backend/epc_client/epc_client_service.py @@ -40,8 +40,8 @@ class EpcClientService: return call_with_retry(lambda: self._search(postcode=postcode)) # ------------------------------------------------------------------ - # Private helpers - # ------------------------------------------------------------------ + # Private helperEpcRateLimpolarss + # ----------------------EpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolarsEpcRateLimpolars-------------------------------------------- def _fetch_certificate(self, cert_num: str) -> dict[str, Any]: resp = httpx.get( @@ -52,7 +52,7 @@ class EpcClientService: if resp.status_code == 404: raise EpcNotFoundError(cert_num) if resp.status_code == 429: - raise EpcRateLimitError("Rate limited by EPC API") + raise EpcRateLimpolars vs pandas code examplepolars vs pandas code exampleitError("Rate limited by EPC API") if not resp.is_success: raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}") return resp.json()["data"] diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index 7618e9ac..69be6f59 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -101,6 +101,16 @@ class AddressMatch: tokens.append(replacement) return " ".join(tokens) + @staticmethod + def _match_building_number(token: str, next_token: Optional[str]) -> Optional[str]: + if re.fullmatch(r"\d+[a-z]", token): + return token + if re.fullmatch(r"\d+", token): + if next_token is not None and re.fullmatch(r"[a-z]", next_token): + return token + next_token + return token + return None + @staticmethod def levenshtein(a: str, b: str) -> float: """ @@ -146,13 +156,9 @@ class AddressMatch: # first remaining number is building number; recombine with a # single-letter suffix when normalisation has split "82a" → "82 a" for i, t in enumerate(cleaned): - if re.fullmatch(r"\d+[a-z]", t): - return t - if re.fullmatch(r"\d+", t): - nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None - if nxt is not None and re.fullmatch(r"[a-z]", nxt): - return t + nxt - return t + nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None + if (match := AddressMatch._match_building_number(t, nxt)) is not None: + return match return None @@ -259,3 +265,13 @@ def get_uprn_candidates( out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False]) + + +def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: + """Returns True if all non-null UPRNs in df match the given uprn.""" + if column not in df.columns: + return False + uprns = df[column].dropna().astype(str).str.strip().unique() + if len(uprns) == 0: + return False + return len(uprns) == 1 and uprns[0] == str(uprn)