one place to have df_has_single_uprn

This commit is contained in:
Jun-te Kim 2026-05-12 10:51:27 +00:00
parent b364df89ad
commit bec5c4f3c3
3 changed files with 2 additions and 12 deletions

View file

@ -62,6 +62,7 @@ bash .devcontainer/backend/install-claude-skills.sh
## Type Safety ## Type Safety
All new code must pass `pyright` with zero errors under `typeCheckingMode = strict`. All new code must pass `pyright` with zero errors under `typeCheckingMode = strict`.
Use Optional over | None
Annotate all function return types. Use `dict[str, Any]` for untyped external API Annotate all function return types. Use `dict[str, Any]` for untyped external API
payloads — never bare `dict`. Add `pandas-stubs` when introducing pandas to a module. payloads — never bare `dict`. Add `pandas-stubs` when introducing pandas to a module.

View file

@ -17,9 +17,8 @@ from datetime import datetime
from backend.utils.addressMatch import ( from backend.utils.addressMatch import (
AddressMatch, AddressMatch,
get_uprn_candidates, get_uprn_candidates,
df_has_single_uprn,
score_addresses,
) )
from backend.address2UPRN.scoring import df_has_single_uprn
from datatypes.epc.domain.historic_epc_matching import ( from datatypes.epc.domain.historic_epc_matching import (
match_addresses_for_postcode, match_addresses_for_postcode,
) )

View file

@ -259,13 +259,3 @@ def get_uprn_candidates(
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False]) return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False])
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
"""Returns True if all non-null UPRNs in df match the given uprn."""
if column not in df.columns:
return False
uprns = df[column].dropna().astype(str).str.strip().unique()
if len(uprns) == 0:
return False
return len(uprns) == 1 and uprns[0] == str(uprn)