mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
56 lines
1.4 KiB
Python
56 lines
1.4 KiB
Python
import pandas as pd
|
|
|
|
from backend.utils.addressMatch import AddressMatch
|
|
|
|
|
|
def all_uprns_match(
|
|
df: pd.DataFrame,
|
|
target_uprn: str,
|
|
column: str = "uprn",
|
|
) -> bool:
|
|
if column not in df.columns:
|
|
return False
|
|
|
|
uprns = df[column].dropna().astype(str).str.strip().unique()
|
|
|
|
if len(uprns) == 0:
|
|
return False
|
|
|
|
return len(uprns) == 1 and uprns[0] == str(target_uprn)
|
|
|
|
|
|
def get_uprn_candidates(
|
|
df: pd.DataFrame,
|
|
user_address: str,
|
|
address_column: str = "address",
|
|
uprn_column: str = "uprn",
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Annotate EPC results with lexicographical similarity scores and ranks.
|
|
|
|
Returns a DataFrame sorted by descending lexiscore.
|
|
DOES NOT choose or return a UPRN.
|
|
"""
|
|
|
|
if address_column not in df.columns:
|
|
raise ValueError(f"Missing column: {address_column}")
|
|
|
|
if uprn_column not in df.columns:
|
|
raise ValueError(f"Missing column: {uprn_column}")
|
|
|
|
out = df.copy()
|
|
|
|
user_norm = AddressMatch.normalise_address(user_address)
|
|
|
|
out["lexiscore"] = out[address_column].apply(
|
|
lambda x: AddressMatch.levenshtein(user_norm, x)
|
|
)
|
|
|
|
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
|
|
|
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
|
|
|
return out.sort_values(
|
|
["lexirank", "lexiscore"],
|
|
ascending=[True, False],
|
|
)
|