import pandas as pd from backend.utils.addressMatch import AddressMatch def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool: """ Returns True if all non-null UPRNs in df match the given uprn. Returns False otherwise. """ if column not in df.columns: return False uprns = df[column].dropna().astype(str).str.strip().unique() if len(uprns) == 0: return False return len(uprns) == 1 and uprns[0] == str(uprn) def get_uprn_candidates( df: pd.DataFrame, user_address: str, address_column: str = "address", uprn_column: str = "uprn", ) -> pd.DataFrame: """ Annotate EPC results with lexicographical similarity scores and ranks. Returns a DataFrame sorted by descending lexiscore. DOES NOT choose or return a UPRN. """ if address_column not in df.columns: raise ValueError(f"Missing column: {address_column}") if uprn_column not in df.columns: raise ValueError(f"Missing column: {uprn_column}") out = df.copy() user_norm = AddressMatch.normalise_address(user_address) out["lexiscore"] = out[address_column].apply( lambda x: AddressMatch.levenshtein(user_norm, x) ) out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True) out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int) return out.sort_values( ["lexirank", "lexiscore"], ascending=[True, False], )