get rid of duplicate function and make better sensible variable name

This commit is contained in:
Jun-te Kim 2026-05-12 13:46:02 +00:00
parent b0e935d497
commit 27f2ef5e83
4 changed files with 7 additions and 35 deletions

View file

@ -14,11 +14,8 @@ from utils.s3 import (
)
from datetime import datetime
from backend.utils.addressMatch import (
AddressMatch,
get_uprn_candidates,
)
from backend.address2UPRN.scoring import all_uprns_match
from backend.utils.addressMatch import AddressMatch
from backend.address2UPRN.scoring import all_uprns_match, rank_by_address_similarity
from datatypes.epc.domain.historic_epc_matching import (
match_addresses_for_postcode,
)
@ -82,7 +79,7 @@ def get_uprn_with_epc_df(
if epc_df.empty:
return None
scored_df = get_uprn_candidates(
scored_df = rank_by_address_similarity(
epc_df,
user_address=user_inputed_address,
)
@ -174,7 +171,7 @@ def resolve_uprns_for_postcode_group(
for _, row in group_df.iterrows():
user_address = str(row[address_col]).strip()
scored_df = get_uprn_candidates(
scored_df = rank_by_address_similarity(
epc_df,
user_address=user_address,
)

View file

@ -19,7 +19,7 @@ def all_uprns_match(
return len(uprns) == 1 and uprns[0] == str(target_uprn)
def get_uprn_candidates(
def rank_by_address_similarity(
df: pd.DataFrame,
user_address: str,
address_column: str = "address",

View file

@ -242,31 +242,6 @@ def score_addresses(
return df[address_column].apply(lambda x: AddressMatch.score(user_address, x))
def get_uprn_candidates(
df: pd.DataFrame,
user_address: str,
address_column: str = "address",
uprn_column: str = "uprn",
) -> pd.DataFrame:
"""
Annotate EPC results with lexicographical similarity scores and ranks.
Returns a DataFrame sorted by descending lexiscore.
"""
if address_column not in df.columns:
raise ValueError(f"Missing column: {address_column}")
if uprn_column not in df.columns:
raise ValueError(f"Missing column: {uprn_column}")
out = df.copy()
user_norm = AddressMatch.normalise_address(user_address)
out["lexiscore"] = out[address_column].apply(
lambda x: AddressMatch.levenshtein(user_norm, x)
)
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
return out.sort_values(["lexirank", "lexiscore"], ascending=[True, False])
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
"""Returns True if all non-null UPRNs in df match the given uprn."""
if column not in df.columns:

View file

@ -4,7 +4,7 @@ from typing import Optional
import pandas as pd
from botocore.exceptions import ClientError
from backend.address2UPRN.scoring import get_uprn_candidates
from backend.address2UPRN.scoring import rank_by_address_similarity
from backend.utils.addressMatch import AddressMatch
from datatypes.epc.domain.historic_epc import HistoricEpc
from utils.pandas_utils import pandas_cell_to_str
@ -85,7 +85,7 @@ def match_addresses_for_postcode(
) from e
raise
scored = get_uprn_candidates(
scored = rank_by_address_similarity(
df,
user_address=user_address,
address_column=address_column,