test first with just 5

This commit is contained in:
Jun-te Kim 2026-02-12 15:03:38 +00:00
parent 1bf322005c
commit 3bdd4a4a97
3 changed files with 20 additions and 37 deletions

View file

@ -3,6 +3,8 @@ FROM python:3.11.10-bullseye
ARG USER=vscode
ARG DEBIAN_FRONTEND=noninteractive
ARG DOCKER_GID=1003
# 1) Toolchain + utilities for building libpostal
RUN apt-get update && apt-get install -y --no-install-recommends \

View file

@ -6,7 +6,8 @@
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/home/vscode,type=bind"
"source=${localEnv:HOME},target=/home/vscode,type=bind",
"source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind"
],
"customizations": {
"vscode": {

View file

@ -329,9 +329,6 @@ def get_uprn_candidates(
def get_uprn_with_epc_df(
user_inputed_address: str,
epc_df: pd.DataFrame,
return_address=False,
return_EPC=False,
return_score=True,
):
"""
Return uprn (str) using a pre-fetched EPC dataframe.
@ -371,8 +368,6 @@ def get_uprn_with_epc_df(
return None
address = top_rank_df["address"].values[0]
lexiscore = float(top_rank_df["lexiscore"].values[0])
epc = top_rank_df["current-energy-efficiency"].values[0]
score = float(top_rank_df["lexiscore"].values[0])
# logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
@ -382,20 +377,7 @@ def get_uprn_with_epc_df(
if found_uprn == "":
return None
if return_address:
if return_EPC is False:
return found_uprn, address
else:
if return_score is False:
return found_uprn, address, epc
else:
return (
found_uprn,
address,
epc,
score,
)
return found_uprn
return (found_uprn, address, score)
def get_uprn(
@ -688,7 +670,11 @@ def handler(event, context, local=False):
# Create user_input column by concatenating Address 1 and Address 2
df["user_input"] = (
df["Address 1"].fillna("") + " " + df["Address 2"].fillna("")
df["Address 1"].fillna("")
+ " "
+ df["Address 2"].fillna("")
+ " "
+ df["Address 3"].fillna("")
).str.strip()
logger.info(f"Created user_input column from Address 1 and Address 2")
@ -743,14 +729,11 @@ def handler(event, context, local=False):
result = get_uprn_with_epc_df(
user_inputed_address=user_input,
epc_df=epc_df,
return_address=True,
return_EPC=True,
return_score=True,
)
# Parse result tuple if successful
if result:
uprn, found_address, epc, score = result
uprn, found_address, score = result
uprns_found += 1
logger.info(
f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})"
@ -759,10 +742,9 @@ def handler(event, context, local=False):
results_data.append(
{
**row, # Include all original data
"found_uprn": uprn,
"found_address": found_address,
"epc_rating": epc,
"lexiscore": score,
"uprn": uprn,
"domna_found_address": found_address,
"domna_lexiscore": score,
}
)
else:
@ -772,10 +754,9 @@ def handler(event, context, local=False):
results_data.append(
{
**row, # Include all original data
"found_uprn": None,
"found_address": None,
"epc_rating": None,
"lexiscore": None,
"uprn": None,
"domna_found_address": None,
"domna_lexiscore": None,
}
)
@ -789,10 +770,9 @@ def handler(event, context, local=False):
results_data.append(
{
**row,
"found_uprn": None,
"found_address": None,
"epc_rating": None,
"score": None,
"uprn": None,
"domna_found_address": None,
"domna_lexiscore": None,
"error": str(e),
}
)