mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
dan's pr
This commit is contained in:
parent
1b9c26a2b6
commit
4d013f3295
3 changed files with 18 additions and 20 deletions
|
|
@ -14,7 +14,7 @@ from utils.s3 import (
|
|||
)
|
||||
from datetime import datetime
|
||||
|
||||
from backend.utils.addressMatch import addressMatch
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -35,7 +35,7 @@ def score_addresses(
|
|||
if column not in df.columns:
|
||||
raise ValueError(f"Missing column: {column}")
|
||||
|
||||
return df[column].apply(lambda x: addressMatch.score(user_address, x))
|
||||
return df[column].apply(lambda x: AddressMatch.score(user_address, x))
|
||||
|
||||
|
||||
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
||||
|
|
@ -127,10 +127,10 @@ def get_uprn_candidates(
|
|||
|
||||
out = df.copy()
|
||||
|
||||
user_norm = addressMatch.normalise_address(user_address)
|
||||
user_norm = AddressMatch.normalise_address(user_address)
|
||||
|
||||
out["lexiscore"] = out[address_column].apply(
|
||||
lambda x: addressMatch.levenshtein(user_norm, x)
|
||||
lambda x: AddressMatch.levenshtein(user_norm, x)
|
||||
)
|
||||
|
||||
# Normalise UPRN to string
|
||||
|
|
@ -455,7 +455,7 @@ def handler(event, context, local=False):
|
|||
)
|
||||
|
||||
# Validate postcode before processing
|
||||
if not addressMatch.is_valid_postcode(postcode):
|
||||
if not AddressMatch.is_valid_postcode(postcode):
|
||||
logger.warning(f"Postcode {postcode} is invalid, skipping")
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
import json
|
||||
from utils.logger import setup_logger
|
||||
import logging
|
||||
|
|
@ -8,7 +8,7 @@ from utils.s3 import (
|
|||
read_csv_from_s3 as read_csv_from_s3_dict,
|
||||
parse_s3_uri,
|
||||
)
|
||||
from backend.utils.addressMatch import addressMatch
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
from backend.app.db.connection import get_db_session
|
||||
from backend.app.db.models.postcode_search import PostcodeSearchModel
|
||||
from backend.utils.ordnance_survey import (
|
||||
|
|
@ -124,7 +124,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
|||
|
||||
s3_uri: str = body.get("s3_uri", "")
|
||||
lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
|
||||
lexiscore_column: str = body.get("lexiscore_column", None)
|
||||
lexiscore_column: Optional[str] = body.get("lexiscore_column", None)
|
||||
task_id: str = body.get("task_id", "")
|
||||
sub_task_id: str = body.get("sub_task_id", "")
|
||||
|
||||
|
|
@ -158,7 +158,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
|||
# Process each postcode group at a time
|
||||
for postcode, group in grouped:
|
||||
print(f"Processing postcode: {postcode} ({len(group)} rows)")
|
||||
valid_group = addressMatch.is_valid_postcode(postcode)
|
||||
valid_group = AddressMatch.is_valid_postcode(postcode)
|
||||
if not valid_group:
|
||||
logger.warning(f"Postcode {postcode} is invalid, skipping")
|
||||
for idx in group.index:
|
||||
|
|
@ -203,7 +203,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
|||
|
||||
# Score against OS Places addresses
|
||||
scores = postcode_cache["ADDRESS"].apply(
|
||||
lambda addr: addressMatch.score(ordnancy_survey_user_input, addr)
|
||||
lambda addr: AddressMatch.score(ordnancy_survey_user_input, addr)
|
||||
)
|
||||
best_idx = scores.idxmax()
|
||||
best_score = scores[best_idx]
|
||||
|
|
@ -215,12 +215,10 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
|||
df.at[idx, "ordnance_survey_lexiscore"] = best_score
|
||||
|
||||
# Save results locally
|
||||
df.to_csv("ordnance_survey_results.csv", index=False)
|
||||
print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
|
||||
if local:
|
||||
df.to_csv("ordnance_survey_results.csv", index=False)
|
||||
print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
|
||||
|
||||
# Save results to S3
|
||||
if task_id and sub_task_id:
|
||||
try:
|
||||
save_results_to_s3(df, task_id, sub_task_id)
|
||||
except Exception as s3_error:
|
||||
logger.error(f"Failed to save results to S3: {s3_error}")
|
||||
save_results_to_s3(df, task_id, sub_task_id)
|
||||
|
|
|
|||
|
|
@ -4,13 +4,13 @@ from difflib import SequenceMatcher
|
|||
import requests
|
||||
|
||||
|
||||
class addressMatch:
|
||||
class AddressMatch:
|
||||
def __init__(self):
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def score(a: str, b: str) -> float:
|
||||
score: float = addressMatch.levenshtein(a, b)
|
||||
score: float = AddressMatch.levenshtein(a, b)
|
||||
|
||||
return score
|
||||
|
||||
|
|
@ -143,8 +143,8 @@ class addressMatch:
|
|||
|
||||
return None
|
||||
|
||||
a_norm = addressMatch.normalise_address(a)
|
||||
b_norm = addressMatch.normalise_address(b)
|
||||
a_norm = AddressMatch.normalise_address(a)
|
||||
b_norm = AddressMatch.normalise_address(b)
|
||||
|
||||
# --- hard signal: numbers ---
|
||||
nums_a = extract_numbers(a_norm)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue