diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 33cb6ff9..4c22db44 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -14,7 +14,7 @@ from utils.s3 import ( ) from datetime import datetime -from backend.utils.addressMatch import addressMatch +from backend.utils.addressMatch import AddressMatch logger = setup_logger() @@ -35,7 +35,7 @@ def score_addresses( if column not in df.columns: raise ValueError(f"Missing column: {column}") - return df[column].apply(lambda x: addressMatch.score(user_address, x)) + return df[column].apply(lambda x: AddressMatch.score(user_address, x)) def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3): @@ -127,10 +127,10 @@ def get_uprn_candidates( out = df.copy() - user_norm = addressMatch.normalise_address(user_address) + user_norm = AddressMatch.normalise_address(user_address) out["lexiscore"] = out[address_column].apply( - lambda x: addressMatch.levenshtein(user_norm, x) + lambda x: AddressMatch.levenshtein(user_norm, x) ) # Normalise UPRN to string @@ -455,7 +455,7 @@ def handler(event, context, local=False): ) # Validate postcode before processing - if not addressMatch.is_valid_postcode(postcode): + if not AddressMatch.is_valid_postcode(postcode): logger.warning(f"Postcode {postcode} is invalid, skipping") continue diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py index bf8cfdaf..5c8373a1 100644 --- a/backend/ordnanceSurvey/main.py +++ b/backend/ordnanceSurvey/main.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Optional import json from utils.logger import setup_logger import logging @@ -8,7 +8,7 @@ from utils.s3 import ( read_csv_from_s3 as read_csv_from_s3_dict, parse_s3_uri, ) -from backend.utils.addressMatch import addressMatch +from backend.utils.addressMatch import AddressMatch from backend.app.db.connection import get_db_session from backend.app.db.models.postcode_search import PostcodeSearchModel from backend.utils.ordnance_survey import ( @@ -124,7 +124,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: s3_uri: str = body.get("s3_uri", "") lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5) - lexiscore_column: str = body.get("lexiscore_column", None) + lexiscore_column: Optional[str] = body.get("lexiscore_column", None) task_id: str = body.get("task_id", "") sub_task_id: str = body.get("sub_task_id", "") @@ -158,7 +158,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: # Process each postcode group at a time for postcode, group in grouped: print(f"Processing postcode: {postcode} ({len(group)} rows)") - valid_group = addressMatch.is_valid_postcode(postcode) + valid_group = AddressMatch.is_valid_postcode(postcode) if not valid_group: logger.warning(f"Postcode {postcode} is invalid, skipping") for idx in group.index: @@ -203,7 +203,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: # Score against OS Places addresses scores = postcode_cache["ADDRESS"].apply( - lambda addr: addressMatch.score(ordnancy_survey_user_input, addr) + lambda addr: AddressMatch.score(ordnancy_survey_user_input, addr) ) best_idx = scores.idxmax() best_score = scores[best_idx] @@ -215,12 +215,10 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: df.at[idx, "ordnance_survey_lexiscore"] = best_score # Save results locally - df.to_csv("ordnance_survey_results.csv", index=False) - print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)") + if local: + df.to_csv("ordnance_survey_results.csv", index=False) + print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)") # Save results to S3 if task_id and sub_task_id: - try: - save_results_to_s3(df, task_id, sub_task_id) - except Exception as s3_error: - logger.error(f"Failed to save results to S3: {s3_error}") + save_results_to_s3(df, task_id, sub_task_id) diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py index b09c1672..411bb07c 100644 --- a/backend/utils/addressMatch.py +++ b/backend/utils/addressMatch.py @@ -4,13 +4,13 @@ from difflib import SequenceMatcher import requests -class addressMatch: +class AddressMatch: def __init__(self): return None @staticmethod def score(a: str, b: str) -> float: - score: float = addressMatch.levenshtein(a, b) + score: float = AddressMatch.levenshtein(a, b) return score @@ -143,8 +143,8 @@ class addressMatch: return None - a_norm = addressMatch.normalise_address(a) - b_norm = addressMatch.normalise_address(b) + a_norm = AddressMatch.normalise_address(a) + b_norm = AddressMatch.normalise_address(b) # --- hard signal: numbers --- nums_a = extract_numbers(a_norm)