diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 9c19eca9..389816cc 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -15,7 +15,7 @@ from utils.s3 import ( from datetime import datetime from backend.utils.addressMatch import AddressMatch -from backend.address2UPRN.scoring import all_uprns_match, rank_by_address_similarity +from backend.address2UPRN.scoring import all_uprns_match, rank_address_similarity from datatypes.epc.domain.historic_epc_matching import ( match_addresses_for_postcode, ) @@ -79,7 +79,7 @@ def get_uprn_with_epc_df( if epc_df.empty: return None - scored_df = rank_by_address_similarity( + scored_df = rank_address_similarity( epc_df, user_address=user_inputed_address, ) @@ -171,7 +171,7 @@ def resolve_uprns_for_postcode_group( for _, row in group_df.iterrows(): user_address = str(row[address_col]).strip() - scored_df = rank_by_address_similarity( + scored_df = rank_address_similarity( epc_df, user_address=user_address, ) diff --git a/backend/address2UPRN/scoring.py b/backend/address2UPRN/scoring.py index 2a681ad2..dcb86d49 100644 --- a/backend/address2UPRN/scoring.py +++ b/backend/address2UPRN/scoring.py @@ -19,8 +19,8 @@ def all_uprns_match( return len(uprns) == 1 and uprns[0] == str(target_uprn) -def rank_by_address_similarity( - df: pd.DataFrame, +def rank_address_similarity( + address_list_df: pd.DataFrame, user_address: str, address_column: str = "address", uprn_column: str = "uprn", @@ -32,13 +32,13 @@ def rank_by_address_similarity( DOES NOT choose or return a UPRN. """ - if address_column not in df.columns: + if address_column not in address_list_df.columns: raise ValueError(f"Missing column: {address_column}") - if uprn_column not in df.columns: + if uprn_column not in address_list_df.columns: raise ValueError(f"Missing column: {uprn_column}") - out = df.copy() + out = address_list_df.copy() user_norm = AddressMatch.normalise_address(user_address) diff --git a/datatypes/epc/domain/historic_epc_matching.py b/datatypes/epc/domain/historic_epc_matching.py index 6ea2118b..86c44b59 100644 --- a/datatypes/epc/domain/historic_epc_matching.py +++ b/datatypes/epc/domain/historic_epc_matching.py @@ -4,7 +4,7 @@ from typing import Optional import pandas as pd from botocore.exceptions import ClientError -from backend.address2UPRN.scoring import rank_by_address_similarity +from backend.address2UPRN.scoring import rank_address_similarity from backend.utils.addressMatch import AddressMatch from datatypes.epc.domain.historic_epc import HistoricEpc from utils.pandas_utils import pandas_cell_to_str @@ -85,7 +85,7 @@ def match_addresses_for_postcode( ) from e raise - scored = rank_by_address_similarity( + scored = rank_address_similarity( df, user_address=user_address, address_column=address_column, diff --git a/etl/hubspot/hubspotClient.py b/etl/hubspot/hubspotClient.py index 92a6c7e1..4c9cb1e6 100644 --- a/etl/hubspot/hubspotClient.py +++ b/etl/hubspot/hubspotClient.py @@ -1,6 +1,7 @@ import os import time from enum import Enum +from http import HTTPStatus from typing import Optional, cast, Callable, Any from hubspot.client import Client # type: ignore[reportMissingTypeStubs] @@ -86,19 +87,27 @@ class HubspotClient: def _call_with_retry(self, fn: Callable[[], Any], max_retries: int = 2) -> Any: """ - Call fn(), retrying up to max_retries times on 429 rate-limit errors. + Call fn(), retrying up to max_retries times on 429 rate-limit errors + or transient 5xx server errors. Waits the minimal amount: the remaining interval window reported by HubSpot headers. Falls back to the full interval (10s) if headers are absent. Note: each HubSpot sub-module (deals, companies, etc.) ships its own ApiException - class with no shared base beyond Exception, so we detect 429s via duck-typing. + class with no shared base beyond Exception, so we detect retryable statuses via duck-typing. """ + retryable_statuses = { + HTTPStatus.TOO_MANY_REQUESTS, + HTTPStatus.INTERNAL_SERVER_ERROR, + HTTPStatus.BAD_GATEWAY, + HTTPStatus.SERVICE_UNAVAILABLE, + HTTPStatus.GATEWAY_TIMEOUT, + } for attempt in range(max_retries + 1): try: return fn() except Exception as e: status = getattr(e, "status", None) - if status != 429 or attempt == max_retries: + if status not in retryable_statuses or attempt == max_retries: raise headers = getattr(e, "headers", None) or {} interval_ms = int( @@ -106,7 +115,7 @@ class HubspotClient: ) wait_s = interval_ms / 1000.0 self.logger.warning( - f"HubSpot 429 (attempt {attempt + 1}/{max_retries}), " + f"HubSpot {status} (attempt {attempt + 1}/{max_retries}), " f"waiting {wait_s:.1f}s before retry." ) time.sleep(wait_s)