mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
rank address similiarity
This commit is contained in:
parent
8b27a5173b
commit
dfc100f78b
4 changed files with 23 additions and 14 deletions
|
|
@ -15,7 +15,7 @@ from utils.s3 import (
|
|||
from datetime import datetime
|
||||
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
from backend.address2UPRN.scoring import all_uprns_match, rank_by_address_similarity
|
||||
from backend.address2UPRN.scoring import all_uprns_match, rank_address_similarity
|
||||
from datatypes.epc.domain.historic_epc_matching import (
|
||||
match_addresses_for_postcode,
|
||||
)
|
||||
|
|
@ -79,7 +79,7 @@ def get_uprn_with_epc_df(
|
|||
if epc_df.empty:
|
||||
return None
|
||||
|
||||
scored_df = rank_by_address_similarity(
|
||||
scored_df = rank_address_similarity(
|
||||
epc_df,
|
||||
user_address=user_inputed_address,
|
||||
)
|
||||
|
|
@ -171,7 +171,7 @@ def resolve_uprns_for_postcode_group(
|
|||
for _, row in group_df.iterrows():
|
||||
user_address = str(row[address_col]).strip()
|
||||
|
||||
scored_df = rank_by_address_similarity(
|
||||
scored_df = rank_address_similarity(
|
||||
epc_df,
|
||||
user_address=user_address,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ def all_uprns_match(
|
|||
return len(uprns) == 1 and uprns[0] == str(target_uprn)
|
||||
|
||||
|
||||
def rank_by_address_similarity(
|
||||
df: pd.DataFrame,
|
||||
def rank_address_similarity(
|
||||
address_list_df: pd.DataFrame,
|
||||
user_address: str,
|
||||
address_column: str = "address",
|
||||
uprn_column: str = "uprn",
|
||||
|
|
@ -32,13 +32,13 @@ def rank_by_address_similarity(
|
|||
DOES NOT choose or return a UPRN.
|
||||
"""
|
||||
|
||||
if address_column not in df.columns:
|
||||
if address_column not in address_list_df.columns:
|
||||
raise ValueError(f"Missing column: {address_column}")
|
||||
|
||||
if uprn_column not in df.columns:
|
||||
if uprn_column not in address_list_df.columns:
|
||||
raise ValueError(f"Missing column: {uprn_column}")
|
||||
|
||||
out = df.copy()
|
||||
out = address_list_df.copy()
|
||||
|
||||
user_norm = AddressMatch.normalise_address(user_address)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from typing import Optional
|
|||
import pandas as pd
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from backend.address2UPRN.scoring import rank_by_address_similarity
|
||||
from backend.address2UPRN.scoring import rank_address_similarity
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
from datatypes.epc.domain.historic_epc import HistoricEpc
|
||||
from utils.pandas_utils import pandas_cell_to_str
|
||||
|
|
@ -85,7 +85,7 @@ def match_addresses_for_postcode(
|
|||
) from e
|
||||
raise
|
||||
|
||||
scored = rank_by_address_similarity(
|
||||
scored = rank_address_similarity(
|
||||
df,
|
||||
user_address=user_address,
|
||||
address_column=address_column,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import time
|
||||
from enum import Enum
|
||||
from http import HTTPStatus
|
||||
from typing import Optional, cast, Callable, Any
|
||||
|
||||
from hubspot.client import Client # type: ignore[reportMissingTypeStubs]
|
||||
|
|
@ -86,19 +87,27 @@ class HubspotClient:
|
|||
|
||||
def _call_with_retry(self, fn: Callable[[], Any], max_retries: int = 2) -> Any:
|
||||
"""
|
||||
Call fn(), retrying up to max_retries times on 429 rate-limit errors.
|
||||
Call fn(), retrying up to max_retries times on 429 rate-limit errors
|
||||
or transient 5xx server errors.
|
||||
Waits the minimal amount: the remaining interval window reported by HubSpot headers.
|
||||
Falls back to the full interval (10s) if headers are absent.
|
||||
|
||||
Note: each HubSpot sub-module (deals, companies, etc.) ships its own ApiException
|
||||
class with no shared base beyond Exception, so we detect 429s via duck-typing.
|
||||
class with no shared base beyond Exception, so we detect retryable statuses via duck-typing.
|
||||
"""
|
||||
retryable_statuses = {
|
||||
HTTPStatus.TOO_MANY_REQUESTS,
|
||||
HTTPStatus.INTERNAL_SERVER_ERROR,
|
||||
HTTPStatus.BAD_GATEWAY,
|
||||
HTTPStatus.SERVICE_UNAVAILABLE,
|
||||
HTTPStatus.GATEWAY_TIMEOUT,
|
||||
}
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
return fn()
|
||||
except Exception as e:
|
||||
status = getattr(e, "status", None)
|
||||
if status != 429 or attempt == max_retries:
|
||||
if status not in retryable_statuses or attempt == max_retries:
|
||||
raise
|
||||
headers = getattr(e, "headers", None) or {}
|
||||
interval_ms = int(
|
||||
|
|
@ -106,7 +115,7 @@ class HubspotClient:
|
|||
)
|
||||
wait_s = interval_ms / 1000.0
|
||||
self.logger.warning(
|
||||
f"HubSpot 429 (attempt {attempt + 1}/{max_retries}), "
|
||||
f"HubSpot {status} (attempt {attempt + 1}/{max_retries}), "
|
||||
f"waiting {wait_s:.1f}s before retry."
|
||||
)
|
||||
time.sleep(wait_s)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue