mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
rank address similiarity
This commit is contained in:
parent
8b27a5173b
commit
dfc100f78b
4 changed files with 23 additions and 14 deletions
|
|
@ -15,7 +15,7 @@ from utils.s3 import (
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from backend.utils.addressMatch import AddressMatch
|
from backend.utils.addressMatch import AddressMatch
|
||||||
from backend.address2UPRN.scoring import all_uprns_match, rank_by_address_similarity
|
from backend.address2UPRN.scoring import all_uprns_match, rank_address_similarity
|
||||||
from datatypes.epc.domain.historic_epc_matching import (
|
from datatypes.epc.domain.historic_epc_matching import (
|
||||||
match_addresses_for_postcode,
|
match_addresses_for_postcode,
|
||||||
)
|
)
|
||||||
|
|
@ -79,7 +79,7 @@ def get_uprn_with_epc_df(
|
||||||
if epc_df.empty:
|
if epc_df.empty:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
scored_df = rank_by_address_similarity(
|
scored_df = rank_address_similarity(
|
||||||
epc_df,
|
epc_df,
|
||||||
user_address=user_inputed_address,
|
user_address=user_inputed_address,
|
||||||
)
|
)
|
||||||
|
|
@ -171,7 +171,7 @@ def resolve_uprns_for_postcode_group(
|
||||||
for _, row in group_df.iterrows():
|
for _, row in group_df.iterrows():
|
||||||
user_address = str(row[address_col]).strip()
|
user_address = str(row[address_col]).strip()
|
||||||
|
|
||||||
scored_df = rank_by_address_similarity(
|
scored_df = rank_address_similarity(
|
||||||
epc_df,
|
epc_df,
|
||||||
user_address=user_address,
|
user_address=user_address,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -19,8 +19,8 @@ def all_uprns_match(
|
||||||
return len(uprns) == 1 and uprns[0] == str(target_uprn)
|
return len(uprns) == 1 and uprns[0] == str(target_uprn)
|
||||||
|
|
||||||
|
|
||||||
def rank_by_address_similarity(
|
def rank_address_similarity(
|
||||||
df: pd.DataFrame,
|
address_list_df: pd.DataFrame,
|
||||||
user_address: str,
|
user_address: str,
|
||||||
address_column: str = "address",
|
address_column: str = "address",
|
||||||
uprn_column: str = "uprn",
|
uprn_column: str = "uprn",
|
||||||
|
|
@ -32,13 +32,13 @@ def rank_by_address_similarity(
|
||||||
DOES NOT choose or return a UPRN.
|
DOES NOT choose or return a UPRN.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if address_column not in df.columns:
|
if address_column not in address_list_df.columns:
|
||||||
raise ValueError(f"Missing column: {address_column}")
|
raise ValueError(f"Missing column: {address_column}")
|
||||||
|
|
||||||
if uprn_column not in df.columns:
|
if uprn_column not in address_list_df.columns:
|
||||||
raise ValueError(f"Missing column: {uprn_column}")
|
raise ValueError(f"Missing column: {uprn_column}")
|
||||||
|
|
||||||
out = df.copy()
|
out = address_list_df.copy()
|
||||||
|
|
||||||
user_norm = AddressMatch.normalise_address(user_address)
|
user_norm = AddressMatch.normalise_address(user_address)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ from typing import Optional
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
|
|
||||||
from backend.address2UPRN.scoring import rank_by_address_similarity
|
from backend.address2UPRN.scoring import rank_address_similarity
|
||||||
from backend.utils.addressMatch import AddressMatch
|
from backend.utils.addressMatch import AddressMatch
|
||||||
from datatypes.epc.domain.historic_epc import HistoricEpc
|
from datatypes.epc.domain.historic_epc import HistoricEpc
|
||||||
from utils.pandas_utils import pandas_cell_to_str
|
from utils.pandas_utils import pandas_cell_to_str
|
||||||
|
|
@ -85,7 +85,7 @@ def match_addresses_for_postcode(
|
||||||
) from e
|
) from e
|
||||||
raise
|
raise
|
||||||
|
|
||||||
scored = rank_by_address_similarity(
|
scored = rank_address_similarity(
|
||||||
df,
|
df,
|
||||||
user_address=user_address,
|
user_address=user_address,
|
||||||
address_column=address_column,
|
address_column=address_column,
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from http import HTTPStatus
|
||||||
from typing import Optional, cast, Callable, Any
|
from typing import Optional, cast, Callable, Any
|
||||||
|
|
||||||
from hubspot.client import Client # type: ignore[reportMissingTypeStubs]
|
from hubspot.client import Client # type: ignore[reportMissingTypeStubs]
|
||||||
|
|
@ -86,19 +87,27 @@ class HubspotClient:
|
||||||
|
|
||||||
def _call_with_retry(self, fn: Callable[[], Any], max_retries: int = 2) -> Any:
|
def _call_with_retry(self, fn: Callable[[], Any], max_retries: int = 2) -> Any:
|
||||||
"""
|
"""
|
||||||
Call fn(), retrying up to max_retries times on 429 rate-limit errors.
|
Call fn(), retrying up to max_retries times on 429 rate-limit errors
|
||||||
|
or transient 5xx server errors.
|
||||||
Waits the minimal amount: the remaining interval window reported by HubSpot headers.
|
Waits the minimal amount: the remaining interval window reported by HubSpot headers.
|
||||||
Falls back to the full interval (10s) if headers are absent.
|
Falls back to the full interval (10s) if headers are absent.
|
||||||
|
|
||||||
Note: each HubSpot sub-module (deals, companies, etc.) ships its own ApiException
|
Note: each HubSpot sub-module (deals, companies, etc.) ships its own ApiException
|
||||||
class with no shared base beyond Exception, so we detect 429s via duck-typing.
|
class with no shared base beyond Exception, so we detect retryable statuses via duck-typing.
|
||||||
"""
|
"""
|
||||||
|
retryable_statuses = {
|
||||||
|
HTTPStatus.TOO_MANY_REQUESTS,
|
||||||
|
HTTPStatus.INTERNAL_SERVER_ERROR,
|
||||||
|
HTTPStatus.BAD_GATEWAY,
|
||||||
|
HTTPStatus.SERVICE_UNAVAILABLE,
|
||||||
|
HTTPStatus.GATEWAY_TIMEOUT,
|
||||||
|
}
|
||||||
for attempt in range(max_retries + 1):
|
for attempt in range(max_retries + 1):
|
||||||
try:
|
try:
|
||||||
return fn()
|
return fn()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
status = getattr(e, "status", None)
|
status = getattr(e, "status", None)
|
||||||
if status != 429 or attempt == max_retries:
|
if status not in retryable_statuses or attempt == max_retries:
|
||||||
raise
|
raise
|
||||||
headers = getattr(e, "headers", None) or {}
|
headers = getattr(e, "headers", None) or {}
|
||||||
interval_ms = int(
|
interval_ms = int(
|
||||||
|
|
@ -106,7 +115,7 @@ class HubspotClient:
|
||||||
)
|
)
|
||||||
wait_s = interval_ms / 1000.0
|
wait_s = interval_ms / 1000.0
|
||||||
self.logger.warning(
|
self.logger.warning(
|
||||||
f"HubSpot 429 (attempt {attempt + 1}/{max_retries}), "
|
f"HubSpot {status} (attempt {attempt + 1}/{max_retries}), "
|
||||||
f"waiting {wait_s:.1f}s before retry."
|
f"waiting {wait_s:.1f}s before retry."
|
||||||
)
|
)
|
||||||
time.sleep(wait_s)
|
time.sleep(wait_s)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue