mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
changed to utils
This commit is contained in:
parent
7ef5dc4922
commit
fb758b76bf
4 changed files with 18 additions and 16 deletions
|
|
@ -1,5 +1,5 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
from botocore.exceptions import ClientError
|
||||
|
|
@ -7,6 +7,7 @@ from botocore.exceptions import ClientError
|
|||
from backend.address2UPRN.scoring import get_uprn_candidates
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
from datatypes.epc.domain.historic_epc import HistoricEpc
|
||||
from utils.pandas_utils import pandas_cell_to_str
|
||||
from utils.s3 import parse_s3_uri, read_csv_gz_from_s3
|
||||
|
||||
DEFAULT_S3_ROOT = "s3://retrofit-data-dev/historical_epc"
|
||||
|
|
@ -14,20 +15,9 @@ DEFAULT_S3_ROOT = "s3://retrofit-data-dev/historical_epc"
|
|||
_EXTRA_COLS = {"lexiscore", "lexirank"}
|
||||
|
||||
|
||||
def _cell_to_str(v: Any) -> str:
|
||||
if v is None or (isinstance(v, float) and pd.isna(v)):
|
||||
return ""
|
||||
s = str(v).replace("\xa0", " ")
|
||||
# get_uprn_candidates runs .astype(str) on UPRN, turning NaN into "nan".
|
||||
# Treat that as missing so unambiguous_uprn truthiness checks work.
|
||||
if s.lower() == "nan":
|
||||
return ""
|
||||
return s
|
||||
|
||||
|
||||
def _row_to_historic_epc(row: pd.Series) -> HistoricEpc:
|
||||
kwargs = {
|
||||
col.lower(): _cell_to_str(val)
|
||||
col.lower(): pandas_cell_to_str(val)
|
||||
for col, val in row.items()
|
||||
if col.lower() not in _EXTRA_COLS
|
||||
}
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ class TestUnambiguousUprn:
|
|||
])
|
||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
top = result.top()
|
||||
# _cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
|
||||
# pandas_cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
|
||||
# so unambiguous_uprn's truthiness check correctly drops the row.
|
||||
assert top.record.uprn == ""
|
||||
|
||||
|
|
|
|||
14
utils/pandas_utils.py
Normal file
14
utils/pandas_utils.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def pandas_cell_to_str(v: Any) -> str:
|
||||
if v is None or (isinstance(v, float) and pd.isna(v)):
|
||||
return ""
|
||||
s = str(v).replace("\xa0", " ")
|
||||
# get_uprn_candidates runs .astype(str) on UPRN, turning NaN into "nan".
|
||||
# Treat that as missing so unambiguous_uprn truthiness checks work.
|
||||
if s.lower() == "nan":
|
||||
return ""
|
||||
return s
|
||||
|
|
@ -6,8 +6,6 @@ from io import BytesIO, StringIO
|
|||
from urllib.parse import unquote
|
||||
from utils.logger import setup_logger
|
||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||
from typing import Any
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue