mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
changed to utils
This commit is contained in:
parent
7ef5dc4922
commit
fb758b76bf
4 changed files with 18 additions and 16 deletions
|
|
@ -1,5 +1,5 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, Optional
|
from typing import Optional
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
|
|
@ -7,6 +7,7 @@ from botocore.exceptions import ClientError
|
||||||
from backend.address2UPRN.scoring import get_uprn_candidates
|
from backend.address2UPRN.scoring import get_uprn_candidates
|
||||||
from backend.utils.addressMatch import AddressMatch
|
from backend.utils.addressMatch import AddressMatch
|
||||||
from datatypes.epc.domain.historic_epc import HistoricEpc
|
from datatypes.epc.domain.historic_epc import HistoricEpc
|
||||||
|
from utils.pandas_utils import pandas_cell_to_str
|
||||||
from utils.s3 import parse_s3_uri, read_csv_gz_from_s3
|
from utils.s3 import parse_s3_uri, read_csv_gz_from_s3
|
||||||
|
|
||||||
DEFAULT_S3_ROOT = "s3://retrofit-data-dev/historical_epc"
|
DEFAULT_S3_ROOT = "s3://retrofit-data-dev/historical_epc"
|
||||||
|
|
@ -14,20 +15,9 @@ DEFAULT_S3_ROOT = "s3://retrofit-data-dev/historical_epc"
|
||||||
_EXTRA_COLS = {"lexiscore", "lexirank"}
|
_EXTRA_COLS = {"lexiscore", "lexirank"}
|
||||||
|
|
||||||
|
|
||||||
def _cell_to_str(v: Any) -> str:
|
|
||||||
if v is None or (isinstance(v, float) and pd.isna(v)):
|
|
||||||
return ""
|
|
||||||
s = str(v).replace("\xa0", " ")
|
|
||||||
# get_uprn_candidates runs .astype(str) on UPRN, turning NaN into "nan".
|
|
||||||
# Treat that as missing so unambiguous_uprn truthiness checks work.
|
|
||||||
if s.lower() == "nan":
|
|
||||||
return ""
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
def _row_to_historic_epc(row: pd.Series) -> HistoricEpc:
|
def _row_to_historic_epc(row: pd.Series) -> HistoricEpc:
|
||||||
kwargs = {
|
kwargs = {
|
||||||
col.lower(): _cell_to_str(val)
|
col.lower(): pandas_cell_to_str(val)
|
||||||
for col, val in row.items()
|
for col, val in row.items()
|
||||||
if col.lower() not in _EXTRA_COLS
|
if col.lower() not in _EXTRA_COLS
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -211,7 +211,7 @@ class TestUnambiguousUprn:
|
||||||
])
|
])
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||||
top = result.top()
|
top = result.top()
|
||||||
# _cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
|
# pandas_cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
|
||||||
# so unambiguous_uprn's truthiness check correctly drops the row.
|
# so unambiguous_uprn's truthiness check correctly drops the row.
|
||||||
assert top.record.uprn == ""
|
assert top.record.uprn == ""
|
||||||
|
|
||||||
|
|
|
||||||
14
utils/pandas_utils.py
Normal file
14
utils/pandas_utils.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def pandas_cell_to_str(v: Any) -> str:
|
||||||
|
if v is None or (isinstance(v, float) and pd.isna(v)):
|
||||||
|
return ""
|
||||||
|
s = str(v).replace("\xa0", " ")
|
||||||
|
# get_uprn_candidates runs .astype(str) on UPRN, turning NaN into "nan".
|
||||||
|
# Treat that as missing so unambiguous_uprn truthiness checks work.
|
||||||
|
if s.lower() == "nan":
|
||||||
|
return ""
|
||||||
|
return s
|
||||||
|
|
@ -6,8 +6,6 @@ from io import BytesIO, StringIO
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
from utils.logger import setup_logger
|
from utils.logger import setup_logger
|
||||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue