This commit is contained in:
Khalim Conn-Kowlessar 2024-01-02 18:39:47 +00:00
parent 2156f6b076
commit f7cd9d0d10
2 changed files with 23 additions and 2 deletions

View file

@ -516,6 +516,11 @@ class SearchEpc:
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
if vartype == "Int64":
# We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
# so this handles this
estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
else:
estimation_data[key] = estimation_data[key].astype(vartype)
if estimation_data.shape[0] == 0:

View file

@ -7,6 +7,7 @@ from tqdm import tqdm
from dotenv import load_dotenv
from utils.logger import setup_logger
from backend.SearchEpc import SearchEpc, vartypes
from etl.epc.settings import BUILT_FORM_REMAP
ENV_FILE = Path(__file__).parent / "backend" / ".env"
@ -20,6 +21,12 @@ EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
load_dotenv(ENV_FILE)
CATETORICALS_TO_IGNORE = [
"postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
"building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
"local-authority-label", "county",
]
def check_numeric_performance(estimated_value, actual_value):
# If we don't have anything to compare against, return None
@ -49,6 +56,10 @@ def app():
if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
raise ValueError("Not all vartypes have been accounted for")
# Drop some keys that aren't important
for k in CATETORICALS_TO_IGNORE:
str_var_types.pop(k, None)
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
directory_sample = choices(directories, k=N_DIRECTORIES)
@ -80,11 +91,16 @@ def app():
searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
searcher.uprn = epc["uprn"]
# Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
# Enclosed End-Terrace
built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
estimated_epc = searcher.estimate_epc(
property_type=epc["property-type"], built_form=epc["built-form"], lmks_to_drop=lmks_to_drop
property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
)
# We now compare the difference between the estimated and original
# TODO: We can convert windows and lighting to numeric versions and estimate how close we are
numeric_performance = {
key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
numerical_vartypes.items()