multiple bugs fixed

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-02 23:06:55 +00:00
parent de10fe7e6d
commit 014f684de3
2 changed files with 20 additions and 11 deletions

View file

@ -196,10 +196,11 @@ class SearchEpc:
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
if parsed_house_number is None:
# Because usaddress isn't optimal for parsing addresses with 'Flat' as a prefix, we also add a custom
# approach
# Pattern to look for 'Flat' followed by a number, or just a number at the beginning
pattern = r'(?i)(?:flat\s*(\d+))|^\s*(\d+)'
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
# we also add a custom approach
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
match = re.search(pattern, address)
@ -468,10 +469,15 @@ class SearchEpc:
estimation_property_type = epc_property_type if property_type == "" else property_type
epc_data = epc_data[
(epc_data["built-form"] == estimation_built_form) & (
epc_data["property-type"] == estimation_property_type)
]
# We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
# on maisonette
if (estimation_property_type == "Maisonette") & (estimation_built_form == "Detached"):
epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
else:
epc_data = epc_data[
(epc_data["built-form"] == estimation_built_form) & (
epc_data["property-type"] == estimation_property_type)
]
if not epc_data.empty:
return epc_data # Return the filtered data if it's not empty

View file

@ -7,6 +7,7 @@ from tqdm import tqdm
from dotenv import load_dotenv
from utils.logger import setup_logger
from backend.SearchEpc import SearchEpc, vartypes
from BaseUtility import Definitions
from etl.epc.settings import BUILT_FORM_REMAP
ENV_FILE = Path(__file__).parent / "backend" / ".env"
@ -14,8 +15,8 @@ ENV_FILE = Path(__file__).parent / "backend" / ".env"
logger = setup_logger()
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
DIR_SAMPLE_SIZE = 50
N_DIRECTORIES = 25
DIR_SAMPLE_SIZE = 500
N_DIRECTORIES = 50
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@ -94,7 +95,9 @@ def app():
# Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
# Enclosed End-Terrace
built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
if (epc["property-type"] == "Maisonette") & (built_form == "Detached"):
if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
built_form in Definitions.DATA_ANOMALY_MATCHES
):
built_form = ""
estimated_epc = searcher.estimate_epc(