diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index d7e1ae58..ae34c49a 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -196,10 +196,11 @@ class SearchEpc: parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None if parsed_house_number is None: - # Because usaddress isn't optimal for parsing addresses with 'Flat' as a prefix, we also add a custom - # approach - # Pattern to look for 'Flat' followed by a number, or just a number at the beginning - pattern = r'(?i)(?:flat\s*(\d+))|^\s*(\d+)' + # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat', + # we also add a custom approach + + # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning + pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)' match = re.search(pattern, address) @@ -468,10 +469,15 @@ class SearchEpc: estimation_property_type = epc_property_type if property_type == "" else property_type - epc_data = epc_data[ - (epc_data["built-form"] == estimation_built_form) & ( - epc_data["property-type"] == estimation_property_type) - ] + # We handle some edge cases experiences with maisonettes - if built form is detatched, just filter + # on maisonette + if (estimation_property_type == "Maisonette") & (estimation_built_form == "Detached"): + epc_data = epc_data[epc_data["property-type"] == estimation_property_type] + else: + epc_data = epc_data[ + (epc_data["built-form"] == estimation_built_form) & ( + epc_data["property-type"] == estimation_property_type) + ] if not epc_data.empty: return epc_data # Return the filtered data if it's not empty diff --git a/etl/testing_data/estimate_epc.py b/etl/testing_data/estimate_epc.py index 9ecd7236..7dc669f9 100644 --- a/etl/testing_data/estimate_epc.py +++ b/etl/testing_data/estimate_epc.py @@ -7,6 +7,7 @@ from tqdm import tqdm from dotenv import load_dotenv from utils.logger import setup_logger from backend.SearchEpc import SearchEpc, vartypes +from BaseUtility import Definitions from etl.epc.settings import BUILT_FORM_REMAP ENV_FILE = Path(__file__).parent / "backend" / ".env" @@ -14,8 +15,8 @@ ENV_FILE = Path(__file__).parent / "backend" / ".env" logger = setup_logger() DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" -DIR_SAMPLE_SIZE = 50 -N_DIRECTORIES = 25 +DIR_SAMPLE_SIZE = 500 +N_DIRECTORIES = 50 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") @@ -94,7 +95,9 @@ def app(): # Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.) # Enclosed End-Terrace built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"]) - if (epc["property-type"] == "Maisonette") & (built_form == "Detached"): + if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or ( + built_form in Definitions.DATA_ANOMALY_MATCHES + ): built_form = "" estimated_epc = searcher.estimate_epc(