building out multi-unit flagging

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-19 14:21:29 +00:00
parent cb0194c3b9
commit 0a643d80ad
2 changed files with 16 additions and 11 deletions

View file

@ -1,5 +1,4 @@
import os
import usaddress
import re
import pandas as pd
from utils.logger import setup_logger
from backend.SearchEpc import SearchEpc
@ -42,6 +41,9 @@ class AssetList:
DOMNA_PROPERTY_ID = "domna_property_id"
# Regular expression for identifying if the address might point to multiple units
MULTI_UNIT_REGEX = re.compile(r'\b([A-Za-z0-9]+)-([A-Za-z0-9]+)\b')
def __init__(
self,
local_filepath,
@ -139,6 +141,14 @@ class AssetList:
cleaned = cleaned.rstrip(", ").strip(",").strip()
return cleaned
@classmethod
def _identify_multi_address(cls, address):
# We check if the address is comma separated
if "," in address:
address1_section = address.split(",")[0]
# We look for string in the form (x-y)
return bool(cls.MULTI_UNIT_REGEX.search(address1_section))
def standardise(self):
"""
This function is used to standardise the asset list
@ -205,4 +215,6 @@ class AssetList:
]
]
# We idenfiy addresses which are likely to be multi-addresses (i.g are rooms x-y)
raise NotImplementedError

View file

@ -1,12 +1,5 @@
from asset_list.AssetList import AssetList
from backend.SearchEpc import
def test_address1_extraction():
example = 'Block (Rooms 1-4), 23 Clifton Hill, Newtown, Exeter, EX1 2DL'
# AssetList._extract_address1(
# example,
# )
pass
def test_multi_unit_address_flagging():
assert AssetList._identify_multi_address('Block (Rooms 1-4), 23 Clifton Hill, Newtown, Exeter, EX1 2DL')