mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
refactored test to deal with flats better
This commit is contained in:
parent
9aae5bf482
commit
1934c889b0
2 changed files with 15 additions and 18 deletions
|
|
@ -168,8 +168,8 @@ FLAT 8 599 HARROW ROAD,W10 4RA,None
|
|||
"Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383
|
||||
24b Honley Road,SE6 2HZ,None
|
||||
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
||||
2 COLLEGE HOUSE,CM7 1JS,100091449870
|
||||
3 COLLEGE HOUSE,CM7 1JS,100091449871
|
||||
2 COLLEGE HOUSE,CM7 1JS,None
|
||||
3 COLLEGE HOUSE,CM7 1JS,None
|
||||
1 Anita Street,M4 5DU,None
|
||||
2 Anita Street,M4 5DU,77123061
|
||||
5 Anita Street,M4 5DU,77123081
|
||||
|
|
@ -279,6 +279,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
|||
80a Victoria Square,M4 5DZ,77211231
|
||||
81a Victoria Square,M4 5DZ,77211232
|
||||
82 Victoria Square,M4 5DZ,None
|
||||
82a Victoria Square,M4 5DZ,77211233
|
||||
83a Victoria Square,M4 5DZ,77211234
|
||||
84a Victoria Square,M4 5DZ,None
|
||||
85a Victoria Square,M4 5DZ,77211236
|
||||
|
|
|
|||
|
|
|
@ -127,6 +127,7 @@ class AddressMatch:
|
|||
Assumes formats like:
|
||||
- '42 moreton road'
|
||||
- 'flat 3 42 moreton road'
|
||||
- '82 a victoria square' (recombined to '82a')
|
||||
"""
|
||||
tokens = s.split()
|
||||
|
||||
|
|
@ -142,9 +143,15 @@ class AddressMatch:
|
|||
continue
|
||||
cleaned.append(t)
|
||||
|
||||
# first remaining number is building number
|
||||
for t in cleaned:
|
||||
if re.fullmatch(r"\d+[a-z]?", t):
|
||||
# first remaining number is building number; recombine with a
|
||||
# single-letter suffix when normalisation has split "82a" → "82 a"
|
||||
for i, t in enumerate(cleaned):
|
||||
if re.fullmatch(r"\d+[a-z]", t):
|
||||
return t
|
||||
if re.fullmatch(r"\d+", t):
|
||||
nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
|
||||
if nxt is not None and re.fullmatch(r"[a-z]", nxt):
|
||||
return t + nxt
|
||||
return t
|
||||
|
||||
return None
|
||||
|
|
@ -181,24 +188,13 @@ class AddressMatch:
|
|||
# Slash-format like "3/137a" is an implicit flat reference
|
||||
# (flat 3 of 137a) even without a "flat" keyword.
|
||||
has_implicit_flat_user = bool(re.search(r"\d+\s*/\s*\d+", a_norm))
|
||||
# If the user named a street, their leading number is a house number,
|
||||
# not a flat number — so an EPC "Flat N, …" candidate is a wrong unit.
|
||||
# Without a street token (e.g. "2 College House"), the user may be
|
||||
# implicitly naming a flat in a named building; don't apply the guard.
|
||||
STREET_TYPE_TOKENS = {
|
||||
"road", "street", "lane", "avenue", "close", "way",
|
||||
"crescent", "court", "drive", "place", "terrace", "mews",
|
||||
"gardens", "square", "grove", "park", "walk", "row",
|
||||
"green", "hill", "rise", "parade", "broadway",
|
||||
}
|
||||
user_tokens = set(a_norm.split())
|
||||
has_street_type_user = bool(user_tokens & STREET_TYPE_TOKENS)
|
||||
|
||||
# EPC says it's a flat but user gave no flat indication
|
||||
# (neither keyword nor slash-format). Unlikely to be the right unit.
|
||||
if (
|
||||
has_flat_token_epc
|
||||
and not has_flat_token_user
|
||||
and not has_implicit_flat_user
|
||||
and has_street_type_user
|
||||
):
|
||||
return 0.0
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue