mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Slice 1/6 of the postcode_splitter refactor (Hestia-Homes/Model#1100). Introduces the pure-domain foundation under domain/, with no AWS, Postgres, or pandas. UserAddress is a frozen dataclass that sanitises its postcode in __post_init__ via the canonical sanitise_postcode helper, and iter_postcode_grouped_batches preserves the legacy splitter's batching invariants (group-by-postcode in insertion order, never split a group, oversize single-postcode groups dispatched whole, final flush). Updates UBIQUITOUS_LANGUAGE.md so the User Address term covers both the dataclass sense (preferred in domain code) and the raw upstream-string sense. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
28 lines
913 B
Python
28 lines
913 B
Python
from domain.postcodes.sanitise import sanitise_postcode
|
|
|
|
|
|
def test_sanitise_uppercases() -> None:
|
|
assert sanitise_postcode("sw1a1aa") == "SW1A1AA"
|
|
|
|
|
|
def test_sanitise_strips_internal_spaces() -> None:
|
|
assert sanitise_postcode("sw1a 1aa") == "SW1A1AA"
|
|
|
|
|
|
def test_sanitise_strips_leading_and_trailing_whitespace() -> None:
|
|
assert sanitise_postcode(" sw1a 1aa ") == "SW1A1AA"
|
|
|
|
|
|
def test_sanitise_strips_tabs_and_newlines() -> None:
|
|
# CSV ingestion occasionally introduces stray whitespace characters; the
|
|
# canonical form must absorb them just like literal spaces.
|
|
assert sanitise_postcode("sw1a\t1aa\n") == "SW1A1AA"
|
|
|
|
|
|
def test_sanitise_already_canonical_is_idempotent() -> None:
|
|
assert sanitise_postcode("SW1A1AA") == "SW1A1AA"
|
|
assert sanitise_postcode(sanitise_postcode("sw1a 1aa")) == "SW1A1AA"
|
|
|
|
|
|
def test_sanitise_empty_string() -> None:
|
|
assert sanitise_postcode("") == ""
|