From 8bb90a5aa5beb495de799c481c2faa7899e6c5de Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Wed, 20 May 2026 12:57:03 +0000
Subject: [PATCH] sanitisation of postcode

---
 backend/bulk_address2uprn_combiner/main.py    | 14 +-----
 backend/ordnanceSurvey/main.py                | 12 +----
 domain/addresses/postcode_batching.py         |  8 ++--
 domain/addresses/user_address.py              | 20 ++++----
 domain/postcode.py                            | 40 ++++++++++++++++
 domain/postcodes/__init__.py                  |  0
 domain/postcodes/sanitise.py                  | 23 ---------
 .../user_address_csv_s3_repository.py         |  5 +-
 .../user_address/user_address_repository.py   |  4 +-
 .../addresses/test_postcode_batching.py       | 17 ++++---
 tests/domain/addresses/test_user_address.py   | 48 +++++++++++--------
 tests/domain/postcodes/__init__.py            |  0
 tests/domain/postcodes/test_sanitise.py       | 28 -----------
 tests/domain/test_postcode.py                 | 48 +++++++++++++++++++
 .../test_user_address_csv_s3_repository.py    | 11 +++--
 15 files changed, 153 insertions(+), 125 deletions(-)
 create mode 100644 domain/postcode.py
 delete mode 100644 domain/postcodes/__init__.py
 delete mode 100644 domain/postcodes/sanitise.py
 delete mode 100644 tests/domain/postcodes/__init__.py
 delete mode 100644 tests/domain/postcodes/test_sanitise.py
 create mode 100644 tests/domain/test_postcode.py

diff --git a/backend/bulk_address2uprn_combiner/main.py b/backend/bulk_address2uprn_combiner/main.py
index 37136e52..44f0b3f9 100644
--- a/backend/bulk_address2uprn_combiner/main.py
+++ b/backend/bulk_address2uprn_combiner/main.py
@@ -2,7 +2,7 @@ import os
 import boto3
 import pandas as pd
 from io import BytesIO
-from typing import Any, Optional
+from typing import Any
 from uuid import UUID
 from datetime import datetime, timezone
 
@@ -12,7 +12,6 @@ from backend.app.db.functions.bulk_address_uploads_functions import (
     set_combined_output_s3_uri,
     set_combining_status,
 )
-from orchestration.task_orchestrator import TaskOrchestrator
 
 logger = setup_logger()
 
@@ -36,16 +35,7 @@ def download_csv(s3_client, bucket: str, key: str) -> pd.DataFrame:
 
 
 @subtask_handler()
-def handler(
-    body: dict[str, Any],
-    context: Any,
-    orchestrator: Optional[TaskOrchestrator] = None,
-) -> str:
-    # `orchestrator` is injected by the new utilities.aws_lambda.subtask_handler
-    # decorator; unused here but accepted so the contract is uniform across
-    # callers (see issue #1103).
-    del orchestrator
-
+def handler(body: dict[str, Any], context: Any) -> str:
     task_id_str: str = body.get("task_id", "")
 
     if not task_id_str:
diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index 18c4e2f2..6e82b468 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -16,7 +16,6 @@ from backend.ordnanceSurvey.helpers import (
     os_places_results_to_dataframe,
 )
 from backend.app.config import get_settings
-from orchestration.task_orchestrator import TaskOrchestrator
 from sqlalchemy import select
 from datetime import datetime
 import uuid
@@ -106,16 +105,7 @@ def save_results_to_s3(
 
 
 @subtask_handler()  # This assumes task_id and subtask_id is defined in event.Records.body
-def handler(
-    body: dict[str, Any],
-    context: Any,
-    orchestrator: Optional[TaskOrchestrator] = None,
-    local: bool = False,
-) -> None:
-    # `orchestrator` is injected by the new utilities.aws_lambda.subtask_handler
-    # decorator; unused here but accepted so the contract is uniform across
-    # callers (see issue #1103).
-    del orchestrator
+def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
 
     # delete this line after test
     # local = True
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
index 209e0784..b73dc1bb 100644
--- a/domain/addresses/postcode_batching.py
+++ b/domain/addresses/postcode_batching.py
@@ -22,6 +22,7 @@ from __future__ import annotations
 from collections.abc import Iterable, Iterator
 
 from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
 
 
 def iter_postcode_grouped_batches(
@@ -75,13 +76,14 @@ def iter_postcode_grouped_batches(
 
 def _group_by_postcode_in_order(
     addresses: Iterable[UserAddress],
-) -> dict[str, list[UserAddress]]:
+) -> dict[Postcode, list[UserAddress]]:
     """Group addresses by ``postcode`` preserving first-seen order.
 
     Python dicts retain insertion order since 3.7, so a plain dict suffices
-    for the same effect as pandas ``groupby(..., sort=False)``.
+    for the same effect as pandas ``groupby(..., sort=False)``. ``Postcode``
+    is a frozen value object, hence hashable and usable as the dict key.
     """
-    groups: dict[str, list[UserAddress]] = {}
+    groups: dict[Postcode, list[UserAddress]] = {}
     for address in addresses:
         groups.setdefault(address.postcode, []).append(address)
     return groups
diff --git a/domain/addresses/user_address.py b/domain/addresses/user_address.py
index 120a3659..672b2c54 100644
--- a/domain/addresses/user_address.py
+++ b/domain/addresses/user_address.py
@@ -1,9 +1,9 @@
 """The :class:`UserAddress` value object.
 
 A frozen dataclass capturing the splitter's domain entity: the raw input
-address line, a sanitised postcode, and an optional internal reference from
-the customer dataset. Postcode sanitisation runs in ``__post_init__`` so no
-caller can construct an instance with an un-normalised postcode.
+address line, a :class:`~domain.postcode.Postcode`, and an optional internal
+reference from the customer dataset. The postcode is a value object that is
+canonical by construction, so no caller can hold an un-normalised postcode.
 """
 
 from __future__ import annotations
@@ -11,7 +11,7 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Optional
 
-from domain.postcodes.sanitise import sanitise_postcode
+from domain.postcode import Postcode
 
 
 def _empty_source_row() -> dict[str, str]:
@@ -25,9 +25,9 @@ class UserAddress:
 
     Attributes:
         user_address: The free-text address string as supplied upstream.
-        postcode: The postcode; always stored in canonical form
-            (uppercased, whitespace stripped). Sanitisation is enforced by
-            :meth:`__post_init__`.
+        postcode: The postcode as a :class:`~domain.postcode.Postcode` value
+            object -- canonical (uppercased, whitespace stripped) by
+            construction.
         internal_reference: Optional customer-side identifier preserved for
             traceability through the matching pipeline.
         source_row: The complete original CSV row this address was parsed
@@ -39,12 +39,8 @@ class UserAddress:
     """
 
     user_address: str
-    postcode: str
+    postcode: Postcode
     internal_reference: Optional[str] = None
     source_row: dict[str, str] = field(
         default_factory=_empty_source_row, compare=False
     )
-
-    def __post_init__(self) -> None:
-        # Frozen dataclass: bypass the descriptor with object.__setattr__.
-        object.__setattr__(self, "postcode", sanitise_postcode(self.postcode))
diff --git a/domain/postcode.py b/domain/postcode.py
new file mode 100644
index 00000000..514e1a39
--- /dev/null
+++ b/domain/postcode.py
@@ -0,0 +1,40 @@
+"""The :class:`Postcode` value object.
+
+A frozen value object that owns postcode sanitisation. Constructing a
+``Postcode`` always yields the canonical form -- uppercase with all
+whitespace removed -- so no part of the domain can hold an un-normalised
+postcode. This matches the legacy splitter's
+``df["postcode"].str.upper().str.replace(" ", "")``.
+
+``Postcode`` is the single sanitisation point: anywhere a postcode crosses a
+domain boundary it should be wrapped in one, and ``str(postcode)`` gives the
+canonical string back for serialisation.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class Postcode:
+    """A postcode held in canonical form.
+
+    The ``value`` passed to the constructor is sanitised eagerly in
+    :meth:`__post_init__` -- uppercased, with all whitespace (spaces, tabs,
+    newlines) removed -- so every ``Postcode`` instance is canonical by
+    construction. Two postcodes that differ only in surface whitespace or
+    case therefore compare equal.
+
+    Attributes:
+        value: The canonical postcode string (e.g. ``"SW1A1AA"``).
+    """
+
+    value: str
+
+    def __post_init__(self) -> None:
+        # Frozen dataclass: bypass the descriptor with object.__setattr__.
+        object.__setattr__(self, "value", "".join(self.value.split()).upper())
+
+    def __str__(self) -> str:
+        return self.value
diff --git a/domain/postcodes/__init__.py b/domain/postcodes/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/domain/postcodes/sanitise.py b/domain/postcodes/sanitise.py
deleted file mode 100644
index 94b0dcf7..00000000
--- a/domain/postcodes/sanitise.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""Canonical postcode sanitisation for the domain layer.
-
-The legacy postcode_splitter normalises postcodes inline with
-``df["postcode"].str.upper().str.replace(" ", "")``. This module promotes
-that operation to a pure, reusable function so the same canonical form is
-applied wherever a postcode crosses a domain boundary -- including
-:class:`domain.addresses.user_address.UserAddress` construction and future
-migrations.
-"""
-
-from __future__ import annotations
-
-
-def sanitise_postcode(s: str) -> str:
-    """Return the canonical form of a postcode.
-
-    The canonical form is uppercase with all whitespace removed. This matches
-    the legacy splitter's ``str.upper().str.replace(" ", "")`` for the
-    overwhelmingly common case of space-separated postcodes (e.g. ``"sw1a 1aa"``
-    becomes ``"SW1A1AA"``) while also tolerating tabs/newlines that can creep
-    in from CSV ingestion.
-    """
-    return "".join(s.split()).upper()
diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py
index 7cd10bac..2432d8e9 100644
--- a/repositories/user_address/user_address_csv_s3_repository.py
+++ b/repositories/user_address/user_address_csv_s3_repository.py
@@ -20,6 +20,7 @@ from datetime import datetime, timezone
 from typing import Optional
 
 from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_repository import UserAddressRepository
 
@@ -77,7 +78,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
             addresses.append(
                 UserAddress(
                     user_address=user_address,
-                    postcode=postcode,
+                    postcode=Postcode(postcode),
                     internal_reference=internal_reference,
                     source_row=row,
                 )
@@ -96,7 +97,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
         Returns the full ``s3://bucket/key`` URI.
         """
         rows: list[dict[str, str]] = [
-            {**addr.source_row, _POSTCODE_CLEAN_COLUMN: addr.postcode}
+            {**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)}
             for addr in addresses
         ]
         filename = (
diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py
index d8c12855..ab9b6671 100644
--- a/repositories/user_address/user_address_repository.py
+++ b/repositories/user_address/user_address_repository.py
@@ -17,8 +17,8 @@ class UserAddressRepository(ABC):
 
     Implementations choose the underlying storage (S3 CSV, Postgres,
     in-memory, ...) but must preserve the canonical column semantics:
-    the address text, postcode (sanitised by ``UserAddress.__post_init__``),
-    and an optional internal reference.
+    the address text, postcode (a :class:`~domain.postcode.Postcode` value
+    object), and an optional internal reference.
     """
 
     @abstractmethod
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
index 2dac46cc..6e52b581 100644
--- a/tests/domain/addresses/test_postcode_batching.py
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -2,12 +2,15 @@ import pytest
 
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
 from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
 
 
 def _addrs(postcode: str, n: int) -> list[UserAddress]:
     """Build ``n`` addresses sharing a postcode, with distinct address lines."""
     return [
-        UserAddress(user_address=f"{i} {postcode} Street", postcode=postcode)
+        UserAddress(
+            user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
+        )
         for i in range(n)
     ]
 
@@ -38,8 +41,8 @@ def test_flush_on_overflow_before_adding_next_postcode() -> None:
     addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 3)
     batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
     assert len(batches) == 2
-    assert [a.postcode for a in batches[0]] == ["AA11AA"] * 3
-    assert [a.postcode for a in batches[1]] == ["BB22BB"] * 3
+    assert [str(a.postcode) for a in batches[0]] == ["AA11AA"] * 3
+    assert [str(a.postcode) for a in batches[1]] == ["BB22BB"] * 3
 
 
 def test_single_postcode_group_exceeding_cap_is_dispatched_whole() -> None:
@@ -61,9 +64,9 @@ def test_oversize_group_flushes_existing_buffer_first() -> None:
         iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
     )
     assert len(batches) == 3
-    assert [a.postcode for a in batches[0]] == ["AA11AA", "AA11AA"]
-    assert [a.postcode for a in batches[1]] == ["BB22BB"] * 7
-    assert [a.postcode for a in batches[2]] == ["CC33CC"]
+    assert [str(a.postcode) for a in batches[0]] == ["AA11AA", "AA11AA"]
+    assert [str(a.postcode) for a in batches[1]] == ["BB22BB"] * 7
+    assert [str(a.postcode) for a in batches[2]] == ["CC33CC"]
 
 
 def test_final_flush_yields_remaining_buffer() -> None:
@@ -80,7 +83,7 @@ def test_postcode_grouping_preserves_first_seen_order() -> None:
     b1, b2 = _addrs("AA1 1AA", 2)
     batches = list(iter_postcode_grouped_batches([a1, b1, a2, b2]))
     assert len(batches) == 1
-    assert [a.postcode for a in batches[0]] == [
+    assert [str(a.postcode) for a in batches[0]] == [
         "ZZ99ZZ",
         "ZZ99ZZ",
         "AA11AA",
diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py
index 4d8322da..fa44ad61 100644
--- a/tests/domain/addresses/test_user_address.py
+++ b/tests/domain/addresses/test_user_address.py
@@ -3,69 +3,77 @@ import dataclasses
 import pytest
 
 from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
 
 
-def test_user_address_sanitises_postcode_on_construction() -> None:
-    addr = UserAddress(user_address="1 The Street", postcode="sw1a 1aa")
-    assert addr.postcode == "SW1A1AA"
+def test_user_address_holds_postcode_value_object() -> None:
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    assert addr.postcode == Postcode("SW1A1AA")
 
 
 def test_user_address_preserves_user_address_verbatim() -> None:
     # The free-text user_address string is intentionally NOT normalised --
-    # only the postcode is canonicalised at the boundary.
-    addr = UserAddress(user_address="  1 The   Street  ", postcode="sw1a 1aa")
+    # only the postcode is canonicalised, and that happens inside Postcode.
+    addr = UserAddress(
+        user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA")
+    )
     assert addr.user_address == "  1 The   Street  "
 
 
 def test_user_address_internal_reference_defaults_to_none() -> None:
-    addr = UserAddress(user_address="1 The Street", postcode="SW1A1AA")
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     assert addr.internal_reference is None
 
 
 def test_user_address_internal_reference_accepted() -> None:
     addr = UserAddress(
         user_address="1 The Street",
-        postcode="SW1A1AA",
+        postcode=Postcode("SW1A1AA"),
         internal_reference="cust-42",
     )
     assert addr.internal_reference == "cust-42"
 
 
 def test_user_address_is_frozen() -> None:
-    addr = UserAddress(user_address="1 The Street", postcode="SW1A1AA")
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     with pytest.raises(dataclasses.FrozenInstanceError):
-        addr.postcode = "OTHER"  # type: ignore[misc]
+        addr.postcode = Postcode("OTHER")  # type: ignore[misc]
 
 
-def test_user_address_equality_uses_sanitised_postcode() -> None:
-    # Two instances constructed with different surface forms of the same
-    # postcode must compare equal because sanitisation runs eagerly.
-    a = UserAddress(user_address="1 The Street", postcode="sw1a 1aa")
-    b = UserAddress(user_address="1 The Street", postcode="SW1A1AA")
+def test_user_address_equality_uses_canonical_postcode() -> None:
+    # Postcode sanitises eagerly, so addresses built from different surface
+    # forms of the same postcode compare equal.
+    a = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     assert a == b
 
 
 def test_user_address_source_row_defaults_to_empty_dict() -> None:
-    addr = UserAddress(user_address="1 The Street", postcode="SW1A1AA")
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     assert addr.source_row == {}
 
 
 def test_user_address_carries_source_row() -> None:
     row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
     addr = UserAddress(
-        user_address="1 The Street", postcode="SW1A 1AA", source_row=row
+        user_address="1 The Street",
+        postcode=Postcode("SW1A 1AA"),
+        source_row=row,
     )
     assert addr.source_row == row
 
 
 def test_user_address_equality_ignores_source_row() -> None:
     # source_row is excluded from equality (and hashing): identity stays
-    # defined by the parsed fields, so two addresses parsed from rows with
-    # different incidental columns still compare equal.
+    # defined by the parsed fields.
     a = UserAddress(
-        user_address="1 The Street", postcode="SW1A1AA", source_row={"x": "1"}
+        user_address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        source_row={"x": "1"},
     )
     b = UserAddress(
-        user_address="1 The Street", postcode="SW1A1AA", source_row={"y": "2"}
+        user_address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        source_row={"y": "2"},
     )
     assert a == b
diff --git a/tests/domain/postcodes/__init__.py b/tests/domain/postcodes/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/domain/postcodes/test_sanitise.py b/tests/domain/postcodes/test_sanitise.py
deleted file mode 100644
index edd1679c..00000000
--- a/tests/domain/postcodes/test_sanitise.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from domain.postcodes.sanitise import sanitise_postcode
-
-
-def test_sanitise_uppercases() -> None:
-    assert sanitise_postcode("sw1a1aa") == "SW1A1AA"
-
-
-def test_sanitise_strips_internal_spaces() -> None:
-    assert sanitise_postcode("sw1a 1aa") == "SW1A1AA"
-
-
-def test_sanitise_strips_leading_and_trailing_whitespace() -> None:
-    assert sanitise_postcode("  sw1a 1aa  ") == "SW1A1AA"
-
-
-def test_sanitise_strips_tabs_and_newlines() -> None:
-    # CSV ingestion occasionally introduces stray whitespace characters; the
-    # canonical form must absorb them just like literal spaces.
-    assert sanitise_postcode("sw1a\t1aa\n") == "SW1A1AA"
-
-
-def test_sanitise_already_canonical_is_idempotent() -> None:
-    assert sanitise_postcode("SW1A1AA") == "SW1A1AA"
-    assert sanitise_postcode(sanitise_postcode("sw1a 1aa")) == "SW1A1AA"
-
-
-def test_sanitise_empty_string() -> None:
-    assert sanitise_postcode("") == ""
diff --git a/tests/domain/test_postcode.py b/tests/domain/test_postcode.py
new file mode 100644
index 00000000..89d5cdc8
--- /dev/null
+++ b/tests/domain/test_postcode.py
@@ -0,0 +1,48 @@
+import dataclasses
+
+import pytest
+
+from domain.postcode import Postcode
+
+
+def test_postcode_uppercases() -> None:
+    assert Postcode("sw1a1aa").value == "SW1A1AA"
+
+
+def test_postcode_strips_internal_spaces() -> None:
+    assert Postcode("sw1a 1aa").value == "SW1A1AA"
+
+
+def test_postcode_strips_leading_and_trailing_whitespace() -> None:
+    assert Postcode("  sw1a 1aa  ").value == "SW1A1AA"
+
+
+def test_postcode_strips_tabs_and_newlines() -> None:
+    # CSV ingestion occasionally introduces stray whitespace characters; the
+    # canonical form must absorb them just like literal spaces.
+    assert Postcode("sw1a\t1aa\n").value == "SW1A1AA"
+
+
+def test_postcode_construction_is_idempotent() -> None:
+    once = Postcode("sw1a 1aa")
+    assert Postcode(once.value).value == "SW1A1AA"
+
+
+def test_postcode_empty_string() -> None:
+    assert Postcode("").value == ""
+
+
+def test_postcode_str_returns_canonical_value() -> None:
+    assert str(Postcode("sw1a 1aa")) == "SW1A1AA"
+
+
+def test_postcode_equality_ignores_surface_form() -> None:
+    # Differing case / whitespace sanitise to the same canonical value, so
+    # the value objects compare equal.
+    assert Postcode("sw1a 1aa") == Postcode("SW1A1AA")
+
+
+def test_postcode_is_frozen() -> None:
+    postcode = Postcode("SW1A1AA")
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        postcode.value = "OTHER"  # type: ignore[misc]
diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
index 48733b55..c1acee32 100644
--- a/tests/repositories/user_address/test_user_address_csv_s3_repository.py
+++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
@@ -4,6 +4,7 @@ import pytest
 from moto import mock_aws
 
 from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_csv_s3_repository import (
     UserAddressCsvS3Repository,
@@ -47,7 +48,7 @@ def test_load_batch_parses_address_postcode_and_reference(
     assert len(addresses) == 1
     address = addresses[0]
     assert address.user_address == "1 High Street, Flat 2, Townville"
-    assert address.postcode == "SW1A1AA"
+    assert address.postcode == Postcode("SW1A1AA")
     assert address.internal_reference == "REF-001"
 
 
@@ -69,7 +70,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
 
     assert len(addresses) == 1
     assert addresses[0].user_address == "10 Cardiff Road"
-    assert addresses[0].postcode == "CF101AA"
+    assert addresses[0].postcode == Postcode("CF101AA")
     assert addresses[0].internal_reference == "REF-002"
 
 
@@ -91,7 +92,7 @@ def test_load_batch_handles_missing_internal_reference(
 
     assert len(addresses) == 1
     assert addresses[0].user_address == "5 Park Lane"
-    assert addresses[0].postcode == "M11AA"
+    assert addresses[0].postcode == Postcode("M11AA")
     assert addresses[0].internal_reference is None
 
 
@@ -154,7 +155,7 @@ def test_save_batch_returns_uri_under_path_prefix(
     addresses = [
         UserAddress(
             user_address="1 High Street",
-            postcode="SW1A 1AA",
+            postcode=Postcode("SW1A 1AA"),
             source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
         ),
     ]
@@ -199,7 +200,7 @@ def test_save_batch_uses_unique_filename_per_call(
     addresses = [
         UserAddress(
             user_address="1 High Street",
-            postcode="SW1A 1AA",
+            postcode=Postcode("SW1A 1AA"),
             source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
         ),
     ]