From 0dee917094057da947dd0ff3ec9b28833d48cd9b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 22 May 2026 08:27:59 +0000 Subject: [PATCH] unsanistiesed address list instead of raw address lit --- applications/SAL/handler.py | 20 ++++----- applications/postcode_splitter/handler.py | 8 ++-- domain/addresses/postcode_batching.py | 6 +-- ...{raw_address.py => unsanitised_address.py} | 4 +- .../postcode_splitter_orchestrator.py | 12 ++--- orchestration/sal_orchestrator.py | 20 ++++----- .../__init__.py | 0 ...nitised_address_list_csv_s3_repository.py} | 14 +++--- .../unsanitised_address_list_repository.py} | 4 +- .../addresses/test_postcode_batching.py | 4 +- ...address.py => test_unsanitised_address.py} | 44 +++++++++---------- ...lord_description_overrides_orchestrator.py | 34 +++++++------- .../test_postcode_splitter_orchestrator.py | 10 ++--- .../__init__.py | 0 .../conftest.py | 0 ...nitised_address_list_csv_s3_repository.py} | 36 +++++++-------- 16 files changed, 107 insertions(+), 109 deletions(-) rename domain/addresses/{raw_address.py => unsanitised_address.py} (84%) rename repositories/{raw_address => unsanitised_address}/__init__.py (100%) rename repositories/{raw_address/raw_address_list_csv_s3_repository.py => unsanitised_address/unsanitised_address_list_csv_s3_repository.py} (84%) rename repositories/{raw_address/raw_address_list_repository.py => unsanitised_address/unsanitised_address_list_repository.py} (70%) rename tests/domain/addresses/{test_raw_address.py => test_unsanitised_address.py} (51%) rename tests/repositories/{raw_address => unsanitised_address}/__init__.py (100%) rename tests/repositories/{raw_address => unsanitised_address}/conftest.py (100%) rename tests/repositories/{raw_address/test_raw_address_list_csv_s3_repository.py => unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py} (86%) diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py index 69f4c04d..fbed3b83 100644 --- a/applications/SAL/handler.py +++ b/applications/SAL/handler.py @@ -4,10 +4,10 @@ from orchestration.sal_orchestrator import ( SALOrchestrator, ) from infrastructure.csv_s3_client import CsvS3Client -from repositories.raw_address.raw_address_list_csv_s3_repository import ( - RawAddressListCsvS3Repository, +from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import ( + UnsanitisedAddressListCsvS3Repository, ) -from domain.addresses.raw_address import AddressList +from domain.addresses.unsanitised_address import AddressList def handler( @@ -24,18 +24,16 @@ def handler( boto_s3: Any = boto3_client("s3") csv_client = CsvS3Client(boto_s3, bucket) - raw_address_repo = RawAddressListCsvS3Repository(csv_client, bucket) + unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket) - orchestrator = SALOrchestrator( - raw_address_repo=raw_address_repo, + sal = SALOrchestrator( + unsanitised_address_repo=unsanitised_address_repo, ) - list_of_raw_address: AddressList = orchestrator.get_raw_addresses( - input_s3_uri=s3_uri - ) + addressList: AddressList = sal.get_unsanitised_addresses(input_s3_uri=s3_uri) - col_to_desc_map = orchestrator.get_col_to_description_mappings( - list_of_raw_address=list_of_raw_address + col_to_desc_map = sal.get_col_to_description_mappings( + list_of_unsanitised_address=addressList ) # Read csv of user input diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py index 071ff6f9..6614ecda 100644 --- a/applications/postcode_splitter/handler.py +++ b/applications/postcode_splitter/handler.py @@ -12,8 +12,8 @@ from infrastructure.address2uprn_queue_client import Address2UprnQueueClient from infrastructure.csv_s3_client import CsvS3Client from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator from orchestration.task_orchestrator import TaskOrchestrator -from repositories.raw_address.raw_address_list_csv_s3_repository import ( - RawAddressListCsvS3Repository, +from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import ( + UnsanitisedAddressListCsvS3Repository, ) from utilities.aws_lambda.subtask_handler import subtask_handler @@ -36,12 +36,12 @@ def handler( boto_sqs: Any = boto3_client("sqs") csv_client = CsvS3Client(boto_s3, bucket) - user_address_repo = RawAddressListCsvS3Repository(csv_client, bucket) + unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket) queue_client = Address2UprnQueueClient(boto_sqs, queue_url) splitter = PostcodeSplitterOrchestrator( task_orchestrator=task_orchestrator, - user_address_repo=user_address_repo, + unsanitised_address_repo=unsanitised_address_repo, queue_client=queue_client, ) diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py index dd7203b1..18135dbd 100644 --- a/domain/addresses/postcode_batching.py +++ b/domain/addresses/postcode_batching.py @@ -2,12 +2,12 @@ from __future__ import annotations from collections.abc import Iterable, Iterator -from domain.addresses.raw_address import AddressList, RawAddress +from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress from domain.postcode import Postcode def iter_postcode_grouped_batches( - addresses: Iterable[RawAddress], + addresses: Iterable[UnsanitisedAddress], *, max_batch_size: int = 500, ) -> Iterator[AddressList]: @@ -43,7 +43,7 @@ def iter_postcode_grouped_batches( def _group_by_postcode_in_order( - addresses: Iterable[RawAddress], + addresses: Iterable[UnsanitisedAddress], ) -> dict[Postcode, AddressList]: groups: dict[Postcode, AddressList] = {} for address in addresses: diff --git a/domain/addresses/raw_address.py b/domain/addresses/unsanitised_address.py similarity index 84% rename from domain/addresses/raw_address.py rename to domain/addresses/unsanitised_address.py index f9a2789e..a33f0d88 100644 --- a/domain/addresses/raw_address.py +++ b/domain/addresses/unsanitised_address.py @@ -11,7 +11,7 @@ def _empty_source_row() -> dict[str, str]: @dataclass(frozen=True) -class RawAddress: +class UnsanitisedAddress: address: str postcode: Postcode org_reference: Optional[str] = None @@ -21,4 +21,4 @@ class RawAddress: # A batch of raw, pre-standardisation addresses as supplied by a landlord. -AddressList = NewType("AddressList", list[RawAddress]) +AddressList = NewType("AddressList", list[UnsanitisedAddress]) diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py index 20145524..d8d81c65 100644 --- a/orchestration/postcode_splitter_orchestrator.py +++ b/orchestration/postcode_splitter_orchestrator.py @@ -5,8 +5,8 @@ from uuid import UUID from infrastructure.address2uprn_queue_client import Address2UprnQueueClient from orchestration.task_orchestrator import TaskOrchestrator from domain.addresses.postcode_batching import iter_postcode_grouped_batches -from repositories.raw_address.raw_address_list_repository import ( - RawAddressListRepository, +from repositories.unsanitised_address.unsanitised_address_list_repository import ( + UnsanitisedAddressListRepository, ) @@ -14,12 +14,12 @@ class PostcodeSplitterOrchestrator: def __init__( self, task_orchestrator: TaskOrchestrator, - raw_address_repo: RawAddressListRepository, + unsanitised_address_repo: UnsanitisedAddressListRepository, queue_client: Address2UprnQueueClient, max_batch_size: int = 500, ) -> None: self._task_orchestrator = task_orchestrator - self._raw_address_repo = raw_address_repo + self._unsanitised_address_repo = unsanitised_address_repo self._queue_client = queue_client self._max_batch_size = max_batch_size @@ -30,7 +30,7 @@ class PostcodeSplitterOrchestrator: parent_subtask_id: UUID, input_s3_uri: str, ) -> list[UUID]: - addresses = self._raw_address_repo.load_batch(input_s3_uri) + addresses = self._unsanitised_address_repo.load_batch(input_s3_uri) path_prefix = ( f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}" ) @@ -39,7 +39,7 @@ class PostcodeSplitterOrchestrator: for batch in iter_postcode_grouped_batches( addresses, max_batch_size=self._max_batch_size ): - batch_uri = self._raw_address_repo.save_batch(batch, path_prefix) + batch_uri = self._unsanitised_address_repo.save_batch(batch, path_prefix) child = self._task_orchestrator.create_child_subtask( parent_task_id, inputs={ diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py index f55947e7..1eb768de 100644 --- a/orchestration/sal_orchestrator.py +++ b/orchestration/sal_orchestrator.py @@ -1,25 +1,25 @@ -from repositories.raw_address.raw_address_list_repository import ( - RawAddressListRepository, +from repositories.unsanitised_address.unsanitised_address_list_repository import ( + UnsanitisedAddressListRepository, ) -from domain.addresses.raw_address import AddressList +from domain.addresses.unsanitised_address import AddressList class SALOrchestrator: - def __init__(self, raw_address_repo: RawAddressListRepository) -> None: - self._raw_address_repo = raw_address_repo + def __init__(self, unsanitised_address_repo: UnsanitisedAddressListRepository) -> None: + self._unsanitised_address_repo = unsanitised_address_repo - def get_raw_addresses( + def get_unsanitised_addresses( self, input_s3_uri: str, ) -> AddressList: - return self._raw_address_repo.load_batch(input_s3_uri) + return self._unsanitised_address_repo.load_batch(input_s3_uri) def get_col_to_description_mappings( - self, list_of_raw_address: AddressList + self, list_of_unsanitised_address: AddressList ) -> dict[str, set[str]]: mappings: dict[str, set[str]] = {} - for raw_address in list_of_raw_address: - for key, value in raw_address.additional_info.items(): + for unsanitised_address in list_of_unsanitised_address: + for key, value in unsanitised_address.additional_info.items(): # Lower-case so case-only typos collapse to one variant. mappings.setdefault(key, set()).add(value.lower()) return mappings diff --git a/repositories/raw_address/__init__.py b/repositories/unsanitised_address/__init__.py similarity index 100% rename from repositories/raw_address/__init__.py rename to repositories/unsanitised_address/__init__.py diff --git a/repositories/raw_address/raw_address_list_csv_s3_repository.py b/repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py similarity index 84% rename from repositories/raw_address/raw_address_list_csv_s3_repository.py rename to repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py index a636b17b..6c382df0 100644 --- a/repositories/raw_address/raw_address_list_csv_s3_repository.py +++ b/repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py @@ -4,11 +4,11 @@ import uuid from datetime import datetime, timezone from typing import Optional -from domain.addresses.raw_address import AddressList, RawAddress +from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress from domain.postcode import Postcode from infrastructure.csv_s3_client import CsvS3Client -from repositories.raw_address.raw_address_list_repository import ( - RawAddressListRepository, +from repositories.unsanitised_address.unsanitised_address_list_repository import ( + UnsanitisedAddressListRepository, ) _ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3") @@ -17,7 +17,7 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference" _POSTCODE_CLEAN_COLUMN: str = "postcode_clean" -class RawAddressListCsvS3Repository(RawAddressListRepository): +class UnsanitisedAddressListCsvS3Repository(UnsanitisedAddressListRepository): def __init__(self, csv_client: CsvS3Client, bucket: str) -> None: self._csv_client = csv_client self._bucket = bucket @@ -36,13 +36,13 @@ class RawAddressListCsvS3Repository(RawAddressListRepository): for col in _ADDRESS_COLUMNS if col in row and row[col].strip() ] - raw_address = ", ".join(parts) + unsanitised_address = ", ".join(parts) postcode = row.get(_POSTCODE_COLUMN, "") raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip() internal_reference: Optional[str] = raw_ref or None addresses.append( - RawAddress( - address=raw_address, + UnsanitisedAddress( + address=unsanitised_address, postcode=Postcode(postcode), org_reference=internal_reference, additional_info=row, diff --git a/repositories/raw_address/raw_address_list_repository.py b/repositories/unsanitised_address/unsanitised_address_list_repository.py similarity index 70% rename from repositories/raw_address/raw_address_list_repository.py rename to repositories/unsanitised_address/unsanitised_address_list_repository.py index 8abb96be..2f842fcd 100644 --- a/repositories/raw_address/raw_address_list_repository.py +++ b/repositories/unsanitised_address/unsanitised_address_list_repository.py @@ -2,10 +2,10 @@ from __future__ import annotations from abc import ABC, abstractmethod -from domain.addresses.raw_address import AddressList +from domain.addresses.unsanitised_address import AddressList -class RawAddressListRepository(ABC): +class UnsanitisedAddressListRepository(ABC): @abstractmethod def load_batch(self, s3_uri: str) -> AddressList: ... diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py index c7bb2d00..443e43df 100644 --- a/tests/domain/addresses/test_postcode_batching.py +++ b/tests/domain/addresses/test_postcode_batching.py @@ -1,14 +1,14 @@ import pytest from domain.addresses.postcode_batching import iter_postcode_grouped_batches -from domain.addresses.raw_address import AddressList, RawAddress +from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress from domain.postcode import Postcode def _addrs(postcode: str, n: int) -> AddressList: return AddressList( [ - RawAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode)) + UnsanitisedAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode)) for i in range(n) ] ) diff --git a/tests/domain/addresses/test_raw_address.py b/tests/domain/addresses/test_unsanitised_address.py similarity index 51% rename from tests/domain/addresses/test_raw_address.py rename to tests/domain/addresses/test_unsanitised_address.py index 0309b45e..aa6d0071 100644 --- a/tests/domain/addresses/test_raw_address.py +++ b/tests/domain/addresses/test_unsanitised_address.py @@ -2,36 +2,36 @@ import dataclasses import pytest -from domain.addresses.raw_address import RawAddress +from domain.addresses.unsanitised_address import UnsanitisedAddress from domain.postcode import Postcode -def test_raw_address_holds_postcode_value_object() -> None: +def test_unsanitised_address_holds_postcode_value_object() -> None: # act - addr = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa")) + addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa")) # assert assert addr.postcode == Postcode("SW1A1AA") -def test_raw_address_preserves_raw_address_verbatim() -> None: - # The free-text raw_address string is intentionally NOT normalised -- +def test_unsanitised_address_preserves_unsanitised_address_verbatim() -> None: + # The free-text unsanitised_address string is intentionally NOT normalised -- # only the postcode is canonicalised, and that happens inside Postcode. # act - addr = RawAddress(address=" 1 The Street ", postcode=Postcode("SW1A1AA")) + addr = UnsanitisedAddress(address=" 1 The Street ", postcode=Postcode("SW1A1AA")) # assert assert addr.address == " 1 The Street " -def test_raw_address_internal_reference_defaults_to_none() -> None: +def test_unsanitised_address_internal_reference_defaults_to_none() -> None: # act - addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) + addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) # assert assert addr.org_reference is None -def test_raw_address_internal_reference_accepted() -> None: +def test_unsanitised_address_internal_reference_accepted() -> None: # act - addr = RawAddress( + addr = UnsanitisedAddress( address="1 The Street", postcode=Postcode("SW1A1AA"), org_reference="cust-42", @@ -40,36 +40,36 @@ def test_raw_address_internal_reference_accepted() -> None: assert addr.org_reference == "cust-42" -def test_raw_address_is_frozen() -> None: +def test_unsanitised_address_is_frozen() -> None: # arrange - addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) + addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) # act / assert with pytest.raises(dataclasses.FrozenInstanceError): addr.postcode = Postcode("OTHER") # type: ignore[misc] -def test_raw_address_equality_uses_canonical_postcode() -> None: +def test_unsanitised_address_equality_uses_canonical_postcode() -> None: # Postcode sanitises eagerly, so addresses built from different surface # forms of the same postcode compare equal. # arrange - a = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa")) - b = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) + a = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa")) + b = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) # act / assert assert a == b -def test_raw_address_source_row_defaults_to_empty_dict() -> None: +def test_unsanitised_address_source_row_defaults_to_empty_dict() -> None: # act - addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) + addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA")) # assert assert addr.additional_info == {} -def test_raw_address_carries_source_row() -> None: +def test_unsanitised_address_carries_source_row() -> None: # arrange row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"} # act - addr = RawAddress( + addr = UnsanitisedAddress( address="1 The Street", postcode=Postcode("SW1A 1AA"), additional_info=row, @@ -78,16 +78,16 @@ def test_raw_address_carries_source_row() -> None: assert addr.additional_info == row -def test_raw_address_equality_ignores_source_row() -> None: +def test_unsanitised_address_equality_ignores_source_row() -> None: # source_row is excluded from equality (and hashing): identity stays # defined by the parsed fields. # arrange - a = RawAddress( + a = UnsanitisedAddress( address="1 The Street", postcode=Postcode("SW1A1AA"), additional_info={"x": "1"}, ) - b = RawAddress( + b = UnsanitisedAddress( address="1 The Street", postcode=Postcode("SW1A1AA"), additional_info={"y": "2"}, diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py index 133d5b39..7e2c5167 100644 --- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py +++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py @@ -1,16 +1,16 @@ from __future__ import annotations -from domain.addresses.raw_address import AddressList, RawAddress +from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress from domain.postcode import Postcode from orchestration.sal_orchestrator import ( SALOrchestrator, ) -from repositories.raw_address.raw_address_list_repository import ( - RawAddressListRepository, +from repositories.unsanitised_address.unsanitised_address_list_repository import ( + UnsanitisedAddressListRepository, ) -class _StubRawAddressRepository(RawAddressListRepository): +class _StubUnsanitisedAddressRepository(UnsanitisedAddressListRepository): """``get_col_to_description_mappings`` never touches the repo.""" def load_batch(self, s3_uri: str) -> AddressList: @@ -20,8 +20,8 @@ class _StubRawAddressRepository(RawAddressListRepository): raise NotImplementedError() -def _make_raw_address(landlord_additional_info: dict[str, str]) -> RawAddress: - return RawAddress( +def _make_unsanitised_address(landlord_additional_info: dict[str, str]) -> UnsanitisedAddress: + return UnsanitisedAddress( address="1 High St", postcode=Postcode("AA1 1AA"), additional_info=landlord_additional_info, @@ -29,16 +29,16 @@ def _make_raw_address(landlord_additional_info: dict[str, str]) -> RawAddress: def _orchestrator() -> SALOrchestrator: - return SALOrchestrator(raw_address_repo=_StubRawAddressRepository()) + return SALOrchestrator(unsanitised_address_repo=_StubUnsanitisedAddressRepository()) def test_collects_every_value_per_shared_key() -> None: # arrange: every address carries the same keys, all values distinct. addresses = AddressList( [ - _make_raw_address({"description": "cosy", "condition": "new"}), - _make_raw_address({"description": "spacious", "condition": "worn"}), - _make_raw_address({"description": "bright", "condition": "fair"}), + _make_unsanitised_address({"description": "cosy", "condition": "new"}), + _make_unsanitised_address({"description": "spacious", "condition": "worn"}), + _make_unsanitised_address({"description": "bright", "condition": "fair"}), ] ) @@ -56,9 +56,9 @@ def test_repeated_values_collapse_to_one_variant() -> None: # arrange: two addresses share the same wall description. addresses = AddressList( [ - _make_raw_address({"description": "cosy"}), - _make_raw_address({"description": "cosy"}), - _make_raw_address({"description": "bright"}), + _make_unsanitised_address({"description": "cosy"}), + _make_unsanitised_address({"description": "cosy"}), + _make_unsanitised_address({"description": "bright"}), ] ) @@ -73,9 +73,9 @@ def test_case_only_variants_collapse_to_one() -> None: # arrange: the same description typed with inconsistent casing. addresses = AddressList( [ - _make_raw_address({"description": "Cosy"}), - _make_raw_address({"description": "cosy"}), - _make_raw_address({"description": "COSY"}), + _make_unsanitised_address({"description": "Cosy"}), + _make_unsanitised_address({"description": "cosy"}), + _make_unsanitised_address({"description": "COSY"}), ] ) @@ -96,7 +96,7 @@ def test_empty_address_list_yields_empty_mapping() -> None: def test_single_address_yields_single_value_per_key() -> None: # arrange - addresses = AddressList([_make_raw_address({"description": "cosy"})]) + addresses = AddressList([_make_unsanitised_address({"description": "cosy"})]) # act mappings = _orchestrator().get_col_to_description_mappings(addresses) diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py index 1540112f..4317156c 100644 --- a/tests/orchestration/test_postcode_splitter_orchestrator.py +++ b/tests/orchestration/test_postcode_splitter_orchestrator.py @@ -18,8 +18,8 @@ from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchest from orchestration.task_orchestrator import TaskOrchestrator from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository from repositories.tasks.task_postgres_repository import TaskPostgresRepository -from repositories.raw_address.raw_address_list_csv_s3_repository import ( - RawAddressListCsvS3Repository, +from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import ( + UnsanitisedAddressListCsvS3Repository, ) BUCKET = "splitter-bucket" @@ -64,7 +64,7 @@ class Harness: csv_client: CsvS3Client boto_sqs: Any queue_url: str - repo: RawAddressListCsvS3Repository + repo: UnsanitisedAddressListCsvS3Repository @pytest.fixture @@ -78,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]: queue_url = cast(str, queue["QueueUrl"]) csv_client = CsvS3Client(boto_s3, BUCKET) - repo = RawAddressListCsvS3Repository(csv_client, BUCKET) + repo = UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET) queue_client = Address2UprnQueueClient(boto_sqs, queue_url) # DB: ephemeral PostgreSQL TaskOrchestrator @@ -91,7 +91,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]: splitter = PostcodeSplitterOrchestrator( task_orchestrator=task_orchestrator, - raw_address_repo=repo, + unsanitised_address_repo=repo, queue_client=queue_client, max_batch_size=3, ) diff --git a/tests/repositories/raw_address/__init__.py b/tests/repositories/unsanitised_address/__init__.py similarity index 100% rename from tests/repositories/raw_address/__init__.py rename to tests/repositories/unsanitised_address/__init__.py diff --git a/tests/repositories/raw_address/conftest.py b/tests/repositories/unsanitised_address/conftest.py similarity index 100% rename from tests/repositories/raw_address/conftest.py rename to tests/repositories/unsanitised_address/conftest.py diff --git a/tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py b/tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py similarity index 86% rename from tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py rename to tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py index 8870b29a..ff26f08a 100644 --- a/tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py +++ b/tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py @@ -3,11 +3,11 @@ from collections.abc import Iterator import pytest from moto import mock_aws -from domain.addresses.raw_address import AddressList, RawAddress +from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress from domain.postcode import Postcode from infrastructure.csv_s3_client import CsvS3Client -from repositories.raw_address.raw_address_list_csv_s3_repository import ( - RawAddressListCsvS3Repository, +from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import ( + UnsanitisedAddressListCsvS3Repository, ) from tests.infrastructure import make_boto_client @@ -15,22 +15,22 @@ BUCKET = "user-address-bucket" @pytest.fixture -def repo() -> Iterator[RawAddressListCsvS3Repository]: +def repo() -> Iterator[UnsanitisedAddressListCsvS3Repository]: with mock_aws(): boto_client = make_boto_client("s3") boto_client.create_bucket(Bucket=BUCKET) csv_client = CsvS3Client(boto_client, BUCKET) - yield RawAddressListCsvS3Repository(csv_client, BUCKET) + yield UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET) def _upload_csv( - repo: RawAddressListCsvS3Repository, rows: list[dict[str, str]], key: str + repo: UnsanitisedAddressListCsvS3Repository, rows: list[dict[str, str]], key: str ) -> str: return repo._csv_client.save_rows(rows, key) # pyright: ignore[reportPrivateUsage] def test_load_batch_parses_address_postcode_and_reference( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange rows = [ @@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference( def test_load_batch_uses_only_address_1_when_others_missing( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange rows = [ @@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing( def test_load_batch_handles_missing_internal_reference( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange rows = [ @@ -106,10 +106,10 @@ def test_load_batch_handles_missing_internal_reference( def test_load_batch_captures_full_source_row( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # A raw EPC-export-shaped row: the splitter must preserve every column, - # not just the ones it parses into RawAddress fields. + # not just the ones it parses into UnsanitisedAddress fields. # arrange row = { "Asset Reference": "511", @@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row( def test_load_batch_raises_when_postcode_column_absent( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}] @@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent( def test_save_batch_passes_through_all_columns_and_appends_postcode_clean( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange row = { @@ -169,12 +169,12 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean( def test_save_batch_returns_uri_under_path_prefix( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange addresses = AddressList( [ - RawAddress( + UnsanitisedAddress( address="1 High Street", postcode=Postcode("SW1A 1AA"), additional_info={ @@ -194,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix( def test_save_then_reload_round_trip_preserves_columns( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange rows = [ @@ -227,12 +227,12 @@ def test_save_then_reload_round_trip_preserves_columns( def test_save_batch_uses_unique_filename_per_call( - repo: RawAddressListCsvS3Repository, + repo: UnsanitisedAddressListCsvS3Repository, ) -> None: # arrange addresses = AddressList( [ - RawAddress( + UnsanitisedAddress( address="1 High Street", postcode=Postcode("SW1A 1AA"), additional_info={