mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
unsanistiesed address list instead of raw address lit
This commit is contained in:
parent
91bb4b6571
commit
0dee917094
16 changed files with 107 additions and 109 deletions
|
|
@ -4,10 +4,10 @@ from orchestration.sal_orchestrator import (
|
|||
SALOrchestrator,
|
||||
)
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.raw_address.raw_address_list_csv_s3_repository import (
|
||||
RawAddressListCsvS3Repository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
|
||||
UnsanitisedAddressListCsvS3Repository,
|
||||
)
|
||||
from domain.addresses.raw_address import AddressList
|
||||
from domain.addresses.unsanitised_address import AddressList
|
||||
|
||||
|
||||
def handler(
|
||||
|
|
@ -24,18 +24,16 @@ def handler(
|
|||
boto_s3: Any = boto3_client("s3")
|
||||
|
||||
csv_client = CsvS3Client(boto_s3, bucket)
|
||||
raw_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
|
||||
unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket)
|
||||
|
||||
orchestrator = SALOrchestrator(
|
||||
raw_address_repo=raw_address_repo,
|
||||
sal = SALOrchestrator(
|
||||
unsanitised_address_repo=unsanitised_address_repo,
|
||||
)
|
||||
|
||||
list_of_raw_address: AddressList = orchestrator.get_raw_addresses(
|
||||
input_s3_uri=s3_uri
|
||||
)
|
||||
addressList: AddressList = sal.get_unsanitised_addresses(input_s3_uri=s3_uri)
|
||||
|
||||
col_to_desc_map = orchestrator.get_col_to_description_mappings(
|
||||
list_of_raw_address=list_of_raw_address
|
||||
col_to_desc_map = sal.get_col_to_description_mappings(
|
||||
list_of_unsanitised_address=addressList
|
||||
)
|
||||
|
||||
# Read csv of user input
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
|
|||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
|
||||
from orchestration.task_orchestrator import TaskOrchestrator
|
||||
from repositories.raw_address.raw_address_list_csv_s3_repository import (
|
||||
RawAddressListCsvS3Repository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
|
||||
UnsanitisedAddressListCsvS3Repository,
|
||||
)
|
||||
from utilities.aws_lambda.subtask_handler import subtask_handler
|
||||
|
||||
|
|
@ -36,12 +36,12 @@ def handler(
|
|||
boto_sqs: Any = boto3_client("sqs")
|
||||
|
||||
csv_client = CsvS3Client(boto_s3, bucket)
|
||||
user_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
|
||||
unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket)
|
||||
queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
|
||||
|
||||
splitter = PostcodeSplitterOrchestrator(
|
||||
task_orchestrator=task_orchestrator,
|
||||
user_address_repo=user_address_repo,
|
||||
unsanitised_address_repo=unsanitised_address_repo,
|
||||
queue_client=queue_client,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,12 +2,12 @@ from __future__ import annotations
|
|||
|
||||
from collections.abc import Iterable, Iterator
|
||||
|
||||
from domain.addresses.raw_address import AddressList, RawAddress
|
||||
from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def iter_postcode_grouped_batches(
|
||||
addresses: Iterable[RawAddress],
|
||||
addresses: Iterable[UnsanitisedAddress],
|
||||
*,
|
||||
max_batch_size: int = 500,
|
||||
) -> Iterator[AddressList]:
|
||||
|
|
@ -43,7 +43,7 @@ def iter_postcode_grouped_batches(
|
|||
|
||||
|
||||
def _group_by_postcode_in_order(
|
||||
addresses: Iterable[RawAddress],
|
||||
addresses: Iterable[UnsanitisedAddress],
|
||||
) -> dict[Postcode, AddressList]:
|
||||
groups: dict[Postcode, AddressList] = {}
|
||||
for address in addresses:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ def _empty_source_row() -> dict[str, str]:
|
|||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RawAddress:
|
||||
class UnsanitisedAddress:
|
||||
address: str
|
||||
postcode: Postcode
|
||||
org_reference: Optional[str] = None
|
||||
|
|
@ -21,4 +21,4 @@ class RawAddress:
|
|||
|
||||
|
||||
# A batch of raw, pre-standardisation addresses as supplied by a landlord.
|
||||
AddressList = NewType("AddressList", list[RawAddress])
|
||||
AddressList = NewType("AddressList", list[UnsanitisedAddress])
|
||||
|
|
@ -5,8 +5,8 @@ from uuid import UUID
|
|||
from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
|
||||
from orchestration.task_orchestrator import TaskOrchestrator
|
||||
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
|
||||
from repositories.raw_address.raw_address_list_repository import (
|
||||
RawAddressListRepository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_repository import (
|
||||
UnsanitisedAddressListRepository,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -14,12 +14,12 @@ class PostcodeSplitterOrchestrator:
|
|||
def __init__(
|
||||
self,
|
||||
task_orchestrator: TaskOrchestrator,
|
||||
raw_address_repo: RawAddressListRepository,
|
||||
unsanitised_address_repo: UnsanitisedAddressListRepository,
|
||||
queue_client: Address2UprnQueueClient,
|
||||
max_batch_size: int = 500,
|
||||
) -> None:
|
||||
self._task_orchestrator = task_orchestrator
|
||||
self._raw_address_repo = raw_address_repo
|
||||
self._unsanitised_address_repo = unsanitised_address_repo
|
||||
self._queue_client = queue_client
|
||||
self._max_batch_size = max_batch_size
|
||||
|
||||
|
|
@ -30,7 +30,7 @@ class PostcodeSplitterOrchestrator:
|
|||
parent_subtask_id: UUID,
|
||||
input_s3_uri: str,
|
||||
) -> list[UUID]:
|
||||
addresses = self._raw_address_repo.load_batch(input_s3_uri)
|
||||
addresses = self._unsanitised_address_repo.load_batch(input_s3_uri)
|
||||
path_prefix = (
|
||||
f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
|
||||
)
|
||||
|
|
@ -39,7 +39,7 @@ class PostcodeSplitterOrchestrator:
|
|||
for batch in iter_postcode_grouped_batches(
|
||||
addresses, max_batch_size=self._max_batch_size
|
||||
):
|
||||
batch_uri = self._raw_address_repo.save_batch(batch, path_prefix)
|
||||
batch_uri = self._unsanitised_address_repo.save_batch(batch, path_prefix)
|
||||
child = self._task_orchestrator.create_child_subtask(
|
||||
parent_task_id,
|
||||
inputs={
|
||||
|
|
|
|||
|
|
@ -1,25 +1,25 @@
|
|||
from repositories.raw_address.raw_address_list_repository import (
|
||||
RawAddressListRepository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_repository import (
|
||||
UnsanitisedAddressListRepository,
|
||||
)
|
||||
from domain.addresses.raw_address import AddressList
|
||||
from domain.addresses.unsanitised_address import AddressList
|
||||
|
||||
|
||||
class SALOrchestrator:
|
||||
def __init__(self, raw_address_repo: RawAddressListRepository) -> None:
|
||||
self._raw_address_repo = raw_address_repo
|
||||
def __init__(self, unsanitised_address_repo: UnsanitisedAddressListRepository) -> None:
|
||||
self._unsanitised_address_repo = unsanitised_address_repo
|
||||
|
||||
def get_raw_addresses(
|
||||
def get_unsanitised_addresses(
|
||||
self,
|
||||
input_s3_uri: str,
|
||||
) -> AddressList:
|
||||
return self._raw_address_repo.load_batch(input_s3_uri)
|
||||
return self._unsanitised_address_repo.load_batch(input_s3_uri)
|
||||
|
||||
def get_col_to_description_mappings(
|
||||
self, list_of_raw_address: AddressList
|
||||
self, list_of_unsanitised_address: AddressList
|
||||
) -> dict[str, set[str]]:
|
||||
mappings: dict[str, set[str]] = {}
|
||||
for raw_address in list_of_raw_address:
|
||||
for key, value in raw_address.additional_info.items():
|
||||
for unsanitised_address in list_of_unsanitised_address:
|
||||
for key, value in unsanitised_address.additional_info.items():
|
||||
# Lower-case so case-only typos collapse to one variant.
|
||||
mappings.setdefault(key, set()).add(value.lower())
|
||||
return mappings
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@ import uuid
|
|||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from domain.addresses.raw_address import AddressList, RawAddress
|
||||
from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
|
||||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.raw_address.raw_address_list_repository import (
|
||||
RawAddressListRepository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_repository import (
|
||||
UnsanitisedAddressListRepository,
|
||||
)
|
||||
|
||||
_ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
|
||||
|
|
@ -17,7 +17,7 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
|
|||
_POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
|
||||
|
||||
|
||||
class RawAddressListCsvS3Repository(RawAddressListRepository):
|
||||
class UnsanitisedAddressListCsvS3Repository(UnsanitisedAddressListRepository):
|
||||
def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
|
||||
self._csv_client = csv_client
|
||||
self._bucket = bucket
|
||||
|
|
@ -36,13 +36,13 @@ class RawAddressListCsvS3Repository(RawAddressListRepository):
|
|||
for col in _ADDRESS_COLUMNS
|
||||
if col in row and row[col].strip()
|
||||
]
|
||||
raw_address = ", ".join(parts)
|
||||
unsanitised_address = ", ".join(parts)
|
||||
postcode = row.get(_POSTCODE_COLUMN, "")
|
||||
raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
|
||||
internal_reference: Optional[str] = raw_ref or None
|
||||
addresses.append(
|
||||
RawAddress(
|
||||
address=raw_address,
|
||||
UnsanitisedAddress(
|
||||
address=unsanitised_address,
|
||||
postcode=Postcode(postcode),
|
||||
org_reference=internal_reference,
|
||||
additional_info=row,
|
||||
|
|
@ -2,10 +2,10 @@ from __future__ import annotations
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from domain.addresses.raw_address import AddressList
|
||||
from domain.addresses.unsanitised_address import AddressList
|
||||
|
||||
|
||||
class RawAddressListRepository(ABC):
|
||||
class UnsanitisedAddressListRepository(ABC):
|
||||
@abstractmethod
|
||||
def load_batch(self, s3_uri: str) -> AddressList: ...
|
||||
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
import pytest
|
||||
|
||||
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
|
||||
from domain.addresses.raw_address import AddressList, RawAddress
|
||||
from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def _addrs(postcode: str, n: int) -> AddressList:
|
||||
return AddressList(
|
||||
[
|
||||
RawAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
|
||||
UnsanitisedAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
|
||||
for i in range(n)
|
||||
]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2,36 +2,36 @@ import dataclasses
|
|||
|
||||
import pytest
|
||||
|
||||
from domain.addresses.raw_address import RawAddress
|
||||
from domain.addresses.unsanitised_address import UnsanitisedAddress
|
||||
from domain.postcode import Postcode
|
||||
|
||||
|
||||
def test_raw_address_holds_postcode_value_object() -> None:
|
||||
def test_unsanitised_address_holds_postcode_value_object() -> None:
|
||||
# act
|
||||
addr = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
# assert
|
||||
assert addr.postcode == Postcode("SW1A1AA")
|
||||
|
||||
|
||||
def test_raw_address_preserves_raw_address_verbatim() -> None:
|
||||
# The free-text raw_address string is intentionally NOT normalised --
|
||||
def test_unsanitised_address_preserves_unsanitised_address_verbatim() -> None:
|
||||
# The free-text unsanitised_address string is intentionally NOT normalised --
|
||||
# only the postcode is canonicalised, and that happens inside Postcode.
|
||||
# act
|
||||
addr = RawAddress(address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
|
||||
addr = UnsanitisedAddress(address=" 1 The Street ", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.address == " 1 The Street "
|
||||
|
||||
|
||||
def test_raw_address_internal_reference_defaults_to_none() -> None:
|
||||
def test_unsanitised_address_internal_reference_defaults_to_none() -> None:
|
||||
# act
|
||||
addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.org_reference is None
|
||||
|
||||
|
||||
def test_raw_address_internal_reference_accepted() -> None:
|
||||
def test_unsanitised_address_internal_reference_accepted() -> None:
|
||||
# act
|
||||
addr = RawAddress(
|
||||
addr = UnsanitisedAddress(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
org_reference="cust-42",
|
||||
|
|
@ -40,36 +40,36 @@ def test_raw_address_internal_reference_accepted() -> None:
|
|||
assert addr.org_reference == "cust-42"
|
||||
|
||||
|
||||
def test_raw_address_is_frozen() -> None:
|
||||
def test_unsanitised_address_is_frozen() -> None:
|
||||
# arrange
|
||||
addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# act / assert
|
||||
with pytest.raises(dataclasses.FrozenInstanceError):
|
||||
addr.postcode = Postcode("OTHER") # type: ignore[misc]
|
||||
|
||||
|
||||
def test_raw_address_equality_uses_canonical_postcode() -> None:
|
||||
def test_unsanitised_address_equality_uses_canonical_postcode() -> None:
|
||||
# Postcode sanitises eagerly, so addresses built from different surface
|
||||
# forms of the same postcode compare equal.
|
||||
# arrange
|
||||
a = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
b = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
a = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
|
||||
b = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# act / assert
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_raw_address_source_row_defaults_to_empty_dict() -> None:
|
||||
def test_unsanitised_address_source_row_defaults_to_empty_dict() -> None:
|
||||
# act
|
||||
addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
|
||||
# assert
|
||||
assert addr.additional_info == {}
|
||||
|
||||
|
||||
def test_raw_address_carries_source_row() -> None:
|
||||
def test_unsanitised_address_carries_source_row() -> None:
|
||||
# arrange
|
||||
row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
|
||||
# act
|
||||
addr = RawAddress(
|
||||
addr = UnsanitisedAddress(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
additional_info=row,
|
||||
|
|
@ -78,16 +78,16 @@ def test_raw_address_carries_source_row() -> None:
|
|||
assert addr.additional_info == row
|
||||
|
||||
|
||||
def test_raw_address_equality_ignores_source_row() -> None:
|
||||
def test_unsanitised_address_equality_ignores_source_row() -> None:
|
||||
# source_row is excluded from equality (and hashing): identity stays
|
||||
# defined by the parsed fields.
|
||||
# arrange
|
||||
a = RawAddress(
|
||||
a = UnsanitisedAddress(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
additional_info={"x": "1"},
|
||||
)
|
||||
b = RawAddress(
|
||||
b = UnsanitisedAddress(
|
||||
address="1 The Street",
|
||||
postcode=Postcode("SW1A1AA"),
|
||||
additional_info={"y": "2"},
|
||||
|
|
@ -1,16 +1,16 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from domain.addresses.raw_address import AddressList, RawAddress
|
||||
from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
|
||||
from domain.postcode import Postcode
|
||||
from orchestration.sal_orchestrator import (
|
||||
SALOrchestrator,
|
||||
)
|
||||
from repositories.raw_address.raw_address_list_repository import (
|
||||
RawAddressListRepository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_repository import (
|
||||
UnsanitisedAddressListRepository,
|
||||
)
|
||||
|
||||
|
||||
class _StubRawAddressRepository(RawAddressListRepository):
|
||||
class _StubUnsanitisedAddressRepository(UnsanitisedAddressListRepository):
|
||||
"""``get_col_to_description_mappings`` never touches the repo."""
|
||||
|
||||
def load_batch(self, s3_uri: str) -> AddressList:
|
||||
|
|
@ -20,8 +20,8 @@ class _StubRawAddressRepository(RawAddressListRepository):
|
|||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _make_raw_address(landlord_additional_info: dict[str, str]) -> RawAddress:
|
||||
return RawAddress(
|
||||
def _make_unsanitised_address(landlord_additional_info: dict[str, str]) -> UnsanitisedAddress:
|
||||
return UnsanitisedAddress(
|
||||
address="1 High St",
|
||||
postcode=Postcode("AA1 1AA"),
|
||||
additional_info=landlord_additional_info,
|
||||
|
|
@ -29,16 +29,16 @@ def _make_raw_address(landlord_additional_info: dict[str, str]) -> RawAddress:
|
|||
|
||||
|
||||
def _orchestrator() -> SALOrchestrator:
|
||||
return SALOrchestrator(raw_address_repo=_StubRawAddressRepository())
|
||||
return SALOrchestrator(unsanitised_address_repo=_StubUnsanitisedAddressRepository())
|
||||
|
||||
|
||||
def test_collects_every_value_per_shared_key() -> None:
|
||||
# arrange: every address carries the same keys, all values distinct.
|
||||
addresses = AddressList(
|
||||
[
|
||||
_make_raw_address({"description": "cosy", "condition": "new"}),
|
||||
_make_raw_address({"description": "spacious", "condition": "worn"}),
|
||||
_make_raw_address({"description": "bright", "condition": "fair"}),
|
||||
_make_unsanitised_address({"description": "cosy", "condition": "new"}),
|
||||
_make_unsanitised_address({"description": "spacious", "condition": "worn"}),
|
||||
_make_unsanitised_address({"description": "bright", "condition": "fair"}),
|
||||
]
|
||||
)
|
||||
|
||||
|
|
@ -56,9 +56,9 @@ def test_repeated_values_collapse_to_one_variant() -> None:
|
|||
# arrange: two addresses share the same wall description.
|
||||
addresses = AddressList(
|
||||
[
|
||||
_make_raw_address({"description": "cosy"}),
|
||||
_make_raw_address({"description": "cosy"}),
|
||||
_make_raw_address({"description": "bright"}),
|
||||
_make_unsanitised_address({"description": "cosy"}),
|
||||
_make_unsanitised_address({"description": "cosy"}),
|
||||
_make_unsanitised_address({"description": "bright"}),
|
||||
]
|
||||
)
|
||||
|
||||
|
|
@ -73,9 +73,9 @@ def test_case_only_variants_collapse_to_one() -> None:
|
|||
# arrange: the same description typed with inconsistent casing.
|
||||
addresses = AddressList(
|
||||
[
|
||||
_make_raw_address({"description": "Cosy"}),
|
||||
_make_raw_address({"description": "cosy"}),
|
||||
_make_raw_address({"description": "COSY"}),
|
||||
_make_unsanitised_address({"description": "Cosy"}),
|
||||
_make_unsanitised_address({"description": "cosy"}),
|
||||
_make_unsanitised_address({"description": "COSY"}),
|
||||
]
|
||||
)
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ def test_empty_address_list_yields_empty_mapping() -> None:
|
|||
|
||||
def test_single_address_yields_single_value_per_key() -> None:
|
||||
# arrange
|
||||
addresses = AddressList([_make_raw_address({"description": "cosy"})])
|
||||
addresses = AddressList([_make_unsanitised_address({"description": "cosy"})])
|
||||
|
||||
# act
|
||||
mappings = _orchestrator().get_col_to_description_mappings(addresses)
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchest
|
|||
from orchestration.task_orchestrator import TaskOrchestrator
|
||||
from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
|
||||
from repositories.tasks.task_postgres_repository import TaskPostgresRepository
|
||||
from repositories.raw_address.raw_address_list_csv_s3_repository import (
|
||||
RawAddressListCsvS3Repository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
|
||||
UnsanitisedAddressListCsvS3Repository,
|
||||
)
|
||||
|
||||
BUCKET = "splitter-bucket"
|
||||
|
|
@ -64,7 +64,7 @@ class Harness:
|
|||
csv_client: CsvS3Client
|
||||
boto_sqs: Any
|
||||
queue_url: str
|
||||
repo: RawAddressListCsvS3Repository
|
||||
repo: UnsanitisedAddressListCsvS3Repository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -78,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
|
|||
queue_url = cast(str, queue["QueueUrl"])
|
||||
|
||||
csv_client = CsvS3Client(boto_s3, BUCKET)
|
||||
repo = RawAddressListCsvS3Repository(csv_client, BUCKET)
|
||||
repo = UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET)
|
||||
queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
|
||||
|
||||
# DB: ephemeral PostgreSQL TaskOrchestrator
|
||||
|
|
@ -91,7 +91,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
|
|||
|
||||
splitter = PostcodeSplitterOrchestrator(
|
||||
task_orchestrator=task_orchestrator,
|
||||
raw_address_repo=repo,
|
||||
unsanitised_address_repo=repo,
|
||||
queue_client=queue_client,
|
||||
max_batch_size=3,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,11 +3,11 @@ from collections.abc import Iterator
|
|||
import pytest
|
||||
from moto import mock_aws
|
||||
|
||||
from domain.addresses.raw_address import AddressList, RawAddress
|
||||
from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
|
||||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.raw_address.raw_address_list_csv_s3_repository import (
|
||||
RawAddressListCsvS3Repository,
|
||||
from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
|
||||
UnsanitisedAddressListCsvS3Repository,
|
||||
)
|
||||
from tests.infrastructure import make_boto_client
|
||||
|
||||
|
|
@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def repo() -> Iterator[RawAddressListCsvS3Repository]:
|
||||
def repo() -> Iterator[UnsanitisedAddressListCsvS3Repository]:
|
||||
with mock_aws():
|
||||
boto_client = make_boto_client("s3")
|
||||
boto_client.create_bucket(Bucket=BUCKET)
|
||||
csv_client = CsvS3Client(boto_client, BUCKET)
|
||||
yield RawAddressListCsvS3Repository(csv_client, BUCKET)
|
||||
yield UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET)
|
||||
|
||||
|
||||
def _upload_csv(
|
||||
repo: RawAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
|
||||
repo: UnsanitisedAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
|
||||
) -> str:
|
||||
return repo._csv_client.save_rows(rows, key) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
|
||||
def test_load_batch_parses_address_postcode_and_reference(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference(
|
|||
|
||||
|
||||
def test_load_batch_uses_only_address_1_when_others_missing(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
|
|||
|
||||
|
||||
def test_load_batch_handles_missing_internal_reference(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -106,10 +106,10 @@ def test_load_batch_handles_missing_internal_reference(
|
|||
|
||||
|
||||
def test_load_batch_captures_full_source_row(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# A raw EPC-export-shaped row: the splitter must preserve every column,
|
||||
# not just the ones it parses into RawAddress fields.
|
||||
# not just the ones it parses into UnsanitisedAddress fields.
|
||||
# arrange
|
||||
row = {
|
||||
"Asset Reference": "511",
|
||||
|
|
@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row(
|
|||
|
||||
|
||||
def test_load_batch_raises_when_postcode_column_absent(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
|
||||
|
|
@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(
|
|||
|
||||
|
||||
def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
row = {
|
||||
|
|
@ -169,12 +169,12 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
|
|||
|
||||
|
||||
def test_save_batch_returns_uri_under_path_prefix(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
addresses = AddressList(
|
||||
[
|
||||
RawAddress(
|
||||
UnsanitisedAddress(
|
||||
address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
additional_info={
|
||||
|
|
@ -194,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(
|
|||
|
||||
|
||||
def test_save_then_reload_round_trip_preserves_columns(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -227,12 +227,12 @@ def test_save_then_reload_round_trip_preserves_columns(
|
|||
|
||||
|
||||
def test_save_batch_uses_unique_filename_per_call(
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
repo: UnsanitisedAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
addresses = AddressList(
|
||||
[
|
||||
RawAddress(
|
||||
UnsanitisedAddress(
|
||||
address="1 High Street",
|
||||
postcode=Postcode("SW1A 1AA"),
|
||||
additional_info={
|
||||
Loading…
Add table
Reference in a new issue