raw address list repo

This commit is contained in:
Jun-te Kim 2026-05-22 08:17:37 +00:00
parent 5b677dedbe
commit 84098e28ff
8 changed files with 29 additions and 29 deletions

View file

@ -1,11 +1,11 @@
from typing import Any
import boto3
from orchestration.landlord_description_overrides_orchestrator import (
from orchestration.sal_orchestrator import (
SALOrchestrator,
)
from infrastructure.csv_s3_client import CsvS3Client
from repositories.raw_address.raw_address_csv_s3_repository import (
RawAddressCsvS3Repository,
RawAddressListCsvS3Repository,
)
from domain.addresses.raw_address import AddressList
@ -24,7 +24,7 @@ def handler(
boto_s3: Any = boto3_client("s3")
csv_client = CsvS3Client(boto_s3, bucket)
raw_address_repo = RawAddressCsvS3Repository(csv_client, bucket)
raw_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
orchestrator = SALOrchestrator(
raw_address_repo=raw_address_repo,

View file

@ -5,14 +5,14 @@ from uuid import UUID
from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
from orchestration.task_orchestrator import TaskOrchestrator
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
from repositories.raw_address.raw_address_repository import RawAddressRepository
from repositories.raw_address.raw_address_repository import RawAddressListRepository
class PostcodeSplitterOrchestrator:
def __init__(
self,
task_orchestrator: TaskOrchestrator,
raw_address_repo: RawAddressRepository,
raw_address_repo: RawAddressListRepository,
queue_client: Address2UprnQueueClient,
max_batch_size: int = 500,
) -> None:

View file

@ -1,9 +1,9 @@
from repositories.raw_address.raw_address_repository import RawAddressRepository
from repositories.raw_address.raw_address_repository import RawAddressListRepository
from domain.addresses.raw_address import AddressList
class SALOrchestrator:
def __init__(self, raw_address_repo: RawAddressRepository) -> None:
def __init__(self, raw_address_repo: RawAddressListRepository) -> None:
self._raw_address_repo = raw_address_repo
def get_raw_addresses(

View file

@ -7,7 +7,7 @@ from typing import Optional
from domain.addresses.raw_address import AddressList, RawAddress
from domain.postcode import Postcode
from infrastructure.csv_s3_client import CsvS3Client
from repositories.raw_address.raw_address_repository import RawAddressRepository
from repositories.raw_address.raw_address_repository import RawAddressListRepository
_ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
_POSTCODE_COLUMN: str = "postcode"
@ -15,7 +15,7 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
_POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
class RawAddressCsvS3Repository(RawAddressRepository):
class RawAddressListCsvS3Repository(RawAddressListRepository):
def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
self._csv_client = csv_client
self._bucket = bucket

View file

@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
from domain.addresses.raw_address import AddressList
class RawAddressRepository(ABC):
class RawAddressListRepository(ABC):
@abstractmethod
def load_batch(self, s3_uri: str) -> AddressList: ...

View file

@ -2,13 +2,13 @@ from __future__ import annotations
from domain.addresses.raw_address import AddressList, RawAddress
from domain.postcode import Postcode
from orchestration.landlord_description_overrides_orchestrator import (
from orchestration.sal_orchestrator import (
SALOrchestrator,
)
from repositories.raw_address.raw_address_repository import RawAddressRepository
from repositories.raw_address.raw_address_repository import RawAddressListRepository
class _StubRawAddressRepository(RawAddressRepository):
class _StubRawAddressRepository(RawAddressListRepository):
"""``get_col_to_description_mappings`` never touches the repo."""
def load_batch(self, s3_uri: str) -> AddressList:

View file

@ -19,7 +19,7 @@ from orchestration.task_orchestrator import TaskOrchestrator
from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
from repositories.tasks.task_postgres_repository import TaskPostgresRepository
from repositories.raw_address.raw_address_csv_s3_repository import (
RawAddressCsvS3Repository,
RawAddressListCsvS3Repository,
)
BUCKET = "splitter-bucket"
@ -64,7 +64,7 @@ class Harness:
csv_client: CsvS3Client
boto_sqs: Any
queue_url: str
repo: RawAddressCsvS3Repository
repo: RawAddressListCsvS3Repository
@pytest.fixture
@ -78,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
queue_url = cast(str, queue["QueueUrl"])
csv_client = CsvS3Client(boto_s3, BUCKET)
repo = RawAddressCsvS3Repository(csv_client, BUCKET)
repo = RawAddressListCsvS3Repository(csv_client, BUCKET)
queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
# DB: ephemeral PostgreSQL TaskOrchestrator

View file

@ -7,7 +7,7 @@ from domain.addresses.raw_address import AddressList, RawAddress
from domain.postcode import Postcode
from infrastructure.csv_s3_client import CsvS3Client
from repositories.raw_address.raw_address_csv_s3_repository import (
RawAddressCsvS3Repository,
RawAddressListCsvS3Repository,
)
from tests.infrastructure import make_boto_client
@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"
@pytest.fixture
def repo() -> Iterator[RawAddressCsvS3Repository]:
def repo() -> Iterator[RawAddressListCsvS3Repository]:
with mock_aws():
boto_client = make_boto_client("s3")
boto_client.create_bucket(Bucket=BUCKET)
csv_client = CsvS3Client(boto_client, BUCKET)
yield RawAddressCsvS3Repository(csv_client, BUCKET)
yield RawAddressListCsvS3Repository(csv_client, BUCKET)
def _upload_csv(
repo: RawAddressCsvS3Repository, rows: list[dict[str, str]], key: str
repo: RawAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
) -> str:
return repo._csv_client.save_rows(rows, key) # pyright: ignore[reportPrivateUsage]
def test_load_batch_parses_address_postcode_and_reference(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
rows = [
@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference(
def test_load_batch_uses_only_address_1_when_others_missing(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
rows = [
@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
def test_load_batch_handles_missing_internal_reference(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
rows = [
@ -106,7 +106,7 @@ def test_load_batch_handles_missing_internal_reference(
def test_load_batch_captures_full_source_row(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# A raw EPC-export-shaped row: the splitter must preserve every column,
# not just the ones it parses into RawAddress fields.
@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row(
def test_load_batch_raises_when_postcode_column_absent(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(
def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
row = {
@ -169,7 +169,7 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
def test_save_batch_returns_uri_under_path_prefix(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
addresses = AddressList(
@ -194,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(
def test_save_then_reload_round_trip_preserves_columns(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
rows = [
@ -227,7 +227,7 @@ def test_save_then_reload_round_trip_preserves_columns(
def test_save_batch_uses_unique_filename_per_call(
repo: RawAddressCsvS3Repository,
repo: RawAddressListCsvS3Repository,
) -> None:
# arrange
addresses = AddressList(