mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
raw address list repo
This commit is contained in:
parent
5b677dedbe
commit
84098e28ff
8 changed files with 29 additions and 29 deletions
|
|
@ -1,11 +1,11 @@
|
|||
from typing import Any
|
||||
import boto3
|
||||
from orchestration.landlord_description_overrides_orchestrator import (
|
||||
from orchestration.sal_orchestrator import (
|
||||
SALOrchestrator,
|
||||
)
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.raw_address.raw_address_csv_s3_repository import (
|
||||
RawAddressCsvS3Repository,
|
||||
RawAddressListCsvS3Repository,
|
||||
)
|
||||
from domain.addresses.raw_address import AddressList
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ def handler(
|
|||
boto_s3: Any = boto3_client("s3")
|
||||
|
||||
csv_client = CsvS3Client(boto_s3, bucket)
|
||||
raw_address_repo = RawAddressCsvS3Repository(csv_client, bucket)
|
||||
raw_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
|
||||
|
||||
orchestrator = SALOrchestrator(
|
||||
raw_address_repo=raw_address_repo,
|
||||
|
|
|
|||
|
|
@ -5,14 +5,14 @@ from uuid import UUID
|
|||
from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
|
||||
from orchestration.task_orchestrator import TaskOrchestrator
|
||||
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
|
||||
from repositories.raw_address.raw_address_repository import RawAddressRepository
|
||||
from repositories.raw_address.raw_address_repository import RawAddressListRepository
|
||||
|
||||
|
||||
class PostcodeSplitterOrchestrator:
|
||||
def __init__(
|
||||
self,
|
||||
task_orchestrator: TaskOrchestrator,
|
||||
raw_address_repo: RawAddressRepository,
|
||||
raw_address_repo: RawAddressListRepository,
|
||||
queue_client: Address2UprnQueueClient,
|
||||
max_batch_size: int = 500,
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
from repositories.raw_address.raw_address_repository import RawAddressRepository
|
||||
from repositories.raw_address.raw_address_repository import RawAddressListRepository
|
||||
from domain.addresses.raw_address import AddressList
|
||||
|
||||
|
||||
class SALOrchestrator:
|
||||
def __init__(self, raw_address_repo: RawAddressRepository) -> None:
|
||||
def __init__(self, raw_address_repo: RawAddressListRepository) -> None:
|
||||
self._raw_address_repo = raw_address_repo
|
||||
|
||||
def get_raw_addresses(
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from typing import Optional
|
|||
from domain.addresses.raw_address import AddressList, RawAddress
|
||||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.raw_address.raw_address_repository import RawAddressRepository
|
||||
from repositories.raw_address.raw_address_repository import RawAddressListRepository
|
||||
|
||||
_ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
|
||||
_POSTCODE_COLUMN: str = "postcode"
|
||||
|
|
@ -15,7 +15,7 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
|
|||
_POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
|
||||
|
||||
|
||||
class RawAddressCsvS3Repository(RawAddressRepository):
|
||||
class RawAddressListCsvS3Repository(RawAddressListRepository):
|
||||
def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
|
||||
self._csv_client = csv_client
|
||||
self._bucket = bucket
|
||||
|
|
@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
|
|||
from domain.addresses.raw_address import AddressList
|
||||
|
||||
|
||||
class RawAddressRepository(ABC):
|
||||
class RawAddressListRepository(ABC):
|
||||
@abstractmethod
|
||||
def load_batch(self, s3_uri: str) -> AddressList: ...
|
||||
|
||||
|
|
@ -2,13 +2,13 @@ from __future__ import annotations
|
|||
|
||||
from domain.addresses.raw_address import AddressList, RawAddress
|
||||
from domain.postcode import Postcode
|
||||
from orchestration.landlord_description_overrides_orchestrator import (
|
||||
from orchestration.sal_orchestrator import (
|
||||
SALOrchestrator,
|
||||
)
|
||||
from repositories.raw_address.raw_address_repository import RawAddressRepository
|
||||
from repositories.raw_address.raw_address_repository import RawAddressListRepository
|
||||
|
||||
|
||||
class _StubRawAddressRepository(RawAddressRepository):
|
||||
class _StubRawAddressRepository(RawAddressListRepository):
|
||||
"""``get_col_to_description_mappings`` never touches the repo."""
|
||||
|
||||
def load_batch(self, s3_uri: str) -> AddressList:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ from orchestration.task_orchestrator import TaskOrchestrator
|
|||
from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
|
||||
from repositories.tasks.task_postgres_repository import TaskPostgresRepository
|
||||
from repositories.raw_address.raw_address_csv_s3_repository import (
|
||||
RawAddressCsvS3Repository,
|
||||
RawAddressListCsvS3Repository,
|
||||
)
|
||||
|
||||
BUCKET = "splitter-bucket"
|
||||
|
|
@ -64,7 +64,7 @@ class Harness:
|
|||
csv_client: CsvS3Client
|
||||
boto_sqs: Any
|
||||
queue_url: str
|
||||
repo: RawAddressCsvS3Repository
|
||||
repo: RawAddressListCsvS3Repository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -78,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
|
|||
queue_url = cast(str, queue["QueueUrl"])
|
||||
|
||||
csv_client = CsvS3Client(boto_s3, BUCKET)
|
||||
repo = RawAddressCsvS3Repository(csv_client, BUCKET)
|
||||
repo = RawAddressListCsvS3Repository(csv_client, BUCKET)
|
||||
queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
|
||||
|
||||
# DB: ephemeral PostgreSQL TaskOrchestrator
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from domain.addresses.raw_address import AddressList, RawAddress
|
|||
from domain.postcode import Postcode
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from repositories.raw_address.raw_address_csv_s3_repository import (
|
||||
RawAddressCsvS3Repository,
|
||||
RawAddressListCsvS3Repository,
|
||||
)
|
||||
from tests.infrastructure import make_boto_client
|
||||
|
||||
|
|
@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def repo() -> Iterator[RawAddressCsvS3Repository]:
|
||||
def repo() -> Iterator[RawAddressListCsvS3Repository]:
|
||||
with mock_aws():
|
||||
boto_client = make_boto_client("s3")
|
||||
boto_client.create_bucket(Bucket=BUCKET)
|
||||
csv_client = CsvS3Client(boto_client, BUCKET)
|
||||
yield RawAddressCsvS3Repository(csv_client, BUCKET)
|
||||
yield RawAddressListCsvS3Repository(csv_client, BUCKET)
|
||||
|
||||
|
||||
def _upload_csv(
|
||||
repo: RawAddressCsvS3Repository, rows: list[dict[str, str]], key: str
|
||||
repo: RawAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
|
||||
) -> str:
|
||||
return repo._csv_client.save_rows(rows, key) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
|
||||
def test_load_batch_parses_address_postcode_and_reference(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference(
|
|||
|
||||
|
||||
def test_load_batch_uses_only_address_1_when_others_missing(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
|
|||
|
||||
|
||||
def test_load_batch_handles_missing_internal_reference(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -106,7 +106,7 @@ def test_load_batch_handles_missing_internal_reference(
|
|||
|
||||
|
||||
def test_load_batch_captures_full_source_row(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# A raw EPC-export-shaped row: the splitter must preserve every column,
|
||||
# not just the ones it parses into RawAddress fields.
|
||||
|
|
@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row(
|
|||
|
||||
|
||||
def test_load_batch_raises_when_postcode_column_absent(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
|
||||
|
|
@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(
|
|||
|
||||
|
||||
def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
row = {
|
||||
|
|
@ -169,7 +169,7 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
|
|||
|
||||
|
||||
def test_save_batch_returns_uri_under_path_prefix(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
addresses = AddressList(
|
||||
|
|
@ -194,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(
|
|||
|
||||
|
||||
def test_save_then_reload_round_trip_preserves_columns(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
rows = [
|
||||
|
|
@ -227,7 +227,7 @@ def test_save_then_reload_round_trip_preserves_columns(
|
|||
|
||||
|
||||
def test_save_batch_uses_unique_filename_per_call(
|
||||
repo: RawAddressCsvS3Repository,
|
||||
repo: RawAddressListCsvS3Repository,
|
||||
) -> None:
|
||||
# arrange
|
||||
addresses = AddressList(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue