mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Wires slice 1-5 primitives into a deployable splitter:
- orchestration/postcode_splitter_orchestrator.py: PostcodeSplitterOrchestrator
loads addresses via UserAddressRepository, groups by postcode via
iter_postcode_grouped_batches, persists each batch under
ara_postcode_splitter_batches/{task_id}/{subtask_id}/, creates a WAITING
child SubTask, and publishes an address2UPRN SQS message per batch.
- applications/postcode_splitter/: Lambda entrypoint. handler.py is decorated
with @subtask_handler() so the parent SubTask lifecycle is decorator-owned;
PostcodeSplitterTriggerBody validates the body. Dockerfile is the
python:3.11 Lambda base with the DDD-shaped source layers and no pandas.
- tests/orchestration/test_postcode_splitter_orchestrator.py: integration
test using moto S3 + moto SQS + in-memory SQLite that exercises the full
wiring against a fixture CSV spanning three postcode groups (one
oversize) and asserts child count, persisted inputs, queue bodies, and
dispatch order.
backend/postcode_splitter/ and .github/workflows/deploy_terraform.yml are
intentionally unchanged: the dockerfile_path flip is deferred until the
companion backend/address2UPRN/ migration is also ready.
89 lines
3.3 KiB
Python
89 lines
3.3 KiB
Python
"""Use-case orchestrator for the postcode splitter Lambda.
|
|
|
|
Wires the slice-1 domain (``iter_postcode_grouped_batches``), the slice-3
|
|
``UserAddressRepository``, the slice-2 ``Address2UprnQueueClient``, and the
|
|
slice-4 ``TaskOrchestrator.create_child_subtask`` primitive together.
|
|
|
|
``split_and_dispatch`` loads the input batch, groups it into per-postcode
|
|
chunks, writes each chunk back to S3 under a deterministic prefix, creates a
|
|
WAITING child ``SubTask`` for it, and publishes the address-to-UPRN fan-out
|
|
message that downstream consumers pick up.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from uuid import UUID
|
|
|
|
from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
|
|
from orchestration.task_orchestrator import TaskOrchestrator
|
|
from domain.addresses.postcode_batching import iter_postcode_grouped_batches
|
|
from repositories.user_address.user_address_repository import UserAddressRepository
|
|
|
|
|
|
class PostcodeSplitterOrchestrator:
|
|
"""Split an uploaded address batch into postcode-grouped child SubTasks.
|
|
|
|
The orchestrator owns the algorithm; the IO collaborators
|
|
(:class:`UserAddressRepository`, :class:`Address2UprnQueueClient`) and
|
|
the :class:`TaskOrchestrator` lifecycle primitive are injected so the
|
|
same wiring can be exercised against moto/SQLite in tests and against
|
|
real AWS in the Lambda entrypoint.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
task_orchestrator: TaskOrchestrator,
|
|
user_address_repo: UserAddressRepository,
|
|
queue_client: Address2UprnQueueClient,
|
|
max_batch_size: int = 500,
|
|
) -> None:
|
|
self._task_orchestrator = task_orchestrator
|
|
self._user_address_repo = user_address_repo
|
|
self._queue_client = queue_client
|
|
self._max_batch_size = max_batch_size
|
|
|
|
def split_and_dispatch(
|
|
self,
|
|
*,
|
|
parent_task_id: UUID,
|
|
parent_subtask_id: UUID,
|
|
input_s3_uri: str,
|
|
) -> list[UUID]:
|
|
"""Split ``input_s3_uri`` into postcode batches and dispatch each.
|
|
|
|
For each yielded batch:
|
|
|
|
1. Persist it under
|
|
``ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}``.
|
|
2. Create a WAITING child ``SubTask`` with
|
|
``inputs={"task_id": str(parent_task_id), "s3_uri": batch_uri}``.
|
|
3. Publish an ``address2UPRN`` SQS message referencing the new child.
|
|
|
|
Returns:
|
|
The list of child ``SubTask`` ids, in dispatch order.
|
|
"""
|
|
addresses = self._user_address_repo.load_batch(input_s3_uri)
|
|
path_prefix = (
|
|
f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
|
|
)
|
|
|
|
child_ids: list[UUID] = []
|
|
for batch in iter_postcode_grouped_batches(
|
|
addresses, max_batch_size=self._max_batch_size
|
|
):
|
|
batch_uri = self._user_address_repo.save_batch(batch, path_prefix)
|
|
child = self._task_orchestrator.create_child_subtask(
|
|
parent_task_id,
|
|
inputs={
|
|
"task_id": str(parent_task_id),
|
|
"s3_uri": batch_uri,
|
|
},
|
|
)
|
|
self._queue_client.publish(
|
|
parent_task_id=parent_task_id,
|
|
child_subtask_id=child.id,
|
|
s3_uri=batch_uri,
|
|
)
|
|
child_ids.append(child.id)
|
|
|
|
return child_ids
|