mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Wires slice 1-5 primitives into a deployable splitter:
- orchestration/postcode_splitter_orchestrator.py: PostcodeSplitterOrchestrator
loads addresses via UserAddressRepository, groups by postcode via
iter_postcode_grouped_batches, persists each batch under
ara_postcode_splitter_batches/{task_id}/{subtask_id}/, creates a WAITING
child SubTask, and publishes an address2UPRN SQS message per batch.
- applications/postcode_splitter/: Lambda entrypoint. handler.py is decorated
with @subtask_handler() so the parent SubTask lifecycle is decorator-owned;
PostcodeSplitterTriggerBody validates the body. Dockerfile is the
python:3.11 Lambda base with the DDD-shaped source layers and no pandas.
- tests/orchestration/test_postcode_splitter_orchestrator.py: integration
test using moto S3 + moto SQS + in-memory SQLite that exercises the full
wiring against a fixture CSV spanning three postcode groups (one
oversize) and asserts child count, persisted inputs, queue bodies, and
dispatch order.
backend/postcode_splitter/ and .github/workflows/deploy_terraform.yml are
intentionally unchanged: the dockerfile_path flip is deferred until the
companion backend/address2UPRN/ migration is also ready.
70 lines
2.6 KiB
Python
70 lines
2.6 KiB
Python
"""Lambda entrypoint for the postcode splitter slice.
|
|
|
|
The :func:`handler` function is decorated with ``@subtask_handler()`` so the
|
|
decorator owns the parent ``SubTask`` lifecycle (start/complete/fail) and
|
|
injects the decorator-owned :class:`TaskOrchestrator` as the third positional
|
|
argument. The handler itself does only two things:
|
|
|
|
1. Build a :class:`PostcodeSplitterOrchestrator` from env-driven config.
|
|
2. Delegate to ``split_and_dispatch`` and return its result so it lands in
|
|
``SubTask.outputs["result"]``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import Any
|
|
|
|
import boto3
|
|
|
|
from applications.postcode_splitter.postcode_splitter_trigger_body import (
|
|
PostcodeSplitterTriggerBody,
|
|
)
|
|
from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
|
|
from infrastructure.csv_s3_client import CsvS3Client
|
|
from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
|
|
from orchestration.task_orchestrator import TaskOrchestrator
|
|
from repositories.user_address.user_address_csv_s3_repository import (
|
|
UserAddressCsvS3Repository,
|
|
)
|
|
from utilities.aws_lambda.subtask_handler import subtask_handler
|
|
|
|
|
|
@subtask_handler()
|
|
def handler(
|
|
body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
|
|
) -> dict[str, list[str]]:
|
|
"""Validate the trigger body, build the splitter, dispatch children.
|
|
|
|
Reads ``S3_BUCKET_NAME`` and ``ADDRESS2UPRN_QUEUE_URL`` from the
|
|
environment to construct the typed S3/SQS clients. The return value
|
|
lands in ``SubTask.outputs["result"]`` via the decorator.
|
|
"""
|
|
trigger = PostcodeSplitterTriggerBody.model_validate(body)
|
|
|
|
bucket = os.environ["S3_BUCKET_NAME"]
|
|
queue_url = os.environ["ADDRESS2UPRN_QUEUE_URL"]
|
|
|
|
# boto3.client is overloaded per-service in the installed stubs; cast
|
|
# to Any so the strict-mode checker treats it as opaque.
|
|
boto3_client: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
|
boto_s3: Any = boto3_client("s3")
|
|
boto_sqs: Any = boto3_client("sqs")
|
|
|
|
csv_client = CsvS3Client(boto_s3, bucket)
|
|
user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
|
|
queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
|
|
|
|
splitter = PostcodeSplitterOrchestrator(
|
|
task_orchestrator=task_orchestrator,
|
|
user_address_repo=user_address_repo,
|
|
queue_client=queue_client,
|
|
)
|
|
|
|
child_ids = splitter.split_and_dispatch(
|
|
parent_task_id=trigger.task_id,
|
|
parent_subtask_id=trigger.sub_task_id,
|
|
input_s3_uri=trigger.s3_uri,
|
|
)
|
|
|
|
return {"child_subtask_ids": [str(cid) for cid in child_ids]}
|