From d0cf3d14ad5116d0b2926aceb23d642408ca71bc Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 20 May 2026 13:21:11 +0000 Subject: [PATCH] get rid of comments --- applications/postcode_splitter/handler.py | 18 ------- .../local_handler/invoke_local_lambda.py | 9 ---- .../postcode_splitter_trigger_body.py | 21 --------- domain/addresses/postcode_batching.py | 38 --------------- domain/addresses/user_address.py | 30 +----------- domain/postcode.py | 25 ---------- infrastructure/address2uprn_queue_client.py | 7 --- infrastructure/csv_s3_client.py | 18 ------- infrastructure/s3_client.py | 9 ---- infrastructure/s3_uri.py | 18 ------- infrastructure/sqs_client.py | 8 ---- .../postcode_splitter_orchestrator.py | 34 -------------- orchestration/task_orchestrator.py | 6 --- .../user_address_csv_s3_repository.py | 47 ------------------- .../user_address/user_address_repository.py | 19 +------- .../addresses/test_postcode_batching.py | 1 - tests/infrastructure/__init__.py | 7 --- tests/infrastructure/conftest.py | 4 -- .../test_postcode_splitter_orchestrator.py | 10 ---- tests/repositories/user_address/conftest.py | 4 -- .../aws_lambda/test_subtask_handler.py | 25 +--------- utilities/aws_lambda/subtask_handler.py | 24 +--------- 22 files changed, 6 insertions(+), 376 deletions(-) diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py index 005227a9..9fb3ca6a 100644 --- a/applications/postcode_splitter/handler.py +++ b/applications/postcode_splitter/handler.py @@ -1,15 +1,3 @@ -"""Lambda entrypoint for the postcode splitter slice. - -The :func:`handler` function is decorated with ``@subtask_handler()`` so the -decorator owns the parent ``SubTask`` lifecycle (start/complete/fail) and -injects the decorator-owned :class:`TaskOrchestrator` as the third positional -argument. The handler itself does only two things: - -1. Build a :class:`PostcodeSplitterOrchestrator` from env-driven config. -2. Delegate to ``split_and_dispatch`` and return its result so it lands in - ``SubTask.outputs["result"]``. -""" - from __future__ import annotations import os @@ -34,12 +22,6 @@ from utilities.aws_lambda.subtask_handler import subtask_handler def handler( body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator ) -> dict[str, list[str]]: - """Validate the trigger body, build the splitter, dispatch children. - - Reads ``S3_BUCKET_NAME`` and ``ADDRESS2UPRN_QUEUE_URL`` from the - environment to construct the typed S3/SQS clients. The return value - lands in ``SubTask.outputs["result"]`` via the decorator. - """ trigger = PostcodeSplitterTriggerBody.model_validate(body) bucket = os.environ["S3_BUCKET_NAME"] diff --git a/applications/postcode_splitter/local_handler/invoke_local_lambda.py b/applications/postcode_splitter/local_handler/invoke_local_lambda.py index c0ca89ec..21fa9b9e 100755 --- a/applications/postcode_splitter/local_handler/invoke_local_lambda.py +++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py @@ -1,13 +1,4 @@ #!/usr/bin/env python3 -"""POST a single SQS-shaped event at the locally-running splitter Lambda. - -The container built by docker-compose runs the AWS Lambda Runtime Interface -Emulator, which accepts invocations on the URL below. Replace the three -placeholder values with a real parent Task id, the splitter's own SubTask id -(both must already exist in the Postgres pointed at by .env.local), and the -s3://... URI of an uploaded address CSV. -""" - import json import requests diff --git a/applications/postcode_splitter/postcode_splitter_trigger_body.py b/applications/postcode_splitter/postcode_splitter_trigger_body.py index bc983abc..4c33f4a4 100644 --- a/applications/postcode_splitter/postcode_splitter_trigger_body.py +++ b/applications/postcode_splitter/postcode_splitter_trigger_body.py @@ -1,30 +1,9 @@ -"""Trigger payload model for the postcode splitter Lambda. - -The decorator (``@subtask_handler``) already validates ``task_id`` and -``sub_task_id`` via :class:`SubtaskTriggerBody`; this model layers on the -splitter-specific ``s3_uri`` field while keeping ``extra="allow"`` so any -upstream-passthrough keys (e.g. ``portfolio_id``) survive untouched. -""" - from uuid import UUID from pydantic import BaseModel, ConfigDict class PostcodeSplitterTriggerBody(BaseModel): - """Validated body for the postcode splitter Lambda. - - Attributes: - task_id: Parent ``Task`` id; used as the ``task_id`` input on each - child ``SubTask`` and as the ``parent_task_id`` on the fan-out - SQS messages. - sub_task_id: The splitter's own ``SubTask`` id; used as the path - segment under ``ara_postcode_splitter_batches/{task_id}/{...}`` - so per-invocation outputs cannot collide. - s3_uri: ``s3://bucket/key`` URI of the uploaded address CSV the - splitter must read. - """ - model_config = ConfigDict(extra="allow") task_id: UUID diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py index b73dc1bb..44e4d967 100644 --- a/domain/addresses/postcode_batching.py +++ b/domain/addresses/postcode_batching.py @@ -1,22 +1,3 @@ -"""Pure-Python postcode-grouped batching. - -This module preserves the batching invariants from the legacy postcode -splitter (``backend/postcode_splitter/main.py``) without touching pandas, -S3, or SQS: - - * Addresses are grouped by **Postcode** in *insertion order* -- the first - Postcode seen produces the first group. - * A Postcode group is never split across two batches. - * If a single Postcode group is larger than ``max_batch_size``, it is - flushed as its own oversize batch (any buffered groups go out first, - untouched). - * Adding a group that would push the buffer past ``max_batch_size`` first - flushes the existing buffer, then starts a new buffer with the group. - * Whatever remains in the buffer after the input is exhausted is flushed - as the final batch. - * Empty input yields no batches. -""" - from __future__ import annotations from collections.abc import Iterable, Iterator @@ -30,19 +11,6 @@ def iter_postcode_grouped_batches( *, max_batch_size: int = 500, ) -> Iterator[list[UserAddress]]: - """Yield batches of ``UserAddress`` grouped by Postcode. - - Args: - addresses: An iterable of :class:`UserAddress`. Order is preserved - within each Postcode group, and groups are yielded in the order - their first member was seen. - max_batch_size: The soft upper bound on batch size, in number of - addresses. A single Postcode group larger than this cap is - dispatched whole (the cap is never used to split a group). - - Yields: - Lists of ``UserAddress``. Each list is non-empty. - """ if max_batch_size < 1: raise ValueError("max_batch_size must be >= 1") @@ -77,12 +45,6 @@ def iter_postcode_grouped_batches( def _group_by_postcode_in_order( addresses: Iterable[UserAddress], ) -> dict[Postcode, list[UserAddress]]: - """Group addresses by ``postcode`` preserving first-seen order. - - Python dicts retain insertion order since 3.7, so a plain dict suffices - for the same effect as pandas ``groupby(..., sort=False)``. ``Postcode`` - is a frozen value object, hence hashable and usable as the dict key. - """ groups: dict[Postcode, list[UserAddress]] = {} for address in addresses: groups.setdefault(address.postcode, []).append(address) diff --git a/domain/addresses/user_address.py b/domain/addresses/user_address.py index 672b2c54..9a28751b 100644 --- a/domain/addresses/user_address.py +++ b/domain/addresses/user_address.py @@ -1,11 +1,3 @@ -"""The :class:`UserAddress` value object. - -A frozen dataclass capturing the splitter's domain entity: the raw input -address line, a :class:`~domain.postcode.Postcode`, and an optional internal -reference from the customer dataset. The postcode is a value object that is -canonical by construction, so no caller can hold an un-normalised postcode. -""" - from __future__ import annotations from dataclasses import dataclass, field @@ -15,32 +7,12 @@ from domain.postcode import Postcode def _empty_source_row() -> dict[str, str]: - """Typed default factory for :attr:`UserAddress.source_row`.""" return {} @dataclass(frozen=True) class UserAddress: - """A user-supplied address paired with its canonical postcode. - - Attributes: - user_address: The free-text address string as supplied upstream. - postcode: The postcode as a :class:`~domain.postcode.Postcode` value - object -- canonical (uppercased, whitespace stripped) by - construction. - internal_reference: Optional customer-side identifier preserved for - traceability through the matching pipeline. - source_row: The complete original CSV row this address was parsed - from, column name -> cell value. The splitter is a pass-through - router: it groups rows by postcode but must not drop the other - columns the downstream address2uprn stage relies on, so the raw - row travels alongside the parsed fields. Excluded from equality - and hashing -- identity stays defined by the parsed fields above. - """ - user_address: str postcode: Postcode internal_reference: Optional[str] = None - source_row: dict[str, str] = field( - default_factory=_empty_source_row, compare=False - ) + source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False) diff --git a/domain/postcode.py b/domain/postcode.py index 514e1a39..8e4e7c79 100644 --- a/domain/postcode.py +++ b/domain/postcode.py @@ -1,16 +1,3 @@ -"""The :class:`Postcode` value object. - -A frozen value object that owns postcode sanitisation. Constructing a -``Postcode`` always yields the canonical form -- uppercase with all -whitespace removed -- so no part of the domain can hold an un-normalised -postcode. This matches the legacy splitter's -``df["postcode"].str.upper().str.replace(" ", "")``. - -``Postcode`` is the single sanitisation point: anywhere a postcode crosses a -domain boundary it should be wrapped in one, and ``str(postcode)`` gives the -canonical string back for serialisation. -""" - from __future__ import annotations from dataclasses import dataclass @@ -18,18 +5,6 @@ from dataclasses import dataclass @dataclass(frozen=True) class Postcode: - """A postcode held in canonical form. - - The ``value`` passed to the constructor is sanitised eagerly in - :meth:`__post_init__` -- uppercased, with all whitespace (spaces, tabs, - newlines) removed -- so every ``Postcode`` instance is canonical by - construction. Two postcodes that differ only in surface whitespace or - case therefore compare equal. - - Attributes: - value: The canonical postcode string (e.g. ``"SW1A1AA"``). - """ - value: str def __post_init__(self) -> None: diff --git a/infrastructure/address2uprn_queue_client.py b/infrastructure/address2uprn_queue_client.py index d81e2dd1..314e981f 100644 --- a/infrastructure/address2uprn_queue_client.py +++ b/infrastructure/address2uprn_queue_client.py @@ -4,12 +4,6 @@ from infrastructure.sqs_client import SqsClient class Address2UprnQueueClient(SqsClient): - """SQS client that publishes Address-to-UPRN fan-out messages. - - The body shape is fixed by the downstream consumer: - ``{"task_id": str, "sub_task_id": str, "s3_uri": str}`` - """ - def publish( self, *, @@ -17,7 +11,6 @@ class Address2UprnQueueClient(SqsClient): child_subtask_id: UUID, s3_uri: str, ) -> str: - """Send a typed Address-to-UPRN message. Returns the SQS ``MessageId``.""" return self.send( { "task_id": str(parent_task_id), diff --git a/infrastructure/csv_s3_client.py b/infrastructure/csv_s3_client.py index 0a576b81..055d1ce3 100644 --- a/infrastructure/csv_s3_client.py +++ b/infrastructure/csv_s3_client.py @@ -6,20 +6,7 @@ from infrastructure.s3_uri import parse_s3_uri class CsvS3Client(S3Client): - """:class:`S3Client` subclass that round-trips CSV row dictionaries. - - Rows are represented as ``list[dict[str, str]]`` — the same shape used by - :func:`csv.DictReader`/``DictWriter`` — which keeps the API trivially - compatible with existing CSV helpers in ``utils/s3.py``. - """ - def read_rows(self, s3_uri: str) -> list[dict[str, str]]: - """Fetch the object at ``s3_uri`` and decode it as a CSV. - - The bucket portion of the URI is validated against this client's - configured bucket so cross-bucket reads fail loudly rather than - silently fetching from the wrong place. - """ bucket, key = parse_s3_uri(s3_uri) if bucket != self.bucket: raise ValueError( @@ -31,11 +18,6 @@ class CsvS3Client(S3Client): return [dict(row) for row in reader] def save_rows(self, rows: list[dict[str, str]], key: str) -> str: - """Serialise ``rows`` to CSV under ``key`` and return the ``s3://`` URI. - - An empty ``rows`` list is rejected because we cannot otherwise infer - a header row. - """ if not rows: raise ValueError("Cannot save an empty rows list: header is unknown") buffer = StringIO() diff --git a/infrastructure/s3_client.py b/infrastructure/s3_client.py index 9e772881..a789fcc2 100644 --- a/infrastructure/s3_client.py +++ b/infrastructure/s3_client.py @@ -2,13 +2,6 @@ from typing import Any class S3Client: - """Thin typed wrapper around a boto3 S3 client bound to a single bucket. - - The class is deliberately small: it exposes only the byte-level - operations needed by the wider infrastructure layer. Serialisation - (CSV, JSON, etc.) lives in subclasses such as :class:`CsvS3Client`. - """ - def __init__(self, boto_s3_client: Any, bucket: str) -> None: self._client = boto_s3_client self._bucket = bucket @@ -18,7 +11,6 @@ class S3Client: return self._bucket def get_object(self, key: str) -> bytes: - """Return the raw bytes stored at ``key`` in this client's bucket.""" response: dict[str, Any] = self._client.get_object( Bucket=self._bucket, Key=key ) @@ -26,6 +18,5 @@ class S3Client: return body def put_object(self, key: str, body: bytes) -> str: - """Write ``body`` to ``key`` and return the canonical ``s3://`` URI.""" self._client.put_object(Bucket=self._bucket, Key=key, Body=body) return f"s3://{self._bucket}/{key}" diff --git a/infrastructure/s3_uri.py b/infrastructure/s3_uri.py index bf97100e..1dd5d967 100644 --- a/infrastructure/s3_uri.py +++ b/infrastructure/s3_uri.py @@ -1,25 +1,7 @@ -"""Parse S3 URIs into ``(bucket, key)`` pairs. - -A pure-stdlib helper for the infrastructure layer. It deliberately pulls in -neither pandas, boto3, nor the legacy ``utils`` package, so slim Lambda images -that only need URI parsing do not drag the wider data stack along. - -Two input shapes are supported: - -* canonical S3 URIs --- ``s3://bucket/key`` -* AWS S3 console URLs --- ``https://.../s3/object/bucket?prefix=key`` -""" - from urllib.parse import unquote def parse_s3_uri(s3_uri: str) -> tuple[str, str]: - """Return the ``(bucket, key)`` pair addressed by ``s3_uri``. - - Raises: - ValueError: if ``s3_uri`` is neither a well-formed ``s3://`` URI nor - an AWS console URL carrying a ``prefix`` query parameter. - """ if s3_uri.startswith("s3://"): parts = s3_uri[len("s3://") :].split("/", 1) if len(parts) < 2 or not parts[0] or not parts[1]: diff --git a/infrastructure/sqs_client.py b/infrastructure/sqs_client.py index fb053680..6fe8dd2e 100644 --- a/infrastructure/sqs_client.py +++ b/infrastructure/sqs_client.py @@ -3,13 +3,6 @@ from typing import Any class SqsClient: - """Thin typed wrapper around a boto3 SQS client bound to one queue URL. - - The body is JSON-serialised here so callers can pass plain dictionaries - instead of constructing message strings themselves. Typed publish - helpers (e.g. :class:`Address2UprnQueueClient`) build on this contract. - """ - def __init__(self, boto_sqs_client: Any, queue_url: str) -> None: self._client = boto_sqs_client self._queue_url = queue_url @@ -19,7 +12,6 @@ class SqsClient: return self._queue_url def send(self, body: dict[str, Any]) -> str: - """JSON-serialise ``body`` and send it. Returns the SQS ``MessageId``.""" response: dict[str, Any] = self._client.send_message( QueueUrl=self._queue_url, MessageBody=json.dumps(body), diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py index 6afa2538..36f4b515 100644 --- a/orchestration/postcode_splitter_orchestrator.py +++ b/orchestration/postcode_splitter_orchestrator.py @@ -1,15 +1,3 @@ -"""Use-case orchestrator for the postcode splitter Lambda. - -Wires the slice-1 domain (``iter_postcode_grouped_batches``), the slice-3 -``UserAddressRepository``, the slice-2 ``Address2UprnQueueClient``, and the -slice-4 ``TaskOrchestrator.create_child_subtask`` primitive together. - -``split_and_dispatch`` loads the input batch, groups it into per-postcode -chunks, writes each chunk back to S3 under a deterministic prefix, creates a -WAITING child ``SubTask`` for it, and publishes the address-to-UPRN fan-out -message that downstream consumers pick up. -""" - from __future__ import annotations from uuid import UUID @@ -21,15 +9,6 @@ from repositories.user_address.user_address_repository import UserAddressReposit class PostcodeSplitterOrchestrator: - """Split an uploaded address batch into postcode-grouped child SubTasks. - - The orchestrator owns the algorithm; the IO collaborators - (:class:`UserAddressRepository`, :class:`Address2UprnQueueClient`) and - the :class:`TaskOrchestrator` lifecycle primitive are injected so the - same wiring can be exercised against moto/SQLite in tests and against - real AWS in the Lambda entrypoint. - """ - def __init__( self, task_orchestrator: TaskOrchestrator, @@ -49,19 +28,6 @@ class PostcodeSplitterOrchestrator: parent_subtask_id: UUID, input_s3_uri: str, ) -> list[UUID]: - """Split ``input_s3_uri`` into postcode batches and dispatch each. - - For each yielded batch: - - 1. Persist it under - ``ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}``. - 2. Create a WAITING child ``SubTask`` with - ``inputs={"task_id": str(parent_task_id), "s3_uri": batch_uri}``. - 3. Publish an ``address2UPRN`` SQS message referencing the new child. - - Returns: - The list of child ``SubTask`` ids, in dispatch order. - """ addresses = self._user_address_repo.load_batch(input_s3_uri) path_prefix = ( f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}" diff --git a/orchestration/task_orchestrator.py b/orchestration/task_orchestrator.py index 82d95db1..ebb71a32 100644 --- a/orchestration/task_orchestrator.py +++ b/orchestration/task_orchestrator.py @@ -54,12 +54,6 @@ class TaskOrchestrator: *, inputs: Optional[dict[str, Any]] = None, ) -> SubTask: - """Add a new WAITING SubTask under an existing parent Task. - - Skips `_cascade`: a new WAITING child against an IN_PROGRESS parent - leaves the parent's status unchanged per `Task.recalculate_from_subtasks`, - so calling it here would be a no-op. - """ subtask = SubTask.create(task_id=parent_task_id, inputs=inputs) self._subtasks.create(subtask) return subtask diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py index 2432d8e9..9b93b638 100644 --- a/repositories/user_address/user_address_csv_s3_repository.py +++ b/repositories/user_address/user_address_csv_s3_repository.py @@ -1,18 +1,3 @@ -"""CSV-on-S3 adapter for :class:`UserAddressRepository`. - -Reads upload CSVs that carry a ``postcode`` column (plus optional -``Address 1``/``Address 2``/``Address 3`` and ``Internal Reference``), and -writes batch CSVs that pass *every* original column through unchanged with -one column appended -- ``postcode_clean`` (uppercase, whitespace-stripped) -- -which the downstream address2uprn stage groups on. - -The splitter is a pass-through router: it must not reshape or drop columns, -because address2uprn has not been migrated and still consumes the legacy -splitter's full-row output. The frontend pre-applies the user's column -mapping at upload time, so this adapter does NOT consult any -``BulkAddressUpload.column_mapping``. -""" - from __future__ import annotations import uuid @@ -31,33 +16,11 @@ _POSTCODE_CLEAN_COLUMN: str = "postcode_clean" class UserAddressCsvS3Repository(UserAddressRepository): - """Persist :class:`UserAddress` batches as CSV objects in S3. - - The repo owns the unique-filename-within-prefix convention - (``{ISO datetime}_{8-char uuid}.csv``); callers own the directory - hierarchy supplied as ``path_prefix``. - """ - def __init__(self, csv_client: CsvS3Client, bucket: str) -> None: self._csv_client = csv_client self._bucket = bucket def load_batch(self, s3_uri: str) -> list[UserAddress]: - """Load upload CSV rows into :class:`UserAddress` objects. - - Each row's complete column set is preserved on - :attr:`UserAddress.source_row` so :meth:`save_batch` can pass it - through untouched. The parsed convenience fields are also populated: - ``Address 1``/``Address 2``/``Address 3`` are concatenated with - ``", "`` (skipping missing/empty parts) into ``user_address``, and - ``Internal Reference`` is threaded to - :attr:`UserAddress.internal_reference` (``None`` when missing/empty). - - Raises: - ValueError: if the CSV has rows but no ``postcode`` column -- - without it the splitter cannot group, and silently emitting - empty postcodes would corrupt every downstream batch. - """ rows = self._csv_client.read_rows(s3_uri) if rows and _POSTCODE_COLUMN not in rows[0]: raise ValueError( @@ -86,16 +49,6 @@ class UserAddressCsvS3Repository(UserAddressRepository): return addresses def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: - """Write a pass-through batch CSV under a unique key. - - Each output row is the address's original ``source_row`` with a - ``postcode_clean`` column appended (the canonical postcode the - downstream address2uprn stage groups on). No original column is - dropped or reshaped. - - The key is ``{path_prefix}/{ISO-8601 datetime}_{8-char uuid}.csv``. - Returns the full ``s3://bucket/key`` URI. - """ rows: list[dict[str, str]] = [ {**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)} for addr in addresses diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py index ab9b6671..170f34dd 100644 --- a/repositories/user_address/user_address_repository.py +++ b/repositories/user_address/user_address_repository.py @@ -1,10 +1,3 @@ -"""Abstract repository for :class:`UserAddress` batches. - -Persistence-agnostic interface for loading and saving batches of -:class:`domain.addresses.user_address.UserAddress`. Concrete adapters -- -e.g. :class:`UserAddressCsvS3Repository` -- live alongside this module. -""" - from __future__ import annotations from abc import ABC, abstractmethod @@ -13,18 +6,10 @@ from domain.addresses.user_address import UserAddress class UserAddressRepository(ABC): - """Load and persist batches of :class:`UserAddress`. - - Implementations choose the underlying storage (S3 CSV, Postgres, - in-memory, ...) but must preserve the canonical column semantics: - the address text, postcode (a :class:`~domain.postcode.Postcode` value - object), and an optional internal reference. - """ - @abstractmethod def load_batch(self, s3_uri: str) -> list[UserAddress]: - """Read a batch of addresses from ``s3_uri`` and return domain objects.""" + ... @abstractmethod def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: - """Persist ``addresses`` under ``path_prefix`` and return the URI written.""" + ... diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py index 6e52b581..c69722ba 100644 --- a/tests/domain/addresses/test_postcode_batching.py +++ b/tests/domain/addresses/test_postcode_batching.py @@ -6,7 +6,6 @@ from domain.postcode import Postcode def _addrs(postcode: str, n: int) -> list[UserAddress]: - """Build ``n`` addresses sharing a postcode, with distinct address lines.""" return [ UserAddress( user_address=f"{i} {postcode} Street", postcode=Postcode(postcode) diff --git a/tests/infrastructure/__init__.py b/tests/infrastructure/__init__.py index 3478bda9..f5ad62d0 100644 --- a/tests/infrastructure/__init__.py +++ b/tests/infrastructure/__init__.py @@ -6,12 +6,5 @@ REGION = "us-east-1" def make_boto_client(service_name: str) -> Any: - """Construct a boto3 client typed as ``Any``. - - boto3's overloaded ``client`` signature uses ``Literal[...]`` per service - in the installed stubs, which forces every call site to satisfy - ``reportArgumentType`` and ``reportUnknownMemberType`` under strict - pyright. Centralising the cast keeps each test file clean. - """ factory: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] return factory(service_name, region_name=REGION) diff --git a/tests/infrastructure/conftest.py b/tests/infrastructure/conftest.py index 7ed2fdd6..25c1ac3b 100644 --- a/tests/infrastructure/conftest.py +++ b/tests/infrastructure/conftest.py @@ -7,10 +7,6 @@ import pytest @pytest.fixture(autouse=True) def _aws_creds() -> Iterator[None]: # pyright: ignore[reportUnusedFunction] - """Stub AWS creds so botocore doesn't probe the host environment. - - Applied automatically to every test in ``tests/infrastructure/``. - """ keys = ( "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py index 79c60974..4ee2315e 100644 --- a/tests/orchestration/test_postcode_splitter_orchestrator.py +++ b/tests/orchestration/test_postcode_splitter_orchestrator.py @@ -1,13 +1,3 @@ -"""Integration test: PostcodeSplitterOrchestrator wired end-to-end. - -Combines moto S3 + moto SQS + an in-memory SQLite session for the -``TaskOrchestrator`` so the full slice-6 wiring is exercised through real -infrastructure adapters (not mocks). The fixture CSV spans three postcodes -with one oversize group, which forces both the buffer-flush-then-oversize -branch and the final-flush branch of -``iter_postcode_grouped_batches`` — three batches in total. -""" - from __future__ import annotations import json diff --git a/tests/repositories/user_address/conftest.py b/tests/repositories/user_address/conftest.py index 1859ff0a..25c1ac3b 100644 --- a/tests/repositories/user_address/conftest.py +++ b/tests/repositories/user_address/conftest.py @@ -7,10 +7,6 @@ import pytest @pytest.fixture(autouse=True) def _aws_creds() -> Iterator[None]: # pyright: ignore[reportUnusedFunction] - """Stub AWS creds so botocore doesn't probe the host environment. - - Applied automatically to every test in ``tests/repositories/user_address/``. - """ keys = ( "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", diff --git a/tests/utilities/aws_lambda/test_subtask_handler.py b/tests/utilities/aws_lambda/test_subtask_handler.py index 771a49f8..9cf68f28 100644 --- a/tests/utilities/aws_lambda/test_subtask_handler.py +++ b/tests/utilities/aws_lambda/test_subtask_handler.py @@ -1,11 +1,3 @@ -"""Tests for the @subtask_handler decorator. - -Covers the contract that the decorator owns the parent SubTask lifecycle and -injects the decorator-owned TaskOrchestrator as a third positional argument -to the wrapped function — so the handler can compose its own use-case -orchestrator that shares the session. -""" - import logging from collections.abc import Generator, Iterator from contextlib import contextmanager @@ -14,8 +6,6 @@ from typing import Any from uuid import UUID import pytest - -_LOGGER_NAME = "utilities.aws_lambda.subtask_handler" from sqlmodel import Session, SQLModel, create_engine from domain.tasks.subtasks import SubTaskStatus @@ -25,6 +15,8 @@ from repositories.tasks.subtask_postgres_repository import SubTaskPostgresReposi from repositories.tasks.task_postgres_repository import TaskPostgresRepository from utilities.aws_lambda.subtask_handler import subtask_handler +_LOGGER_NAME = "utilities.aws_lambda.subtask_handler" + @dataclass class Harness: @@ -58,8 +50,6 @@ def _direct_event(task_id: UUID, subtask_id: UUID) -> dict[str, Any]: def test_subtask_handler_injects_orchestrator_as_third_positional_argument( harness: Harness, ) -> None: - """The wrapped function receives the decorator-owned TaskOrchestrator - so it can share the session with its own use-case orchestrator.""" _, subtask = harness.orchestrator.create_task_with_subtask( task_source="manual:test" ) @@ -123,9 +113,6 @@ def test_subtask_handler_marks_parent_failed_and_reraises_on_error( def test_subtask_handler_injected_orchestrator_can_create_child_subtask( harness: Harness, ) -> None: - """Smoke check the share-the-session promise: the injected orchestrator - is the same one the decorator owns, so a handler can use it to create - child SubTasks under the same session.""" task, subtask = harness.orchestrator.create_task_with_subtask( task_source="manual:test" ) @@ -150,8 +137,6 @@ def test_subtask_handler_injected_orchestrator_can_create_child_subtask( def test_subtask_handler_logs_subtask_lifecycle_on_success( harness: Harness, caplog: pytest.LogCaptureFixture ) -> None: - """Start and completion are logged at INFO so a successful invocation - leaves a CloudWatch breadcrumb (not just the Lambda runtime lines).""" task, subtask = harness.orchestrator.create_task_with_subtask( task_source="manual:test" ) @@ -172,8 +157,6 @@ def test_subtask_handler_logs_subtask_lifecycle_on_success( def test_subtask_handler_logs_exception_on_failure( harness: Harness, caplog: pytest.LogCaptureFixture ) -> None: - """A failing subtask is logged at ERROR with the traceback attached, - before the exception propagates for the Lambda runtime to surface.""" task, subtask = harness.orchestrator.create_task_with_subtask( task_source="manual:test" ) @@ -198,8 +181,6 @@ def test_subtask_handler_logs_exception_on_failure( def test_subtask_handler_records_cloudwatch_url_on_subtask( harness: Harness, monkeypatch: pytest.MonkeyPatch ) -> None: - """With the AWS Lambda runtime's log env vars present, a CloudWatch deep - link is built and persisted on the SubTask.""" monkeypatch.setenv("AWS_REGION", "eu-west-2") monkeypatch.setenv( "AWS_LAMBDA_LOG_GROUP_NAME", "/aws/lambda/postcode-splitter" @@ -232,8 +213,6 @@ def test_subtask_handler_records_cloudwatch_url_on_subtask( def test_subtask_handler_leaves_cloudwatch_url_unset_outside_lambda( harness: Harness, monkeypatch: pytest.MonkeyPatch ) -> None: - """Outside a real Lambda (e.g. the local RIE) the runtime log env vars - are absent, so cloud_logs_url is left unset rather than storing junk.""" for var in ( "AWS_REGION", "AWS_LAMBDA_LOG_GROUP_NAME", diff --git a/utilities/aws_lambda/subtask_handler.py b/utilities/aws_lambda/subtask_handler.py index 40f116ad..592ffebf 100644 --- a/utilities/aws_lambda/subtask_handler.py +++ b/utilities/aws_lambda/subtask_handler.py @@ -1,15 +1,7 @@ """@subtask_handler decorator for Lambdas that operate on existing SubTasks. Translates an AWS Lambda invocation (SQS-shaped or direct) into -TaskOrchestrator.run_subtask(...) calls, emitting an INFO log line for each -subtask's start and completion and a logged exception on failure. Those lines -land in CloudWatch via the Lambda runtime's stdout/stderr capture. - -Each subtask also records ``cloud_logs_url`` -- a deep link to this -invocation's CloudWatch log stream -- so an operator can jump from a SubTask -row straight to its logs. It is built from the environment variables the AWS -Lambda runtime sets, so it is populated only on real Lambda invocations and -left unset under the local RIE (which does not export them). +TaskOrchestrator.run_subtask(...) calls. """ import json @@ -94,24 +86,10 @@ def _records(event: dict[str, Any]) -> list[dict[str, Any]]: def _console_encode(value: str) -> str: - """Encode a value for a CloudWatch console deep link. - - The console expects URL-encoding with the percent signs themselves - re-encoded as ``$25`` -- e.g. ``/`` becomes ``%2F`` becomes ``$252F``. - """ return quote(value, safe="").replace("%", "$25") def _cloudwatch_url() -> Optional[str]: - """Build a CloudWatch console URL for this invocation's log stream. - - Sourced entirely from the environment variables the AWS Lambda runtime - sets -- ``AWS_REGION``, ``AWS_LAMBDA_LOG_GROUP_NAME`` and - ``AWS_LAMBDA_LOG_STREAM_NAME``. Returns None when any is absent, which is - the case outside a real Lambda (the local RIE does not export them) -- so - ``SubTask.cloud_logs_url`` is left unset rather than storing a link that - points nowhere. - """ region = os.environ.get("AWS_REGION") log_group = os.environ.get("AWS_LAMBDA_LOG_GROUP_NAME") log_stream = os.environ.get("AWS_LAMBDA_LOG_STREAM_NAME")