diff --git a/.dockerignore b/.dockerignore
index 0c7d7749..90436ffc 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,7 +6,7 @@ backend/.idea/*
 backend/.env
 recommendations/tests/*
 model_data/tests/*
-infrastructure/*
+deployment/*
 data_collection/*
 node_modules/*
 conservation_areas/*
diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml
index 3435c92d..e7ad9424 100644
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@@ -40,6 +40,8 @@ on:
         required: false
       EPC_AUTH_TOKEN:
         required: false
+      OPEN_EPC_API_TOKEN:
+        required: false
 
 jobs:
   build:
@@ -50,6 +52,7 @@ jobs:
       DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
       DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
       EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}
+      OPEN_EPC_API_TOKEN: ${{ secrets.OPEN_EPC_API_TOKEN }}
 
     outputs:
       image_digest: ${{ steps.digest.outputs.image_digest }}
diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml
index bd014e3d..7f2eb890 100644
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@@ -62,20 +62,20 @@ jobs:
       - uses: hashicorp/setup-terraform@v3
 
       - name: Terraform Init
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
         run: terraform init -reconfigure
 
       - name: Terraform Workspace
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
         run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
 
       - name: Terraform Plan
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
         run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
 
       - name: Terraform Apply
         if: env.TERRAFORM_APPLY == 'true'
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
         run: terraform apply -auto-approve tfplan
 
   # ============================================================
@@ -101,7 +101,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: ara_engine
-      lambda_path: infrastructure/terraform/lambda/engine
+      lambda_path: deployment/terraform/lambda/engine
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
@@ -133,6 +133,7 @@ jobs:
         DEV_DB_PORT=$DEV_DB_PORT
         DEV_DB_NAME=$DEV_DB_NAME
         EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
+        OPEN_EPC_API_TOKEN=$OPEN_EPC_API_TOKEN
     secrets:
       AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
@@ -141,6 +142,7 @@ jobs:
       DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
       DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
       EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
+      OPEN_EPC_API_TOKEN: ${{ secrets.DEV_OPEN_EPC_API_TOKEN }}
 
   # ============================================================
   # Deploy Address 2 UPRN Lambda
@@ -150,7 +152,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: address2uprn
-      lambda_path: infrastructure/terraform/lambda/address2UPRN
+      lambda_path: deployment/terraform/lambda/address2UPRN
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
@@ -169,7 +171,7 @@ jobs:
     uses: ./.github/workflows/_build_image.yml
     with:
       ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
-      dockerfile_path: backend/postcode_splitter/handler/Dockerfile
+      dockerfile_path: applications/postcode_splitter/Dockerfile
       build_context: .
       build_args: |
         DEV_DB_HOST=$DEV_DB_HOST
@@ -191,7 +193,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: postcodeSplitter
-      lambda_path: infrastructure/terraform/lambda/postcodeSplitter
+      lambda_path: deployment/terraform/lambda/postcodeSplitter
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
@@ -231,7 +233,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: bulk_address2uprn_combiner
-      lambda_path: infrastructure/terraform/lambda/bulk_address2uprn_combiner
+      lambda_path: deployment/terraform/lambda/bulk_address2uprn_combiner
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: bulk_address2uprn_combiner-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.bulk_address2uprn_combiner_image.outputs.image_digest }}
@@ -271,7 +273,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: condition-etl
-      lambda_path: infrastructure/terraform/lambda/condition-etl
+      lambda_path: deployment/terraform/lambda/condition-etl
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
@@ -311,7 +313,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: categorisation
-      lambda_path: infrastructure/terraform/lambda/categorisation
+      lambda_path: deployment/terraform/lambda/categorisation
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.categorisation_image.outputs.image_digest }}
@@ -351,7 +353,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: ordnanceSurvey
-      lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
+      lambda_path: deployment/terraform/lambda/ordnanceSurvey
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
@@ -386,7 +388,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: pashub_to_ara
-      lambda_path: infrastructure/terraform/lambda/pashub_to_ara
+      lambda_path: deployment/terraform/lambda/pashub_to_ara
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.pashub_to_ara_image.outputs.image_digest }}
@@ -419,7 +421,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: ara_fast_api
-      lambda_path: infrastructure/terraform/lambda/fast-api
+      lambda_path: deployment/terraform/lambda/fast-api
       stage: ${{ needs.determine_stage.outputs.stage }}
       terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
     secrets:
@@ -458,17 +460,17 @@ jobs:
       - uses: hashicorp/setup-terraform@v3
 
       - name: Terraform Init
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
         run: terraform init -reconfigure
 
       - name: Terraform Workspace
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
         run: |
           terraform workspace select $STAGE \
             || terraform workspace new $STAGE
 
       - name: Terraform Plan
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
         run: |
           terraform plan \
             -var="stage=${STAGE}" \
@@ -476,7 +478,7 @@ jobs:
 
       - name: Terraform Apply
         if: env.TERRAFORM_APPLY == 'true'
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
         run: terraform apply -auto-approve tfplan
 
 
@@ -503,17 +505,17 @@ jobs:
       - uses: hashicorp/setup-terraform@v3
 
       - name: Terraform Init
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
         run: terraform init -reconfigure
 
       - name: Terraform Workspace
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
         run: |
           terraform workspace select $STAGE \
             || terraform workspace new $STAGE
 
       - name: Terraform Plan
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
         run: |
           terraform plan \
             -var="stage=${STAGE}" \
@@ -521,7 +523,7 @@ jobs:
 
       - name: Terraform Apply
         if: env.TERRAFORM_APPLY == 'true'
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
         run: terraform apply -auto-approve tfplan
 
   # ============================================================
@@ -562,7 +564,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: magic_plan
-      lambda_path: infrastructure/terraform/lambda/magic_plan
+      lambda_path: deployment/terraform/lambda/magic_plan
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: magic-plan-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.magic_plan_image.outputs.image_digest }}
@@ -585,7 +587,7 @@ jobs:
     uses: ./.github/workflows/_deploy_lambda.yml
     with:
       lambda_name: hubspot-etl-to-ara
-      lambda_path: infrastructure/terraform/lambda/hubspot_deal_etl
+      lambda_path: deployment/terraform/lambda/hubspot_deal_etl
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: hubspot-etl-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.hubspot_etl_image.outputs.image_digest }}
diff --git a/.github/workflows/lambda_smoke_tests.yml b/.github/workflows/lambda_smoke_tests.yml
index 5ff5420a..b562f91e 100644
--- a/.github/workflows/lambda_smoke_tests.yml
+++ b/.github/workflows/lambda_smoke_tests.yml
@@ -36,6 +36,13 @@ jobs:
       build_context: .
       service_name: postcode-splitter
 
+  postcode_splitter_ddd_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: applications/postcode_splitter/Dockerfile
+      build_context: .
+      service_name: postcode-splitter-ddd
+
   # ============================================================
   # Bulk Address2UPRN Combiner
   # ============================================================
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index fa4fdf2a..15d4cfe9 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -60,3 +60,15 @@ jobs:
             -e DB_PASSWORD=test \
             -e DB_PORT=5432 \
             model-test pytest -vv -m 'not integration'
+
+      # The DDD rewrite (tests/) defines SQLModel table classes that map to the
+      # same physical tables as the legacy backend models. Both sets share the
+      # one global SQLModel.metadata, so they cannot be imported into the same
+      # pytest process. It runs as a separate invocation until the legacy
+      # models are retired. Its DB is spawned in-process by pytest-postgresql,
+      # so no DB service or env is required.
+      - name: Run DDD tests
+        run: |
+          docker run --rm \
+            --network host \
+            model-test pytest -vv tests/
diff --git a/.gitignore b/.gitignore
index 888d527a..9e5df0c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,6 +121,7 @@ celerybeat.pid
 
 # Environments
 .env
+.env.local
 .venv
 env/
 venv/
diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index aa0426a0..00000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,29 +0,0 @@
-
-<!-- BACKLOG.MD MCP GUIDELINES START -->
-
-<CRITICAL_INSTRUCTION>
-
-## BACKLOG WORKFLOW INSTRUCTIONS
-
-This project uses Backlog.md MCP for all task and project management activities.
-
-**CRITICAL GUIDANCE**
-
-- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
-- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
-
-- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
-- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
-- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
-
-These guides cover:
-- Decision framework for when to create tasks
-- Search-first workflow to avoid duplicates
-- Links to detailed guides for task creation, execution, and finalization
-- MCP tools reference
-
-You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
-
-</CRITICAL_INSTRUCTION>
-
-<!-- BACKLOG.MD MCP GUIDELINES END -->
diff --git a/CLAUDE.md b/CLAUDE.md
index f88a59d5..2dabf532 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,33 +1,4 @@
 
-<!-- BACKLOG.MD MCP GUIDELINES START -->
-
-<CRITICAL_INSTRUCTION>
-
-## BACKLOG WORKFLOW INSTRUCTIONS
-
-This project uses Backlog.md MCP for all task and project management activities.
-
-**CRITICAL GUIDANCE**
-
-- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
-- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
-
-- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
-- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
-- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
-
-These guides cover:
-- Decision framework for when to create tasks
-- Search-first workflow to avoid duplicates
-- Links to detailed guides for task creation, execution, and finalization
-- MCP tools reference
-
-You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
-
-</CRITICAL_INSTRUCTION>
-
-<!-- BACKLOG.MD MCP GUIDELINES END -->
-
 ## Available Skills
 
 Five Claude Code skills are installed in this repo's dev container. Each maps to a phase of the feature lifecycle.
diff --git a/Dockerfile.test.dockerignore b/Dockerfile.test.dockerignore
index 4f79c6ee..ed05c399 100644
--- a/Dockerfile.test.dockerignore
+++ b/Dockerfile.test.dockerignore
@@ -4,7 +4,7 @@ model_data/local_data/
 backend/node_modules/
 backend/.idea/
 backend/.env
-infrastructure/
+deployment/
 data_collection/
 node_modules/
 conservation_areas/
diff --git a/UBIQUITOUS_LANGUAGE.md b/UBIQUITOUS_LANGUAGE.md
index 1765cbc8..c3074c02 100644
--- a/UBIQUITOUS_LANGUAGE.md
+++ b/UBIQUITOUS_LANGUAGE.md
@@ -23,7 +23,7 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
 |------|------------|------------------|
 | **UPRN** | Unique Property Reference Number — the government-issued permanent identifier for a physical address in the UK. | "property ID", "address ID", "code" |
 | **Postcode** | A UK postal code used to group nearby addresses; the primary search key for finding EPC records. | "zip code", "postal code" |
-| **User Address** | A free-text address string provided by a user or imported from a customer dataset, before any normalisation or matching. | "user input", "raw address", "user_inputed_address" |
+| **User Address** | A structured dataclass (`domain.addresses.user_address.UserAddress`) capturing a customer-supplied address: a free-text `user_address` line, a canonical `postcode` (sanitised on construction), and an optional `internal_reference`. The bare string sense -- the raw free-text address line as it arrives from upstream ingestion, before being wrapped -- remains valid when discussing CSV columns, API payloads, or other upstream contexts; in domain code, prefer the dataclass. | "user input", "raw address", "user_inputed_address" |
 | **Dwelling** | A single residential unit that can hold an EPC — a house, flat, or maisonette. | "property", "unit", "home" |
 
 ## Address Matching
@@ -72,7 +72,7 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
 
 ## Flagged ambiguities
 
-- **"address"** appears as both the raw **User Address** (free-text from customer data) and a structured field on an **EPC Search Result** (normalised address lines). Always qualify: "user address" vs "EPC address" or "address line 1".
+- **"address"** appears as both the raw **User Address** (free-text from customer data, or the structured `UserAddress` dataclass that wraps it) and a structured field on an **EPC Search Result** (normalised address lines). Always qualify: "user address" vs "EPC address" or "address line 1". Within `domain/`, **User Address** specifically means the `UserAddress` dataclass; in upstream ingestion contexts (CSV columns, SQS payloads) it can still mean the raw string sense.
 - **"score"** is used for the `AddressMatch.score()` function output, the `lexiscore` DataFrame column, and informally in conversation. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
 - **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`.
 - **"EPC"** is overloaded as both the document (an Energy Performance Certificate) and the rating band letter. Use **EPC** for the document and **EPC Band** for the letter.
diff --git a/applications/__init__.py b/applications/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/applications/postcode_splitter/Dockerfile b/applications/postcode_splitter/Dockerfile
new file mode 100644
index 00000000..aea1f914
--- /dev/null
+++ b/applications/postcode_splitter/Dockerfile
@@ -0,0 +1,34 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+# Postgres host/port/database are baked into the image at build time from
+# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
+# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
+# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
+# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV POSTGRES_HOST=${DEV_DB_HOST}
+ENV POSTGRES_PORT=${DEV_DB_PORT}
+ENV POSTGRES_DATABASE=${DEV_DB_NAME}
+
+WORKDIR /var/task
+
+COPY applications/postcode_splitter/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the layered source the handler imports from. The new splitter pulls
+# only DDD-shaped packages — no pandas, no legacy backend/.
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/
+COPY orchestration/ orchestration/
+COPY repositories/ repositories/
+COPY utilities/ utilities/
+COPY applications/ applications/
+
+# Place the handler at the Lambda task root so the runtime can resolve
+# ``main.handler`` without an extra package prefix.
+COPY applications/postcode_splitter/handler.py /var/task/main.py
+
+CMD ["main.handler"]
diff --git a/applications/postcode_splitter/__init__.py b/applications/postcode_splitter/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py
new file mode 100644
index 00000000..9fb3ca6a
--- /dev/null
+++ b/applications/postcode_splitter/handler.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import boto3
+
+from applications.postcode_splitter.postcode_splitter_trigger_body import (
+    PostcodeSplitterTriggerBody,
+)
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from infrastructure.csv_s3_client import CsvS3Client
+from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.user_address.user_address_csv_s3_repository import (
+    UserAddressCsvS3Repository,
+)
+from utilities.aws_lambda.subtask_handler import subtask_handler
+
+
+@subtask_handler()
+def handler(
+    body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
+) -> dict[str, list[str]]:
+    trigger = PostcodeSplitterTriggerBody.model_validate(body)
+
+    bucket = os.environ["S3_BUCKET_NAME"]
+    queue_url = os.environ["ADDRESS2UPRN_QUEUE_URL"]
+
+    # boto3.client is overloaded per-service in the installed stubs; cast
+    # to Any so the strict-mode checker treats it as opaque.
+    boto3_client: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    boto_s3: Any = boto3_client("s3")
+    boto_sqs: Any = boto3_client("sqs")
+
+    csv_client = CsvS3Client(boto_s3, bucket)
+    user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
+    queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
+
+    splitter = PostcodeSplitterOrchestrator(
+        task_orchestrator=task_orchestrator,
+        user_address_repo=user_address_repo,
+        queue_client=queue_client,
+    )
+
+    child_ids = splitter.split_and_dispatch(
+        parent_task_id=trigger.task_id,
+        parent_subtask_id=trigger.sub_task_id,
+        input_s3_uri=trigger.s3_uri,
+    )
+
+    return {"child_subtask_ids": [str(cid) for cid in child_ids]}
diff --git a/applications/postcode_splitter/local_handler/.env.local.example b/applications/postcode_splitter/local_handler/.env.local.example
new file mode 100644
index 00000000..28fa8390
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/.env.local.example
@@ -0,0 +1,34 @@
+# Local-test environment for the postcode_splitter Lambda.
+#
+#   cp .env.local.example .env.local   then fill in the values below.
+#
+# .env.local is gitignored. The container hits REAL AWS and a REAL Postgres,
+# so every value here points at infrastructure that actually exists.
+#
+# NOTE: the new DDD code uses different env var names than the repo root
+# .env. The mapping (root .env name -> var here) is given per section.
+# Keep comments on their own lines — docker-compose's env_file parser folds a
+# trailing "# ..." into the value.
+
+# --- Postgres (orchestration/default_orchestrator -> PostgresConfig.from_env) ---
+# POSTGRES_HOST <- DB_HOST, PORT <- DB_PORT, USERNAME <- DB_USERNAME,
+# PASSWORD <- DB_PASSWORD, DATABASE <- DB_NAME.
+POSTGRES_HOST=
+POSTGRES_PORT=5432
+POSTGRES_USERNAME=
+POSTGRES_PASSWORD=
+POSTGRES_DATABASE=
+# POSTGRES_DRIVER=psycopg2   (optional; defaults to psycopg2)
+
+# --- Handler config (applications/postcode_splitter/handler.py) ---
+# S3_BUCKET_NAME: bucket holding the input address CSV (root .env: DATA_BUCKET).
+# ADDRESS2UPRN_QUEUE_URL: SQS queue the splitter fans batches out to; not in
+# the root .env (Terraform sets it in prod).
+S3_BUCKET_NAME=
+ADDRESS2UPRN_QUEUE_URL=
+
+# --- AWS credentials for boto3 (S3 + SQS clients) ---
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_DEFAULT_REGION=eu-west-2
+# AWS_SESSION_TOKEN=   (only if using temporary/SSO credentials)
diff --git a/applications/postcode_splitter/local_handler/docker-compose.yml b/applications/postcode_splitter/local_handler/docker-compose.yml
new file mode 100644
index 00000000..68af1c40
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/docker-compose.yml
@@ -0,0 +1,9 @@
+services:
+  postcode-splitter:
+    build:
+      context: ../../../
+      dockerfile: applications/postcode_splitter/Dockerfile
+    ports:
+      - "9001:8080"
+    env_file:
+      - .env.local
diff --git a/applications/postcode_splitter/local_handler/invoke_local_lambda.py b/applications/postcode_splitter/local_handler/invoke_local_lambda.py
new file mode 100755
index 00000000..5f4b1d36
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9001"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {
+    "Records": [
+        {
+            "body": json.dumps(
+                {
+                    "task_id": "e295d89b-a7c5-4a9a-8b4e-b405fab1f298",
+                    "sub_task_id": "f4a9944f-41f0-4a33-8669-5016ec574068",
+                    "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv",
+                }
+            )
+        }
+    ]
+}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
diff --git a/applications/postcode_splitter/local_handler/run_local.sh b/applications/postcode_splitter/local_handler/run_local.sh
new file mode 100755
index 00000000..345b60ee
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/run_local.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+
+if [ ! -f .env.local ]; then
+  cp .env.local.example .env.local
+  echo "Created .env.local from the template — fill it in, then re-run." >&2
+  exit 1
+fi
+
+docker compose build --no-cache
+docker compose up --force-recreate
diff --git a/applications/postcode_splitter/postcode_splitter_trigger_body.py b/applications/postcode_splitter/postcode_splitter_trigger_body.py
new file mode 100644
index 00000000..4c33f4a4
--- /dev/null
+++ b/applications/postcode_splitter/postcode_splitter_trigger_body.py
@@ -0,0 +1,11 @@
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict
+
+
+class PostcodeSplitterTriggerBody(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    task_id: UUID
+    sub_task_id: UUID
+    s3_uri: str
diff --git a/applications/postcode_splitter/requirements.txt b/applications/postcode_splitter/requirements.txt
new file mode 100644
index 00000000..6a85a255
--- /dev/null
+++ b/applications/postcode_splitter/requirements.txt
@@ -0,0 +1,4 @@
+boto3
+pydantic
+sqlmodel
+psycopg2-binary
diff --git a/asset_list/app.py b/asset_list/app.py
index 7413c7cb..424f4df6 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -79,23 +79,23 @@ def app():
     """
 
     data_folder = "/workspaces/model/asset_list"
-    data_filename = "input.xlsx"
-    sheet_name = "Handovers"
-    postcode_column = "POSTCODE"
-    address1_column = "Full Addres"
+    data_filename = "hyde.xlsx"
+    sheet_name = "AddressProfilingResults"
+    postcode_column = "Postcode"
+    address1_column = "Address"
     address1_method = None
-    fulladdress_column = "Full Addres"
+    fulladdress_column = "Postcode"
     address_cols_to_concat = []
     missing_postcodes_method = None
     landlord_year_built = None
-    landlord_os_uprn = "domna_found_uprn"
-    landlord_property_type = "PROPERTY TYPE"  # Good to include if landlord gave
-    landlord_built_form = "Type Description"  # Good to include if landlord gave
+    landlord_os_uprn = None
+    landlord_property_type = "Property Type"  # Good to include if landlord gave
+    landlord_built_form = None  # Good to include if landlord gave
     landlord_wall_construction = None
     landlord_roof_construction = None
     landlord_heating_system = None
     landlord_existing_pv = None
-    landlord_property_id = "PROP REF"
+    landlord_property_id = "Organisation Reference"
     landlord_sap = None
     outcomes_filename = None
     outcomes_sheetname = None
@@ -469,8 +469,3 @@ def app():
                     writer, sheet_name="Duplicate Properties", index=False
                 )
 
-
-
-
-for key,value in dict.items():
-    lsakjfldsa
\ No newline at end of file
diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile
index 07159357..7d174152 100644
--- a/backend/address2UPRN/handler/Dockerfile
+++ b/backend/address2UPRN/handler/Dockerfile
@@ -6,11 +6,13 @@ ARG DEV_DB_HOST
 ARG DEV_DB_PORT
 ARG DEV_DB_NAME
 ARG EPC_AUTH_TOKEN
+ARG OPEN_EPC_API_TOKEN
 
 ENV DB_HOST=${DEV_DB_HOST}
 ENV DB_PORT=${DEV_DB_PORT}
 ENV DB_NAME=${DEV_DB_NAME}
 ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
+ENV OPEN_EPC_API_TOKEN=${OPEN_EPC_API_TOKEN}
 
 
 # Set working directory (Lambda task root)
diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt
index 6ef41b2d..02aaefba 100644
--- a/backend/address2UPRN/handler/requirements.txt
+++ b/backend/address2UPRN/handler/requirements.txt
@@ -8,4 +8,5 @@ boto3==1.35.44
 sqlmodel
 sqlalchemy==2.0.36
 psycopg2-binary==2.9.10
-pydantic-settings==2.6.0
\ No newline at end of file
+pydantic-settings==2.6.0
+httpx
\ No newline at end of file
diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py
index 86caeea3..72dbf142 100644
--- a/backend/epc_client/epc_client_service.py
+++ b/backend/epc_client/epc_client_service.py
@@ -47,8 +47,14 @@ class EpcClientService:
         latest = max(results, key=lambda r: r.registration_date)
         return self.get_by_certificate_number(latest.certificate_number)
 
+    @staticmethod
+    def _normalise_postcode(postcode: str) -> str:
+        """Return the postcode with all spaces removed and uppercased."""
+        return postcode.replace(" ", "").upper()
+
     def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
-        return call_with_retry(lambda: self._search(postcode=postcode))
+        normalised = self._normalise_postcode(postcode)
+        return call_with_retry(lambda: self._search(postcode=normalised))
 
     # ------------------------------------------------------------------
     # Private helperEpcRateLimpolarss
diff --git a/infrastructure/terraform/README.md b/deployment/terraform/README.md
similarity index 100%
rename from infrastructure/terraform/README.md
rename to deployment/terraform/README.md
diff --git a/infrastructure/terraform/cdn/main.tf b/deployment/terraform/cdn/main.tf
similarity index 100%
rename from infrastructure/terraform/cdn/main.tf
rename to deployment/terraform/cdn/main.tf
diff --git a/infrastructure/terraform/cdn/provider.tf b/deployment/terraform/cdn/provider.tf
similarity index 100%
rename from infrastructure/terraform/cdn/provider.tf
rename to deployment/terraform/cdn/provider.tf
diff --git a/infrastructure/terraform/cdn/variables.tf b/deployment/terraform/cdn/variables.tf
similarity index 100%
rename from infrastructure/terraform/cdn/variables.tf
rename to deployment/terraform/cdn/variables.tf
diff --git a/infrastructure/terraform/cdn_certificate/main.tf b/deployment/terraform/cdn_certificate/main.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/main.tf
rename to deployment/terraform/cdn_certificate/main.tf
diff --git a/infrastructure/terraform/cdn_certificate/outputs.tf b/deployment/terraform/cdn_certificate/outputs.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/outputs.tf
rename to deployment/terraform/cdn_certificate/outputs.tf
diff --git a/infrastructure/terraform/cdn_certificate/provider.tf b/deployment/terraform/cdn_certificate/provider.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/provider.tf
rename to deployment/terraform/cdn_certificate/provider.tf
diff --git a/infrastructure/terraform/cdn_certificate/variables.tf b/deployment/terraform/cdn_certificate/variables.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/variables.tf
rename to deployment/terraform/cdn_certificate/variables.tf
diff --git a/infrastructure/terraform/lambda/_template/README.md b/deployment/terraform/lambda/_template/README.md
similarity index 96%
rename from infrastructure/terraform/lambda/_template/README.md
rename to deployment/terraform/lambda/_template/README.md
index 5bb10627..f2a8638a 100644
--- a/infrastructure/terraform/lambda/_template/README.md
+++ b/deployment/terraform/lambda/_template/README.md
@@ -10,7 +10,7 @@
 ### 2. Add infrastructure prerequisites (shared stack)
 - Add a new ECR repository in:
 
-  infrastructure/terraform/shared/main.tf
+  deployment/terraform/shared/main.tf
 
 - Create a PR to deploy this to main then dev in order to deploy the shared stack
 
diff --git a/infrastructure/terraform/lambda/_template/main.tf b/deployment/terraform/lambda/_template/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/_template/main.tf
rename to deployment/terraform/lambda/_template/main.tf
diff --git a/infrastructure/terraform/lambda/_template/provider.tf b/deployment/terraform/lambda/_template/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/_template/provider.tf
rename to deployment/terraform/lambda/_template/provider.tf
diff --git a/infrastructure/terraform/lambda/_template/variables.tf b/deployment/terraform/lambda/_template/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/_template/variables.tf
rename to deployment/terraform/lambda/_template/variables.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/deployment/terraform/lambda/address2UPRN/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/main.tf
rename to deployment/terraform/lambda/address2UPRN/main.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/outputs.tf b/deployment/terraform/lambda/address2UPRN/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/outputs.tf
rename to deployment/terraform/lambda/address2UPRN/outputs.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/provider.tf b/deployment/terraform/lambda/address2UPRN/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/provider.tf
rename to deployment/terraform/lambda/address2UPRN/provider.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/variables.tf b/deployment/terraform/lambda/address2UPRN/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/variables.tf
rename to deployment/terraform/lambda/address2UPRN/variables.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/main.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/main.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/main.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/outputs.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/provider.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/provider.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/provider.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/variables.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/variables.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/variables.tf
diff --git a/infrastructure/terraform/lambda/categorisation/main.tf b/deployment/terraform/lambda/categorisation/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/main.tf
rename to deployment/terraform/lambda/categorisation/main.tf
diff --git a/infrastructure/terraform/lambda/categorisation/outputs.tf b/deployment/terraform/lambda/categorisation/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/outputs.tf
rename to deployment/terraform/lambda/categorisation/outputs.tf
diff --git a/infrastructure/terraform/lambda/categorisation/provider.tf b/deployment/terraform/lambda/categorisation/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/provider.tf
rename to deployment/terraform/lambda/categorisation/provider.tf
diff --git a/infrastructure/terraform/lambda/categorisation/variables.tf b/deployment/terraform/lambda/categorisation/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/variables.tf
rename to deployment/terraform/lambda/categorisation/variables.tf
diff --git a/infrastructure/terraform/lambda/condition-etl/main.tf b/deployment/terraform/lambda/condition-etl/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/condition-etl/main.tf
rename to deployment/terraform/lambda/condition-etl/main.tf
diff --git a/infrastructure/terraform/lambda/condition-etl/provider.tf b/deployment/terraform/lambda/condition-etl/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/condition-etl/provider.tf
rename to deployment/terraform/lambda/condition-etl/provider.tf
diff --git a/infrastructure/terraform/lambda/condition-etl/variables.tf b/deployment/terraform/lambda/condition-etl/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/condition-etl/variables.tf
rename to deployment/terraform/lambda/condition-etl/variables.tf
diff --git a/infrastructure/terraform/lambda/ecmk_to_ara/main.tf b/deployment/terraform/lambda/ecmk_to_ara/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ecmk_to_ara/main.tf
rename to deployment/terraform/lambda/ecmk_to_ara/main.tf
diff --git a/infrastructure/terraform/lambda/ecmk_to_ara/provider.tf b/deployment/terraform/lambda/ecmk_to_ara/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ecmk_to_ara/provider.tf
rename to deployment/terraform/lambda/ecmk_to_ara/provider.tf
diff --git a/infrastructure/terraform/lambda/ecmk_to_ara/variables.tf b/deployment/terraform/lambda/ecmk_to_ara/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ecmk_to_ara/variables.tf
rename to deployment/terraform/lambda/ecmk_to_ara/variables.tf
diff --git a/infrastructure/terraform/lambda/engine/main.tf b/deployment/terraform/lambda/engine/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/main.tf
rename to deployment/terraform/lambda/engine/main.tf
diff --git a/infrastructure/terraform/lambda/engine/outputs.tf b/deployment/terraform/lambda/engine/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/outputs.tf
rename to deployment/terraform/lambda/engine/outputs.tf
diff --git a/infrastructure/terraform/lambda/engine/provider.tf b/deployment/terraform/lambda/engine/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/provider.tf
rename to deployment/terraform/lambda/engine/provider.tf
diff --git a/infrastructure/terraform/lambda/engine/variables.tf b/deployment/terraform/lambda/engine/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/variables.tf
rename to deployment/terraform/lambda/engine/variables.tf
diff --git a/infrastructure/terraform/lambda/fast-api/main.tf b/deployment/terraform/lambda/fast-api/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/main.tf
rename to deployment/terraform/lambda/fast-api/main.tf
diff --git a/infrastructure/terraform/lambda/fast-api/outputs.tf b/deployment/terraform/lambda/fast-api/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/outputs.tf
rename to deployment/terraform/lambda/fast-api/outputs.tf
diff --git a/infrastructure/terraform/lambda/fast-api/provider.tf b/deployment/terraform/lambda/fast-api/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/provider.tf
rename to deployment/terraform/lambda/fast-api/provider.tf
diff --git a/infrastructure/terraform/lambda/fast-api/variables.tf b/deployment/terraform/lambda/fast-api/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/variables.tf
rename to deployment/terraform/lambda/fast-api/variables.tf
diff --git a/infrastructure/terraform/lambda/hubspot_deal_etl/main.tf b/deployment/terraform/lambda/hubspot_deal_etl/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/hubspot_deal_etl/main.tf
rename to deployment/terraform/lambda/hubspot_deal_etl/main.tf
diff --git a/infrastructure/terraform/lambda/hubspot_deal_etl/provider.tf b/deployment/terraform/lambda/hubspot_deal_etl/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/hubspot_deal_etl/provider.tf
rename to deployment/terraform/lambda/hubspot_deal_etl/provider.tf
diff --git a/infrastructure/terraform/lambda/hubspot_deal_etl/variables.tf b/deployment/terraform/lambda/hubspot_deal_etl/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/hubspot_deal_etl/variables.tf
rename to deployment/terraform/lambda/hubspot_deal_etl/variables.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/main.tf b/deployment/terraform/lambda/magic_plan/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/main.tf
rename to deployment/terraform/lambda/magic_plan/main.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/outputs.tf b/deployment/terraform/lambda/magic_plan/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/outputs.tf
rename to deployment/terraform/lambda/magic_plan/outputs.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/provider.tf b/deployment/terraform/lambda/magic_plan/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/provider.tf
rename to deployment/terraform/lambda/magic_plan/provider.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/variables.tf b/deployment/terraform/lambda/magic_plan/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/variables.tf
rename to deployment/terraform/lambda/magic_plan/variables.tf
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/main.tf b/deployment/terraform/lambda/ordnanceSurvey/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ordnanceSurvey/main.tf
rename to deployment/terraform/lambda/ordnanceSurvey/main.tf
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf b/deployment/terraform/lambda/ordnanceSurvey/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
rename to deployment/terraform/lambda/ordnanceSurvey/provider.tf
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf b/deployment/terraform/lambda/ordnanceSurvey/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
rename to deployment/terraform/lambda/ordnanceSurvey/variables.tf
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/main.tf b/deployment/terraform/lambda/pashub_to_ara/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/pashub_to_ara/main.tf
rename to deployment/terraform/lambda/pashub_to_ara/main.tf
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/outputs.tf b/deployment/terraform/lambda/pashub_to_ara/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/pashub_to_ara/outputs.tf
rename to deployment/terraform/lambda/pashub_to_ara/outputs.tf
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/provider.tf b/deployment/terraform/lambda/pashub_to_ara/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/pashub_to_ara/provider.tf
rename to deployment/terraform/lambda/pashub_to_ara/provider.tf
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/variables.tf b/deployment/terraform/lambda/pashub_to_ara/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/pashub_to_ara/variables.tf
rename to deployment/terraform/lambda/pashub_to_ara/variables.tf
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/deployment/terraform/lambda/postcodeSplitter/main.tf
similarity index 76%
rename from infrastructure/terraform/lambda/postcodeSplitter/main.tf
rename to deployment/terraform/lambda/postcodeSplitter/main.tf
index 94c5cd4e..721cb2ea 100644
--- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf
+++ b/deployment/terraform/lambda/postcodeSplitter/main.tf
@@ -38,22 +38,8 @@ module "lambda" {
     {
       STAGE     = var.stage
       LOG_LEVEL = "info"
-      DB_USERNAME = local.db_credentials.db_assessment_model_username
-      DB_PASSWORD = local.db_credentials.db_assessment_model_password
-      GOOGLE_SOLAR_API_KEY = "test"
-      SAP_PREDICTIONS_BUCKET = "test"
-      CARBON_PREDICTIONS_BUCKET = "test"
-      HEAT_PREDICTIONS_BUCKET = "test"
-      HEATING_KWH_PREDICTIONS_BUCKET = "test"
-      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
-      API_KEY = "test"
-      ENVIRONMENT = "test"
-      SECRET_KEY = "test"
-      PLAN_TRIGGER_BUCKET = "test"
-      DATA_BUCKET = "test"
-      EPC_AUTH_TOKEN = "test"
-      ENGINE_SQS_URL = "test"
-      ENERGY_ASSESSMENTS_BUCKET = "test"
+      POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username
+      POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password
       ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url
       S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
     },
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/outputs.tf b/deployment/terraform/lambda/postcodeSplitter/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/postcodeSplitter/outputs.tf
rename to deployment/terraform/lambda/postcodeSplitter/outputs.tf
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/provider.tf b/deployment/terraform/lambda/postcodeSplitter/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/postcodeSplitter/provider.tf
rename to deployment/terraform/lambda/postcodeSplitter/provider.tf
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/deployment/terraform/lambda/postcodeSplitter/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/postcodeSplitter/variables.tf
rename to deployment/terraform/lambda/postcodeSplitter/variables.tf
diff --git a/infrastructure/terraform/modules/acm_certificate/main.tf b/deployment/terraform/modules/acm_certificate/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/acm_certificate/main.tf
rename to deployment/terraform/modules/acm_certificate/main.tf
diff --git a/infrastructure/terraform/modules/acm_certificate/outputs.tf b/deployment/terraform/modules/acm_certificate/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/acm_certificate/outputs.tf
rename to deployment/terraform/modules/acm_certificate/outputs.tf
diff --git a/infrastructure/terraform/modules/acm_certificate/variables.tf b/deployment/terraform/modules/acm_certificate/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/acm_certificate/variables.tf
rename to deployment/terraform/modules/acm_certificate/variables.tf
diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/deployment/terraform/modules/cloudfront/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/cloudfront/main.tf
rename to deployment/terraform/modules/cloudfront/main.tf
diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/deployment/terraform/modules/cloudfront/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/cloudfront/variables.tf
rename to deployment/terraform/modules/cloudfront/variables.tf
diff --git a/infrastructure/terraform/modules/container_registry/main.tf b/deployment/terraform/modules/container_registry/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/container_registry/main.tf
rename to deployment/terraform/modules/container_registry/main.tf
diff --git a/infrastructure/terraform/modules/container_registry/outputs.tf b/deployment/terraform/modules/container_registry/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/container_registry/outputs.tf
rename to deployment/terraform/modules/container_registry/outputs.tf
diff --git a/infrastructure/terraform/modules/container_registry/variables.tf b/deployment/terraform/modules/container_registry/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/container_registry/variables.tf
rename to deployment/terraform/modules/container_registry/variables.tf
diff --git a/infrastructure/terraform/modules/ecr/main.tf b/deployment/terraform/modules/ecr/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/ecr/main.tf
rename to deployment/terraform/modules/ecr/main.tf
diff --git a/infrastructure/terraform/modules/ecr/outputs.tf b/deployment/terraform/modules/ecr/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/ecr/outputs.tf
rename to deployment/terraform/modules/ecr/outputs.tf
diff --git a/infrastructure/terraform/modules/ecr/variables.tf b/deployment/terraform/modules/ecr/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/ecr/variables.tf
rename to deployment/terraform/modules/ecr/variables.tf
diff --git a/infrastructure/terraform/modules/general_iam_policy/main.tf b/deployment/terraform/modules/general_iam_policy/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/general_iam_policy/main.tf
rename to deployment/terraform/modules/general_iam_policy/main.tf
diff --git a/infrastructure/terraform/modules/general_iam_policy/outputs.tf b/deployment/terraform/modules/general_iam_policy/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/general_iam_policy/outputs.tf
rename to deployment/terraform/modules/general_iam_policy/outputs.tf
diff --git a/infrastructure/terraform/modules/general_iam_policy/variables.tf b/deployment/terraform/modules/general_iam_policy/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/general_iam_policy/variables.tf
rename to deployment/terraform/modules/general_iam_policy/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_execution_role/main.tf b/deployment/terraform/modules/lambda_execution_role/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_execution_role/main.tf
rename to deployment/terraform/modules/lambda_execution_role/main.tf
diff --git a/infrastructure/terraform/modules/lambda_execution_role/outputs.tf b/deployment/terraform/modules/lambda_execution_role/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_execution_role/outputs.tf
rename to deployment/terraform/modules/lambda_execution_role/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_execution_role/variables.tf b/deployment/terraform/modules/lambda_execution_role/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_execution_role/variables.tf
rename to deployment/terraform/modules/lambda_execution_role/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_service/main.tf b/deployment/terraform/modules/lambda_service/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service/main.tf
rename to deployment/terraform/modules/lambda_service/main.tf
diff --git a/infrastructure/terraform/modules/lambda_service/outputs.tf b/deployment/terraform/modules/lambda_service/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service/outputs.tf
rename to deployment/terraform/modules/lambda_service/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_service/variables.tf b/deployment/terraform/modules/lambda_service/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service/variables.tf
rename to deployment/terraform/modules/lambda_service/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_service_zip/main.tf b/deployment/terraform/modules/lambda_service_zip/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service_zip/main.tf
rename to deployment/terraform/modules/lambda_service_zip/main.tf
diff --git a/infrastructure/terraform/modules/lambda_service_zip/variables.tf b/deployment/terraform/modules/lambda_service_zip/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service_zip/variables.tf
rename to deployment/terraform/modules/lambda_service_zip/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf b/deployment/terraform/modules/lambda_sqs_trigger/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_sqs_trigger/main.tf
rename to deployment/terraform/modules/lambda_sqs_trigger/main.tf
diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf b/deployment/terraform/modules/lambda_sqs_trigger/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf
rename to deployment/terraform/modules/lambda_sqs_trigger/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_with_api_gateway/main.tf b/deployment/terraform/modules/lambda_with_api_gateway/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_api_gateway/main.tf
rename to deployment/terraform/modules/lambda_with_api_gateway/main.tf
diff --git a/infrastructure/terraform/modules/lambda_with_api_gateway/outputs.tf b/deployment/terraform/modules/lambda_with_api_gateway/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_api_gateway/outputs.tf
rename to deployment/terraform/modules/lambda_with_api_gateway/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_with_api_gateway/variables.tf b/deployment/terraform/modules/lambda_with_api_gateway/variables.tf
similarity index 96%
rename from infrastructure/terraform/modules/lambda_with_api_gateway/variables.tf
rename to deployment/terraform/modules/lambda_with_api_gateway/variables.tf
index 95e5acd9..b5d0515a 100644
--- a/infrastructure/terraform/modules/lambda_with_api_gateway/variables.tf
+++ b/deployment/terraform/modules/lambda_with_api_gateway/variables.tf
@@ -11,7 +11,7 @@ variable "zip_excludes" {
     "**/*.pyc",
     "**/.pytest_cache/**",
     "**/tests/**",
-    "**/infrastructure/**"
+    "**/deployment/**"
   ]
 }
 
diff --git a/infrastructure/terraform/modules/lambda_with_sqs/main.tf b/deployment/terraform/modules/lambda_with_sqs/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_sqs/main.tf
rename to deployment/terraform/modules/lambda_with_sqs/main.tf
diff --git a/infrastructure/terraform/modules/lambda_with_sqs/outputs.tf b/deployment/terraform/modules/lambda_with_sqs/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_sqs/outputs.tf
rename to deployment/terraform/modules/lambda_with_sqs/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_with_sqs/variables.tf b/deployment/terraform/modules/lambda_with_sqs/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_sqs/variables.tf
rename to deployment/terraform/modules/lambda_with_sqs/variables.tf
diff --git a/infrastructure/terraform/modules/route53/main.tf b/deployment/terraform/modules/route53/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/route53/main.tf
rename to deployment/terraform/modules/route53/main.tf
diff --git a/infrastructure/terraform/modules/route53/variables.tf b/deployment/terraform/modules/route53/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/route53/variables.tf
rename to deployment/terraform/modules/route53/variables.tf
diff --git a/infrastructure/terraform/modules/s3/main.tf b/deployment/terraform/modules/s3/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3/main.tf
rename to deployment/terraform/modules/s3/main.tf
diff --git a/infrastructure/terraform/modules/s3/outputs.tf b/deployment/terraform/modules/s3/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3/outputs.tf
rename to deployment/terraform/modules/s3/outputs.tf
diff --git a/infrastructure/terraform/modules/s3/variables.tf b/deployment/terraform/modules/s3/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3/variables.tf
rename to deployment/terraform/modules/s3/variables.tf
diff --git a/infrastructure/terraform/modules/s3_iam_policy/main.tf b/deployment/terraform/modules/s3_iam_policy/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_iam_policy/main.tf
rename to deployment/terraform/modules/s3_iam_policy/main.tf
diff --git a/infrastructure/terraform/modules/s3_iam_policy/outputs.tf b/deployment/terraform/modules/s3_iam_policy/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_iam_policy/outputs.tf
rename to deployment/terraform/modules/s3_iam_policy/outputs.tf
diff --git a/infrastructure/terraform/modules/s3_iam_policy/variables.tf b/deployment/terraform/modules/s3_iam_policy/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_iam_policy/variables.tf
rename to deployment/terraform/modules/s3_iam_policy/variables.tf
diff --git a/infrastructure/terraform/modules/s3_presignable_bucket/main.tf b/deployment/terraform/modules/s3_presignable_bucket/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_presignable_bucket/main.tf
rename to deployment/terraform/modules/s3_presignable_bucket/main.tf
diff --git a/infrastructure/terraform/modules/s3_presignable_bucket/outputs.tf b/deployment/terraform/modules/s3_presignable_bucket/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_presignable_bucket/outputs.tf
rename to deployment/terraform/modules/s3_presignable_bucket/outputs.tf
diff --git a/infrastructure/terraform/modules/s3_presignable_bucket/variables.tf b/deployment/terraform/modules/s3_presignable_bucket/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_presignable_bucket/variables.tf
rename to deployment/terraform/modules/s3_presignable_bucket/variables.tf
diff --git a/infrastructure/terraform/modules/ses/main.tf b/deployment/terraform/modules/ses/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/ses/main.tf
rename to deployment/terraform/modules/ses/main.tf
diff --git a/infrastructure/terraform/modules/ses/outputs.tf b/deployment/terraform/modules/ses/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/ses/outputs.tf
rename to deployment/terraform/modules/ses/outputs.tf
diff --git a/infrastructure/terraform/modules/ses/variables.tf b/deployment/terraform/modules/ses/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/ses/variables.tf
rename to deployment/terraform/modules/ses/variables.tf
diff --git a/infrastructure/terraform/modules/sqs_queue/main.tf b/deployment/terraform/modules/sqs_queue/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/sqs_queue/main.tf
rename to deployment/terraform/modules/sqs_queue/main.tf
diff --git a/infrastructure/terraform/modules/sqs_queue/outputs.tf b/deployment/terraform/modules/sqs_queue/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/sqs_queue/outputs.tf
rename to deployment/terraform/modules/sqs_queue/outputs.tf
diff --git a/infrastructure/terraform/modules/sqs_queue/variables.tf b/deployment/terraform/modules/sqs_queue/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/sqs_queue/variables.tf
rename to deployment/terraform/modules/sqs_queue/variables.tf
diff --git a/infrastructure/terraform/modules/tf_state_bucket/main.tf b/deployment/terraform/modules/tf_state_bucket/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/tf_state_bucket/main.tf
rename to deployment/terraform/modules/tf_state_bucket/main.tf
diff --git a/infrastructure/terraform/modules/tf_state_bucket/outputs.tf b/deployment/terraform/modules/tf_state_bucket/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/tf_state_bucket/outputs.tf
rename to deployment/terraform/modules/tf_state_bucket/outputs.tf
diff --git a/infrastructure/terraform/modules/tf_state_bucket/variables.tf b/deployment/terraform/modules/tf_state_bucket/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/tf_state_bucket/variables.tf
rename to deployment/terraform/modules/tf_state_bucket/variables.tf
diff --git a/infrastructure/terraform/shared/dev.tfvars b/deployment/terraform/shared/dev.tfvars
similarity index 100%
rename from infrastructure/terraform/shared/dev.tfvars
rename to deployment/terraform/shared/dev.tfvars
diff --git a/infrastructure/terraform/shared/main.tf b/deployment/terraform/shared/main.tf
similarity index 100%
rename from infrastructure/terraform/shared/main.tf
rename to deployment/terraform/shared/main.tf
diff --git a/infrastructure/terraform/shared/secrets.tf b/deployment/terraform/shared/secrets.tf
similarity index 100%
rename from infrastructure/terraform/shared/secrets.tf
rename to deployment/terraform/shared/secrets.tf
diff --git a/infrastructure/terraform/shared/variables.tf b/deployment/terraform/shared/variables.tf
similarity index 100%
rename from infrastructure/terraform/shared/variables.tf
rename to deployment/terraform/shared/variables.tf
diff --git a/domain/__init__.py b/domain/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/domain/addresses/__init__.py b/domain/addresses/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
new file mode 100644
index 00000000..44e4d967
--- /dev/null
+++ b/domain/addresses/postcode_batching.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from collections.abc import Iterable, Iterator
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+
+
+def iter_postcode_grouped_batches(
+    addresses: Iterable[UserAddress],
+    *,
+    max_batch_size: int = 500,
+) -> Iterator[list[UserAddress]]:
+    if max_batch_size < 1:
+        raise ValueError("max_batch_size must be >= 1")
+
+    groups = _group_by_postcode_in_order(addresses)
+
+    buffer: list[UserAddress] = []
+    for group in groups.values():
+        group_len = len(group)
+
+        # Oversize single-Postcode group: flush buffer first, then dispatch
+        # the group as its own batch. Mirrors the legacy
+        # ``if group_len >= batch_size`` branch.
+        if group_len >= max_batch_size:
+            if buffer:
+                yield buffer
+                buffer = []
+            yield group
+            continue
+
+        # Adding this group would overflow: flush buffer before appending.
+        if len(buffer) + group_len > max_batch_size:
+            yield buffer
+            buffer = []
+
+        buffer.extend(group)
+
+    # Final flush.
+    if buffer:
+        yield buffer
+
+
+def _group_by_postcode_in_order(
+    addresses: Iterable[UserAddress],
+) -> dict[Postcode, list[UserAddress]]:
+    groups: dict[Postcode, list[UserAddress]] = {}
+    for address in addresses:
+        groups.setdefault(address.postcode, []).append(address)
+    return groups
diff --git a/domain/addresses/user_address.py b/domain/addresses/user_address.py
new file mode 100644
index 00000000..9a28751b
--- /dev/null
+++ b/domain/addresses/user_address.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+from domain.postcode import Postcode
+
+
+def _empty_source_row() -> dict[str, str]:
+    return {}
+
+
+@dataclass(frozen=True)
+class UserAddress:
+    user_address: str
+    postcode: Postcode
+    internal_reference: Optional[str] = None
+    source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False)
diff --git a/domain/postcode.py b/domain/postcode.py
new file mode 100644
index 00000000..8e4e7c79
--- /dev/null
+++ b/domain/postcode.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class Postcode:
+    value: str
+
+    def __post_init__(self) -> None:
+        # Frozen dataclass: bypass the descriptor with object.__setattr__.
+        object.__setattr__(self, "value", "".join(self.value.split()).upper())
+
+    def __str__(self) -> str:
+        return self.value
diff --git a/domain/tasks/__init__.py b/domain/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/domain/tasks/subtasks.py b/domain/tasks/subtasks.py
new file mode 100644
index 00000000..bd49a6ec
--- /dev/null
+++ b/domain/tasks/subtasks.py
@@ -0,0 +1,55 @@
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Optional
+from uuid import UUID, uuid4
+
+
+class SubTaskStatus(str, Enum):
+    WAITING = "waiting"
+    IN_PROGRESS = "in progress"
+    COMPLETE = "complete"
+    FAILED = "failed"
+
+
+@dataclass
+class SubTask:
+    id: UUID
+    task_id: UUID
+    status: SubTaskStatus = SubTaskStatus.WAITING
+    inputs: Optional[dict[str, Any]] = None
+    outputs: Optional[dict[str, Any]] = None
+    cloud_logs_url: Optional[str] = None
+    job_started: Optional[datetime] = None
+    job_completed: Optional[datetime] = None
+
+    @classmethod
+    def create(
+        cls, *, task_id: UUID, inputs: Optional[dict[str, Any]] = None
+    ) -> "SubTask":
+        return cls(
+            id=uuid4(),
+            task_id=task_id,
+            status=SubTaskStatus.WAITING,
+            inputs=inputs,
+        )
+
+    def start(self, cloud_logs_url: Optional[str] = None) -> None:
+        if self.status not in (SubTaskStatus.WAITING, SubTaskStatus.IN_PROGRESS):
+            raise ValueError(f"cannot start subtask in status {self.status}")
+        if self.job_started is None:
+            self.job_started = datetime.now(timezone.utc)
+        self.status = SubTaskStatus.IN_PROGRESS
+        if cloud_logs_url is not None:
+            self.cloud_logs_url = cloud_logs_url
+
+    def complete(self, result: Any = None) -> None:
+        self.status = SubTaskStatus.COMPLETE
+        self.job_completed = datetime.now(timezone.utc)
+        if result is not None:
+            self.outputs = {"result": result}
+
+    def fail(self, error: BaseException) -> None:
+        self.status = SubTaskStatus.FAILED
+        self.job_completed = datetime.now(timezone.utc)
+        self.outputs = {"error": str(error)}
diff --git a/domain/tasks/tasks.py b/domain/tasks/tasks.py
new file mode 100644
index 00000000..177258d6
--- /dev/null
+++ b/domain/tasks/tasks.py
@@ -0,0 +1,94 @@
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Optional
+from uuid import UUID, uuid4
+
+from domain.tasks.subtasks import SubTaskStatus
+
+
+class TaskStatus(str, Enum):
+    WAITING = "waiting"
+    IN_PROGRESS = "in progress"
+    COMPLETE = "complete"
+    FAILED = "failed"
+
+
+class Source(str, Enum):
+    PORTFOLIO = "portfolio_id"
+    HUBSPOT_DEAL = "hubspot_deal_id"
+
+
+@dataclass
+class Task:
+    id: UUID
+    task_source: str
+    status: TaskStatus = TaskStatus.WAITING
+    service: Optional[str] = None
+    source: Optional[Source] = None
+    source_id: Optional[str] = None
+    job_started: Optional[datetime] = None
+    job_completed: Optional[datetime] = None
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        task_source: str,
+        service: Optional[str] = None,
+        source: Optional[Source] = None,
+        source_id: Optional[str] = None,
+    ) -> "Task":
+        if not task_source.strip():
+            raise ValueError("task_source must be non-empty")
+        return cls(
+            id=uuid4(),
+            task_source=task_source,
+            service=service,
+            source=source,
+            source_id=source_id,
+            status=TaskStatus.WAITING,
+            job_started=datetime.now(timezone.utc),
+        )
+
+    def start(self) -> None:
+        if self.status not in (TaskStatus.WAITING, TaskStatus.IN_PROGRESS):
+            raise ValueError(f"cannot start task in status {self.status}")
+        if self.job_started is None:
+            self.job_started = datetime.now(timezone.utc)
+        self.status = TaskStatus.IN_PROGRESS
+
+    def complete(self) -> None:
+        self.status = TaskStatus.COMPLETE
+        self.job_completed = datetime.now(timezone.utc)
+
+    def fail(self) -> None:
+        self.status = TaskStatus.FAILED
+        self.job_completed = datetime.now(timezone.utc)
+
+    def recalculate_from_subtasks(self, statuses: list[SubTaskStatus]) -> None:
+        """Recompute Task.status from its SubTasks' statuses.
+
+        Rule (preserved from legacy _update_task_progress):
+          - any FAILED       → FAILED
+          - all COMPLETE     → COMPLETE
+          - any IN_PROGRESS  → IN_PROGRESS
+          - otherwise        → WAITING
+
+        Empty list is a no-op (newly-created task with no subtasks).
+        """
+        if not statuses:
+            return
+        now = datetime.now(timezone.utc)
+        if SubTaskStatus.FAILED in statuses:
+            self.status = TaskStatus.FAILED
+            self.job_completed = now
+        elif all(s is SubTaskStatus.COMPLETE for s in statuses):
+            self.status = TaskStatus.COMPLETE
+            self.job_completed = now
+        elif SubTaskStatus.IN_PROGRESS in statuses:
+            self.status = TaskStatus.IN_PROGRESS
+            self.job_completed = None
+        else:
+            self.status = TaskStatus.WAITING
+            self.job_completed = None
diff --git a/infrastructure/__init__.py b/infrastructure/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/infrastructure/address2uprn_queue_client.py b/infrastructure/address2uprn_queue_client.py
new file mode 100644
index 00000000..314e981f
--- /dev/null
+++ b/infrastructure/address2uprn_queue_client.py
@@ -0,0 +1,20 @@
+from uuid import UUID
+
+from infrastructure.sqs_client import SqsClient
+
+
+class Address2UprnQueueClient(SqsClient):
+    def publish(
+        self,
+        *,
+        parent_task_id: UUID,
+        child_subtask_id: UUID,
+        s3_uri: str,
+    ) -> str:
+        return self.send(
+            {
+                "task_id": str(parent_task_id),
+                "sub_task_id": str(child_subtask_id),
+                "s3_uri": s3_uri,
+            }
+        )
diff --git a/infrastructure/csv_s3_client.py b/infrastructure/csv_s3_client.py
new file mode 100644
index 00000000..8af8de73
--- /dev/null
+++ b/infrastructure/csv_s3_client.py
@@ -0,0 +1,33 @@
+import csv
+from io import StringIO
+
+from infrastructure.s3_client import S3Client
+from infrastructure.s3_uri import parse_s3_uri
+
+
+class CsvS3Client(S3Client):
+    def read_rows(self, s3_uri: str) -> list[dict[str, str]]:
+        bucket, key = parse_s3_uri(s3_uri)
+        if bucket != self.bucket:
+            raise ValueError(
+                f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}"
+            )
+        raw = self.get_object(key)
+        try:
+            text = raw.decode("utf-8-sig")
+        except UnicodeDecodeError:
+            # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8.
+            text = raw.decode("cp1252")
+
+        reader = csv.DictReader(StringIO(text))
+        return [dict(row) for row in reader]
+
+    def save_rows(self, rows: list[dict[str, str]], key: str) -> str:
+        if not rows:
+            raise ValueError("Cannot save an empty rows list: header is unknown")
+        buffer = StringIO()
+        fieldnames = list(rows[0].keys())
+        writer = csv.DictWriter(buffer, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+        return self.put_object(key, buffer.getvalue().encode("utf-8"))
diff --git a/infrastructure/postgres/__init__.py b/infrastructure/postgres/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/infrastructure/postgres/config.py b/infrastructure/postgres/config.py
new file mode 100644
index 00000000..c39c6f30
--- /dev/null
+++ b/infrastructure/postgres/config.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+from typing import Mapping
+
+
+@dataclass(frozen=True)
+class PostgresConfig:
+    host: str
+    port: int
+    username: str
+    password: str
+    database: str
+    driver: str = "psycopg2"
+    pool_size: int = 3
+    max_overflow: int = 5
+    pool_pre_ping: bool = True
+    pool_recycle: int = 300
+
+    def url(self) -> str:
+        return (
+            f"postgresql+{self.driver}://"
+            f"{self.username}:{self.password}@{self.host}:{self.port}/{self.database}"
+        )
+
+    @classmethod
+    def from_env(cls, env: Mapping[str, str]) -> "PostgresConfig":
+        return cls(
+            host=env["POSTGRES_HOST"],
+            port=int(env["POSTGRES_PORT"]),
+            username=env["POSTGRES_USERNAME"],
+            password=env["POSTGRES_PASSWORD"],
+            database=env["POSTGRES_DATABASE"],
+            driver=env.get("POSTGRES_DRIVER", "psycopg2"),
+        )
diff --git a/infrastructure/postgres/engine.py b/infrastructure/postgres/engine.py
new file mode 100644
index 00000000..0de9efcb
--- /dev/null
+++ b/infrastructure/postgres/engine.py
@@ -0,0 +1,18 @@
+from sqlalchemy.engine import Engine
+from sqlmodel import Session, create_engine
+
+from infrastructure.postgres.config import PostgresConfig
+
+
+def make_engine(config: PostgresConfig) -> Engine:
+    return create_engine(
+        config.url(),
+        pool_size=config.pool_size,
+        max_overflow=config.max_overflow,
+        pool_pre_ping=config.pool_pre_ping,
+        pool_recycle=config.pool_recycle,
+    )
+
+
+def make_session(engine: Engine) -> Session:
+    return Session(engine)
diff --git a/infrastructure/postgres/subtask_table.py b/infrastructure/postgres/subtask_table.py
new file mode 100644
index 00000000..dec34fbf
--- /dev/null
+++ b/infrastructure/postgres/subtask_table.py
@@ -0,0 +1,21 @@
+from datetime import datetime, timezone
+from typing import ClassVar, Optional
+from uuid import UUID, uuid4
+
+from sqlmodel import Field, SQLModel
+
+
+class SubTaskRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "sub_task"  # pyright: ignore[reportIncompatibleVariableOverride]
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True, index=True)
+    task_id: UUID = Field(foreign_key="tasks.id")
+    job_started: Optional[datetime] = None
+    job_completed: Optional[datetime] = None
+    status: str = Field(default="waiting")
+    inputs: Optional[str] = None
+    outputs: Optional[str] = None
+    cloud_logs_url: Optional[str] = None
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc)
+    )
diff --git a/infrastructure/postgres/task_table.py b/infrastructure/postgres/task_table.py
new file mode 100644
index 00000000..32e5450b
--- /dev/null
+++ b/infrastructure/postgres/task_table.py
@@ -0,0 +1,36 @@
+from datetime import datetime, timezone
+from typing import ClassVar, Optional
+from uuid import UUID, uuid4
+
+from sqlalchemy import Column
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.tasks.tasks import Source
+
+
+class TaskRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "tasks"  # pyright: ignore[reportIncompatibleVariableOverride]
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True, index=True)
+    task_source: str
+    job_started: Optional[datetime] = None
+    job_completed: Optional[datetime] = None
+    status: str = Field(default="waiting")
+    service: Optional[str] = None
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc)
+    )
+
+    source: Optional[Source] = Field(
+        default=None,
+        sa_column=Column(
+            SAEnum(
+                Source,
+                name="source",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=True,
+        ),
+    )
+    source_id: Optional[str] = None
diff --git a/infrastructure/s3_client.py b/infrastructure/s3_client.py
new file mode 100644
index 00000000..a789fcc2
--- /dev/null
+++ b/infrastructure/s3_client.py
@@ -0,0 +1,22 @@
+from typing import Any
+
+
+class S3Client:
+    def __init__(self, boto_s3_client: Any, bucket: str) -> None:
+        self._client = boto_s3_client
+        self._bucket = bucket
+
+    @property
+    def bucket(self) -> str:
+        return self._bucket
+
+    def get_object(self, key: str) -> bytes:
+        response: dict[str, Any] = self._client.get_object(
+            Bucket=self._bucket, Key=key
+        )
+        body: bytes = response["Body"].read()
+        return body
+
+    def put_object(self, key: str, body: bytes) -> str:
+        self._client.put_object(Bucket=self._bucket, Key=key, Body=body)
+        return f"s3://{self._bucket}/{key}"
diff --git a/infrastructure/s3_uri.py b/infrastructure/s3_uri.py
new file mode 100644
index 00000000..1dd5d967
--- /dev/null
+++ b/infrastructure/s3_uri.py
@@ -0,0 +1,25 @@
+from urllib.parse import unquote
+
+
+def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
+    if s3_uri.startswith("s3://"):
+        parts = s3_uri[len("s3://") :].split("/", 1)
+        if len(parts) < 2 or not parts[0] or not parts[1]:
+            raise ValueError("S3 URI must include both a bucket and a key")
+        return parts[0], parts[1]
+
+    if "?" not in s3_uri:
+        raise ValueError(f"Not an s3:// URI and has no query string: {s3_uri!r}")
+    base, query = s3_uri.split("?", 1)
+
+    if "/s3/object/" not in base:
+        raise ValueError(f"Console URL has no '/s3/object/' segment: {s3_uri!r}")
+    bucket = base.split("/s3/object/", 1)[1]
+
+    params: dict[str, str] = {}
+    for item in query.split("&"):
+        if "=" in item:
+            name, value = item.split("=", 1)
+            params[name] = value
+    key = unquote(params.get("prefix", ""))
+    return bucket, key
diff --git a/infrastructure/sqs_client.py b/infrastructure/sqs_client.py
new file mode 100644
index 00000000..6fe8dd2e
--- /dev/null
+++ b/infrastructure/sqs_client.py
@@ -0,0 +1,20 @@
+import json
+from typing import Any
+
+
+class SqsClient:
+    def __init__(self, boto_sqs_client: Any, queue_url: str) -> None:
+        self._client = boto_sqs_client
+        self._queue_url = queue_url
+
+    @property
+    def queue_url(self) -> str:
+        return self._queue_url
+
+    def send(self, body: dict[str, Any]) -> str:
+        response: dict[str, Any] = self._client.send_message(
+            QueueUrl=self._queue_url,
+            MessageBody=json.dumps(body),
+        )
+        message_id: str = response["MessageId"]
+        return message_id
diff --git a/orchestration/__init__.py b/orchestration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py
new file mode 100644
index 00000000..36f4b515
--- /dev/null
+++ b/orchestration/postcode_splitter_orchestrator.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+from uuid import UUID
+
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from orchestration.task_orchestrator import TaskOrchestrator
+from domain.addresses.postcode_batching import iter_postcode_grouped_batches
+from repositories.user_address.user_address_repository import UserAddressRepository
+
+
+class PostcodeSplitterOrchestrator:
+    def __init__(
+        self,
+        task_orchestrator: TaskOrchestrator,
+        user_address_repo: UserAddressRepository,
+        queue_client: Address2UprnQueueClient,
+        max_batch_size: int = 500,
+    ) -> None:
+        self._task_orchestrator = task_orchestrator
+        self._user_address_repo = user_address_repo
+        self._queue_client = queue_client
+        self._max_batch_size = max_batch_size
+
+    def split_and_dispatch(
+        self,
+        *,
+        parent_task_id: UUID,
+        parent_subtask_id: UUID,
+        input_s3_uri: str,
+    ) -> list[UUID]:
+        addresses = self._user_address_repo.load_batch(input_s3_uri)
+        path_prefix = (
+            f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
+        )
+
+        child_ids: list[UUID] = []
+        for batch in iter_postcode_grouped_batches(
+            addresses, max_batch_size=self._max_batch_size
+        ):
+            batch_uri = self._user_address_repo.save_batch(batch, path_prefix)
+            child = self._task_orchestrator.create_child_subtask(
+                parent_task_id,
+                inputs={
+                    "task_id": str(parent_task_id),
+                    "s3_uri": batch_uri,
+                },
+            )
+            self._queue_client.publish(
+                parent_task_id=parent_task_id,
+                child_subtask_id=child.id,
+                s3_uri=batch_uri,
+            )
+            child_ids.append(child.id)
+
+        return child_ids
diff --git a/orchestration/task_orchestrator.py b/orchestration/task_orchestrator.py
new file mode 100644
index 00000000..ebb71a32
--- /dev/null
+++ b/orchestration/task_orchestrator.py
@@ -0,0 +1,106 @@
+from typing import Any, Callable, Optional
+from uuid import UUID
+
+from domain.tasks.subtasks import SubTask
+from domain.tasks.tasks import Source, Task
+from repositories.tasks.subtask_repository import SubTaskRepository
+from repositories.tasks.task_repository import TaskRepository
+from utilities.private import private
+
+
+class TaskOrchestrator:
+    """Coordinates Task + SubTask lifecycle.
+
+    Exposes primitives (start/complete/fail_subtask) for handlers that want
+    fine-grained control, and a high-level run_subtask wrapper that owns the
+    try/except so it can replace the body of the legacy subtask_handler
+    decorator in backend/utils/subtasks.py.
+
+    Each primitive saves the SubTask, then recomputes the parent Task's
+    status from all its children.
+    """
+
+    def __init__(
+        self,
+        task_repo: TaskRepository,
+        subtask_repo: SubTaskRepository,
+    ) -> None:
+        self._tasks = task_repo
+        self._subtasks = subtask_repo
+
+    def create_task_with_subtask(
+        self,
+        *,
+        task_source: str,
+        inputs: Optional[dict[str, Any]] = None,
+        service: Optional[str] = None,
+        source: Optional[Source] = None,
+        source_id: Optional[str] = None,
+    ) -> tuple[Task, SubTask]:
+        task = Task.create(
+            task_source=task_source,
+            service=service,
+            source=source,
+            source_id=source_id,
+        )
+        self._tasks.create(task)
+        subtask = SubTask.create(task_id=task.id, inputs=inputs)
+        self._subtasks.create(subtask)
+        return task, subtask
+
+    def create_child_subtask(
+        self,
+        parent_task_id: UUID,
+        *,
+        inputs: Optional[dict[str, Any]] = None,
+    ) -> SubTask:
+        subtask = SubTask.create(task_id=parent_task_id, inputs=inputs)
+        self._subtasks.create(subtask)
+        return subtask
+
+    def start_subtask(
+        self, subtask_id: UUID, cloud_logs_url: Optional[str] = None
+    ) -> SubTask:
+        subtask = self._subtasks.get(subtask_id)
+        subtask.start(cloud_logs_url)
+        self._subtasks.save(subtask)
+        self._cascade(subtask.task_id)
+        return subtask
+
+    def complete_subtask(
+        self, subtask_id: UUID, result: Any = None
+    ) -> SubTask:
+        subtask = self._subtasks.get(subtask_id)
+        subtask.complete(result)
+        self._subtasks.save(subtask)
+        self._cascade(subtask.task_id)
+        return subtask
+
+    def fail_subtask(self, subtask_id: UUID, error: BaseException) -> SubTask:
+        subtask = self._subtasks.get(subtask_id)
+        subtask.fail(error)
+        self._subtasks.save(subtask)
+        self._cascade(subtask.task_id)
+        return subtask
+
+    def run_subtask(
+        self,
+        subtask_id: UUID,
+        work: Callable[[], Any],
+        cloud_logs_url: Optional[str] = None,
+    ) -> Any:
+        self.start_subtask(subtask_id, cloud_logs_url)
+        try:
+            result = work()
+        except Exception as e:
+            self.fail_subtask(subtask_id, e)
+            raise
+        self.complete_subtask(subtask_id, result)
+        return result
+
+    @private
+    def _cascade(self, task_id: UUID) -> None:
+        statuses = [s.status for s in self._subtasks.list_by_task(task_id)]
+        task = self._tasks.get(task_id)
+        task.recalculate_from_subtasks(statuses)
+        self._tasks.save(task)
diff --git a/repositories/__init__.py b/repositories/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/repositories/tasks/__init__.py b/repositories/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/repositories/tasks/subtask_postgres_repository.py b/repositories/tasks/subtask_postgres_repository.py
new file mode 100644
index 00000000..affc280e
--- /dev/null
+++ b/repositories/tasks/subtask_postgres_repository.py
@@ -0,0 +1,89 @@
+import json
+from datetime import datetime, timezone
+from typing import Any, Optional
+from uuid import UUID
+
+from sqlmodel import Session, select
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+from infrastructure.postgres.subtask_table import SubTaskRow
+from repositories.tasks.subtask_repository import SubTaskRepository
+from utilities.private import private
+
+
+class SubTaskPostgresRepository(SubTaskRepository):
+    def __init__(self, session: Session) -> None:
+        self._session = session
+
+    def create(self, subtask: SubTask) -> SubTask:
+        row = self._to_row(subtask)
+        self._session.add(row)
+        self._session.commit()
+        self._session.refresh(row)
+        return self._to_domain(row)
+
+    def get(self, subtask_id: UUID) -> SubTask:
+        row = self._session.get(SubTaskRow, subtask_id)
+        if row is None:
+            raise ValueError(f"SubTask {subtask_id} not found")
+        return self._to_domain(row)
+
+    def save(self, subtask: SubTask) -> None:
+        row = self._session.get(SubTaskRow, subtask.id)
+        if row is None:
+            raise ValueError(f"SubTask {subtask.id} not found")
+        row.status = subtask.status.value
+        row.job_started = subtask.job_started
+        row.job_completed = subtask.job_completed
+        row.inputs = (
+            json.dumps(subtask.inputs) if subtask.inputs is not None else None
+        )
+        row.outputs = (
+            json.dumps(subtask.outputs) if subtask.outputs is not None else None
+        )
+        row.cloud_logs_url = subtask.cloud_logs_url
+        row.updated_at = datetime.now(timezone.utc)
+        self._session.add(row)
+        self._session.commit()
+
+    def list_by_task(self, task_id: UUID) -> list[SubTask]:
+        rows = self._session.exec(
+            select(SubTaskRow).where(SubTaskRow.task_id == task_id)
+        ).all()
+        return [self._to_domain(r) for r in rows]
+
+    @private
+    def _to_row(self, subtask: SubTask) -> SubTaskRow:
+        return SubTaskRow(
+            id=subtask.id,
+            task_id=subtask.task_id,
+            status=subtask.status.value,
+            inputs=(
+                json.dumps(subtask.inputs) if subtask.inputs is not None else None
+            ),
+            outputs=(
+                json.dumps(subtask.outputs)
+                if subtask.outputs is not None
+                else None
+            ),
+            cloud_logs_url=subtask.cloud_logs_url,
+            job_started=subtask.job_started,
+            job_completed=subtask.job_completed,
+        )
+
+    @private
+    def _to_domain(self, row: SubTaskRow) -> SubTask:
+        return SubTask(
+            id=row.id,
+            task_id=row.task_id,
+            status=SubTaskStatus(row.status.lower()),
+            inputs=_loads_or_none(row.inputs),
+            outputs=_loads_or_none(row.outputs),
+            cloud_logs_url=row.cloud_logs_url,
+            job_started=row.job_started,
+            job_completed=row.job_completed,
+        )
+
+
+def _loads_or_none(s: Optional[str]) -> Optional[dict[str, Any]]:
+    return json.loads(s) if s else None
diff --git a/repositories/tasks/subtask_repository.py b/repositories/tasks/subtask_repository.py
new file mode 100644
index 00000000..adb36f99
--- /dev/null
+++ b/repositories/tasks/subtask_repository.py
@@ -0,0 +1,18 @@
+from abc import ABC, abstractmethod
+from uuid import UUID
+
+from domain.tasks.subtasks import SubTask
+
+
+class SubTaskRepository(ABC):
+    @abstractmethod
+    def create(self, subtask: SubTask) -> SubTask: ...
+
+    @abstractmethod
+    def get(self, subtask_id: UUID) -> SubTask: ...
+
+    @abstractmethod
+    def save(self, subtask: SubTask) -> None: ...
+
+    @abstractmethod
+    def list_by_task(self, task_id: UUID) -> list[SubTask]: ...
diff --git a/repositories/tasks/task_postgres_repository.py b/repositories/tasks/task_postgres_repository.py
new file mode 100644
index 00000000..d23fe91c
--- /dev/null
+++ b/repositories/tasks/task_postgres_repository.py
@@ -0,0 +1,77 @@
+"""
+Postgres implementation of TaskRepository.
+
+NOTE: this repository owns only the `tasks` table. Unlike the legacy
+backend.app.db.functions.tasks.Tasks.TasksInterface.create_task, it does NOT
+auto-create a child SubTask. Do not rewire existing Lambda callers to this
+repo until the SubTask aggregate + TaskOrchestrator slice lands — they would
+silently lose their initial SubTask row.
+"""
+
+from datetime import datetime, timezone
+from uuid import UUID
+
+from sqlmodel import Session
+
+from domain.tasks.tasks import Task, TaskStatus
+from infrastructure.postgres.task_table import TaskRow
+from repositories.tasks.task_repository import TaskRepository
+from utilities.private import private
+
+
+class TaskPostgresRepository(TaskRepository):
+    def __init__(self, session: Session) -> None:
+        self._session = session
+
+    def create(self, task: Task) -> Task:
+        row = self._to_row(task)
+        self._session.add(row)
+        self._session.commit()
+        self._session.refresh(row)
+        return self._to_domain(row)
+
+    def get(self, task_id: UUID) -> Task:
+        row = self._session.get(TaskRow, task_id)
+        if row is None:
+            raise ValueError(f"Task {task_id} not found")
+        return self._to_domain(row)
+
+    def save(self, task: Task) -> None:
+        row = self._session.get(TaskRow, task.id)
+        if row is None:
+            raise ValueError(f"Task {task.id} not found")
+        row.status = task.status.value
+        row.job_started = task.job_started
+        row.job_completed = task.job_completed
+        row.service = task.service
+        row.source = task.source
+        row.source_id = task.source_id
+        row.updated_at = datetime.now(timezone.utc)
+        self._session.add(row)
+        self._session.commit()
+
+    @private
+    def _to_row(self, task: Task) -> TaskRow:
+        return TaskRow(
+            id=task.id,
+            task_source=task.task_source,
+            status=task.status.value,
+            service=task.service,
+            source=task.source,
+            source_id=task.source_id,
+            job_started=task.job_started,
+            job_completed=task.job_completed,
+        )
+
+    @private
+    def _to_domain(self, row: TaskRow) -> Task:
+        return Task(
+            id=row.id,
+            task_source=row.task_source,
+            status=TaskStatus(row.status.lower()),
+            service=row.service,
+            source=row.source,
+            source_id=row.source_id,
+            job_started=row.job_started,
+            job_completed=row.job_completed,
+        )
diff --git a/repositories/tasks/task_repository.py b/repositories/tasks/task_repository.py
new file mode 100644
index 00000000..8bdce0cc
--- /dev/null
+++ b/repositories/tasks/task_repository.py
@@ -0,0 +1,15 @@
+from abc import ABC, abstractmethod
+from uuid import UUID
+
+from domain.tasks.tasks import Task
+
+
+class TaskRepository(ABC):
+    @abstractmethod
+    def create(self, task: Task) -> Task: ...
+
+    @abstractmethod
+    def get(self, task_id: UUID) -> Task: ...
+
+    @abstractmethod
+    def save(self, task: Task) -> None: ...
diff --git a/repositories/user_address/__init__.py b/repositories/user_address/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py
new file mode 100644
index 00000000..058fd5a5
--- /dev/null
+++ b/repositories/user_address/user_address_csv_s3_repository.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import uuid
+from datetime import datetime, timezone
+from typing import Optional
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+from infrastructure.csv_s3_client import CsvS3Client
+from repositories.user_address.user_address_repository import UserAddressRepository
+
+_ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
+_POSTCODE_COLUMN: str = "postcode"
+_INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
+_POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
+
+
+class UserAddressCsvS3Repository(UserAddressRepository):
+    def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
+        self._csv_client = csv_client
+        self._bucket = bucket
+
+    def load_batch(self, s3_uri: str) -> list[UserAddress]:
+        rows = self._csv_client.read_rows(s3_uri)
+        if rows and _POSTCODE_COLUMN not in rows[0]:
+            raise ValueError(
+                f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
+                f"columns present: {sorted(rows[0])}"
+            )
+        addresses: list[UserAddress] = []
+        for row in rows:
+            parts = [
+                row[col].strip()
+                for col in _ADDRESS_COLUMNS
+                if col in row and row[col].strip()
+            ]
+            user_address = ", ".join(parts)
+            postcode = row.get(_POSTCODE_COLUMN, "")
+            raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
+            internal_reference: Optional[str] = raw_ref or None
+            addresses.append(
+                UserAddress(
+                    user_address=user_address,
+                    postcode=Postcode(postcode),
+                    internal_reference=internal_reference,
+                    source_row=row,
+                )
+            )
+        return addresses
+
+    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
+        rows: list[dict[str, str]] = [
+            {**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)}
+            for addr in addresses
+        ]
+
+        # TODO: [New Starter Task] file_name generation can be standardised
+        # and also easier to read, test for future implementation. Buiild that!
+        filename = (
+            f"{datetime.now(timezone.utc).isoformat()}_{uuid.uuid4().hex[:8]}.csv"
+        )
+        key = f"{path_prefix.rstrip('/')}/{filename}"
+        return self._csv_client.save_rows(rows, key)
diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py
new file mode 100644
index 00000000..b2c0f866
--- /dev/null
+++ b/repositories/user_address/user_address_repository.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+from domain.addresses.user_address import UserAddress
+
+
+class UserAddressRepository(ABC):
+    @abstractmethod
+    def load_batch(self, s3_uri: str) -> list[UserAddress]: ...
+
+    @abstractmethod
+    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: ...
diff --git a/run_backlog.sh b/run_backlog.sh
deleted file mode 100644
index 398e921c..00000000
--- a/run_backlog.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-backlog browser --port 6421
diff --git a/test.requirements.txt b/test.requirements.txt
index 7fdd7dc4..26125034 100644
--- a/test.requirements.txt
+++ b/test.requirements.txt
@@ -9,4 +9,5 @@ hubspot-api-client
 fuzzywuzzy
 pymupdf
 playwright==1.58.0
-msal
\ No newline at end of file
+msal
+moto[s3,sqs]
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..0a246372
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,48 @@
+"""Shared pytest fixtures for the ``tests/`` tree.
+
+Provides an ephemeral PostgreSQL engine for tests that exercise SQLModel
+repositories. PostgreSQL has no true in-memory mode; ``pytest-postgresql``
+starts a real, throwaway server in a temp directory (the process is started
+once per session and a fresh database is created/dropped per test). That is
+the closest equivalent to "in-memory" and matches production behaviour far
+better than SQLite (enums, JSONB, constraint semantics, etc.).
+"""
+
+from __future__ import annotations
+
+import glob
+from collections.abc import Iterator
+from typing import Any
+
+import pytest
+from psycopg import Connection
+from pytest_postgresql import factories
+from sqlalchemy import Engine
+from sqlmodel import SQLModel, create_engine
+
+# Importing the SQLModel row modules registers their tables on
+# SQLModel.metadata so ``create_all`` builds the full schema. Imports look
+# unused; they aren't.
+
+
+# pg_ctl ships under a versioned path and is not on PATH in the dev container.
+_PG_CTL = next(iter(sorted(glob.glob("/usr/lib/postgresql/*/bin/pg_ctl"))), "pg_ctl")
+
+postgresql_proc = factories.postgresql_proc(
+    executable=_PG_CTL
+)  # pyright: ignore[reportUnknownMemberType]
+postgresql = factories.postgresql("postgresql_proc")
+
+
+@pytest.fixture
+def db_engine(postgresql: Connection[Any]) -> Iterator[Engine]:
+    """A SQLModel engine bound to a fresh, ephemeral PostgreSQL database."""
+    info = postgresql.info
+    url = f"postgresql+psycopg://{info.user}:@{info.host}:{info.port}/{info.dbname}"
+    engine = create_engine(url)
+    SQLModel.metadata.create_all(engine)
+    try:
+        yield engine
+    finally:
+        SQLModel.metadata.drop_all(engine)
+        engine.dispose()
diff --git a/tests/domain/__init__.py b/tests/domain/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/domain/addresses/__init__.py b/tests/domain/addresses/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
new file mode 100644
index 00000000..8ffcf1b5
--- /dev/null
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -0,0 +1,118 @@
+import pytest
+
+from domain.addresses.postcode_batching import iter_postcode_grouped_batches
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+
+
+def _addrs(postcode: str, n: int) -> list[UserAddress]:
+    return [
+        UserAddress(
+            user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
+        )
+        for i in range(n)
+    ]
+
+
+def test_empty_input_yields_no_batches() -> None:
+    # act / assert
+    assert list(iter_postcode_grouped_batches([])) == []
+
+
+def test_single_batch_under_cap() -> None:
+    # arrange
+    addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2)
+    # act
+    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500))
+    # assert
+    assert len(batches) == 1
+    assert batches[0] == addrs
+
+
+def test_multiple_postcodes_packed_into_one_batch_up_to_cap() -> None:
+    # Two groups whose total exactly equals the cap pack into a single
+    # batch -- no premature flush.
+    # arrange
+    addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2)
+    # act
+    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
+    # assert
+    assert len(batches) == 1
+    assert len(batches[0]) == 5
+
+
+def test_flush_on_overflow_before_adding_next_postcode() -> None:
+    # Cap is 5. First group fills 3 slots; second group of 3 would overflow,
+    # so the buffer is flushed first and the next group starts a fresh batch.
+    # arrange
+    addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 3)
+    # act
+    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
+    # assert
+    assert len(batches) == 2
+    assert [str(a.postcode) for a in batches[0]] == ["AA11AA"] * 3
+    assert [str(a.postcode) for a in batches[1]] == ["BB22BB"] * 3
+
+
+def test_single_postcode_group_exceeding_cap_is_dispatched_whole() -> None:
+    # An oversize single-postcode group goes out as one batch larger than
+    # the cap -- the cap never splits a postcode.
+    # arrange
+    addrs = _addrs("AA1 1AA", 7)
+    # act
+    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
+    # assert
+    assert len(batches) == 1
+    assert len(batches[0]) == 7
+
+
+def test_oversize_group_flushes_existing_buffer_first() -> None:
+    # Mirrors the legacy ``if buffer: flush`` branch when an oversize group
+    # is encountered: buffered work must not be lost or interleaved.
+    # arrange
+    small = _addrs("AA1 1AA", 2)
+    big = _addrs("BB2 2BB", 7)
+    tail = _addrs("CC3 3CC", 1)
+    # act
+    batches = list(
+        iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
+    )
+    # assert
+    assert len(batches) == 3
+    assert [str(a.postcode) for a in batches[0]] == ["AA11AA", "AA11AA"]
+    assert [str(a.postcode) for a in batches[1]] == ["BB22BB"] * 7
+    assert [str(a.postcode) for a in batches[2]] == ["CC33CC"]
+
+
+def test_final_flush_yields_remaining_buffer() -> None:
+    # No overflow ever happens, but the trailing buffer must still come out.
+    # arrange
+    addrs = _addrs("AA1 1AA", 2) + _addrs("BB2 2BB", 2)
+    # act
+    batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500))
+    # assert
+    assert batches == [addrs]
+
+
+def test_postcode_grouping_preserves_first_seen_order() -> None:
+    # Interleaved input must still group by postcode and emit in first-seen
+    # order -- never alphabetical.
+    # arrange
+    a1, a2 = _addrs("ZZ9 9ZZ", 2)
+    b1, b2 = _addrs("AA1 1AA", 2)
+    # act
+    batches = list(iter_postcode_grouped_batches([a1, b1, a2, b2]))
+    # assert
+    assert len(batches) == 1
+    assert [str(a.postcode) for a in batches[0]] == [
+        "ZZ99ZZ",
+        "ZZ99ZZ",
+        "AA11AA",
+        "AA11AA",
+    ]
+
+
+def test_invalid_max_batch_size_raises() -> None:
+    # act / assert
+    with pytest.raises(ValueError, match="max_batch_size"):
+        list(iter_postcode_grouped_batches([], max_batch_size=0))
diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py
new file mode 100644
index 00000000..8d092df3
--- /dev/null
+++ b/tests/domain/addresses/test_user_address.py
@@ -0,0 +1,98 @@
+import dataclasses
+
+import pytest
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+
+
+def test_user_address_holds_postcode_value_object() -> None:
+    # act
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    # assert
+    assert addr.postcode == Postcode("SW1A1AA")
+
+
+def test_user_address_preserves_user_address_verbatim() -> None:
+    # The free-text user_address string is intentionally NOT normalised --
+    # only the postcode is canonicalised, and that happens inside Postcode.
+    # act
+    addr = UserAddress(
+        user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA")
+    )
+    # assert
+    assert addr.user_address == "  1 The   Street  "
+
+
+def test_user_address_internal_reference_defaults_to_none() -> None:
+    # act
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # assert
+    assert addr.internal_reference is None
+
+
+def test_user_address_internal_reference_accepted() -> None:
+    # act
+    addr = UserAddress(
+        user_address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        internal_reference="cust-42",
+    )
+    # assert
+    assert addr.internal_reference == "cust-42"
+
+
+def test_user_address_is_frozen() -> None:
+    # arrange
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # act / assert
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        addr.postcode = Postcode("OTHER")  # type: ignore[misc]
+
+
+def test_user_address_equality_uses_canonical_postcode() -> None:
+    # Postcode sanitises eagerly, so addresses built from different surface
+    # forms of the same postcode compare equal.
+    # arrange
+    a = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # act / assert
+    assert a == b
+
+
+def test_user_address_source_row_defaults_to_empty_dict() -> None:
+    # act
+    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # assert
+    assert addr.source_row == {}
+
+
+def test_user_address_carries_source_row() -> None:
+    # arrange
+    row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
+    # act
+    addr = UserAddress(
+        user_address="1 The Street",
+        postcode=Postcode("SW1A 1AA"),
+        source_row=row,
+    )
+    # assert
+    assert addr.source_row == row
+
+
+def test_user_address_equality_ignores_source_row() -> None:
+    # source_row is excluded from equality (and hashing): identity stays
+    # defined by the parsed fields.
+    # arrange
+    a = UserAddress(
+        user_address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        source_row={"x": "1"},
+    )
+    b = UserAddress(
+        user_address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        source_row={"y": "2"},
+    )
+    # act / assert
+    assert a == b
diff --git a/tests/domain/tasks/__init__.py b/tests/domain/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/domain/tasks/test_subtasks.py b/tests/domain/tasks/test_subtasks.py
new file mode 100644
index 00000000..8cee4496
--- /dev/null
+++ b/tests/domain/tasks/test_subtasks.py
@@ -0,0 +1,95 @@
+from uuid import uuid4
+
+import pytest
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+
+
+def test_create_subtask_starts_waiting() -> None:
+    # arrange
+    task_id = uuid4()
+
+    # act
+    st = SubTask.create(task_id=task_id, inputs={"foo": "bar"})
+
+    # assert
+    assert st.task_id == task_id
+    assert st.status is SubTaskStatus.WAITING
+    assert st.inputs == {"foo": "bar"}
+    assert st.outputs is None
+    assert st.job_started is None
+    assert st.job_completed is None
+
+
+def test_start_transitions_to_in_progress_and_sets_cloud_logs_url() -> None:
+    # arrange
+    st = SubTask.create(task_id=uuid4())
+
+    # act
+    st.start(cloud_logs_url="https://example/log")
+
+    # assert
+    assert st.status is SubTaskStatus.IN_PROGRESS
+    assert st.cloud_logs_url == "https://example/log"
+    assert st.job_started is not None
+
+
+def test_start_is_idempotent_from_in_progress() -> None:
+    # arrange
+    st = SubTask.create(task_id=uuid4())
+    st.start()
+    first_start = st.job_started
+
+    # act
+    st.start(cloud_logs_url="https://other")
+
+    # assert
+    assert st.status is SubTaskStatus.IN_PROGRESS
+    assert st.job_started == first_start  # not overwritten
+    assert st.cloud_logs_url == "https://other"
+
+
+def test_start_rejects_from_terminal_status() -> None:
+    # arrange
+    st = SubTask.create(task_id=uuid4())
+    st.complete()
+    # act / assert
+    with pytest.raises(ValueError):
+        st.start()
+
+
+def test_complete_marks_outputs_and_job_completed() -> None:
+    # arrange
+    st = SubTask.create(task_id=uuid4())
+    st.start()
+
+    # act
+    st.complete({"uprn": "123"})
+
+    # assert
+    assert st.status is SubTaskStatus.COMPLETE
+    assert st.outputs == {"result": {"uprn": "123"}}
+    assert st.job_completed is not None
+
+
+def test_complete_without_result_leaves_outputs_unset() -> None:
+    # arrange
+    st = SubTask.create(task_id=uuid4())
+    # act
+    st.complete()
+    # assert
+    assert st.outputs is None
+
+
+def test_fail_records_error_in_outputs() -> None:
+    # arrange
+    st = SubTask.create(task_id=uuid4())
+    err = RuntimeError("boom")
+
+    # act
+    st.fail(err)
+
+    # assert
+    assert st.status is SubTaskStatus.FAILED
+    assert st.outputs == {"error": "boom"}
+    assert st.job_completed is not None
diff --git a/tests/domain/tasks/test_tasks.py b/tests/domain/tasks/test_tasks.py
new file mode 100644
index 00000000..ba82412b
--- /dev/null
+++ b/tests/domain/tasks/test_tasks.py
@@ -0,0 +1,131 @@
+import pytest
+
+from domain.tasks.subtasks import SubTaskStatus
+from domain.tasks.tasks import Source, Task, TaskStatus
+
+
+def test_create_task_starts_waiting() -> None:
+    # arrange / act
+    t = Task.create(
+        task_source="manual:test", source=Source.PORTFOLIO, source_id="abc-123"
+    )
+
+    # assert
+    assert t.status is TaskStatus.WAITING
+    assert t.source is Source.PORTFOLIO
+    assert t.source_id == "abc-123"
+    assert t.job_started is not None
+    assert t.job_completed is None
+
+
+def test_create_task_rejects_blank_task_source() -> None:
+    # act / assert
+    with pytest.raises(ValueError, match="task_source"):
+        Task.create(task_source="   ")
+
+
+def test_start_transitions_to_in_progress() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+    # act
+    t.start()
+    # assert
+    assert t.status is TaskStatus.IN_PROGRESS
+
+
+def test_complete_marks_job_completed() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+    t.start()
+    # act
+    t.complete()
+    # assert
+    assert t.status is TaskStatus.COMPLETE
+    assert t.job_completed is not None
+
+
+def test_fail_marks_job_completed() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+    # act
+    t.fail()
+    # assert
+    assert t.status is TaskStatus.FAILED
+    assert t.job_completed is not None
+
+
+def test_start_rejects_from_terminal_status() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+    t.complete()
+    # act / assert
+    with pytest.raises(ValueError):
+        t.start()
+
+
+def test_recalculate_with_empty_statuses_is_noop() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+    original_status = t.status
+    original_completed = t.job_completed
+
+    # act
+    t.recalculate_from_subtasks([])
+
+    # assert
+    assert t.status is original_status
+    assert t.job_completed is original_completed
+
+
+def test_recalculate_all_waiting_keeps_waiting() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+    t.start()  # task moved to IN_PROGRESS earlier
+    t.complete()  # then COMPLETE, with job_completed set
+
+    # act
+    t.recalculate_from_subtasks([SubTaskStatus.WAITING, SubTaskStatus.WAITING])
+
+    # assert
+    assert t.status is TaskStatus.WAITING
+    assert t.job_completed is None
+
+
+def test_recalculate_any_in_progress_marks_in_progress() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+
+    # act
+    t.recalculate_from_subtasks(
+        [SubTaskStatus.WAITING, SubTaskStatus.IN_PROGRESS, SubTaskStatus.COMPLETE]
+    )
+
+    # assert
+    assert t.status is TaskStatus.IN_PROGRESS
+    assert t.job_completed is None
+
+
+def test_recalculate_all_complete_marks_complete() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+
+    # act
+    t.recalculate_from_subtasks([SubTaskStatus.COMPLETE, SubTaskStatus.COMPLETE])
+
+    # assert
+    assert t.status is TaskStatus.COMPLETE
+    assert t.job_completed is not None
+
+
+def test_recalculate_any_failed_marks_failed_even_with_others() -> None:
+    # arrange
+    t = Task.create(task_source="manual:test")
+
+    # act
+    t.recalculate_from_subtasks(
+        [SubTaskStatus.IN_PROGRESS, SubTaskStatus.COMPLETE, SubTaskStatus.FAILED]
+    )
+
+    # assert
+    assert t.status is TaskStatus.FAILED
+    assert t.job_completed is not None
diff --git a/tests/domain/test_postcode.py b/tests/domain/test_postcode.py
new file mode 100644
index 00000000..f7ce9015
--- /dev/null
+++ b/tests/domain/test_postcode.py
@@ -0,0 +1,59 @@
+import dataclasses
+
+import pytest
+
+from domain.postcode import Postcode
+
+
+def test_postcode_uppercases() -> None:
+    # act / assert
+    assert Postcode("sw1a1aa").value == "SW1A1AA"
+
+
+def test_postcode_strips_internal_spaces() -> None:
+    # act / assert
+    assert Postcode("sw1a 1aa").value == "SW1A1AA"
+
+
+def test_postcode_strips_leading_and_trailing_whitespace() -> None:
+    # act / assert
+    assert Postcode("  sw1a 1aa  ").value == "SW1A1AA"
+
+
+def test_postcode_strips_tabs_and_newlines() -> None:
+    # CSV ingestion occasionally introduces stray whitespace characters; the
+    # canonical form must absorb them just like literal spaces.
+    # act / assert
+    assert Postcode("sw1a\t1aa\n").value == "SW1A1AA"
+
+
+def test_postcode_construction_is_idempotent() -> None:
+    # arrange
+    once = Postcode("sw1a 1aa")
+    # act / assert
+    assert Postcode(once.value).value == "SW1A1AA"
+
+
+def test_postcode_empty_string() -> None:
+    # act / assert
+    assert Postcode("").value == ""
+
+
+def test_postcode_str_returns_canonical_value() -> None:
+    # act / assert
+    assert str(Postcode("sw1a 1aa")) == "SW1A1AA"
+
+
+def test_postcode_equality_ignores_surface_form() -> None:
+    # Differing case / whitespace sanitise to the same canonical value, so
+    # the value objects compare equal.
+    # act / assert
+    assert Postcode("sw1a 1aa") == Postcode("SW1A1AA")
+
+
+def test_postcode_is_frozen() -> None:
+    # arrange
+    postcode = Postcode("SW1A1AA")
+    # act / assert
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        postcode.value = "OTHER"  # type: ignore[misc]
diff --git a/tests/infrastructure/__init__.py b/tests/infrastructure/__init__.py
new file mode 100644
index 00000000..f5ad62d0
--- /dev/null
+++ b/tests/infrastructure/__init__.py
@@ -0,0 +1,10 @@
+from typing import Any
+
+import boto3
+
+REGION = "us-east-1"
+
+
+def make_boto_client(service_name: str) -> Any:
+    factory: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    return factory(service_name, region_name=REGION)
diff --git a/tests/infrastructure/conftest.py b/tests/infrastructure/conftest.py
new file mode 100644
index 00000000..25c1ac3b
--- /dev/null
+++ b/tests/infrastructure/conftest.py
@@ -0,0 +1,28 @@
+import os
+from collections.abc import Iterator
+from typing import Optional
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _aws_creds() -> Iterator[None]:  # pyright: ignore[reportUnusedFunction]
+    keys = (
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_SESSION_TOKEN",
+        "AWS_DEFAULT_REGION",
+    )
+    prev: dict[str, Optional[str]] = {k: os.environ.get(k) for k in keys}
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
+    try:
+        yield
+    finally:
+        for k, v in prev.items():
+            if v is None:
+                os.environ.pop(k, None)
+            else:
+                os.environ[k] = v
diff --git a/tests/infrastructure/test_address2uprn_queue_client.py b/tests/infrastructure/test_address2uprn_queue_client.py
new file mode 100644
index 00000000..c8e89ece
--- /dev/null
+++ b/tests/infrastructure/test_address2uprn_queue_client.py
@@ -0,0 +1,71 @@
+import json
+from collections.abc import Iterator
+from typing import Any, cast
+from uuid import uuid4
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from tests.infrastructure import make_boto_client
+
+
+@pytest.fixture
+def queue_setup() -> Iterator[tuple[Address2UprnQueueClient, Any, str]]:
+    with mock_aws():
+        boto_client = make_boto_client("sqs")
+        queue: dict[str, Any] = boto_client.create_queue(
+            QueueName="address2uprn-queue"
+        )
+        queue_url = cast(str, queue["QueueUrl"])
+        yield (
+            Address2UprnQueueClient(boto_client, queue_url),
+            boto_client,
+            queue_url,
+        )
+
+
+def test_publish_returns_message_id(
+    queue_setup: tuple[Address2UprnQueueClient, Any, str],
+) -> None:
+    # arrange
+    client, _boto, _url = queue_setup
+    # act
+    message_id = client.publish(
+        parent_task_id=uuid4(),
+        child_subtask_id=uuid4(),
+        s3_uri="s3://my-bucket/path/to/chunk.csv",
+    )
+    # assert
+    assert isinstance(message_id, str)
+    assert message_id
+
+
+def test_publish_body_uses_typed_shape(
+    queue_setup: tuple[Address2UprnQueueClient, Any, str],
+) -> None:
+    # arrange
+    client, boto_client, queue_url = queue_setup
+    parent_id = uuid4()
+    child_id = uuid4()
+    s3_uri = "s3://my-bucket/path/to/chunk.csv"
+
+    # act
+    client.publish(
+        parent_task_id=parent_id,
+        child_subtask_id=child_id,
+        s3_uri=s3_uri,
+    )
+
+    # assert
+    received: dict[str, Any] = boto_client.receive_message(
+        QueueUrl=queue_url, MaxNumberOfMessages=1
+    )
+    messages: list[dict[str, Any]] = received["Messages"]
+    assert len(messages) == 1
+    body = json.loads(messages[0]["Body"])
+    assert body == {
+        "task_id": str(parent_id),
+        "sub_task_id": str(child_id),
+        "s3_uri": s3_uri,
+    }
diff --git a/tests/infrastructure/test_csv_s3_client.py b/tests/infrastructure/test_csv_s3_client.py
new file mode 100644
index 00000000..30e27164
--- /dev/null
+++ b/tests/infrastructure/test_csv_s3_client.py
@@ -0,0 +1,51 @@
+from collections.abc import Iterator
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.csv_s3_client import CsvS3Client
+from tests.infrastructure import make_boto_client
+
+BUCKET = "csv-bucket"
+
+
+@pytest.fixture
+def csv_client() -> Iterator[CsvS3Client]:
+    with mock_aws():
+        boto_client = make_boto_client("s3")
+        boto_client.create_bucket(Bucket=BUCKET)
+        yield CsvS3Client(boto_client, BUCKET)
+
+
+def test_save_rows_returns_s3_uri(csv_client: CsvS3Client) -> None:
+    # arrange
+    rows = [{"address": "1 High St", "postcode": "AB1 2CD"}]
+    # act
+    uri = csv_client.save_rows(rows, "uploads/addresses.csv")
+    # assert
+    assert uri == f"s3://{BUCKET}/uploads/addresses.csv"
+
+
+def test_round_trip_preserves_rows(csv_client: CsvS3Client) -> None:
+    # arrange
+    rows = [
+        {"address": "1 High St", "postcode": "AB1 2CD"},
+        {"address": "2 Low St", "postcode": "XY9 8ZW"},
+    ]
+    # act
+    uri = csv_client.save_rows(rows, "uploads/addresses.csv")
+    fetched = csv_client.read_rows(uri)
+    # assert
+    assert fetched == rows
+
+
+def test_save_rows_rejects_empty_list(csv_client: CsvS3Client) -> None:
+    # act / assert
+    with pytest.raises(ValueError, match="empty"):
+        csv_client.save_rows([], "uploads/empty.csv")
+
+
+def test_read_rows_rejects_wrong_bucket(csv_client: CsvS3Client) -> None:
+    # act / assert
+    with pytest.raises(ValueError, match="does not match client bucket"):
+        csv_client.read_rows("s3://other-bucket/uploads/addresses.csv")
diff --git a/tests/infrastructure/test_s3_client.py b/tests/infrastructure/test_s3_client.py
new file mode 100644
index 00000000..67db4f58
--- /dev/null
+++ b/tests/infrastructure/test_s3_client.py
@@ -0,0 +1,36 @@
+from collections.abc import Iterator
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.s3_client import S3Client
+from tests.infrastructure import make_boto_client
+
+BUCKET = "test-bucket"
+
+
+@pytest.fixture
+def s3_client() -> Iterator[S3Client]:
+    with mock_aws():
+        boto_client = make_boto_client("s3")
+        boto_client.create_bucket(Bucket=BUCKET)
+        yield S3Client(boto_client, BUCKET)
+
+
+def test_put_object_returns_s3_uri(s3_client: S3Client) -> None:
+    # act
+    uri = s3_client.put_object("folder/data.bin", b"payload")
+    # assert
+    assert uri == f"s3://{BUCKET}/folder/data.bin"
+
+
+def test_get_object_returns_bytes_written_by_put_object(s3_client: S3Client) -> None:
+    # arrange
+    s3_client.put_object("round/trip.bin", b"hello world")
+    # act / assert
+    assert s3_client.get_object("round/trip.bin") == b"hello world"
+
+
+def test_bucket_property_exposes_configured_bucket(s3_client: S3Client) -> None:
+    # act / assert
+    assert s3_client.bucket == BUCKET
diff --git a/tests/infrastructure/test_s3_uri.py b/tests/infrastructure/test_s3_uri.py
new file mode 100644
index 00000000..32fd710f
--- /dev/null
+++ b/tests/infrastructure/test_s3_uri.py
@@ -0,0 +1,40 @@
+import pytest
+
+from infrastructure.s3_uri import parse_s3_uri
+
+
+def test_parses_simple_s3_uri() -> None:
+    # act / assert
+    assert parse_s3_uri("s3://my-bucket/file.csv") == ("my-bucket", "file.csv")
+
+
+def test_parses_s3_uri_with_nested_key() -> None:
+    # act
+    bucket, key = parse_s3_uri("s3://my-bucket/nested/path/to/file.csv")
+    # assert
+    assert (bucket, key) == ("my-bucket", "nested/path/to/file.csv")
+
+
+def test_rejects_s3_uri_without_key() -> None:
+    # act / assert
+    with pytest.raises(ValueError, match="bucket and a key"):
+        parse_s3_uri("s3://my-bucket")
+
+
+def test_rejects_s3_uri_with_empty_key() -> None:
+    # act / assert
+    with pytest.raises(ValueError, match="bucket and a key"):
+        parse_s3_uri("s3://my-bucket/")
+
+
+def test_parses_console_url_prefix() -> None:
+    # arrange
+    url = "https://eu-west-2.console.aws.amazon.com/s3/object/my-bucket?prefix=nested%2Ffile.csv"
+    # act / assert
+    assert parse_s3_uri(url) == ("my-bucket", "nested/file.csv")
+
+
+def test_rejects_unparseable_string() -> None:
+    # act / assert
+    with pytest.raises(ValueError):
+        parse_s3_uri("not-a-uri-at-all")
diff --git a/tests/infrastructure/test_sqs_client.py b/tests/infrastructure/test_sqs_client.py
new file mode 100644
index 00000000..44186bbb
--- /dev/null
+++ b/tests/infrastructure/test_sqs_client.py
@@ -0,0 +1,44 @@
+import json
+from collections.abc import Iterator
+from typing import Any, cast
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.sqs_client import SqsClient
+from tests.infrastructure import make_boto_client
+
+
+@pytest.fixture
+def sqs_setup() -> Iterator[tuple[SqsClient, Any, str]]:
+    with mock_aws():
+        boto_client = make_boto_client("sqs")
+        queue: dict[str, Any] = boto_client.create_queue(QueueName="test-queue")
+        queue_url = cast(str, queue["QueueUrl"])
+        yield SqsClient(boto_client, queue_url), boto_client, queue_url
+
+
+def test_send_returns_message_id(sqs_setup: tuple[SqsClient, Any, str]) -> None:
+    # arrange
+    client, _boto, _url = sqs_setup
+    # act
+    message_id = client.send({"hello": "world"})
+    # assert
+    assert isinstance(message_id, str)
+    assert message_id
+
+
+def test_send_json_serialises_body(sqs_setup: tuple[SqsClient, Any, str]) -> None:
+    # arrange
+    client, boto_client, queue_url = sqs_setup
+    body = {"hello": "world", "count": 3}
+    # act
+    client.send(body)
+
+    # assert
+    received: dict[str, Any] = boto_client.receive_message(
+        QueueUrl=queue_url, MaxNumberOfMessages=1
+    )
+    messages: list[dict[str, Any]] = received["Messages"]
+    assert len(messages) == 1
+    assert json.loads(messages[0]["Body"]) == body
diff --git a/tests/orchestration/__init__.py b/tests/orchestration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
new file mode 100644
index 00000000..a718ffbc
--- /dev/null
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -0,0 +1,299 @@
+from __future__ import annotations
+
+import json
+import os
+from collections.abc import Iterator
+from dataclasses import dataclass
+from typing import Any, cast
+
+import boto3
+import pytest
+from moto import mock_aws
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from infrastructure.csv_s3_client import CsvS3Client
+from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+from repositories.user_address.user_address_csv_s3_repository import (
+    UserAddressCsvS3Repository,
+)
+
+BUCKET = "splitter-bucket"
+REGION = "us-east-1"
+
+
+def _make_boto_client(service_name: str) -> Any:
+    factory: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    return factory(service_name, region_name=REGION)
+
+
+@pytest.fixture(autouse=True)
+def _aws_creds() -> Iterator[None]:  # pyright: ignore[reportUnusedFunction]
+    keys = (
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_SESSION_TOKEN",
+        "AWS_DEFAULT_REGION",
+    )
+    prev: dict[str, Any] = {k: os.environ.get(k) for k in keys}
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = REGION
+    try:
+        yield
+    finally:
+        for k, v in prev.items():
+            if v is None:
+                os.environ.pop(k, None)
+            else:
+                os.environ[k] = v
+
+
+@dataclass
+class Harness:
+    splitter: PostcodeSplitterOrchestrator
+    task_orchestrator: TaskOrchestrator
+    subtasks: SubTaskPostgresRepository
+    csv_client: CsvS3Client
+    boto_sqs: Any
+    queue_url: str
+    repo: UserAddressCsvS3Repository
+
+
+@pytest.fixture
+def harness(db_engine: Engine) -> Iterator[Harness]:
+    with mock_aws():
+        # Infra: S3 + SQS
+        boto_s3 = _make_boto_client("s3")
+        boto_s3.create_bucket(Bucket=BUCKET)
+        boto_sqs = _make_boto_client("sqs")
+        queue: dict[str, Any] = boto_sqs.create_queue(QueueName="address2uprn-queue")
+        queue_url = cast(str, queue["QueueUrl"])
+
+        csv_client = CsvS3Client(boto_s3, BUCKET)
+        repo = UserAddressCsvS3Repository(csv_client, BUCKET)
+        queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
+
+        # DB: ephemeral PostgreSQL TaskOrchestrator
+        with Session(db_engine) as session:
+            task_repo = TaskPostgresRepository(session=session)
+            subtask_repo = SubTaskPostgresRepository(session=session)
+            task_orchestrator = TaskOrchestrator(
+                task_repo=task_repo, subtask_repo=subtask_repo
+            )
+
+            splitter = PostcodeSplitterOrchestrator(
+                task_orchestrator=task_orchestrator,
+                user_address_repo=repo,
+                queue_client=queue_client,
+                max_batch_size=3,
+            )
+
+            yield Harness(
+                splitter=splitter,
+                task_orchestrator=task_orchestrator,
+                subtasks=subtask_repo,
+                csv_client=csv_client,
+                boto_sqs=boto_sqs,
+                queue_url=queue_url,
+                repo=repo,
+            )
+
+
+def _upload_fixture_csv(csv_client: CsvS3Client) -> str:
+    # Three postcode groups:
+    #   AA1 1AA × 2 (within cap)
+    #   BB2 2BB × 4 (oversize: > max_batch_size=3)
+    #   CC3 3CC × 1 (final flush)
+    # Expected batching with cap=3 and the algorithm in
+    # ``iter_postcode_grouped_batches``:
+    #   batch 1: [AA1 1AA × 2]           (flushed because oversize follows)
+    #   batch 2: [BB2 2BB × 4]           (oversize own batch)
+    #   batch 3: [CC3 3CC × 1]           (final flush)
+    rows: list[dict[str, str]] = []
+    rows.extend(
+        {
+            "Address 1": f"{i} High St",
+            "Address 2": "",
+            "Address 3": "",
+            "postcode": "AA1 1AA",
+            "Internal Reference": f"AA-{i}",
+        }
+        for i in range(1, 3)
+    )
+    rows.extend(
+        {
+            "Address 1": f"{i} Long Road",
+            "Address 2": "",
+            "Address 3": "",
+            "postcode": "BB2 2BB",
+            "Internal Reference": f"BB-{i}",
+        }
+        for i in range(1, 5)
+    )
+    rows.append(
+        {
+            "Address 1": "1 Final Way",
+            "Address 2": "",
+            "Address 3": "",
+            "postcode": "CC3 3CC",
+            "Internal Reference": "CC-1",
+        }
+    )
+    return csv_client.save_rows(rows, "uploads/input.csv")
+
+
+def _drain_queue(boto_sqs: Any, queue_url: str) -> list[dict[str, Any]]:
+    bodies: list[dict[str, Any]] = []
+    while True:
+        received: dict[str, Any] = boto_sqs.receive_message(
+            QueueUrl=queue_url, MaxNumberOfMessages=10, WaitTimeSeconds=0
+        )
+        messages = cast(list[dict[str, Any]], received.get("Messages", []))
+        if not messages:
+            break
+        for message in messages:
+            bodies.append(cast(dict[str, Any], json.loads(message["Body"])))
+            boto_sqs.delete_message(
+                QueueUrl=queue_url, ReceiptHandle=message["ReceiptHandle"]
+            )
+    return bodies
+
+
+def test_split_and_dispatch_creates_three_children_for_fixture(
+    harness: Harness,
+) -> None:
+    # arrange
+    parent_task, parent_subtask = (
+        harness.task_orchestrator.create_task_with_subtask(
+            task_source="manual:postcode-splitter-int"
+        )
+    )
+    input_uri = _upload_fixture_csv(harness.csv_client)
+
+    # act
+    child_ids = harness.splitter.split_and_dispatch(
+        parent_task_id=parent_task.id,
+        parent_subtask_id=parent_subtask.id,
+        input_s3_uri=input_uri,
+    )
+
+    # assert
+    assert len(child_ids) == 3
+    # All child ids are unique and persisted as WAITING children of the
+    # parent task.
+    assert len(set(child_ids)) == 3
+    for cid in child_ids:
+        child = harness.subtasks.get(cid)
+        assert child.task_id == parent_task.id
+
+
+def test_split_and_dispatch_persists_child_inputs_with_task_id_and_s3_uri(
+    harness: Harness,
+) -> None:
+    # arrange
+    parent_task, parent_subtask = (
+        harness.task_orchestrator.create_task_with_subtask(
+            task_source="manual:postcode-splitter-int"
+        )
+    )
+    input_uri = _upload_fixture_csv(harness.csv_client)
+
+    # act
+    child_ids = harness.splitter.split_and_dispatch(
+        parent_task_id=parent_task.id,
+        parent_subtask_id=parent_subtask.id,
+        input_s3_uri=input_uri,
+    )
+
+    # assert
+    for cid in child_ids:
+        child = harness.subtasks.get(cid)
+        assert child.inputs is not None
+        assert child.inputs["task_id"] == str(parent_task.id)
+        batch_uri = child.inputs["s3_uri"]
+        assert isinstance(batch_uri, str)
+        prefix = (
+            f"s3://{BUCKET}/ara_postcode_splitter_batches/"
+            f"{parent_task.id}/{parent_subtask.id}/"
+        )
+        assert batch_uri.startswith(prefix)
+        assert batch_uri.endswith(".csv")
+
+
+def test_split_and_dispatch_publishes_one_message_per_child_with_matching_ids(
+    harness: Harness,
+) -> None:
+    # arrange
+    parent_task, parent_subtask = (
+        harness.task_orchestrator.create_task_with_subtask(
+            task_source="manual:postcode-splitter-int"
+        )
+    )
+    input_uri = _upload_fixture_csv(harness.csv_client)
+
+    # act
+    child_ids = harness.splitter.split_and_dispatch(
+        parent_task_id=parent_task.id,
+        parent_subtask_id=parent_subtask.id,
+        input_s3_uri=input_uri,
+    )
+
+    # assert
+    bodies = _drain_queue(harness.boto_sqs, harness.queue_url)
+    assert len(bodies) == len(child_ids)
+
+    # Match queue messages against persisted child inputs by child_subtask_id;
+    # the message body's task_id/s3_uri must agree with the SubTask inputs.
+    bodies_by_child = {body["sub_task_id"]: body for body in bodies}
+    assert set(bodies_by_child.keys()) == {str(cid) for cid in child_ids}
+    for cid in child_ids:
+        child = harness.subtasks.get(cid)
+        body = bodies_by_child[str(cid)]
+        assert child.inputs is not None
+        assert body == {
+            "task_id": str(parent_task.id),
+            "sub_task_id": str(cid),
+            "s3_uri": child.inputs["s3_uri"],
+        }
+
+
+def test_split_and_dispatch_returns_child_ids_in_dispatch_order(
+    harness: Harness,
+) -> None:
+    # arrange
+    parent_task, parent_subtask = (
+        harness.task_orchestrator.create_task_with_subtask(
+            task_source="manual:postcode-splitter-int"
+        )
+    )
+    input_uri = _upload_fixture_csv(harness.csv_client)
+
+    # act
+    child_ids = harness.splitter.split_and_dispatch(
+        parent_task_id=parent_task.id,
+        parent_subtask_id=parent_subtask.id,
+        input_s3_uri=input_uri,
+    )
+
+    # assert
+    # Re-load each child's saved batch and inspect the postcode_clean column
+    # to confirm the dispatch order matches the postcode-batching algorithm:
+    # AA-batch first, BB oversize batch second, CC final-flush third.
+    postcodes_per_batch: list[set[str]] = []
+    for cid in child_ids:
+        child = harness.subtasks.get(cid)
+        assert child.inputs is not None
+        rows = harness.csv_client.read_rows(child.inputs["s3_uri"])
+        postcodes_per_batch.append({row["postcode_clean"] for row in rows})
+
+    assert postcodes_per_batch == [
+        {"AA11AA"},
+        {"BB22BB"},
+        {"CC33CC"},
+    ]
diff --git a/tests/orchestration/test_task_orchestrator.py b/tests/orchestration/test_task_orchestrator.py
new file mode 100644
index 00000000..ae89991d
--- /dev/null
+++ b/tests/orchestration/test_task_orchestrator.py
@@ -0,0 +1,197 @@
+from collections.abc import Iterator
+from dataclasses import dataclass
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+from domain.tasks.tasks import Source, TaskStatus
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@dataclass
+class Harness:
+    orchestrator: TaskOrchestrator
+    tasks: TaskPostgresRepository
+    subtasks: SubTaskPostgresRepository
+
+
+@pytest.fixture
+def harness(db_engine: Engine) -> Iterator[Harness]:
+    with Session(db_engine) as session:
+        tasks = TaskPostgresRepository(session=session)
+        subtasks = SubTaskPostgresRepository(session=session)
+        yield Harness(
+            orchestrator=TaskOrchestrator(task_repo=tasks, subtask_repo=subtasks),
+            tasks=tasks,
+            subtasks=subtasks,
+        )
+
+
+def test_create_task_with_subtask_creates_both_in_waiting(
+    harness: Harness,
+) -> None:
+    # act
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test",
+        inputs={"foo": "bar"},
+        source=Source.PORTFOLIO,
+        source_id="abc",
+    )
+
+    # assert
+    assert task.status is TaskStatus.WAITING
+    assert subtask.status is SubTaskStatus.WAITING
+    assert subtask.task_id == task.id
+    assert subtask.inputs == {"foo": "bar"}
+
+
+def test_start_subtask_cascades_to_in_progress(harness: Harness) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    # act
+    started = harness.orchestrator.start_subtask(
+        subtask.id, cloud_logs_url="https://example/log"
+    )
+
+    # assert
+    assert started.status is SubTaskStatus.IN_PROGRESS
+    assert started.cloud_logs_url == "https://example/log"
+    assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+
+def test_complete_subtask_cascades_to_complete(harness: Harness) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+    harness.orchestrator.start_subtask(subtask.id)
+
+    # act
+    harness.orchestrator.complete_subtask(subtask.id, {"value": 42})
+
+    # assert
+    done_subtask = harness.subtasks.get(subtask.id)
+    done_task = harness.tasks.get(task.id)
+    assert done_subtask.outputs == {"result": {"value": 42}}
+    assert done_task.status is TaskStatus.COMPLETE
+    assert done_task.job_completed is not None
+
+
+def test_fail_subtask_cascades_to_failed(harness: Harness) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    # act
+    harness.orchestrator.fail_subtask(subtask.id, RuntimeError("boom"))
+
+    # assert
+    failed_subtask = harness.subtasks.get(subtask.id)
+    failed_task = harness.tasks.get(task.id)
+    assert failed_subtask.outputs == {"error": "boom"}
+    assert failed_task.status is TaskStatus.FAILED
+
+
+def test_failed_subtask_locks_task_failed_even_with_others_complete(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, first = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+    second = SubTask.create(task_id=task.id)
+    harness.subtasks.create(second)
+
+    # act
+    harness.orchestrator.complete_subtask(first.id)
+    harness.orchestrator.fail_subtask(second.id, RuntimeError("nope"))
+
+    # assert
+    assert harness.tasks.get(task.id).status is TaskStatus.FAILED
+
+
+def test_mixed_complete_and_in_progress_keeps_task_in_progress(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, first = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+    second = SubTask.create(task_id=task.id)
+    harness.subtasks.create(second)
+
+    # act
+    harness.orchestrator.complete_subtask(first.id)
+    harness.orchestrator.start_subtask(second.id)
+
+    # assert
+    assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+
+def test_run_subtask_happy_path_returns_result_and_cascades_complete(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    # act
+    result = harness.orchestrator.run_subtask(subtask.id, work=lambda: {"answer": 42})
+
+    # assert
+    assert result == {"answer": 42}
+    assert harness.subtasks.get(subtask.id).status is SubTaskStatus.COMPLETE
+    assert harness.tasks.get(task.id).status is TaskStatus.COMPLETE
+
+
+def test_create_child_subtask_adds_waiting_child_without_changing_parent_status(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, first = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+    harness.orchestrator.start_subtask(first.id)
+    assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+    # act
+    child = harness.orchestrator.create_child_subtask(
+        task.id, inputs={"split": "a"}
+    )
+
+    # assert
+    persisted_child = harness.subtasks.get(child.id)
+    assert persisted_child.task_id == task.id
+    assert persisted_child.status is SubTaskStatus.WAITING
+    assert persisted_child.inputs == {"split": "a"}
+    assert persisted_child.id != first.id
+    # Cascade is a no-op: parent stays IN_PROGRESS.
+    assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+
+def test_run_subtask_failing_work_marks_failed_and_reraises(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    def boom() -> None:
+        raise RuntimeError("boom")
+
+    # act / assert
+    with pytest.raises(RuntimeError, match="boom"):
+        harness.orchestrator.run_subtask(subtask.id, work=boom)
+
+    assert harness.subtasks.get(subtask.id).status is SubTaskStatus.FAILED
+    assert harness.tasks.get(task.id).status is TaskStatus.FAILED
diff --git a/tests/repositories/__init__.py b/tests/repositories/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/tasks/__init__.py b/tests/repositories/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/tasks/postgres/__init__.py b/tests/repositories/tasks/postgres/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/tasks/postgres/test_subtask_postgres_repository.py b/tests/repositories/tasks/postgres/test_subtask_postgres_repository.py
new file mode 100644
index 00000000..9cec52ea
--- /dev/null
+++ b/tests/repositories/tasks/postgres/test_subtask_postgres_repository.py
@@ -0,0 +1,100 @@
+from collections.abc import Iterator
+from uuid import UUID, uuid4
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+from domain.tasks.tasks import Task
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@pytest.fixture
+def session(db_engine: Engine) -> Iterator[Session]:
+    with Session(db_engine) as s:
+        yield s
+
+
+def _persisted_task_id(session: Session) -> UUID:
+    """Create a parent Task row so SubTask FK constraints are satisfied."""
+    task = Task.create(task_source="manual:test")
+    TaskPostgresRepository(session=session).create(task)
+    return task.id
+
+
+def test_create_and_get_round_trip_preserves_inputs(session: Session) -> None:
+    # arrange
+    repo = SubTaskPostgresRepository(session=session)
+    task_id = _persisted_task_id(session)
+    st = SubTask.create(task_id=task_id, inputs={"address": "68 Glendon Way"})
+
+    # act
+    repo.create(st)
+    fetched = repo.get(st.id)
+
+    # assert
+    assert fetched.id == st.id
+    assert fetched.task_id == task_id
+    assert fetched.status is SubTaskStatus.WAITING
+    assert fetched.inputs == {"address": "68 Glendon Way"}
+    assert fetched.outputs is None
+
+
+def test_save_persists_status_and_outputs(session: Session) -> None:
+    # arrange
+    repo = SubTaskPostgresRepository(session=session)
+    st = SubTask.create(task_id=_persisted_task_id(session))
+    repo.create(st)
+
+    # act
+    st.start(cloud_logs_url="https://example/log")
+    repo.save(st)
+    # assert
+    assert repo.get(st.id).status is SubTaskStatus.IN_PROGRESS
+
+    # act
+    st.complete({"uprn": "123"})
+    repo.save(st)
+    # assert
+    done = repo.get(st.id)
+    assert done.status is SubTaskStatus.COMPLETE
+    assert done.outputs == {"result": {"uprn": "123"}}
+    assert done.cloud_logs_url == "https://example/log"
+    assert done.job_completed is not None
+
+
+def test_list_by_task_filters_by_task_id(session: Session) -> None:
+    # arrange
+    repo = SubTaskPostgresRepository(session=session)
+    task_a = _persisted_task_id(session)
+    task_b = _persisted_task_id(session)
+    repo.create(SubTask.create(task_id=task_a))
+    repo.create(SubTask.create(task_id=task_a))
+    repo.create(SubTask.create(task_id=task_b))
+
+    # act
+    a_results = repo.list_by_task(task_a)
+    b_results = repo.list_by_task(task_b)
+
+    # assert
+    assert len(a_results) == 2
+    assert len(b_results) == 1
+    assert all(s.task_id == task_a for s in a_results)
+    assert all(s.task_id == task_b for s in b_results)
+
+
+def test_list_by_task_returns_empty_for_unknown_task(session: Session) -> None:
+    # arrange
+    repo = SubTaskPostgresRepository(session=session)
+    # act / assert
+    assert repo.list_by_task(uuid4()) == []
+
+
+def test_get_missing_raises(session: Session) -> None:
+    # arrange
+    repo = SubTaskPostgresRepository(session=session)
+    # act / assert
+    with pytest.raises(ValueError, match="not found"):
+        repo.get(uuid4())
diff --git a/tests/repositories/tasks/postgres/test_task_postgres_repository.py b/tests/repositories/tasks/postgres/test_task_postgres_repository.py
new file mode 100644
index 00000000..8a49a861
--- /dev/null
+++ b/tests/repositories/tasks/postgres/test_task_postgres_repository.py
@@ -0,0 +1,77 @@
+from collections.abc import Iterator
+from uuid import uuid4
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.tasks import Source, Task, TaskStatus
+from infrastructure.postgres.task_table import TaskRow
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@pytest.fixture
+def session(db_engine: Engine) -> Iterator[Session]:
+    with Session(db_engine) as s:
+        yield s
+
+
+def test_create_and_get_round_trip(session: Session) -> None:
+    # arrange
+    repo = TaskPostgresRepository(session=session)
+    t = Task.create(
+        task_source="manual:test", source=Source.PORTFOLIO, source_id="abc-123"
+    )
+
+    # act
+    repo.create(t)
+    fetched = repo.get(t.id)
+
+    # assert
+    assert fetched.id == t.id
+    assert fetched.status is TaskStatus.WAITING
+    assert fetched.source is Source.PORTFOLIO
+    assert fetched.source_id == "abc-123"
+
+
+def test_save_persists_status_transition(session: Session) -> None:
+    # arrange
+    repo = TaskPostgresRepository(session=session)
+    t = Task.create(task_source="manual:test")
+    repo.create(t)
+
+    # act
+    t.start()
+    repo.save(t)
+    # assert
+    assert repo.get(t.id).status is TaskStatus.IN_PROGRESS
+
+    # act
+    t.complete()
+    repo.save(t)
+    # assert
+    done = repo.get(t.id)
+    assert done.status is TaskStatus.COMPLETE
+    assert done.job_completed is not None
+
+
+def test_get_missing_raises(session: Session) -> None:
+    # arrange
+    repo = TaskPostgresRepository(session=session)
+    # act / assert
+    with pytest.raises(ValueError, match="not found"):
+        repo.get(uuid4())
+
+
+def test_get_normalises_legacy_capitalised_status(session: Session) -> None:
+    # Existing rows written by backend code use "In Progress" (capitalised).
+    # arrange
+    repo = TaskPostgresRepository(session=session)
+    row = TaskRow(task_source="manual:test", status="In Progress")
+    session.add(row)
+    session.commit()
+
+    # act
+    fetched = repo.get(row.id)
+    # assert
+    assert fetched.status is TaskStatus.IN_PROGRESS
diff --git a/tests/repositories/user_address/__init__.py b/tests/repositories/user_address/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/user_address/conftest.py b/tests/repositories/user_address/conftest.py
new file mode 100644
index 00000000..25c1ac3b
--- /dev/null
+++ b/tests/repositories/user_address/conftest.py
@@ -0,0 +1,28 @@
+import os
+from collections.abc import Iterator
+from typing import Optional
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _aws_creds() -> Iterator[None]:  # pyright: ignore[reportUnusedFunction]
+    keys = (
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_SESSION_TOKEN",
+        "AWS_DEFAULT_REGION",
+    )
+    prev: dict[str, Optional[str]] = {k: os.environ.get(k) for k in keys}
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
+    try:
+        yield
+    finally:
+        for k, v in prev.items():
+            if v is None:
+                os.environ.pop(k, None)
+            else:
+                os.environ[k] = v
diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
new file mode 100644
index 00000000..9ffb250a
--- /dev/null
+++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
@@ -0,0 +1,237 @@
+from collections.abc import Iterator
+
+import pytest
+from moto import mock_aws
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+from infrastructure.csv_s3_client import CsvS3Client
+from repositories.user_address.user_address_csv_s3_repository import (
+    UserAddressCsvS3Repository,
+)
+from tests.infrastructure import make_boto_client
+
+BUCKET = "user-address-bucket"
+
+
+@pytest.fixture
+def repo() -> Iterator[UserAddressCsvS3Repository]:
+    with mock_aws():
+        boto_client = make_boto_client("s3")
+        boto_client.create_bucket(Bucket=BUCKET)
+        csv_client = CsvS3Client(boto_client, BUCKET)
+        yield UserAddressCsvS3Repository(csv_client, BUCKET)
+
+
+def _upload_csv(
+    repo: UserAddressCsvS3Repository, rows: list[dict[str, str]], key: str
+) -> str:
+    return repo._csv_client.save_rows(rows, key)  # pyright: ignore[reportPrivateUsage]
+
+
+def test_load_batch_parses_address_postcode_and_reference(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    rows = [
+        {
+            "Address 1": "1 High Street",
+            "Address 2": "Flat 2",
+            "Address 3": "Townville",
+            "postcode": "sw1a 1aa",
+            "Internal Reference": "REF-001",
+        }
+    ]
+    uri = _upload_csv(repo, rows, "uploads/full.csv")
+
+    # act
+    addresses = repo.load_batch(uri)
+
+    # assert
+    assert len(addresses) == 1
+    address = addresses[0]
+    assert address.user_address == "1 High Street, Flat 2, Townville"
+    assert address.postcode == Postcode("SW1A1AA")
+    assert address.internal_reference == "REF-001"
+
+
+def test_load_batch_uses_only_address_1_when_others_missing(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    rows = [
+        {
+            "Address 1": "10 Cardiff Road",
+            "Address 2": "",
+            "Address 3": "",
+            "postcode": "CF10 1AA",
+            "Internal Reference": "REF-002",
+        }
+    ]
+    uri = _upload_csv(repo, rows, "uploads/address1-only.csv")
+
+    # act
+    addresses = repo.load_batch(uri)
+
+    # assert
+    assert len(addresses) == 1
+    assert addresses[0].user_address == "10 Cardiff Road"
+    assert addresses[0].postcode == Postcode("CF101AA")
+    assert addresses[0].internal_reference == "REF-002"
+
+
+def test_load_batch_handles_missing_internal_reference(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    rows = [
+        {
+            "Address 1": "5 Park Lane",
+            "Address 2": "",
+            "Address 3": "",
+            "postcode": "M1 1AA",
+            "Internal Reference": "",
+        }
+    ]
+    uri = _upload_csv(repo, rows, "uploads/no-ref.csv")
+
+    # act
+    addresses = repo.load_batch(uri)
+
+    # assert
+    assert len(addresses) == 1
+    assert addresses[0].user_address == "5 Park Lane"
+    assert addresses[0].postcode == Postcode("M11AA")
+    assert addresses[0].internal_reference is None
+
+
+def test_load_batch_captures_full_source_row(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # A raw EPC-export-shaped row: the splitter must preserve every column,
+    # not just the ones it parses into UserAddress fields.
+    # arrange
+    row = {
+        "Asset Reference": "511",
+        "Address 1": "9 Abingdon Road Padiham Lancashire BB12 7BX",
+        "postcode": "BB12 7BX",
+        "Property Type": "House: End Terrace",
+        "SAP Score": "69",
+    }
+    uri = _upload_csv(repo, [row], "uploads/epc.csv")
+
+    # act
+    addresses = repo.load_batch(uri)
+
+    # assert
+    assert addresses[0].source_row == row
+
+
+def test_load_batch_raises_when_postcode_column_absent(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
+    uri = _upload_csv(repo, rows, "uploads/no-postcode.csv")
+
+    # act / assert
+    with pytest.raises(ValueError, match="no 'postcode' column"):
+        repo.load_batch(uri)
+
+
+def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    row = {
+        "Asset Reference": "511",
+        "Address 1": "9 Abingdon Road Padiham Lancashire BB12 7BX",
+        "postcode": " BB12 7BX",
+        "Property Type": "House: End Terrace",
+    }
+    uri = _upload_csv(repo, [row], "uploads/epc.csv")
+    addresses = repo.load_batch(uri)
+
+    # act
+    saved_uri = repo.save_batch(addresses, "tasks/passthrough")
+    saved_rows = repo._csv_client.read_rows(saved_uri)  # pyright: ignore[reportPrivateUsage]
+
+    # assert
+    assert len(saved_rows) == 1
+    saved = saved_rows[0]
+    # Every original column survives, byte-for-byte.
+    for column, value in row.items():
+        assert saved[column] == value
+    # Plus the one appended column the downstream address2uprn stage groups on.
+    assert saved["postcode_clean"] == "BB127BX"
+
+
+def test_save_batch_returns_uri_under_path_prefix(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    addresses = [
+        UserAddress(
+            user_address="1 High Street",
+            postcode=Postcode("SW1A 1AA"),
+            source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
+        ),
+    ]
+
+    # act
+    uri = repo.save_batch(addresses, "tasks/abc/batches")
+
+    # assert
+    assert uri.startswith(f"s3://{BUCKET}/tasks/abc/batches/")
+    assert uri.endswith(".csv")
+
+
+def test_save_then_reload_round_trip_preserves_columns(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    rows = [
+        {
+            "Address 1": "1 High Street",
+            "postcode": "SW1A 1AA",
+            "Internal Reference": "REF-001",
+        },
+        {
+            "Address 1": "2 Low Street",
+            "postcode": "XY9 8ZW",
+            "Internal Reference": "",
+        },
+    ]
+    uri = _upload_csv(repo, rows, "uploads/round-trip.csv")
+    addresses = repo.load_batch(uri)
+
+    # act
+    saved_uri = repo.save_batch(addresses, "tasks/round-trip")
+    saved_rows = repo._csv_client.read_rows(saved_uri)  # pyright: ignore[reportPrivateUsage]
+
+    # assert
+    # Original columns come back verbatim; postcode_clean is the only addition.
+    assert [
+        {k: v for k, v in r.items() if k != "postcode_clean"} for r in saved_rows
+    ] == rows
+    assert [r["postcode_clean"] for r in saved_rows] == ["SW1A1AA", "XY98ZW"]
+
+
+def test_save_batch_uses_unique_filename_per_call(
+    repo: UserAddressCsvS3Repository,
+) -> None:
+    # arrange
+    addresses = [
+        UserAddress(
+            user_address="1 High Street",
+            postcode=Postcode("SW1A 1AA"),
+            source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
+        ),
+    ]
+
+    # act
+    uri_1 = repo.save_batch(addresses, "tasks/uniqueness")
+    uri_2 = repo.save_batch(addresses, "tasks/uniqueness")
+
+    # assert
+    assert uri_1 != uri_2
diff --git a/tests/utilities/__init__.py b/tests/utilities/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/utilities/aws_lambda/__init__.py b/tests/utilities/aws_lambda/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/utilities/aws_lambda/test_subtask_handler.py b/tests/utilities/aws_lambda/test_subtask_handler.py
new file mode 100644
index 00000000..d671adc4
--- /dev/null
+++ b/tests/utilities/aws_lambda/test_subtask_handler.py
@@ -0,0 +1,255 @@
+import logging
+from collections.abc import Generator, Iterator
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import Any
+from uuid import UUID
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.subtasks import SubTaskStatus
+from domain.tasks.tasks import TaskStatus
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+from utilities.aws_lambda.subtask_handler import subtask_handler
+
+_LOGGER_NAME = "utilities.aws_lambda.subtask_handler"
+
+
+@dataclass
+class Harness:
+    orchestrator: TaskOrchestrator
+    tasks: TaskPostgresRepository
+    subtasks: SubTaskPostgresRepository
+
+    @contextmanager
+    def factory(self) -> Generator[TaskOrchestrator, None, None]:
+        yield self.orchestrator
+
+
+@pytest.fixture
+def harness(db_engine: Engine) -> Iterator[Harness]:
+    with Session(db_engine) as session:
+        tasks = TaskPostgresRepository(session=session)
+        subtasks = SubTaskPostgresRepository(session=session)
+        yield Harness(
+            orchestrator=TaskOrchestrator(task_repo=tasks, subtask_repo=subtasks),
+            tasks=tasks,
+            subtasks=subtasks,
+        )
+
+
+def _direct_event(task_id: UUID, subtask_id: UUID) -> dict[str, Any]:
+    return {"task_id": str(task_id), "sub_task_id": str(subtask_id)}
+
+
+def test_subtask_handler_injects_orchestrator_as_third_positional_argument(
+    harness: Harness,
+) -> None:
+    # arrange
+    _, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    received: dict[str, Any] = {}
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        received["body"] = body
+        received["context"] = context
+        received["orchestrator"] = orchestrator
+
+    # act
+    handler(_direct_event(subtask.task_id, subtask.id), context="ctx-sentinel")
+
+    # assert
+    assert received["orchestrator"] is harness.orchestrator
+    assert received["context"] == "ctx-sentinel"
+    assert received["body"]["sub_task_id"] == str(subtask.id)
+
+
+def test_subtask_handler_completes_parent_subtask_on_success(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        return None
+
+    # act
+    handler(_direct_event(task.id, subtask.id), context=None)
+
+    # assert
+    assert harness.subtasks.get(subtask.id).status is SubTaskStatus.COMPLETE
+    assert harness.tasks.get(task.id).status is TaskStatus.COMPLETE
+
+
+def test_subtask_handler_marks_parent_failed_and_reraises_on_error(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        raise RuntimeError("boom")
+
+    # act / assert
+    with pytest.raises(RuntimeError, match="boom"):
+        handler(_direct_event(task.id, subtask.id), context=None)
+
+    assert harness.subtasks.get(subtask.id).status is SubTaskStatus.FAILED
+    assert harness.tasks.get(task.id).status is TaskStatus.FAILED
+
+
+def test_subtask_handler_injected_orchestrator_can_create_child_subtask(
+    harness: Harness,
+) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    child_ids: list[UUID] = []
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        child = orchestrator.create_child_subtask(task.id, inputs={"split": 1})
+        child_ids.append(child.id)
+
+    # act
+    handler(_direct_event(task.id, subtask.id), context=None)
+
+    # assert
+    assert len(child_ids) == 1
+    persisted_child = harness.subtasks.get(child_ids[0])
+    assert persisted_child.task_id == task.id
+    assert persisted_child.status is SubTaskStatus.WAITING
+
+
+def test_subtask_handler_logs_subtask_lifecycle_on_success(
+    harness: Harness, caplog: pytest.LogCaptureFixture
+) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        return None
+
+    # act
+    with caplog.at_level(logging.INFO, logger=_LOGGER_NAME):
+        handler(_direct_event(task.id, subtask.id), context=None)
+
+    # assert
+    assert f"Running subtask {subtask.id}" in caplog.text
+    assert f"Subtask {subtask.id} completed" in caplog.text
+
+
+def test_subtask_handler_logs_exception_on_failure(
+    harness: Harness, caplog: pytest.LogCaptureFixture
+) -> None:
+    # arrange
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        raise RuntimeError("boom")
+
+    # act / assert
+    with caplog.at_level(logging.INFO, logger=_LOGGER_NAME):
+        with pytest.raises(RuntimeError, match="boom"):
+            handler(_direct_event(task.id, subtask.id), context=None)
+
+    failures = [r for r in caplog.records if r.levelno == logging.ERROR]
+    assert any(
+        f"Subtask {subtask.id} failed" in r.getMessage() for r in failures
+    )
+    assert any(r.exc_info is not None for r in failures)
+
+
+def test_subtask_handler_records_cloudwatch_url_on_subtask(
+    harness: Harness, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    # arrange
+    monkeypatch.setenv("AWS_REGION", "eu-west-2")
+    monkeypatch.setenv(
+        "AWS_LAMBDA_LOG_GROUP_NAME", "/aws/lambda/postcode-splitter"
+    )
+    monkeypatch.setenv(
+        "AWS_LAMBDA_LOG_STREAM_NAME", "2026/05/20/[$LATEST]abc123"
+    )
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        return None
+
+    # act
+    handler(_direct_event(task.id, subtask.id), context=None)
+
+    # assert
+    saved_url = harness.subtasks.get(subtask.id).cloud_logs_url
+    assert saved_url is not None
+    assert saved_url.startswith(
+        "https://eu-west-2.console.aws.amazon.com/cloudwatch/home"
+    )
+    # Log group / stream are console-encoded ("/" -> "$252F").
+    assert "$252Faws$252Flambda$252Fpostcode-splitter" in saved_url
+    assert "$255B$2524LATEST$255D" in saved_url
+
+
+def test_subtask_handler_leaves_cloudwatch_url_unset_outside_lambda(
+    harness: Harness, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    # arrange
+    for var in (
+        "AWS_REGION",
+        "AWS_LAMBDA_LOG_GROUP_NAME",
+        "AWS_LAMBDA_LOG_STREAM_NAME",
+    ):
+        monkeypatch.delenv(var, raising=False)
+    task, subtask = harness.orchestrator.create_task_with_subtask(
+        task_source="manual:test"
+    )
+
+    @subtask_handler(orchestrator_cm=harness.factory)
+    def handler(
+        body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+    ) -> None:
+        return None
+
+    # act
+    handler(_direct_event(task.id, subtask.id), context=None)
+
+    # assert
+    assert harness.subtasks.get(subtask.id).cloud_logs_url is None
diff --git a/utilities/__init__.py b/utilities/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utilities/aws_lambda/__init__.py b/utilities/aws_lambda/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utilities/aws_lambda/default_orchestrator.py b/utilities/aws_lambda/default_orchestrator.py
new file mode 100644
index 00000000..f78886b9
--- /dev/null
+++ b/utilities/aws_lambda/default_orchestrator.py
@@ -0,0 +1,26 @@
+import os
+from collections.abc import Generator
+from contextlib import contextmanager
+
+from sqlmodel import Session
+
+from infrastructure.postgres.config import PostgresConfig
+from infrastructure.postgres.engine import make_engine
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@contextmanager
+def default_orchestrator() -> Generator[TaskOrchestrator, None, None]:
+    """Yield a TaskOrchestrator wired to a fresh Postgres session.
+
+    Connection params come from os.environ via PostgresConfig.from_env. Each
+    handler invocation gets its own session, cleaned up on context exit.
+    """
+    engine = make_engine(PostgresConfig.from_env(dict(os.environ)))
+    with Session(engine) as session:
+        yield TaskOrchestrator(
+            task_repo=TaskPostgresRepository(session=session),
+            subtask_repo=SubTaskPostgresRepository(session=session),
+        )
diff --git a/utilities/aws_lambda/subtask_handler.py b/utilities/aws_lambda/subtask_handler.py
new file mode 100644
index 00000000..592ffebf
--- /dev/null
+++ b/utilities/aws_lambda/subtask_handler.py
@@ -0,0 +1,102 @@
+"""@subtask_handler decorator for Lambdas that operate on existing SubTasks.
+
+Translates an AWS Lambda invocation (SQS-shaped or direct) into
+TaskOrchestrator.run_subtask(...) calls.
+"""
+
+import json
+import logging
+import os
+from contextlib import AbstractContextManager
+from functools import wraps
+from typing import Any, Callable, Optional, cast
+from urllib.parse import quote
+
+from utilities.aws_lambda.default_orchestrator import default_orchestrator
+from utilities.aws_lambda.subtask_trigger_body import SubtaskTriggerBody
+from orchestration.task_orchestrator import TaskOrchestrator
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+OrchestratorCM = Callable[[], AbstractContextManager[TaskOrchestrator]]
+
+
+def subtask_handler(
+    *,
+    orchestrator_cm: Optional[OrchestratorCM] = None,
+) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+    """Run the wrapped function as the body of an existing SubTask.
+
+    For each record, validates the body via SubtaskTriggerBody (must contain
+    task_id and sub_task_id), then runs the function inside
+    orchestrator.run_subtask(...). The orchestrator owns the start/complete/
+    fail lifecycle and cascades status into the parent Task. On failure the
+    underlying exception propagates after the SubTask is marked FAILED.
+    """
+    factory = orchestrator_cm or default_orchestrator
+
+    def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+        @wraps(func)
+        def wrapper(event: dict[str, Any], context: Any) -> None:
+            cloud_logs_url = _cloudwatch_url()
+            with factory() as orchestrator:
+                for record in _records(event):
+                    body = _parse_body(record)
+                    trigger = SubtaskTriggerBody.model_validate(body)
+                    logger.info("Running subtask %s", trigger.sub_task_id)
+                    try:
+                        orchestrator.run_subtask(
+                            trigger.sub_task_id,
+                            work=lambda body=body, o=orchestrator: func(
+                                body, context, o
+                            ),
+                            cloud_logs_url=cloud_logs_url,
+                        )
+                    except Exception:
+                        logger.exception(
+                            "Subtask %s failed", trigger.sub_task_id
+                        )
+                        raise
+                    logger.info("Subtask %s completed", trigger.sub_task_id)
+
+        return wrapper
+
+    return decorator
+
+
+def _parse_body(record: dict[str, Any]) -> dict[str, Any]:
+    raw = record.get("body", record)
+    if isinstance(raw, str):
+        try:
+            parsed = json.loads(raw)
+        except json.JSONDecodeError:
+            return {}
+        return cast(dict[str, Any], parsed) if isinstance(parsed, dict) else {}
+    if isinstance(raw, dict):
+        return cast(dict[str, Any], raw)
+    return {}
+
+
+def _records(event: dict[str, Any]) -> list[dict[str, Any]]:
+    raw_records = event.get("Records")
+    if isinstance(raw_records, list):
+        return [r for r in cast(list[Any], raw_records) if isinstance(r, dict)]
+    return [event]
+
+
+def _console_encode(value: str) -> str:
+    return quote(value, safe="").replace("%", "$25")
+
+
+def _cloudwatch_url() -> Optional[str]:
+    region = os.environ.get("AWS_REGION")
+    log_group = os.environ.get("AWS_LAMBDA_LOG_GROUP_NAME")
+    log_stream = os.environ.get("AWS_LAMBDA_LOG_STREAM_NAME")
+    if not (region and log_group and log_stream):
+        return None
+    return (
+        f"https://{region}.console.aws.amazon.com/cloudwatch/home"
+        f"?region={region}#logsV2:log-groups/log-group/"
+        f"{_console_encode(log_group)}/log-events/{_console_encode(log_stream)}"
+    )
diff --git a/utilities/aws_lambda/subtask_trigger_body.py b/utilities/aws_lambda/subtask_trigger_body.py
new file mode 100644
index 00000000..a6b539e5
--- /dev/null
+++ b/utilities/aws_lambda/subtask_trigger_body.py
@@ -0,0 +1,17 @@
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict
+
+
+class SubtaskTriggerBody(BaseModel):
+    """The minimum the subtask_handler needs to dispatch lifecycle calls.
+
+    `extra="allow"` so the rest of the work payload passes through to the
+    decorated function untouched — handlers do their own model_validate on
+    the full body for fields specific to their use case.
+    """
+
+    model_config = ConfigDict(extra="allow")
+
+    task_id: UUID
+    sub_task_id: UUID
diff --git a/utilities/aws_lambda/task_handler.py b/utilities/aws_lambda/task_handler.py
new file mode 100644
index 00000000..82c7198e
--- /dev/null
+++ b/utilities/aws_lambda/task_handler.py
@@ -0,0 +1,98 @@
+"""@task_handler decorator for Lambdas that own the entire pipeline.
+
+Translates an AWS Lambda invocation (SQS-shaped or direct) into
+TaskOrchestrator.create_task_with_subtask(...) + run_subtask(...).
+"""
+
+import json
+from contextlib import AbstractContextManager
+from functools import wraps
+from typing import Any, Callable, Optional, cast
+
+from utilities.aws_lambda.default_orchestrator import default_orchestrator
+from domain.tasks.tasks import Source
+from orchestration.task_orchestrator import TaskOrchestrator
+
+OrchestratorCM = Callable[[], AbstractContextManager[TaskOrchestrator]]
+
+
+def task_handler(
+    *,
+    task_source: str,
+    source: Source,
+    orchestrator_cm: Optional[OrchestratorCM] = None,
+) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+    """Run the wrapped function as the body of a freshly-created Task + SubTask.
+
+    For each record, creates a new Task + initial SubTask, then runs the
+    wrapped function inside orchestrator.run_subtask(...). `source_id` is
+    read from body[source.value] (silent None if absent — preserved from
+    legacy ADR-0001).
+
+    Records-style events use SQS partial-batch-failure semantics: individual
+    failures are reported via {"batchItemFailures": [...]} rather than
+    propagating. Direct invocations re-raise.
+    """
+    factory = orchestrator_cm or default_orchestrator
+
+    def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+        @wraps(func)
+        def wrapper(event: dict[str, Any], context: Any) -> Any:
+            with factory() as orchestrator:
+                results: list[Any] = []
+                failures: list[dict[str, Any]] = []
+
+                for record in _records(event):
+                    body = _parse_body(record)
+                    raw_source_id = body.get(source.value)
+                    source_id = (
+                        str(raw_source_id) if raw_source_id is not None else None
+                    )
+
+                    _, subtask = orchestrator.create_task_with_subtask(
+                        task_source=task_source,
+                        inputs=body,
+                        source=source,
+                        source_id=source_id,
+                    )
+
+                    try:
+                        result = orchestrator.run_subtask(
+                            subtask.id,
+                            work=lambda body=body: func(body, context),
+                        )
+                        results.append(result)
+                    except Exception:
+                        if "Records" in event:
+                            message_id = record.get("messageId", "")
+                            failures.append({"itemIdentifier": message_id})
+                        else:
+                            raise
+
+                if "Records" in event:
+                    return {"batchItemFailures": failures}
+                return results
+
+        return wrapper
+
+    return decorator
+
+
+def _parse_body(record: dict[str, Any]) -> dict[str, Any]:
+    raw = record.get("body", record)
+    if isinstance(raw, str):
+        try:
+            parsed = json.loads(raw)
+        except json.JSONDecodeError:
+            return {}
+        return cast(dict[str, Any], parsed) if isinstance(parsed, dict) else {}
+    if isinstance(raw, dict):
+        return cast(dict[str, Any], raw)
+    return {}
+
+
+def _records(event: dict[str, Any]) -> list[dict[str, Any]]:
+    raw_records = event.get("Records")
+    if isinstance(raw_records, list):
+        return [r for r in cast(list[Any], raw_records) if isinstance(r, dict)]
+    return [event]
diff --git a/utilities/private.py b/utilities/private.py
new file mode 100644
index 00000000..77a70578
--- /dev/null
+++ b/utilities/private.py
@@ -0,0 +1,33 @@
+import inspect
+from typing import Any, Callable
+
+
+class private:
+    """Decorator that raises if a _-prefixed method is called from outside its class."""
+
+    func: Callable[..., Any]
+    name: str
+    owner: type
+
+    def __init__(self, func: Callable[..., Any]) -> None:
+        self.func = func
+        self.name = getattr(func, "__name__", "<anonymous>")
+
+    def __set_name__(self, owner: type, name: str) -> None:
+        self.owner = owner
+
+    def __get__(self, instance: Any, owner: type) -> Callable[..., Any]:
+        # Walk up one frame to see who's calling
+        frame = inspect.currentframe()
+        if frame is None or frame.f_back is None:
+            raise RuntimeError("cannot inspect caller frame")
+        caller_frame = frame.f_back
+        caller_self = caller_frame.f_locals.get("self")
+
+        if not isinstance(caller_self, self.owner):
+            raise RuntimeError(
+                f"{self.owner.__name__}.{self.name} is private; "
+                f"called from {caller_frame.f_code.co_name}"
+            )
+
+        return getattr(self.func, "__get__")(instance, owner)