diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json
index 24949770..0a78dadf 100644
--- a/.devcontainer/backend/devcontainer.json
+++ b/.devcontainer/backend/devcontainer.json
@@ -5,7 +5,7 @@
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"initializeCommand": "docker network create shared-dev 2>/dev/null || true; test -d \"$HOME/.config/gh\" || test -n \"$GITHUB_TOKEN\" || { echo >&2 'error: no GitHub auth found. Run `gh auth login && gh auth setup-git` on the host, or export GITHUB_TOKEN, then retry.'; exit 1; }",
- "postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.5 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
+ "postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.7 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind",
diff --git a/.dockerignore b/.dockerignore
index 0c7d7749..90436ffc 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,7 +6,7 @@ backend/.idea/*
backend/.env
recommendations/tests/*
model_data/tests/*
-infrastructure/*
+deployment/*
data_collection/*
node_modules/*
conservation_areas/*
diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml
index 3435c92d..e7ad9424 100644
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@@ -40,6 +40,8 @@ on:
required: false
EPC_AUTH_TOKEN:
required: false
+ OPEN_EPC_API_TOKEN:
+ required: false
jobs:
build:
@@ -50,6 +52,7 @@ jobs:
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}
+ OPEN_EPC_API_TOKEN: ${{ secrets.OPEN_EPC_API_TOKEN }}
outputs:
image_digest: ${{ steps.digest.outputs.image_digest }}
diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml
index 1cc7d462..0d702155 100644
--- a/.github/workflows/_deploy_lambda.yml
+++ b/.github/workflows/_deploy_lambda.yml
@@ -80,6 +80,10 @@ on:
required: false
TF_VAR_pashub_password:
required: false
+ TF_VAR_pashub_coordination_email:
+ required: false
+ TF_VAR_pashub_coordination_password:
+ required: false
TF_VAR_hubspot_api_key:
required: false
@@ -154,6 +158,8 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
+ TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
+ TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }}
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
@@ -202,6 +208,8 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
+ TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
+ TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }}
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
diff --git a/.github/workflows/_smoke_test_lambda.yml b/.github/workflows/_smoke_test_lambda.yml
new file mode 100644
index 00000000..3fcf0de4
--- /dev/null
+++ b/.github/workflows/_smoke_test_lambda.yml
@@ -0,0 +1,85 @@
+name: Lambda smoke test
+
+on:
+ workflow_call:
+ inputs:
+ dockerfile_path:
+ required: true
+ type: string
+ build_context:
+ required: false
+ default: "."
+ type: string
+ service_name:
+ required: true
+ type: string
+
+jobs:
+ smoke-test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Download AWS Lambda RIE
+ run: |
+ mkdir -p ~/.aws-lambda-rie
+ curl -fsSL -o ~/.aws-lambda-rie/aws-lambda-rie \
+ https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie
+ chmod +x ~/.aws-lambda-rie/aws-lambda-rie
+
+ - name: Build Lambda image
+ run: |
+ docker build \
+ --platform linux/amd64 \
+ -f ${{ inputs.dockerfile_path }} \
+ -t ${{ inputs.service_name }}-smoke-test:latest \
+ ${{ inputs.build_context }}
+
+ - name: Start Lambda container
+ run: |
+ IMG=${{ inputs.service_name }}-smoke-test:latest
+ ENTRY=$(docker inspect --format='{{range .Config.Entrypoint}}{{.}} {{end}}' "$IMG")
+ CMD_ARGS=$(docker inspect --format='{{range .Config.Cmd}}{{.}} {{end}}' "$IMG")
+
+ if echo "$ENTRY" | grep -q "lambda-entrypoint.sh"; then
+ # AWS base image — RIE is bundled
+ docker run -d --name ${{ inputs.service_name }}-smoke-test \
+ -p 9000:8080 \
+ "$IMG"
+ else
+ # Custom base — mount RIE from runner and re-wire entrypoint
+ docker run -d --name ${{ inputs.service_name }}-smoke-test \
+ -v "$HOME/.aws-lambda-rie:/aws-lambda-rie" \
+ -p 9000:8080 \
+ --entrypoint /aws-lambda-rie/aws-lambda-rie \
+ "$IMG" \
+ $ENTRY $CMD_ARGS
+ fi
+
+ - name: Invoke Lambda and check for import errors
+ run: |
+ response=$(curl -s --retry-connrefused --retry 15 --retry-delay 1 \
+ -X POST \
+ http://localhost:9000/2015-03-31/functions/function/invocations \
+ -H "Content-Type: application/json" \
+ -d '{"Records":[{"body":"{}"}]}')
+
+ echo "Response: $response"
+
+ if [ -z "$response" ]; then
+ echo "No response from Lambda RIE"
+ exit 1
+ fi
+
+ if echo "$response" | grep -qE 'ImportModuleError|ModuleNotFoundError|ImportError'; then
+ echo "Import error detected in handler"
+ exit 1
+ fi
+
+ - name: Dump container logs
+ if: always()
+ run: docker logs ${{ inputs.service_name }}-smoke-test
+
+ - name: Tear down container
+ if: always()
+ run: docker rm -f ${{ inputs.service_name }}-smoke-test
diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml
index e0343974..7f2eb890 100644
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@@ -62,20 +62,20 @@ jobs:
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
- working-directory: infrastructure/terraform/shared
+ working-directory: deployment/terraform/shared
run: terraform init -reconfigure
- name: Terraform Workspace
- working-directory: infrastructure/terraform/shared
+ working-directory: deployment/terraform/shared
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
- name: Terraform Plan
- working-directory: infrastructure/terraform/shared
+ working-directory: deployment/terraform/shared
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
- name: Terraform Apply
if: env.TERRAFORM_APPLY == 'true'
- working-directory: infrastructure/terraform/shared
+ working-directory: deployment/terraform/shared
run: terraform apply -auto-approve tfplan
# ============================================================
@@ -101,7 +101,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ara_engine
- lambda_path: infrastructure/terraform/lambda/engine
+ lambda_path: deployment/terraform/lambda/engine
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
@@ -133,6 +133,7 @@ jobs:
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
+ OPEN_EPC_API_TOKEN=$OPEN_EPC_API_TOKEN
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
@@ -141,6 +142,7 @@ jobs:
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
+ OPEN_EPC_API_TOKEN: ${{ secrets.DEV_OPEN_EPC_API_TOKEN }}
# ============================================================
# Deploy Address 2 UPRN Lambda
@@ -150,7 +152,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: address2uprn
- lambda_path: infrastructure/terraform/lambda/address2UPRN
+ lambda_path: deployment/terraform/lambda/address2UPRN
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
@@ -169,7 +171,7 @@ jobs:
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
- dockerfile_path: backend/postcode_splitter/handler/Dockerfile
+ dockerfile_path: applications/postcode_splitter/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
@@ -191,7 +193,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: postcodeSplitter
- lambda_path: infrastructure/terraform/lambda/postcodeSplitter
+ lambda_path: deployment/terraform/lambda/postcodeSplitter
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
@@ -231,7 +233,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: bulk_address2uprn_combiner
- lambda_path: infrastructure/terraform/lambda/bulk_address2uprn_combiner
+ lambda_path: deployment/terraform/lambda/bulk_address2uprn_combiner
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: bulk_address2uprn_combiner-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.bulk_address2uprn_combiner_image.outputs.image_digest }}
@@ -271,7 +273,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: condition-etl
- lambda_path: infrastructure/terraform/lambda/condition-etl
+ lambda_path: deployment/terraform/lambda/condition-etl
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
@@ -311,7 +313,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: categorisation
- lambda_path: infrastructure/terraform/lambda/categorisation
+ lambda_path: deployment/terraform/lambda/categorisation
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.categorisation_image.outputs.image_digest }}
@@ -351,7 +353,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ordnanceSurvey
- lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
+ lambda_path: deployment/terraform/lambda/ordnanceSurvey
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
@@ -386,7 +388,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: pashub_to_ara
- lambda_path: infrastructure/terraform/lambda/pashub_to_ara
+ lambda_path: deployment/terraform/lambda/pashub_to_ara
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.pashub_to_ara_image.outputs.image_digest }}
@@ -407,6 +409,8 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }}
TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }}
TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }}
+ TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }}
+ TF_VAR_pashub_coordination_password: ${{ secrets.PASHUB_COORDINATION_PASSWORD }}
# ============================================================
@@ -417,7 +421,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ara_fast_api
- lambda_path: infrastructure/terraform/lambda/fast-api
+ lambda_path: deployment/terraform/lambda/fast-api
stage: ${{ needs.determine_stage.outputs.stage }}
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
secrets:
@@ -456,17 +460,17 @@ jobs:
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
- working-directory: infrastructure/terraform/cdn_certificate
+ working-directory: deployment/terraform/cdn_certificate
run: terraform init -reconfigure
- name: Terraform Workspace
- working-directory: infrastructure/terraform/cdn_certificate
+ working-directory: deployment/terraform/cdn_certificate
run: |
terraform workspace select $STAGE \
|| terraform workspace new $STAGE
- name: Terraform Plan
- working-directory: infrastructure/terraform/cdn_certificate
+ working-directory: deployment/terraform/cdn_certificate
run: |
terraform plan \
-var="stage=${STAGE}" \
@@ -474,7 +478,7 @@ jobs:
- name: Terraform Apply
if: env.TERRAFORM_APPLY == 'true'
- working-directory: infrastructure/terraform/cdn_certificate
+ working-directory: deployment/terraform/cdn_certificate
run: terraform apply -auto-approve tfplan
@@ -501,17 +505,17 @@ jobs:
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
- working-directory: infrastructure/terraform/cdn
+ working-directory: deployment/terraform/cdn
run: terraform init -reconfigure
- name: Terraform Workspace
- working-directory: infrastructure/terraform/cdn
+ working-directory: deployment/terraform/cdn
run: |
terraform workspace select $STAGE \
|| terraform workspace new $STAGE
- name: Terraform Plan
- working-directory: infrastructure/terraform/cdn
+ working-directory: deployment/terraform/cdn
run: |
terraform plan \
-var="stage=${STAGE}" \
@@ -519,7 +523,7 @@ jobs:
- name: Terraform Apply
if: env.TERRAFORM_APPLY == 'true'
- working-directory: infrastructure/terraform/cdn
+ working-directory: deployment/terraform/cdn
run: terraform apply -auto-approve tfplan
# ============================================================
@@ -560,7 +564,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: magic_plan
- lambda_path: infrastructure/terraform/lambda/magic_plan
+ lambda_path: deployment/terraform/lambda/magic_plan
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: magic-plan-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.magic_plan_image.outputs.image_digest }}
@@ -583,7 +587,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: hubspot-etl-to-ara
- lambda_path: infrastructure/terraform/lambda/hubspot_deal_etl
+ lambda_path: deployment/terraform/lambda/hubspot_deal_etl
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: hubspot-etl-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.hubspot_etl_image.outputs.image_digest }}
diff --git a/.github/workflows/lambda_smoke_tests.yml b/.github/workflows/lambda_smoke_tests.yml
new file mode 100644
index 00000000..b562f91e
--- /dev/null
+++ b/.github/workflows/lambda_smoke_tests.yml
@@ -0,0 +1,114 @@
+name: Lambda Smoke Tests
+
+on:
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ # ============================================================
+ # Ara Engine
+ # ============================================================
+ ara_engine_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/docker/engine.Dockerfile
+ build_context: .
+ service_name: ara-engine
+
+ # ============================================================
+ # Address 2 UPRN
+ # ============================================================
+ address2uprn_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/address2UPRN/handler/Dockerfile
+ build_context: .
+ service_name: address2uprn
+
+ # ============================================================
+ # Postcode Splitter
+ # ============================================================
+ postcode_splitter_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/postcode_splitter/handler/Dockerfile
+ build_context: .
+ service_name: postcode-splitter
+
+ postcode_splitter_ddd_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: applications/postcode_splitter/Dockerfile
+ build_context: .
+ service_name: postcode-splitter-ddd
+
+ # ============================================================
+ # Bulk Address2UPRN Combiner
+ # ============================================================
+ bulk_address2uprn_combiner_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/bulk_address2uprn_combiner/handler/Dockerfile
+ build_context: .
+ service_name: bulk-address2uprn-combiner
+
+ # ============================================================
+ # Condition ETL
+ # ============================================================
+ condition_etl_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/condition/handler/Dockerfile
+ build_context: .
+ service_name: condition-etl
+
+ # ============================================================
+ # Categorisation
+ # ============================================================
+ categorisation_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/categorisation/handler/Dockerfile
+ build_context: .
+ service_name: categorisation
+
+ # ============================================================
+ # Ordnance Survey
+ # ============================================================
+ ordnance_survey_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
+ build_context: .
+ service_name: ordnance-survey
+
+ # ============================================================
+ # Pas Hub Fetcher
+ # ============================================================
+ pashub_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/pashub_fetcher/handler/Dockerfile
+ build_context: .
+ service_name: pashub
+
+ # ============================================================
+ # MagicPlan
+ # ============================================================
+ magic_plan_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: backend/magic_plan/handler/Dockerfile
+ build_context: .
+ service_name: magic-plan
+
+ # ============================================================
+ # HubSpot Scraper
+ # ============================================================
+ hubspot_scraper_smoke_test:
+ uses: ./.github/workflows/_smoke_test_lambda.yml
+ with:
+ dockerfile_path: etl/hubspot/scripts/scraper/handler/Dockerfile
+ build_context: .
+ service_name: hubspot-scraper
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index fa4fdf2a..15d4cfe9 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -60,3 +60,15 @@ jobs:
-e DB_PASSWORD=test \
-e DB_PORT=5432 \
model-test pytest -vv -m 'not integration'
+
+ # The DDD rewrite (tests/) defines SQLModel table classes that map to the
+ # same physical tables as the legacy backend models. Both sets share the
+ # one global SQLModel.metadata, so they cannot be imported into the same
+ # pytest process. It runs as a separate invocation until the legacy
+ # models are retired. Its DB is spawned in-process by pytest-postgresql,
+ # so no DB service or env is required.
+ - name: Run DDD tests
+ run: |
+ docker run --rm \
+ --network host \
+ model-test pytest -vv tests/
diff --git a/.gitignore b/.gitignore
index 3e193d24..6cd39e9d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,6 +121,7 @@ celerybeat.pid
# Environments
.env
+.env.local
.venv
env/
venv/
diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index aa0426a0..00000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
-
-
-
-## BACKLOG WORKFLOW INSTRUCTIONS
-
-This project uses Backlog.md MCP for all task and project management activities.
-
-**CRITICAL GUIDANCE**
-
-- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
-- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
-
-- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
-- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
-- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
-
-These guides cover:
-- Decision framework for when to create tasks
-- Search-first workflow to avoid duplicates
-- Links to detailed guides for task creation, execution, and finalization
-- MCP tools reference
-
-You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
-
-
-
-
diff --git a/CLAUDE.md b/CLAUDE.md
index faa857ce..857c7083 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,33 +1,4 @@
-
-
-
-
-## BACKLOG WORKFLOW INSTRUCTIONS
-
-This project uses Backlog.md MCP for all task and project management activities.
-
-**CRITICAL GUIDANCE**
-
-- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
-- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
-
-- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
-- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
-- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
-
-These guides cover:
-- Decision framework for when to create tasks
-- Search-first workflow to avoid duplicates
-- Links to detailed guides for task creation, execution, and finalization
-- MCP tools reference
-
-You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
-
-
-
-
-
## Available Skills
Five Claude Code skills are installed in this repo's dev container. Each maps to a phase of the feature lifecycle.
diff --git a/CONTEXT.md b/CONTEXT.md
index 34afd5fe..3c02e7e8 100644
--- a/CONTEXT.md
+++ b/CONTEXT.md
@@ -58,7 +58,7 @@ A UK postal code used to group nearby addresses; the primary search key for find
_Avoid_: zip code, postal code
**User Address**:
-A free-text address string provided by a user or imported from a customer dataset, before any normalisation or matching.
+A structured dataclass (`domain.addresses.user_address.UserAddress`) capturing a customer-supplied address: a free-text `user_address` line, a canonical `postcode` (sanitised on construction), and an optional `internal_reference`. The bare string sense — the raw free-text address line as it arrives from upstream ingestion, before being wrapped — remains valid when discussing CSV columns, API payloads, or other upstream contexts; in domain code, prefer the dataclass.
_Avoid_: user input, raw address, user_inputed_address
**Comparable Properties**:
@@ -297,7 +297,7 @@ _Avoid_: API key, auth token, secret
- **"energy assessment"** in the existing codebase (`energy_assessment_functions`, `energy_assessments_by_uprn`) refers to what is now canonically called **Site Notes**. New code uses **Site Notes**.
- **"patch"** / `patch_epc` in the existing codebase has been merged into **Landlord Overrides**; the original concept is deprecated.
- **"already_installed measures"** in the existing codebase is likely subsumed by **Landlord Overrides** ("we have a heat pump now" → override the heating fields). Final call deferred to implementation.
-- **"address"** appears as both the raw **User Address** (free-text) and a structured field on an **EPC Search Result** (normalised lines). Always qualify: "user address" vs "EPC address" or "address line 1".
+- **"address"** appears as both the raw **User Address** (free-text from customer data, or the structured `UserAddress` dataclass that wraps it) and a structured field on an **EPC Search Result** (normalised lines). Always qualify: "user address" vs "EPC address" or "address line 1". Within `domain/`, **User Address** specifically means the `UserAddress` dataclass; in upstream ingestion contexts (CSV columns, SQS payloads) it can still mean the raw string sense.
- **"score"** is used for `AddressMatch.score()` output, the `lexiscore` column, and informally. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
- **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`.
- **"EPC"** is overloaded as both the document and the rating band letter. Use **EPC** for the document, **EPC Band** for the letter.
diff --git a/Dockerfile.test.dockerignore b/Dockerfile.test.dockerignore
index 4f79c6ee..ed05c399 100644
--- a/Dockerfile.test.dockerignore
+++ b/Dockerfile.test.dockerignore
@@ -4,7 +4,7 @@ model_data/local_data/
backend/node_modules/
backend/.idea/
backend/.env
-infrastructure/
+deployment/
data_collection/
node_modules/
conservation_areas/
diff --git a/applications/__init__.py b/applications/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/applications/postcode_splitter/Dockerfile b/applications/postcode_splitter/Dockerfile
new file mode 100644
index 00000000..aea1f914
--- /dev/null
+++ b/applications/postcode_splitter/Dockerfile
@@ -0,0 +1,34 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+# Postgres host/port/database are baked into the image at build time from
+# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
+# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
+# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
+# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV POSTGRES_HOST=${DEV_DB_HOST}
+ENV POSTGRES_PORT=${DEV_DB_PORT}
+ENV POSTGRES_DATABASE=${DEV_DB_NAME}
+
+WORKDIR /var/task
+
+COPY applications/postcode_splitter/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the layered source the handler imports from. The new splitter pulls
+# only DDD-shaped packages — no pandas, no legacy backend/.
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/
+COPY orchestration/ orchestration/
+COPY repositories/ repositories/
+COPY utilities/ utilities/
+COPY applications/ applications/
+
+# Place the handler at the Lambda task root so the runtime can resolve
+# ``main.handler`` without an extra package prefix.
+COPY applications/postcode_splitter/handler.py /var/task/main.py
+
+CMD ["main.handler"]
diff --git a/applications/postcode_splitter/__init__.py b/applications/postcode_splitter/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py
new file mode 100644
index 00000000..9fb3ca6a
--- /dev/null
+++ b/applications/postcode_splitter/handler.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import boto3
+
+from applications.postcode_splitter.postcode_splitter_trigger_body import (
+ PostcodeSplitterTriggerBody,
+)
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from infrastructure.csv_s3_client import CsvS3Client
+from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.user_address.user_address_csv_s3_repository import (
+ UserAddressCsvS3Repository,
+)
+from utilities.aws_lambda.subtask_handler import subtask_handler
+
+
+@subtask_handler()
+def handler(
+ body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
+) -> dict[str, list[str]]:
+ trigger = PostcodeSplitterTriggerBody.model_validate(body)
+
+ bucket = os.environ["S3_BUCKET_NAME"]
+ queue_url = os.environ["ADDRESS2UPRN_QUEUE_URL"]
+
+ # boto3.client is overloaded per-service in the installed stubs; cast
+ # to Any so the strict-mode checker treats it as opaque.
+ boto3_client: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+ boto_s3: Any = boto3_client("s3")
+ boto_sqs: Any = boto3_client("sqs")
+
+ csv_client = CsvS3Client(boto_s3, bucket)
+ user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
+ queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
+
+ splitter = PostcodeSplitterOrchestrator(
+ task_orchestrator=task_orchestrator,
+ user_address_repo=user_address_repo,
+ queue_client=queue_client,
+ )
+
+ child_ids = splitter.split_and_dispatch(
+ parent_task_id=trigger.task_id,
+ parent_subtask_id=trigger.sub_task_id,
+ input_s3_uri=trigger.s3_uri,
+ )
+
+ return {"child_subtask_ids": [str(cid) for cid in child_ids]}
diff --git a/applications/postcode_splitter/local_handler/.env.local.example b/applications/postcode_splitter/local_handler/.env.local.example
new file mode 100644
index 00000000..28fa8390
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/.env.local.example
@@ -0,0 +1,34 @@
+# Local-test environment for the postcode_splitter Lambda.
+#
+# cp .env.local.example .env.local then fill in the values below.
+#
+# .env.local is gitignored. The container hits REAL AWS and a REAL Postgres,
+# so every value here points at infrastructure that actually exists.
+#
+# NOTE: the new DDD code uses different env var names than the repo root
+# .env. The mapping (root .env name -> var here) is given per section.
+# Keep comments on their own lines — docker-compose's env_file parser folds a
+# trailing "# ..." into the value.
+
+# --- Postgres (orchestration/default_orchestrator -> PostgresConfig.from_env) ---
+# POSTGRES_HOST <- DB_HOST, PORT <- DB_PORT, USERNAME <- DB_USERNAME,
+# PASSWORD <- DB_PASSWORD, DATABASE <- DB_NAME.
+POSTGRES_HOST=
+POSTGRES_PORT=5432
+POSTGRES_USERNAME=
+POSTGRES_PASSWORD=
+POSTGRES_DATABASE=
+# POSTGRES_DRIVER=psycopg2 (optional; defaults to psycopg2)
+
+# --- Handler config (applications/postcode_splitter/handler.py) ---
+# S3_BUCKET_NAME: bucket holding the input address CSV (root .env: DATA_BUCKET).
+# ADDRESS2UPRN_QUEUE_URL: SQS queue the splitter fans batches out to; not in
+# the root .env (Terraform sets it in prod).
+S3_BUCKET_NAME=
+ADDRESS2UPRN_QUEUE_URL=
+
+# --- AWS credentials for boto3 (S3 + SQS clients) ---
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_DEFAULT_REGION=eu-west-2
+# AWS_SESSION_TOKEN= (only if using temporary/SSO credentials)
diff --git a/applications/postcode_splitter/local_handler/docker-compose.yml b/applications/postcode_splitter/local_handler/docker-compose.yml
new file mode 100644
index 00000000..68af1c40
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/docker-compose.yml
@@ -0,0 +1,9 @@
+services:
+ postcode-splitter:
+ build:
+ context: ../../../
+ dockerfile: applications/postcode_splitter/Dockerfile
+ ports:
+ - "9001:8080"
+ env_file:
+ - .env.local
diff --git a/applications/postcode_splitter/local_handler/invoke_local_lambda.py b/applications/postcode_splitter/local_handler/invoke_local_lambda.py
new file mode 100755
index 00000000..5f4b1d36
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9001"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {
+ "Records": [
+ {
+ "body": json.dumps(
+ {
+ "task_id": "e295d89b-a7c5-4a9a-8b4e-b405fab1f298",
+ "sub_task_id": "f4a9944f-41f0-4a33-8669-5016ec574068",
+ "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv",
+ }
+ )
+ }
+ ]
+}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
diff --git a/applications/postcode_splitter/local_handler/run_local.sh b/applications/postcode_splitter/local_handler/run_local.sh
new file mode 100755
index 00000000..345b60ee
--- /dev/null
+++ b/applications/postcode_splitter/local_handler/run_local.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+
+if [ ! -f .env.local ]; then
+ cp .env.local.example .env.local
+ echo "Created .env.local from the template — fill it in, then re-run." >&2
+ exit 1
+fi
+
+docker compose build --no-cache
+docker compose up --force-recreate
diff --git a/applications/postcode_splitter/postcode_splitter_trigger_body.py b/applications/postcode_splitter/postcode_splitter_trigger_body.py
new file mode 100644
index 00000000..4c33f4a4
--- /dev/null
+++ b/applications/postcode_splitter/postcode_splitter_trigger_body.py
@@ -0,0 +1,11 @@
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict
+
+
+class PostcodeSplitterTriggerBody(BaseModel):
+ model_config = ConfigDict(extra="allow")
+
+ task_id: UUID
+ sub_task_id: UUID
+ s3_uri: str
diff --git a/applications/postcode_splitter/requirements.txt b/applications/postcode_splitter/requirements.txt
new file mode 100644
index 00000000..6a85a255
--- /dev/null
+++ b/applications/postcode_splitter/requirements.txt
@@ -0,0 +1,4 @@
+boto3
+pydantic
+sqlmodel
+psycopg2-binary
diff --git a/asset_list/app.py b/asset_list/app.py
index 7413c7cb..424f4df6 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -79,23 +79,23 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
- data_filename = "input.xlsx"
- sheet_name = "Handovers"
- postcode_column = "POSTCODE"
- address1_column = "Full Addres"
+ data_filename = "hyde.xlsx"
+ sheet_name = "AddressProfilingResults"
+ postcode_column = "Postcode"
+ address1_column = "Address"
address1_method = None
- fulladdress_column = "Full Addres"
+ fulladdress_column = "Postcode"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
- landlord_os_uprn = "domna_found_uprn"
- landlord_property_type = "PROPERTY TYPE" # Good to include if landlord gave
- landlord_built_form = "Type Description" # Good to include if landlord gave
+ landlord_os_uprn = None
+ landlord_property_type = "Property Type" # Good to include if landlord gave
+ landlord_built_form = None # Good to include if landlord gave
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
- landlord_property_id = "PROP REF"
+ landlord_property_id = "Organisation Reference"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@@ -469,8 +469,3 @@ def app():
writer, sheet_name="Duplicate Properties", index=False
)
-
-
-
-for key,value in dict.items():
- lsakjfldsa
\ No newline at end of file
diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile
index 07159357..7d174152 100644
--- a/backend/address2UPRN/handler/Dockerfile
+++ b/backend/address2UPRN/handler/Dockerfile
@@ -6,11 +6,13 @@ ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
ARG EPC_AUTH_TOKEN
+ARG OPEN_EPC_API_TOKEN
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
+ENV OPEN_EPC_API_TOKEN=${OPEN_EPC_API_TOKEN}
# Set working directory (Lambda task root)
diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt
index 6ef41b2d..02aaefba 100644
--- a/backend/address2UPRN/handler/requirements.txt
+++ b/backend/address2UPRN/handler/requirements.txt
@@ -8,4 +8,5 @@ boto3==1.35.44
sqlmodel
sqlalchemy==2.0.36
psycopg2-binary==2.9.10
-pydantic-settings==2.6.0
\ No newline at end of file
+pydantic-settings==2.6.0
+httpx
\ No newline at end of file
diff --git a/backend/address2UPRN/tests/test_csv.py b/backend/address2UPRN/tests/test_csv.py
index 73d94388..5c97e691 100644
--- a/backend/address2UPRN/tests/test_csv.py
+++ b/backend/address2UPRN/tests/test_csv.py
@@ -12,12 +12,21 @@ FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
# Each parametrized case fires at least one EPC request; without throttling,
# GitHub-hosted runners burst fast enough to hit 429s.
EPC_THROTTLE_SECONDS = 1.0
+EPC_LONG_PAUSE_EVERY = 100
+EPC_LONG_PAUSE_SECONDS = 5.0
+
+_epc_request_count = 0
@pytest.fixture(autouse=True)
def _throttle_epc_requests():
+ global _epc_request_count
yield
- time.sleep(EPC_THROTTLE_SECONDS)
+ _epc_request_count += 1
+ if _epc_request_count % EPC_LONG_PAUSE_EVERY == 0:
+ time.sleep(EPC_LONG_PAUSE_SECONDS)
+ else:
+ time.sleep(EPC_THROTTLE_SECONDS)
def load_test_cases():
diff --git a/backend/address2UPRN/tests/test_data.csv b/backend/address2UPRN/tests/test_data.csv
index 408edc29..1c1ce58a 100644
--- a/backend/address2UPRN/tests/test_data.csv
+++ b/backend/address2UPRN/tests/test_data.csv
@@ -364,4 +364,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
164a Victoria Square,M4 5FA,77211315
165a Victoria Square,M4 5FA,77211316
166a Victoria Square,M4 5FA,None
-"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
\ No newline at end of file
+"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
+71A Stoneleigh Avenue,NE12 8NP,None
+71B Stoneleigh Avenue,NE12 8NP,None
+71 Stoneleigh Avenue,NE12 8NP,47086009
\ No newline at end of file
diff --git a/backend/app/config.py b/backend/app/config.py
index bdfc9ace..fcfb6d5b 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -86,6 +86,8 @@ class Settings(BaseSettings):
# Pas Hub
PASHUB_EMAIL: Optional[str] = None
PASHUB_PASSWORD: Optional[str] = None
+ PASHUB_COORDINATION_EMAIL: Optional[str] = None
+ PASHUB_COORDINATION_PASSWORD: Optional[str] = None
# Optional AWS creds (only required in local)
AWS_ACCESS_KEY_ID: Optional[str] = None
diff --git a/backend/app/db/functions/magic_plan_functions.py b/backend/app/db/functions/magic_plan_functions.py
index 9400f36f..143e4172 100644
--- a/backend/app/db/functions/magic_plan_functions.py
+++ b/backend/app/db/functions/magic_plan_functions.py
@@ -14,15 +14,15 @@ from backend.app.db.models.magic_plan import (
)
-def save_plan(session: Session, plan: Plan) -> None:
- plan_id: int = _upsert_plan(session, plan)
+def save_plan(session: Session, plan: Plan, uploaded_file_id: int) -> None:
+ plan_id: int = _upsert_plan(session, plan, uploaded_file_id)
_delete_children(session, plan_id)
floor_ids: list[int] = _insert_floors(session, plan.floors, plan_id)
room_ids: list[int] = _insert_rooms(session, plan.floors, floor_ids)
_insert_windows_and_doors(session, plan.floors, room_ids)
-def _upsert_plan(session: Session, plan: Plan) -> int:
+def _upsert_plan(session: Session, plan: Plan, uploaded_file_id: int) -> int:
stmt = (
pg_insert(MagicPlanPlanModel)
.values(
@@ -30,6 +30,7 @@ def _upsert_plan(session: Session, plan: Plan) -> int:
name=plan.name,
address=plan.address,
postcode=plan.postcode,
+ uploaded_file_id=uploaded_file_id,
)
.on_conflict_do_update(
index_elements=["magic_plan_uid"],
@@ -37,6 +38,7 @@ def _upsert_plan(session: Session, plan: Plan) -> int:
"name": plan.name,
"address": plan.address,
"postcode": plan.postcode,
+ "uploaded_file_id": uploaded_file_id,
},
)
.returning(col(MagicPlanPlanModel.id))
diff --git a/backend/app/db/functions/tests/test_magic_plan_functions.py b/backend/app/db/functions/tests/test_magic_plan_functions.py
index e58d0528..0b93685c 100644
--- a/backend/app/db/functions/tests/test_magic_plan_functions.py
+++ b/backend/app/db/functions/tests/test_magic_plan_functions.py
@@ -36,7 +36,7 @@ def _count(session: Session, model: type[SQLModel]) -> int:
def test_plan_row_present_after_save(db_session: Session, domain_plan: Plan) -> None:
# Act
- save_plan(db_session, domain_plan)
+ save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanPlanModel) == 1
@@ -45,7 +45,7 @@ def test_floor_count_matches_domain(db_session: Session, domain_plan: Plan) -> N
# Arrange
expected = len(domain_plan.floors)
# Act
- save_plan(db_session, domain_plan)
+ save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanFloorModel) == expected
@@ -54,7 +54,7 @@ def test_room_count_matches_domain(db_session: Session, domain_plan: Plan) -> No
# Arrange
expected = sum(len(f.rooms) for f in domain_plan.floors)
# Act
- save_plan(db_session, domain_plan)
+ save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanRoomModel) == expected
@@ -63,7 +63,7 @@ def test_window_count_matches_domain(db_session: Session, domain_plan: Plan) ->
# Arrange
expected = sum(len(r.windows) for f in domain_plan.floors for r in f.rooms)
# Act
- save_plan(db_session, domain_plan)
+ save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanWindowModel) == expected
@@ -72,15 +72,15 @@ def test_door_count_matches_domain(db_session: Session, domain_plan: Plan) -> No
# Arrange
expected = sum(len(r.doors) for f in domain_plan.floors for r in f.rooms)
# Act
- save_plan(db_session, domain_plan)
+ save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanDoorModel) == expected
def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
# Act — call twice within the same session
- save_plan(db_session, domain_plan)
- save_plan(db_session, domain_plan)
+ save_plan(db_session, domain_plan, 1)
+ save_plan(db_session, domain_plan, 1)
# Assert — same row counts as a single call
assert _count(db_session, MagicPlanPlanModel) == 1
assert _count(db_session, MagicPlanFloorModel) == len(domain_plan.floors)
@@ -93,3 +93,23 @@ def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
assert _count(db_session, MagicPlanDoorModel) == sum(
len(r.doors) for f in domain_plan.floors for r in f.rooms
)
+
+
+def test_uploaded_file_id_stored_after_save(db_session: Session, domain_plan: Plan) -> None:
+ # Act
+ save_plan(db_session, domain_plan, 1)
+ # Assert
+ row = db_session.execute(select(MagicPlanPlanModel)).scalar_one()
+ assert row.uploaded_file_id == 1
+
+
+def test_save_plan_updates_uploaded_file_id_on_reingest(
+ db_session: Session, domain_plan: Plan
+) -> None:
+ # Arrange
+ save_plan(db_session, domain_plan, 1)
+ # Act
+ save_plan(db_session, domain_plan, 2)
+ # Assert
+ row = db_session.execute(select(MagicPlanPlanModel)).scalar_one()
+ assert row.uploaded_file_id == 2
diff --git a/backend/app/db/models/magic_plan.py b/backend/app/db/models/magic_plan.py
index 38e9de18..77ca52fd 100644
--- a/backend/app/db/models/magic_plan.py
+++ b/backend/app/db/models/magic_plan.py
@@ -11,6 +11,7 @@ class MagicPlanPlanModel(SQLModel, table=True):
name: Optional[str] = None
address: Optional[str] = None
postcode: Optional[str] = None
+ uploaded_file_id: Optional[int] = Field(default=None)
class MagicPlanFloorModel(SQLModel, table=True):
diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py
index c629f574..b6a73d5d 100644
--- a/backend/app/db/models/uploaded_file.py
+++ b/backend/app/db/models/uploaded_file.py
@@ -18,10 +18,14 @@ class FileTypeEnum(enum.Enum):
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
ECMK_SURVEY_XML = "ecmk_survey_xml"
MAGIC_PLAN_JSON = "magic_plan_json"
+ IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation"
+ MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan"
+ RETROFIT_DESIGN_DOC = "retrofit_design_doc"
class FileSourceEnum(enum.Enum):
PAS_HUB = "pas hub"
+ COORDINATION_HUB = "coordination_hub"
SHAREPOINT = "sharepoint"
HUBSPOT = "hubspot"
ECMK = "ecmk"
diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile
index 71556895..fa130573 100644
--- a/backend/condition/handler/Dockerfile
+++ b/backend/condition/handler/Dockerfile
@@ -32,6 +32,7 @@ COPY utils/ utils/
COPY backend/condition/ backend/condition/
COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
+COPY backend/app/db/base.py backend/app/db/base.py
COPY backend/app/db/connection.py backend/app/db/connection.py
COPY backend/app/config.py backend/app/config.py
diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py
index 86caeea3..72dbf142 100644
--- a/backend/epc_client/epc_client_service.py
+++ b/backend/epc_client/epc_client_service.py
@@ -47,8 +47,14 @@ class EpcClientService:
latest = max(results, key=lambda r: r.registration_date)
return self.get_by_certificate_number(latest.certificate_number)
+ @staticmethod
+ def _normalise_postcode(postcode: str) -> str:
+ """Return the postcode with all spaces removed and uppercased."""
+ return postcode.replace(" ", "").upper()
+
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
- return call_with_retry(lambda: self._search(postcode=postcode))
+ normalised = self._normalise_postcode(postcode)
+ return call_with_retry(lambda: self._search(postcode=normalised))
# ------------------------------------------------------------------
# Private helperEpcRateLimpolarss
diff --git a/backend/magic_plan/magic_plan_service.py b/backend/magic_plan/magic_plan_service.py
index 22e19ddf..8a75c716 100644
--- a/backend/magic_plan/magic_plan_service.py
+++ b/backend/magic_plan/magic_plan_service.py
@@ -1,7 +1,7 @@
import gzip
import json
from datetime import datetime, timezone
-from typing import Optional
+from typing import Optional, cast
from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary
from datatypes.magicplan.domain.mapper import map_plan
@@ -55,8 +55,9 @@ class MagicPlanService:
)
with db_session() as session:
- save_plan(session, plan)
session.add(uploaded_file)
+ session.flush()
+ save_plan(session, plan, cast(int, uploaded_file.id))
return plan
diff --git a/backend/magic_plan/tests/test_magic_plan_service.py b/backend/magic_plan/tests/test_magic_plan_service.py
index 158cf4d6..a2302ab4 100644
--- a/backend/magic_plan/tests/test_magic_plan_service.py
+++ b/backend/magic_plan/tests/test_magic_plan_service.py
@@ -271,3 +271,38 @@ def test_run_creates_uploaded_file_record(
assert uploaded_file.s3_upload_timestamp is not None
assert uploaded_file.uprn == 100023336956
assert uploaded_file.hubspot_deal_id == "deal-789"
+
+
+def test_run_passes_flushed_uploaded_file_id_to_save_plan(
+ mock_client: MagicMock,
+ plan_summary: PlanSummary,
+) -> None:
+ # Arrange
+ mock_client.get_plans.return_value = [plan_summary]
+ service = _make_service(mock_client)
+ mock_session = MagicMock()
+ added_objects: list = []
+
+ mock_session.add.side_effect = added_objects.append
+
+ def simulate_flush() -> None:
+ for obj in added_objects:
+ if isinstance(obj, UploadedFile):
+ obj.id = 42
+
+ mock_session.flush.side_effect = simulate_flush
+
+ with patch(
+ "backend.magic_plan.magic_plan_service.find_matching_plan",
+ return_value=plan_summary,
+ ), patch("backend.magic_plan.magic_plan_service.save_plan") as mock_save, patch(
+ "backend.magic_plan.magic_plan_service.db_session"
+ ) as mock_db, patch(
+ "backend.magic_plan.magic_plan_service.save_data_to_s3"
+ ):
+ mock_db.return_value.__enter__.return_value = mock_session
+ # Act
+ service.run(_make_request())
+
+ # Assert
+ assert mock_save.call_args[0][2] == 42
diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py
index 4da10661..e63511eb 100644
--- a/backend/pashub_fetcher/core_files.py
+++ b/backend/pashub_fetcher/core_files.py
@@ -14,9 +14,12 @@ class CoreFiles(Enum):
PAR_PHOTOPACK = "PAR Photo Pack"
PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
+ IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation"
+ MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan"
+ RETROFIT_DESIGN_DOC = "Retrofit Design Doc"
-CORE_TO_FILETYPE_MAP = {
+_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
@@ -26,11 +29,49 @@ CORE_TO_FILETYPE_MAP = {
CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
+ CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
+ CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
+ CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value,
}
-def infer_file_type(filename: str) -> Optional[str]:
- for core_file, file_type in CORE_TO_FILETYPE_MAP.items():
+def get_core_file_type(
+ filename: str, evidence_category: Optional[str] = None
+) -> Optional[CoreFiles]:
+ # Identify retrofit design doc using evidence category as the name is possibly unreliable.
+ # We might change to always use evidence category, but needs more investigation
+ if evidence_category is not None and evidence_category.lower() == "retrofit design":
+ return CoreFiles.RETROFIT_DESIGN_DOC
+
+ if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename:
+ return CoreFiles.IMPROVEMENT_OPTION_EVALUATION
+
+ if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename:
+ return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
+
+ if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename:
+ return CoreFiles.RETROFIT_DESIGN_DOC
+
+ _prefix_skip = {
+ CoreFiles.RETROFIT_DESIGN_DOC,
+ CoreFiles.IMPROVEMENT_OPTION_EVALUATION,
+ CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN,
+ }
+
+ for core_file in CoreFiles:
+ if core_file in _prefix_skip:
+ continue
+
if filename.startswith(core_file.value):
- return file_type
+ return core_file
+
return None
+
+
+def get_file_type_string(filename: str) -> Optional[str]:
+ core_file: Optional[CoreFiles] = get_core_file_type(filename)
+
+ if core_file is None:
+ return None
+
+ return _CORE_FILE_TO_FILE_TYPE[core_file]
diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py
index cd0c8113..626ce59d 100644
--- a/backend/pashub_fetcher/handler/handler.py
+++ b/backend/pashub_fetcher/handler/handler.py
@@ -1,9 +1,11 @@
-from typing import Any, Dict, List
+from typing import Any, Callable, Dict, List, Optional
from backend.app.config import get_settings
-from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
+from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_service import PashubService
-from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
+from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
+ PashubToAraTriggerRequest,
+)
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
from backend.app.db.models.tasks import SourceEnum
from backend.utils.subtasks import task_handler
@@ -28,38 +30,41 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
settings = get_settings()
- pas_hub_email = settings.PASHUB_EMAIL
- pas_hub_password = settings.PASHUB_PASSWORD
+ pashub_email = settings.PASHUB_EMAIL
+ pashub_password = settings.PASHUB_PASSWORD
- if (not pas_hub_email) or (not pas_hub_password):
+ coordination_hub_email = settings.PASHUB_COORDINATION_EMAIL
+ coordination_hub_password = settings.PASHUB_COORDINATION_PASSWORD
+ coordination_client_factory: Optional[Callable[[], PashubClient]] = None
+
+ if (not pashub_email) or (not pashub_password):
raise ValueError("Pas Hub credentials not provided")
sharepoint_client = DomnaSharepointClient(
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
)
+ if coordination_hub_email and coordination_hub_password:
+ _coord_email, _coord_password = (
+ coordination_hub_email,
+ coordination_hub_password,
+ )
+ coordination_client_factory = lambda: get_pashub_client(
+ _coord_email, _coord_password
+ )
+
logger.debug("Validating request body")
payload = PashubToAraTriggerRequest.model_validate(body)
logger.debug("Successfully validated request body")
service = PashubService(
- pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
+ pashub_client=get_pashub_client(pashub_email, pashub_password),
sharepoint_client=sharepoint_client,
s3_bucket=S3_BUCKET,
+ coordination_client_factory=coordination_client_factory,
)
- try:
- files: List[str] = service.run(payload)
- except UnauthorizedError:
- logger.warning("Token expired - refreshing")
-
- service = PashubService(
- pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
- sharepoint_client=sharepoint_client,
- s3_bucket=S3_BUCKET,
- )
-
- files = service.run(payload)
+ files: List[str] = service.run(payload)
logger.info(f"Saved {len(files)} files")
diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py
index 20b8590d..79d81838 100644
--- a/backend/pashub_fetcher/pashub_client.py
+++ b/backend/pashub_fetcher/pashub_client.py
@@ -5,12 +5,11 @@ from datetime import datetime
import requests
-from backend.pashub_fetcher.core_files import CoreFiles
+from backend.pashub_fetcher.core_files import CoreFiles, get_core_file_type
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
from utils.logger import setup_logger
-
logger = setup_logger()
@@ -75,6 +74,10 @@ class PashubClient:
logger.info(f"Getting UPRN for job ID {job_id}")
url = f"{self.base}/jobs/{job_id}"
+ logger.debug(
+ f"About to make API request with session headers: {self.session.headers}"
+ )
+
r = self.session.get(url)
if r.status_code == 401:
raise UnauthorizedError("Token expired or invalid")
@@ -83,15 +86,12 @@ class PashubClient:
try:
return r.json()["uprn"]
- except Exception:
+ except Exception as e:
+ logger.warning(
+ f"Failed to get UPRN for Job ID {job_id} with exception: {e}"
+ )
return None
- def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]:
- for core_file in CoreFiles:
- if file.file_name.startswith(core_file.value):
- return core_file
- return None
-
def _select_latest_core_files(
self,
files: List[EvidenceFileData],
@@ -99,7 +99,9 @@ class PashubClient:
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
for file in files:
- core_type = self._get_core_file_type(file)
+ core_type: Optional[CoreFiles] = get_core_file_type(
+ file.file_name, file.evidence_category
+ )
if not core_type:
continue
grouped[core_type].append(file)
@@ -107,6 +109,9 @@ class PashubClient:
latest_files: Dict[CoreFiles, EvidenceFileData] = {}
for core_type, group in grouped.items():
+ if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1:
+ osm_candidates = [f for f in group if "-OSM-" in f.file_name]
+ group = osm_candidates if osm_candidates else group
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
latest_files[core_type] = latest
diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py
index 316902f4..f7f6ccd9 100644
--- a/backend/pashub_fetcher/pashub_service.py
+++ b/backend/pashub_fetcher/pashub_service.py
@@ -1,6 +1,6 @@
import os
from datetime import datetime, timezone
-from typing import List, NamedTuple, Optional, cast
+from typing import Callable, List, NamedTuple, Optional, cast
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
@@ -10,8 +10,8 @@ from backend.app.db.models.uploaded_file import (
)
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
-from backend.pashub_fetcher.core_files import infer_file_type
-from backend.pashub_fetcher.pashub_client import PashubClient
+from backend.pashub_fetcher.core_files import get_file_type_string
+from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
@@ -36,17 +36,37 @@ class PashubService:
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
s3_bucket: str,
+ coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
) -> None:
self._pashub_client = pashub_client
self._sharepoint_client = sharepoint_client
self._s3_bucket = s3_bucket
+ self._coordination_client_factory = coordination_client_factory
+ self._coordination_client: Optional[PashubClient] = None
+
+ def _get_coordination_client(self) -> PashubClient:
+ if self._coordination_client_factory is None:
+ raise UnauthorizedError("No coordination client factory configured")
+ if self._coordination_client is None:
+ self._coordination_client = self._coordination_client_factory()
+ return self._coordination_client
def run(self, request: PashubToAraTriggerRequest) -> List[str]:
job_id = request.pashub_job_id
+ active_client = self._pashub_client
+
+ if request.uprn:
+ uprn: Optional[str] = request.uprn
+ else:
+ try:
+ uprn = active_client.get_uprn_by_job_id(job_id)
+ except UnauthorizedError:
+ logger.info(
+ f"PasHub credentials unauthorized for job {job_id}; retrying with CoordinationHub credentials"
+ )
+ active_client = self._get_coordination_client()
+ uprn = active_client.get_uprn_by_job_id(job_id)
- uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(
- job_id
- )
hubspot_deal_id: Optional[str] = request.hubspot_deal_id
if uprn:
@@ -54,14 +74,25 @@ class PashubService:
else:
logger.info(f"No UPRN found for job {job_id}")
- job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(
- job_id
- )
+ try:
+ job_files: List[str] = active_client.get_core_evidence_files_by_job_id(
+ job_id
+ )
+ except UnauthorizedError:
+ if active_client is not self._pashub_client:
+ raise
+ active_client = self._get_coordination_client()
+ job_files = active_client.get_core_evidence_files_by_job_id(job_id)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
+ file_source = (
+ FileSourceEnum.PAS_HUB
+ if active_client is self._pashub_client
+ else FileSourceEnum.COORDINATION_HUB
+ )
upload_records = self._upload_to_s3_and_update_db(
- job_files, uprn, hubspot_deal_id
+ job_files, uprn, hubspot_deal_id, file_source
)
self._save_site_notes(upload_records)
@@ -83,6 +114,7 @@ class PashubService:
job_files: List[str],
uprn: Optional[str],
hubspot_deal_id: Optional[str],
+ file_source: FileSourceEnum,
) -> List[_FileUploadRecord]:
if not uprn and not hubspot_deal_id:
return []
@@ -108,8 +140,8 @@ class PashubService:
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
- file_source=FileSourceEnum.PAS_HUB.value,
- file_type=infer_file_type(filename),
+ file_source=file_source.value,
+ file_type=get_file_type_string(filename),
)
file_paths.append(file_path)
uploaded_files.append(uploaded_file)
diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
index 518a8dc3..715a09f8 100644
--- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
+++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
@@ -1,11 +1,10 @@
+import re
from typing import Optional
from pydantic import BaseModel
class PashubToAraTriggerRequest(BaseModel):
- pashub_link: (
- str # e.g. https://pashub.net/jobs/12345-abcd-1234-abcd-12345abcde/details
- )
+ pashub_link: str # e.g. https://pashub.net/jobs/{id}/details, /jobs/{id}/evidence/view, /jobs/{id}
address: Optional[str] = None
sharepoint_link: Optional[str] = None
@@ -17,4 +16,7 @@ class PashubToAraTriggerRequest(BaseModel):
@property
def pashub_job_id(self) -> str:
- return self.pashub_link.split("/")[-2]
+ match = re.search(r"/jobs/([^/]+)", self.pashub_link)
+ if not match:
+ raise ValueError(f"No job ID found in PasHub link: {self.pashub_link}")
+ return match.group(1)
diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py
new file mode 100644
index 00000000..3c1d11b8
--- /dev/null
+++ b/backend/pashub_fetcher/tests/test_core_files.py
@@ -0,0 +1,185 @@
+from backend.pashub_fetcher.core_files import (
+ CoreFiles,
+ get_core_file_type,
+ get_file_type_string,
+)
+
+
+def test_file_type_for_photopack():
+ assert get_file_type_string("Photopack_123456_V1.pdf") == "photo_pack"
+
+
+def test_file_type_for_sitenote():
+ assert get_file_type_string("SiteNote_123456_V1.pdf") == "site_note"
+
+
+def test_file_type_for_rdsap_sitenote():
+ assert (
+ get_file_type_string("RdSAP_SiteNote_9510890_V1_Assessmet.pdf")
+ == "rd_sap_site_note"
+ )
+
+
+def test_file_type_for_pas2023_ventilation():
+ assert (
+ get_file_type_string("PAS 2023 Ventilation Assessment Report_123456.pdf")
+ == "pas_2023_ventilation"
+ )
+
+
+def test_file_type_for_pas2023_condition():
+ assert (
+ get_file_type_string("PAS 2023 Condition Report_123456.pdf")
+ == "pas_2023_condition"
+ )
+
+
+def test_file_type_for_pas_significance():
+ assert get_file_type_string("PAS Significance_123456.pdf") == "pas_significance"
+
+
+def test_file_type_for_par_photopack():
+ assert (
+ get_file_type_string("PAR Photo Pack_95101890_V2_Assessment.pdf")
+ == "par_photo_pack"
+ )
+
+
+def test_file_type_for_pas2023_property():
+ assert (
+ get_file_type_string("PAS 2023 Property Assessment Report_123456.pdf")
+ == "pas_2023_property"
+ )
+
+
+def test_file_type_for_pas2023_occupancy():
+ assert (
+ get_file_type_string("PAS 2023 Occupancy Assessment Report_123456.pdf")
+ == "pas_2023_occupancy"
+ )
+
+
+def test_file_type_for_improvement_option_evaluation():
+ # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
+ assert (
+ get_file_type_string("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
+ == "improvement_option_evaluation"
+ )
+
+
+def test_file_type_for_medium_term_improvement_plan():
+ # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
+ assert (
+ get_file_type_string(
+ "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
+ )
+ == "medium_term_improvement_plan"
+ )
+
+
+def test_file_type_for_retrofit_design_doc():
+ assert (
+ get_file_type_string("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
+ == "retrofit_design_doc"
+ )
+ assert (
+ get_file_type_string("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
+ == "retrofit_design_doc"
+ )
+
+
+# ---------------------------------------------------------------------------
+# core_file_for
+# ---------------------------------------------------------------------------
+
+
+def test_core_file_for_evidence_category_match_is_case_insensitive() -> None:
+ # Arrange
+ filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+ # Act
+ result = get_core_file_type(filename, evidence_category="Retrofit Design")
+
+ # Assert
+ assert result == CoreFiles.RETROFIT_DESIGN_DOC
+
+
+def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None:
+ # Arrange
+ filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+ # Act
+ result = get_core_file_type(filename, evidence_category="retrofit design")
+
+ # Assert
+ assert result == CoreFiles.RETROFIT_DESIGN_DOC
+
+
+def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> None:
+ # Arrange
+ filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf"
+
+ # Act
+ result = get_core_file_type(filename)
+
+ # Assert
+ assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION
+
+
+def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> None:
+ # Arrange
+ filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
+
+ # Act
+ result = get_core_file_type(filename)
+
+ # Assert
+ assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
+
+
+def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> (
+ None
+):
+ # Arrange
+ filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+ # Act
+ result = get_core_file_type(filename)
+
+ # Assert
+ assert result == CoreFiles.RETROFIT_DESIGN_DOC
+
+
+def test_core_file_for_prefix_returns_photopack() -> None:
+ # Arrange
+ filename = "Photopack_123456_V1.pdf"
+
+ # Act
+ result = get_core_file_type(filename)
+
+ # Assert
+ assert result == CoreFiles.PHOTOPACK
+
+
+def test_core_file_for_unknown_filename_returns_none() -> None:
+ # Arrange
+ filename = "unknown_document_123.pdf"
+
+ # Act
+ result = get_core_file_type(filename)
+
+ # Assert
+ assert result is None
+
+
+def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> (
+ None
+):
+ # Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design
+ filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+ # Act
+ result = get_core_file_type(filename, evidence_category="some other category")
+
+ # Assert
+ assert result is None
diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py
new file mode 100644
index 00000000..34260c73
--- /dev/null
+++ b/backend/pashub_fetcher/tests/test_pashub_client.py
@@ -0,0 +1,117 @@
+# pyright: reportPrivateUsage=false
+from typing import Optional
+
+from backend.pashub_fetcher.core_files import CoreFiles
+from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
+from backend.pashub_fetcher.pashub_client import PashubClient
+
+
+def make_client() -> PashubClient:
+ return PashubClient(token="test-token")
+
+
+def make_file(
+ file_name: str = "unknown.pdf",
+ evidence_category: Optional[str] = None,
+ created_utc: str = "2024-01-01T00:00:00",
+) -> EvidenceFileData:
+ return EvidenceFileData(
+ file_id="id-1",
+ file_name=file_name,
+ created_utc=created_utc,
+ file_size=1024,
+ file_extension="pdf",
+ evidence_category=evidence_category,
+ )
+
+
+# ---------------------------------------------------------------------------
+# _select_latest_core_files
+# ---------------------------------------------------------------------------
+
+
+def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
+ # Arrange
+ client = make_client()
+ files = [
+ make_file(
+ file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
+ evidence_category="retrofit design",
+ created_utc="2024-06-01T00:00:00",
+ )
+ ]
+
+ # Act
+ result = client._select_latest_core_files(files)
+
+ # Assert
+ assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+
+def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
+ # Arrange - the non-OSM file is newer but should lose to the OSM file
+ client = make_client()
+ files = [
+ make_file(
+ file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
+ evidence_category="retrofit design",
+ created_utc="2024-01-01T00:00:00",
+ ),
+ make_file(
+ file_name="Retrofit Design Doc non-osm variant.pdf",
+ evidence_category="retrofit design",
+ created_utc="2024-06-01T00:00:00",
+ ),
+ ]
+
+ # Act
+ result = client._select_latest_core_files(files)
+
+ # Assert
+ assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+
+def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None:
+ # Arrange
+ client = make_client()
+ files = [
+ make_file(
+ file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
+ evidence_category="retrofit design",
+ created_utc="2024-01-01T00:00:00",
+ ),
+ make_file(
+ file_name="2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf",
+ evidence_category="retrofit design",
+ created_utc="2024-06-01T00:00:00",
+ ),
+ ]
+
+ # Act
+ result = client._select_latest_core_files(files)
+
+ # Assert
+ assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
+
+
+def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None:
+ # Arrange
+ client = make_client()
+ files = [
+ make_file(
+ file_name="retrofit_design_v1.pdf",
+ evidence_category="retrofit design",
+ created_utc="2024-01-01T00:00:00",
+ ),
+ make_file(
+ file_name="retrofit_design_v2.pdf",
+ evidence_category="retrofit design",
+ created_utc="2024-06-01T00:00:00",
+ ),
+ ]
+
+ # Act
+ result = client._select_latest_core_files(files)
+
+ # Assert
+ assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py
index 2aff416b..1f750117 100644
--- a/backend/pashub_fetcher/tests/test_pashub_service.py
+++ b/backend/pashub_fetcher/tests/test_pashub_service.py
@@ -1,8 +1,10 @@
-from typing import Optional
+import pytest
+from typing import Any, Callable, Optional
from unittest.mock import MagicMock, call, patch
-from backend.pashub_fetcher.pashub_client import PashubClient
+from backend.app.db.models.uploaded_file import FileSourceEnum
+from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
@@ -31,11 +33,13 @@ def make_service(
pashub_client: Optional[PashubClient] = None,
sharepoint_client: Optional[DomnaSharepointClient] = None,
s3_bucket: str = "test-bucket",
+ coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
) -> PashubService:
return PashubService(
pashub_client=pashub_client or MagicMock(spec=PashubClient),
sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
s3_bucket=s3_bucket,
+ coordination_client_factory=coordination_client_factory,
)
@@ -144,10 +148,11 @@ def test_run_persists_uploaded_file_records_to_db() -> None:
service.run(make_request(uprn="12345"))
fake_session.add_all.assert_called_once()
- added: list = fake_session.add_all.call_args[0][0]
+ added: list[Any] = fake_session.add_all.call_args[0][0]
assert len(added) == 1
assert added[0].s3_file_bucket == "test-bucket"
assert added[0].uprn == 12345
+ assert added[0].file_source == FileSourceEnum.PAS_HUB.value
# ---------------------------------------------------------------------------
@@ -225,6 +230,135 @@ def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
# ---------------------------------------------------------------------------
+# ---------------------------------------------------------------------------
+# run(): coordination fallback
+# ---------------------------------------------------------------------------
+
+
+def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
+ pas_client = MagicMock(spec=PashubClient)
+ pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+ coord_client = MagicMock(spec=PashubClient)
+ coord_client.get_uprn_by_job_id.return_value = "99999"
+ coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+ factory = MagicMock(return_value=coord_client)
+
+ service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+ with (
+ patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+ patch("backend.pashub_fetcher.pashub_service.db_session"),
+ patch("backend.pashub_fetcher.pashub_service.os.remove"),
+ ):
+ result = service.run(make_request())
+
+ assert result == ["/tmp/a.pdf"]
+ coord_client.get_uprn_by_job_id.assert_called_once()
+ coord_client.get_core_evidence_files_by_job_id.assert_called_once()
+ assert factory.call_count == 1
+
+
+def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
+ pas_client = MagicMock(spec=PashubClient)
+ pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
+
+ coord_client = MagicMock(spec=PashubClient)
+ coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+ factory = MagicMock(return_value=coord_client)
+
+ service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+ with (
+ patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+ patch("backend.pashub_fetcher.pashub_service.db_session"),
+ patch("backend.pashub_fetcher.pashub_service.os.remove"),
+ ):
+ result = service.run(make_request(uprn="12345"))
+
+ assert result == ["/tmp/a.pdf"]
+ coord_client.get_core_evidence_files_by_job_id.assert_called_once()
+ pas_client.get_uprn_by_job_id.assert_not_called()
+
+
+def test_run_raises_unauthorized_when_pas_401_and_no_factory() -> None:
+ pas_client = MagicMock(spec=PashubClient)
+ pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+ service = make_service(pashub_client=pas_client)
+
+ with pytest.raises(UnauthorizedError):
+ service.run(make_request())
+
+
+def test_run_raises_unauthorized_when_both_clients_401() -> None:
+ pas_client = MagicMock(spec=PashubClient)
+ pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+ coord_client = MagicMock(spec=PashubClient)
+ coord_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+ factory = MagicMock(return_value=coord_client)
+
+ service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+ with pytest.raises(UnauthorizedError):
+ service.run(make_request())
+
+
+def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None:
+ pas_client = MagicMock(spec=PashubClient)
+ pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+ coord_client = MagicMock(spec=PashubClient)
+ coord_client.get_uprn_by_job_id.return_value = "99999"
+ coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+ factory = MagicMock(return_value=coord_client)
+ fake_session = MagicMock()
+
+ service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+ with (
+ patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+ patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
+ patch("backend.pashub_fetcher.pashub_service.os.remove"),
+ ):
+ mock_db.return_value.__enter__.return_value = fake_session
+ service.run(make_request())
+
+ fake_session.add_all.assert_called_once()
+ added: list[Any] = fake_session.add_all.call_args[0][0]
+ assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
+
+
+def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None:
+ pas_client = MagicMock(spec=PashubClient)
+ pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
+
+ coord_client = MagicMock(spec=PashubClient)
+ coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+ factory = MagicMock(return_value=coord_client)
+ fake_session = MagicMock()
+
+ service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+ with (
+ patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+ patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
+ patch("backend.pashub_fetcher.pashub_service.os.remove"),
+ ):
+ mock_db.return_value.__enter__.return_value = fake_session
+ service.run(make_request(uprn="12345"))
+
+ fake_session.add_all.assert_called_once()
+ added: list[Any] = fake_session.add_all.call_args[0][0]
+ assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
+
+
def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
diff --git a/backend/pashub_fetcher/tests/test_pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/tests/test_pashub_to_ara_trigger_request.py
new file mode 100644
index 00000000..56187350
--- /dev/null
+++ b/backend/pashub_fetcher/tests/test_pashub_to_ara_trigger_request.py
@@ -0,0 +1,51 @@
+import pytest
+
+from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
+ PashubToAraTriggerRequest,
+)
+
+
+def make_request(pashub_link: str) -> PashubToAraTriggerRequest:
+ return PashubToAraTriggerRequest(pashub_link=pashub_link)
+
+
+def test_pashub_job_id_extracts_id_from_details_link() -> None:
+ # Arrange
+ request = make_request("https://pashub.net/jobs/job-id-123/details")
+
+ # Act
+ result = request.pashub_job_id
+
+ # Assert
+ assert result == "job-id-123"
+
+
+def test_pashub_job_id_raises_for_invalid_link() -> None:
+ # Arrange
+ request = make_request("https://pashub.net/rcs-dashboard")
+
+ # Act / Assert
+ with pytest.raises(ValueError):
+ request.pashub_job_id
+
+
+def test_pashub_job_id_extracts_id_from_bare_job_link() -> None:
+ # Arrange
+ request = make_request("https://pashub.net/jobs/job-id-123")
+
+ # Act
+ result = request.pashub_job_id
+
+ # Assert
+ assert result == "job-id-123"
+
+
+def test_pashub_job_id_extracts_id_from_evidence_view_link() -> None:
+ # Arrange
+ request = make_request("https://pashub.net/jobs/job-id-123/evidence/view")
+
+ # Act
+ result = request.pashub_job_id
+
+ # Assert
+ assert result == "job-id-123"
diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py
new file mode 100644
index 00000000..f4c03afc
--- /dev/null
+++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py
@@ -0,0 +1,137 @@
+import json
+import logging
+import os
+from typing import Any, Optional, cast
+
+import boto3
+from openpyxl import load_workbook
+
+from backend.app.config import get_settings
+from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
+ PashubToAraTriggerRequest,
+)
+
+logging.basicConfig(level=logging.INFO, format="%(message)s")
+logger: logging.Logger = logging.getLogger(__name__)
+
+DRY_RUN: bool = False
+
+DEAL_ID_FILTER: frozenset[str] = frozenset(
+ {
+ "379452094688",
+ "379466504437",
+ "379660170452",
+ "380016925932",
+ "379848065216",
+ "379466504434",
+ "379452094690",
+ "379965924567",
+ "380016925923",
+ "379792072898",
+ "379654754502",
+ "379560262861",
+ "379969670369",
+ "379248717001",
+ "379971468493",
+ "379999888607",
+ "379606372580",
+ "379969603797",
+ "379967743213",
+ "379263155434",
+ "379855267025",
+ "379889899719",
+ "379071064307",
+ "379867925741",
+ }
+)
+
+EXCEL_PATH: str = os.path.join(
+ os.path.dirname(__file__),
+ "united-infrastructure-exports-all-deals-2026-05-14.xlsx",
+)
+
+
+def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
+ wb = load_workbook(excel_path, data_only=True)
+ ws = wb.worksheets[0]
+
+ headers: dict[str, int] = {}
+ for col in range(1, ws.max_column + 1):
+ header_val = ws.cell(row=1, column=col).value
+ if header_val is not None:
+ headers[str(header_val).strip()] = col
+
+ pashub_col: int = headers["PasHub link"]
+ record_id_col: int = headers["Record ID"]
+ deal_name_col: int = headers["Deal Name"]
+ deal_stage_col: int = headers["Deal Stage"]
+
+ requests: list[PashubToAraTriggerRequest] = []
+
+ for row in range(2, ws.max_row + 1):
+ pashub_link_raw = ws.cell(row=row, column=pashub_col).value
+ if not pashub_link_raw:
+ continue
+
+ pashub_link: str = str(pashub_link_raw).strip()
+
+ record_id_raw = ws.cell(row=row, column=record_id_col).value
+ deal_name_raw = ws.cell(row=row, column=deal_name_col).value
+ deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value
+
+ hubspot_deal_id: Optional[str] = (
+ str(record_id_raw) if record_id_raw is not None else None
+ )
+ address: Optional[str] = (
+ str(deal_name_raw).strip() if deal_name_raw is not None else None
+ )
+ deal_stage: Optional[str] = (
+ str(deal_stage_raw).strip() if deal_stage_raw is not None else None
+ )
+
+ requests.append(
+ PashubToAraTriggerRequest(
+ pashub_link=pashub_link,
+ hubspot_deal_id=hubspot_deal_id,
+ address=address,
+ deal_stage=deal_stage,
+ )
+ )
+
+ return requests
+
+
+def main() -> None:
+ trigger_requests: list[PashubToAraTriggerRequest] = _build_requests(EXCEL_PATH)
+
+ if DEAL_ID_FILTER:
+ trigger_requests = [
+ r for r in trigger_requests if r.hubspot_deal_id in DEAL_ID_FILTER
+ ]
+
+ sqs: Any = cast(Any, boto3.client("sqs")) # type: ignore[reportUnknownMemberType]
+ queue_url: str = get_settings().PASHUB_TO_ARA_SQS_URL
+
+ count: int = 0
+ for request in trigger_requests:
+ action: str = "DRY RUN" if DRY_RUN else "SENDING"
+ logger.info(
+ f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}"
+ )
+
+ if not DRY_RUN:
+ response: dict[str, Any] = sqs.send_message(
+ QueueUrl=queue_url,
+ MessageBody=json.dumps(request.model_dump()),
+ )
+ message_id: str = response["MessageId"]
+ logger.info(f" MessageId: {message_id}")
+
+ count += 1
+
+ label: str = "would send" if DRY_RUN else "sent"
+ print(f"{count} messages {label}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/infrastructure/terraform/README.md b/deployment/terraform/README.md
similarity index 100%
rename from infrastructure/terraform/README.md
rename to deployment/terraform/README.md
diff --git a/infrastructure/terraform/cdn/main.tf b/deployment/terraform/cdn/main.tf
similarity index 100%
rename from infrastructure/terraform/cdn/main.tf
rename to deployment/terraform/cdn/main.tf
diff --git a/infrastructure/terraform/cdn/provider.tf b/deployment/terraform/cdn/provider.tf
similarity index 100%
rename from infrastructure/terraform/cdn/provider.tf
rename to deployment/terraform/cdn/provider.tf
diff --git a/infrastructure/terraform/cdn/variables.tf b/deployment/terraform/cdn/variables.tf
similarity index 100%
rename from infrastructure/terraform/cdn/variables.tf
rename to deployment/terraform/cdn/variables.tf
diff --git a/infrastructure/terraform/cdn_certificate/main.tf b/deployment/terraform/cdn_certificate/main.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/main.tf
rename to deployment/terraform/cdn_certificate/main.tf
diff --git a/infrastructure/terraform/cdn_certificate/outputs.tf b/deployment/terraform/cdn_certificate/outputs.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/outputs.tf
rename to deployment/terraform/cdn_certificate/outputs.tf
diff --git a/infrastructure/terraform/cdn_certificate/provider.tf b/deployment/terraform/cdn_certificate/provider.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/provider.tf
rename to deployment/terraform/cdn_certificate/provider.tf
diff --git a/infrastructure/terraform/cdn_certificate/variables.tf b/deployment/terraform/cdn_certificate/variables.tf
similarity index 100%
rename from infrastructure/terraform/cdn_certificate/variables.tf
rename to deployment/terraform/cdn_certificate/variables.tf
diff --git a/infrastructure/terraform/lambda/_template/README.md b/deployment/terraform/lambda/_template/README.md
similarity index 96%
rename from infrastructure/terraform/lambda/_template/README.md
rename to deployment/terraform/lambda/_template/README.md
index 5bb10627..f2a8638a 100644
--- a/infrastructure/terraform/lambda/_template/README.md
+++ b/deployment/terraform/lambda/_template/README.md
@@ -10,7 +10,7 @@
### 2. Add infrastructure prerequisites (shared stack)
- Add a new ECR repository in:
- infrastructure/terraform/shared/main.tf
+ deployment/terraform/shared/main.tf
- Create a PR to deploy this to main then dev in order to deploy the shared stack
diff --git a/infrastructure/terraform/lambda/_template/main.tf b/deployment/terraform/lambda/_template/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/_template/main.tf
rename to deployment/terraform/lambda/_template/main.tf
diff --git a/infrastructure/terraform/lambda/_template/provider.tf b/deployment/terraform/lambda/_template/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/_template/provider.tf
rename to deployment/terraform/lambda/_template/provider.tf
diff --git a/infrastructure/terraform/lambda/_template/variables.tf b/deployment/terraform/lambda/_template/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/_template/variables.tf
rename to deployment/terraform/lambda/_template/variables.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/deployment/terraform/lambda/address2UPRN/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/main.tf
rename to deployment/terraform/lambda/address2UPRN/main.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/outputs.tf b/deployment/terraform/lambda/address2UPRN/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/outputs.tf
rename to deployment/terraform/lambda/address2UPRN/outputs.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/provider.tf b/deployment/terraform/lambda/address2UPRN/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/provider.tf
rename to deployment/terraform/lambda/address2UPRN/provider.tf
diff --git a/infrastructure/terraform/lambda/address2UPRN/variables.tf b/deployment/terraform/lambda/address2UPRN/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/address2UPRN/variables.tf
rename to deployment/terraform/lambda/address2UPRN/variables.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/main.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/main.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/main.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/outputs.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/provider.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/provider.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/provider.tf
diff --git a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/variables.tf b/deployment/terraform/lambda/bulk_address2uprn_combiner/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/bulk_address2uprn_combiner/variables.tf
rename to deployment/terraform/lambda/bulk_address2uprn_combiner/variables.tf
diff --git a/infrastructure/terraform/lambda/categorisation/main.tf b/deployment/terraform/lambda/categorisation/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/main.tf
rename to deployment/terraform/lambda/categorisation/main.tf
diff --git a/infrastructure/terraform/lambda/categorisation/outputs.tf b/deployment/terraform/lambda/categorisation/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/outputs.tf
rename to deployment/terraform/lambda/categorisation/outputs.tf
diff --git a/infrastructure/terraform/lambda/categorisation/provider.tf b/deployment/terraform/lambda/categorisation/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/provider.tf
rename to deployment/terraform/lambda/categorisation/provider.tf
diff --git a/infrastructure/terraform/lambda/categorisation/variables.tf b/deployment/terraform/lambda/categorisation/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/categorisation/variables.tf
rename to deployment/terraform/lambda/categorisation/variables.tf
diff --git a/infrastructure/terraform/lambda/condition-etl/main.tf b/deployment/terraform/lambda/condition-etl/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/condition-etl/main.tf
rename to deployment/terraform/lambda/condition-etl/main.tf
diff --git a/infrastructure/terraform/lambda/condition-etl/provider.tf b/deployment/terraform/lambda/condition-etl/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/condition-etl/provider.tf
rename to deployment/terraform/lambda/condition-etl/provider.tf
diff --git a/infrastructure/terraform/lambda/condition-etl/variables.tf b/deployment/terraform/lambda/condition-etl/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/condition-etl/variables.tf
rename to deployment/terraform/lambda/condition-etl/variables.tf
diff --git a/infrastructure/terraform/lambda/ecmk_to_ara/main.tf b/deployment/terraform/lambda/ecmk_to_ara/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ecmk_to_ara/main.tf
rename to deployment/terraform/lambda/ecmk_to_ara/main.tf
diff --git a/infrastructure/terraform/lambda/ecmk_to_ara/provider.tf b/deployment/terraform/lambda/ecmk_to_ara/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ecmk_to_ara/provider.tf
rename to deployment/terraform/lambda/ecmk_to_ara/provider.tf
diff --git a/infrastructure/terraform/lambda/ecmk_to_ara/variables.tf b/deployment/terraform/lambda/ecmk_to_ara/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ecmk_to_ara/variables.tf
rename to deployment/terraform/lambda/ecmk_to_ara/variables.tf
diff --git a/infrastructure/terraform/lambda/engine/main.tf b/deployment/terraform/lambda/engine/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/main.tf
rename to deployment/terraform/lambda/engine/main.tf
diff --git a/infrastructure/terraform/lambda/engine/outputs.tf b/deployment/terraform/lambda/engine/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/outputs.tf
rename to deployment/terraform/lambda/engine/outputs.tf
diff --git a/infrastructure/terraform/lambda/engine/provider.tf b/deployment/terraform/lambda/engine/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/provider.tf
rename to deployment/terraform/lambda/engine/provider.tf
diff --git a/infrastructure/terraform/lambda/engine/variables.tf b/deployment/terraform/lambda/engine/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/engine/variables.tf
rename to deployment/terraform/lambda/engine/variables.tf
diff --git a/infrastructure/terraform/lambda/fast-api/main.tf b/deployment/terraform/lambda/fast-api/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/main.tf
rename to deployment/terraform/lambda/fast-api/main.tf
diff --git a/infrastructure/terraform/lambda/fast-api/outputs.tf b/deployment/terraform/lambda/fast-api/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/outputs.tf
rename to deployment/terraform/lambda/fast-api/outputs.tf
diff --git a/infrastructure/terraform/lambda/fast-api/provider.tf b/deployment/terraform/lambda/fast-api/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/provider.tf
rename to deployment/terraform/lambda/fast-api/provider.tf
diff --git a/infrastructure/terraform/lambda/fast-api/variables.tf b/deployment/terraform/lambda/fast-api/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/fast-api/variables.tf
rename to deployment/terraform/lambda/fast-api/variables.tf
diff --git a/infrastructure/terraform/lambda/hubspot_deal_etl/main.tf b/deployment/terraform/lambda/hubspot_deal_etl/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/hubspot_deal_etl/main.tf
rename to deployment/terraform/lambda/hubspot_deal_etl/main.tf
diff --git a/infrastructure/terraform/lambda/hubspot_deal_etl/provider.tf b/deployment/terraform/lambda/hubspot_deal_etl/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/hubspot_deal_etl/provider.tf
rename to deployment/terraform/lambda/hubspot_deal_etl/provider.tf
diff --git a/infrastructure/terraform/lambda/hubspot_deal_etl/variables.tf b/deployment/terraform/lambda/hubspot_deal_etl/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/hubspot_deal_etl/variables.tf
rename to deployment/terraform/lambda/hubspot_deal_etl/variables.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/main.tf b/deployment/terraform/lambda/magic_plan/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/main.tf
rename to deployment/terraform/lambda/magic_plan/main.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/outputs.tf b/deployment/terraform/lambda/magic_plan/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/outputs.tf
rename to deployment/terraform/lambda/magic_plan/outputs.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/provider.tf b/deployment/terraform/lambda/magic_plan/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/provider.tf
rename to deployment/terraform/lambda/magic_plan/provider.tf
diff --git a/infrastructure/terraform/lambda/magic_plan/variables.tf b/deployment/terraform/lambda/magic_plan/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/magic_plan/variables.tf
rename to deployment/terraform/lambda/magic_plan/variables.tf
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/main.tf b/deployment/terraform/lambda/ordnanceSurvey/main.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ordnanceSurvey/main.tf
rename to deployment/terraform/lambda/ordnanceSurvey/main.tf
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf b/deployment/terraform/lambda/ordnanceSurvey/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
rename to deployment/terraform/lambda/ordnanceSurvey/provider.tf
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf b/deployment/terraform/lambda/ordnanceSurvey/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
rename to deployment/terraform/lambda/ordnanceSurvey/variables.tf
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/main.tf b/deployment/terraform/lambda/pashub_to_ara/main.tf
similarity index 92%
rename from infrastructure/terraform/lambda/pashub_to_ara/main.tf
rename to deployment/terraform/lambda/pashub_to_ara/main.tf
index 902d7845..eba9c874 100644
--- a/infrastructure/terraform/lambda/pashub_to_ara/main.tf
+++ b/deployment/terraform/lambda/pashub_to_ara/main.tf
@@ -49,6 +49,8 @@ module "lambda" {
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id
PASHUB_EMAIL = var.pashub_email
PASHUB_PASSWORD = var.pashub_password
+ PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email
+ PASHUB_COORDINATION_PASSWORD = var.pashub_coordination_password
}
}
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/outputs.tf b/deployment/terraform/lambda/pashub_to_ara/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/pashub_to_ara/outputs.tf
rename to deployment/terraform/lambda/pashub_to_ara/outputs.tf
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/provider.tf b/deployment/terraform/lambda/pashub_to_ara/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/pashub_to_ara/provider.tf
rename to deployment/terraform/lambda/pashub_to_ara/provider.tf
diff --git a/infrastructure/terraform/lambda/pashub_to_ara/variables.tf b/deployment/terraform/lambda/pashub_to_ara/variables.tf
similarity index 90%
rename from infrastructure/terraform/lambda/pashub_to_ara/variables.tf
rename to deployment/terraform/lambda/pashub_to_ara/variables.tf
index 0e99d378..cdeff256 100644
--- a/infrastructure/terraform/lambda/pashub_to_ara/variables.tf
+++ b/deployment/terraform/lambda/pashub_to_ara/variables.tf
@@ -100,4 +100,16 @@ variable "pashub_email" {
variable "pashub_password" {
type = string
sensitive = true
+}
+
+variable "pashub_coordination_email" {
+ type = string
+ sensitive = true
+ default = null
+}
+
+variable "pashub_coordination_password" {
+ type = string
+ sensitive = true
+ default = null
}
\ No newline at end of file
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/deployment/terraform/lambda/postcodeSplitter/main.tf
similarity index 76%
rename from infrastructure/terraform/lambda/postcodeSplitter/main.tf
rename to deployment/terraform/lambda/postcodeSplitter/main.tf
index 94c5cd4e..721cb2ea 100644
--- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf
+++ b/deployment/terraform/lambda/postcodeSplitter/main.tf
@@ -38,22 +38,8 @@ module "lambda" {
{
STAGE = var.stage
LOG_LEVEL = "info"
- DB_USERNAME = local.db_credentials.db_assessment_model_username
- DB_PASSWORD = local.db_credentials.db_assessment_model_password
- GOOGLE_SOLAR_API_KEY = "test"
- SAP_PREDICTIONS_BUCKET = "test"
- CARBON_PREDICTIONS_BUCKET = "test"
- HEAT_PREDICTIONS_BUCKET = "test"
- HEATING_KWH_PREDICTIONS_BUCKET = "test"
- HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
- API_KEY = "test"
- ENVIRONMENT = "test"
- SECRET_KEY = "test"
- PLAN_TRIGGER_BUCKET = "test"
- DATA_BUCKET = "test"
- EPC_AUTH_TOKEN = "test"
- ENGINE_SQS_URL = "test"
- ENERGY_ASSESSMENTS_BUCKET = "test"
+ POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username
+ POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password
ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
},
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/outputs.tf b/deployment/terraform/lambda/postcodeSplitter/outputs.tf
similarity index 100%
rename from infrastructure/terraform/lambda/postcodeSplitter/outputs.tf
rename to deployment/terraform/lambda/postcodeSplitter/outputs.tf
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/provider.tf b/deployment/terraform/lambda/postcodeSplitter/provider.tf
similarity index 100%
rename from infrastructure/terraform/lambda/postcodeSplitter/provider.tf
rename to deployment/terraform/lambda/postcodeSplitter/provider.tf
diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/deployment/terraform/lambda/postcodeSplitter/variables.tf
similarity index 100%
rename from infrastructure/terraform/lambda/postcodeSplitter/variables.tf
rename to deployment/terraform/lambda/postcodeSplitter/variables.tf
diff --git a/infrastructure/terraform/modules/acm_certificate/main.tf b/deployment/terraform/modules/acm_certificate/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/acm_certificate/main.tf
rename to deployment/terraform/modules/acm_certificate/main.tf
diff --git a/infrastructure/terraform/modules/acm_certificate/outputs.tf b/deployment/terraform/modules/acm_certificate/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/acm_certificate/outputs.tf
rename to deployment/terraform/modules/acm_certificate/outputs.tf
diff --git a/infrastructure/terraform/modules/acm_certificate/variables.tf b/deployment/terraform/modules/acm_certificate/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/acm_certificate/variables.tf
rename to deployment/terraform/modules/acm_certificate/variables.tf
diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/deployment/terraform/modules/cloudfront/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/cloudfront/main.tf
rename to deployment/terraform/modules/cloudfront/main.tf
diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/deployment/terraform/modules/cloudfront/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/cloudfront/variables.tf
rename to deployment/terraform/modules/cloudfront/variables.tf
diff --git a/infrastructure/terraform/modules/container_registry/main.tf b/deployment/terraform/modules/container_registry/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/container_registry/main.tf
rename to deployment/terraform/modules/container_registry/main.tf
diff --git a/infrastructure/terraform/modules/container_registry/outputs.tf b/deployment/terraform/modules/container_registry/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/container_registry/outputs.tf
rename to deployment/terraform/modules/container_registry/outputs.tf
diff --git a/infrastructure/terraform/modules/container_registry/variables.tf b/deployment/terraform/modules/container_registry/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/container_registry/variables.tf
rename to deployment/terraform/modules/container_registry/variables.tf
diff --git a/infrastructure/terraform/modules/ecr/main.tf b/deployment/terraform/modules/ecr/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/ecr/main.tf
rename to deployment/terraform/modules/ecr/main.tf
diff --git a/infrastructure/terraform/modules/ecr/outputs.tf b/deployment/terraform/modules/ecr/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/ecr/outputs.tf
rename to deployment/terraform/modules/ecr/outputs.tf
diff --git a/infrastructure/terraform/modules/ecr/variables.tf b/deployment/terraform/modules/ecr/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/ecr/variables.tf
rename to deployment/terraform/modules/ecr/variables.tf
diff --git a/infrastructure/terraform/modules/general_iam_policy/main.tf b/deployment/terraform/modules/general_iam_policy/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/general_iam_policy/main.tf
rename to deployment/terraform/modules/general_iam_policy/main.tf
diff --git a/infrastructure/terraform/modules/general_iam_policy/outputs.tf b/deployment/terraform/modules/general_iam_policy/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/general_iam_policy/outputs.tf
rename to deployment/terraform/modules/general_iam_policy/outputs.tf
diff --git a/infrastructure/terraform/modules/general_iam_policy/variables.tf b/deployment/terraform/modules/general_iam_policy/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/general_iam_policy/variables.tf
rename to deployment/terraform/modules/general_iam_policy/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_execution_role/main.tf b/deployment/terraform/modules/lambda_execution_role/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_execution_role/main.tf
rename to deployment/terraform/modules/lambda_execution_role/main.tf
diff --git a/infrastructure/terraform/modules/lambda_execution_role/outputs.tf b/deployment/terraform/modules/lambda_execution_role/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_execution_role/outputs.tf
rename to deployment/terraform/modules/lambda_execution_role/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_execution_role/variables.tf b/deployment/terraform/modules/lambda_execution_role/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_execution_role/variables.tf
rename to deployment/terraform/modules/lambda_execution_role/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_service/main.tf b/deployment/terraform/modules/lambda_service/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service/main.tf
rename to deployment/terraform/modules/lambda_service/main.tf
diff --git a/infrastructure/terraform/modules/lambda_service/outputs.tf b/deployment/terraform/modules/lambda_service/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service/outputs.tf
rename to deployment/terraform/modules/lambda_service/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_service/variables.tf b/deployment/terraform/modules/lambda_service/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service/variables.tf
rename to deployment/terraform/modules/lambda_service/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_service_zip/main.tf b/deployment/terraform/modules/lambda_service_zip/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service_zip/main.tf
rename to deployment/terraform/modules/lambda_service_zip/main.tf
diff --git a/infrastructure/terraform/modules/lambda_service_zip/variables.tf b/deployment/terraform/modules/lambda_service_zip/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_service_zip/variables.tf
rename to deployment/terraform/modules/lambda_service_zip/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf b/deployment/terraform/modules/lambda_sqs_trigger/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_sqs_trigger/main.tf
rename to deployment/terraform/modules/lambda_sqs_trigger/main.tf
diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf b/deployment/terraform/modules/lambda_sqs_trigger/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf
rename to deployment/terraform/modules/lambda_sqs_trigger/variables.tf
diff --git a/infrastructure/terraform/modules/lambda_with_api_gateway/main.tf b/deployment/terraform/modules/lambda_with_api_gateway/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_api_gateway/main.tf
rename to deployment/terraform/modules/lambda_with_api_gateway/main.tf
diff --git a/infrastructure/terraform/modules/lambda_with_api_gateway/outputs.tf b/deployment/terraform/modules/lambda_with_api_gateway/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_api_gateway/outputs.tf
rename to deployment/terraform/modules/lambda_with_api_gateway/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_with_api_gateway/variables.tf b/deployment/terraform/modules/lambda_with_api_gateway/variables.tf
similarity index 96%
rename from infrastructure/terraform/modules/lambda_with_api_gateway/variables.tf
rename to deployment/terraform/modules/lambda_with_api_gateway/variables.tf
index 95e5acd9..b5d0515a 100644
--- a/infrastructure/terraform/modules/lambda_with_api_gateway/variables.tf
+++ b/deployment/terraform/modules/lambda_with_api_gateway/variables.tf
@@ -11,7 +11,7 @@ variable "zip_excludes" {
"**/*.pyc",
"**/.pytest_cache/**",
"**/tests/**",
- "**/infrastructure/**"
+ "**/deployment/**"
]
}
diff --git a/infrastructure/terraform/modules/lambda_with_sqs/main.tf b/deployment/terraform/modules/lambda_with_sqs/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_sqs/main.tf
rename to deployment/terraform/modules/lambda_with_sqs/main.tf
diff --git a/infrastructure/terraform/modules/lambda_with_sqs/outputs.tf b/deployment/terraform/modules/lambda_with_sqs/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_sqs/outputs.tf
rename to deployment/terraform/modules/lambda_with_sqs/outputs.tf
diff --git a/infrastructure/terraform/modules/lambda_with_sqs/variables.tf b/deployment/terraform/modules/lambda_with_sqs/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/lambda_with_sqs/variables.tf
rename to deployment/terraform/modules/lambda_with_sqs/variables.tf
diff --git a/infrastructure/terraform/modules/route53/main.tf b/deployment/terraform/modules/route53/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/route53/main.tf
rename to deployment/terraform/modules/route53/main.tf
diff --git a/infrastructure/terraform/modules/route53/variables.tf b/deployment/terraform/modules/route53/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/route53/variables.tf
rename to deployment/terraform/modules/route53/variables.tf
diff --git a/infrastructure/terraform/modules/s3/main.tf b/deployment/terraform/modules/s3/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3/main.tf
rename to deployment/terraform/modules/s3/main.tf
diff --git a/infrastructure/terraform/modules/s3/outputs.tf b/deployment/terraform/modules/s3/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3/outputs.tf
rename to deployment/terraform/modules/s3/outputs.tf
diff --git a/infrastructure/terraform/modules/s3/variables.tf b/deployment/terraform/modules/s3/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3/variables.tf
rename to deployment/terraform/modules/s3/variables.tf
diff --git a/infrastructure/terraform/modules/s3_iam_policy/main.tf b/deployment/terraform/modules/s3_iam_policy/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_iam_policy/main.tf
rename to deployment/terraform/modules/s3_iam_policy/main.tf
diff --git a/infrastructure/terraform/modules/s3_iam_policy/outputs.tf b/deployment/terraform/modules/s3_iam_policy/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_iam_policy/outputs.tf
rename to deployment/terraform/modules/s3_iam_policy/outputs.tf
diff --git a/infrastructure/terraform/modules/s3_iam_policy/variables.tf b/deployment/terraform/modules/s3_iam_policy/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_iam_policy/variables.tf
rename to deployment/terraform/modules/s3_iam_policy/variables.tf
diff --git a/infrastructure/terraform/modules/s3_presignable_bucket/main.tf b/deployment/terraform/modules/s3_presignable_bucket/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_presignable_bucket/main.tf
rename to deployment/terraform/modules/s3_presignable_bucket/main.tf
diff --git a/infrastructure/terraform/modules/s3_presignable_bucket/outputs.tf b/deployment/terraform/modules/s3_presignable_bucket/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_presignable_bucket/outputs.tf
rename to deployment/terraform/modules/s3_presignable_bucket/outputs.tf
diff --git a/infrastructure/terraform/modules/s3_presignable_bucket/variables.tf b/deployment/terraform/modules/s3_presignable_bucket/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/s3_presignable_bucket/variables.tf
rename to deployment/terraform/modules/s3_presignable_bucket/variables.tf
diff --git a/infrastructure/terraform/modules/ses/main.tf b/deployment/terraform/modules/ses/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/ses/main.tf
rename to deployment/terraform/modules/ses/main.tf
diff --git a/infrastructure/terraform/modules/ses/outputs.tf b/deployment/terraform/modules/ses/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/ses/outputs.tf
rename to deployment/terraform/modules/ses/outputs.tf
diff --git a/infrastructure/terraform/modules/ses/variables.tf b/deployment/terraform/modules/ses/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/ses/variables.tf
rename to deployment/terraform/modules/ses/variables.tf
diff --git a/infrastructure/terraform/modules/sqs_queue/main.tf b/deployment/terraform/modules/sqs_queue/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/sqs_queue/main.tf
rename to deployment/terraform/modules/sqs_queue/main.tf
diff --git a/infrastructure/terraform/modules/sqs_queue/outputs.tf b/deployment/terraform/modules/sqs_queue/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/sqs_queue/outputs.tf
rename to deployment/terraform/modules/sqs_queue/outputs.tf
diff --git a/infrastructure/terraform/modules/sqs_queue/variables.tf b/deployment/terraform/modules/sqs_queue/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/sqs_queue/variables.tf
rename to deployment/terraform/modules/sqs_queue/variables.tf
diff --git a/infrastructure/terraform/modules/tf_state_bucket/main.tf b/deployment/terraform/modules/tf_state_bucket/main.tf
similarity index 100%
rename from infrastructure/terraform/modules/tf_state_bucket/main.tf
rename to deployment/terraform/modules/tf_state_bucket/main.tf
diff --git a/infrastructure/terraform/modules/tf_state_bucket/outputs.tf b/deployment/terraform/modules/tf_state_bucket/outputs.tf
similarity index 100%
rename from infrastructure/terraform/modules/tf_state_bucket/outputs.tf
rename to deployment/terraform/modules/tf_state_bucket/outputs.tf
diff --git a/infrastructure/terraform/modules/tf_state_bucket/variables.tf b/deployment/terraform/modules/tf_state_bucket/variables.tf
similarity index 100%
rename from infrastructure/terraform/modules/tf_state_bucket/variables.tf
rename to deployment/terraform/modules/tf_state_bucket/variables.tf
diff --git a/infrastructure/terraform/shared/dev.tfvars b/deployment/terraform/shared/dev.tfvars
similarity index 100%
rename from infrastructure/terraform/shared/dev.tfvars
rename to deployment/terraform/shared/dev.tfvars
diff --git a/infrastructure/terraform/shared/main.tf b/deployment/terraform/shared/main.tf
similarity index 100%
rename from infrastructure/terraform/shared/main.tf
rename to deployment/terraform/shared/main.tf
diff --git a/infrastructure/terraform/shared/secrets.tf b/deployment/terraform/shared/secrets.tf
similarity index 100%
rename from infrastructure/terraform/shared/secrets.tf
rename to deployment/terraform/shared/secrets.tf
diff --git a/infrastructure/terraform/shared/variables.tf b/deployment/terraform/shared/variables.tf
similarity index 100%
rename from infrastructure/terraform/shared/variables.tf
rename to deployment/terraform/shared/variables.tf
diff --git a/domain/__init__.py b/domain/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/domain/addresses/__init__.py b/domain/addresses/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
new file mode 100644
index 00000000..44e4d967
--- /dev/null
+++ b/domain/addresses/postcode_batching.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from collections.abc import Iterable, Iterator
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+
+
+def iter_postcode_grouped_batches(
+ addresses: Iterable[UserAddress],
+ *,
+ max_batch_size: int = 500,
+) -> Iterator[list[UserAddress]]:
+ if max_batch_size < 1:
+ raise ValueError("max_batch_size must be >= 1")
+
+ groups = _group_by_postcode_in_order(addresses)
+
+ buffer: list[UserAddress] = []
+ for group in groups.values():
+ group_len = len(group)
+
+ # Oversize single-Postcode group: flush buffer first, then dispatch
+ # the group as its own batch. Mirrors the legacy
+ # ``if group_len >= batch_size`` branch.
+ if group_len >= max_batch_size:
+ if buffer:
+ yield buffer
+ buffer = []
+ yield group
+ continue
+
+ # Adding this group would overflow: flush buffer before appending.
+ if len(buffer) + group_len > max_batch_size:
+ yield buffer
+ buffer = []
+
+ buffer.extend(group)
+
+ # Final flush.
+ if buffer:
+ yield buffer
+
+
+def _group_by_postcode_in_order(
+ addresses: Iterable[UserAddress],
+) -> dict[Postcode, list[UserAddress]]:
+ groups: dict[Postcode, list[UserAddress]] = {}
+ for address in addresses:
+ groups.setdefault(address.postcode, []).append(address)
+ return groups
diff --git a/domain/addresses/user_address.py b/domain/addresses/user_address.py
new file mode 100644
index 00000000..9a28751b
--- /dev/null
+++ b/domain/addresses/user_address.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+from domain.postcode import Postcode
+
+
+def _empty_source_row() -> dict[str, str]:
+ return {}
+
+
+@dataclass(frozen=True)
+class UserAddress:
+ user_address: str
+ postcode: Postcode
+ internal_reference: Optional[str] = None
+ source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False)
diff --git a/domain/postcode.py b/domain/postcode.py
new file mode 100644
index 00000000..8e4e7c79
--- /dev/null
+++ b/domain/postcode.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class Postcode:
+ value: str
+
+ def __post_init__(self) -> None:
+ # Frozen dataclass: bypass the descriptor with object.__setattr__.
+ object.__setattr__(self, "value", "".join(self.value.split()).upper())
+
+ def __str__(self) -> str:
+ return self.value
diff --git a/domain/tasks/__init__.py b/domain/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/domain/tasks/subtasks.py b/domain/tasks/subtasks.py
new file mode 100644
index 00000000..bd49a6ec
--- /dev/null
+++ b/domain/tasks/subtasks.py
@@ -0,0 +1,55 @@
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Optional
+from uuid import UUID, uuid4
+
+
+class SubTaskStatus(str, Enum):
+ WAITING = "waiting"
+ IN_PROGRESS = "in progress"
+ COMPLETE = "complete"
+ FAILED = "failed"
+
+
+@dataclass
+class SubTask:
+ id: UUID
+ task_id: UUID
+ status: SubTaskStatus = SubTaskStatus.WAITING
+ inputs: Optional[dict[str, Any]] = None
+ outputs: Optional[dict[str, Any]] = None
+ cloud_logs_url: Optional[str] = None
+ job_started: Optional[datetime] = None
+ job_completed: Optional[datetime] = None
+
+ @classmethod
+ def create(
+ cls, *, task_id: UUID, inputs: Optional[dict[str, Any]] = None
+ ) -> "SubTask":
+ return cls(
+ id=uuid4(),
+ task_id=task_id,
+ status=SubTaskStatus.WAITING,
+ inputs=inputs,
+ )
+
+ def start(self, cloud_logs_url: Optional[str] = None) -> None:
+ if self.status not in (SubTaskStatus.WAITING, SubTaskStatus.IN_PROGRESS):
+ raise ValueError(f"cannot start subtask in status {self.status}")
+ if self.job_started is None:
+ self.job_started = datetime.now(timezone.utc)
+ self.status = SubTaskStatus.IN_PROGRESS
+ if cloud_logs_url is not None:
+ self.cloud_logs_url = cloud_logs_url
+
+ def complete(self, result: Any = None) -> None:
+ self.status = SubTaskStatus.COMPLETE
+ self.job_completed = datetime.now(timezone.utc)
+ if result is not None:
+ self.outputs = {"result": result}
+
+ def fail(self, error: BaseException) -> None:
+ self.status = SubTaskStatus.FAILED
+ self.job_completed = datetime.now(timezone.utc)
+ self.outputs = {"error": str(error)}
diff --git a/domain/tasks/tasks.py b/domain/tasks/tasks.py
new file mode 100644
index 00000000..177258d6
--- /dev/null
+++ b/domain/tasks/tasks.py
@@ -0,0 +1,94 @@
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Optional
+from uuid import UUID, uuid4
+
+from domain.tasks.subtasks import SubTaskStatus
+
+
+class TaskStatus(str, Enum):
+ WAITING = "waiting"
+ IN_PROGRESS = "in progress"
+ COMPLETE = "complete"
+ FAILED = "failed"
+
+
+class Source(str, Enum):
+ PORTFOLIO = "portfolio_id"
+ HUBSPOT_DEAL = "hubspot_deal_id"
+
+
+@dataclass
+class Task:
+ id: UUID
+ task_source: str
+ status: TaskStatus = TaskStatus.WAITING
+ service: Optional[str] = None
+ source: Optional[Source] = None
+ source_id: Optional[str] = None
+ job_started: Optional[datetime] = None
+ job_completed: Optional[datetime] = None
+
+ @classmethod
+ def create(
+ cls,
+ *,
+ task_source: str,
+ service: Optional[str] = None,
+ source: Optional[Source] = None,
+ source_id: Optional[str] = None,
+ ) -> "Task":
+ if not task_source.strip():
+ raise ValueError("task_source must be non-empty")
+ return cls(
+ id=uuid4(),
+ task_source=task_source,
+ service=service,
+ source=source,
+ source_id=source_id,
+ status=TaskStatus.WAITING,
+ job_started=datetime.now(timezone.utc),
+ )
+
+ def start(self) -> None:
+ if self.status not in (TaskStatus.WAITING, TaskStatus.IN_PROGRESS):
+ raise ValueError(f"cannot start task in status {self.status}")
+ if self.job_started is None:
+ self.job_started = datetime.now(timezone.utc)
+ self.status = TaskStatus.IN_PROGRESS
+
+ def complete(self) -> None:
+ self.status = TaskStatus.COMPLETE
+ self.job_completed = datetime.now(timezone.utc)
+
+ def fail(self) -> None:
+ self.status = TaskStatus.FAILED
+ self.job_completed = datetime.now(timezone.utc)
+
+ def recalculate_from_subtasks(self, statuses: list[SubTaskStatus]) -> None:
+ """Recompute Task.status from its SubTasks' statuses.
+
+ Rule (preserved from legacy _update_task_progress):
+ - any FAILED → FAILED
+ - all COMPLETE → COMPLETE
+ - any IN_PROGRESS → IN_PROGRESS
+ - otherwise → WAITING
+
+ Empty list is a no-op (newly-created task with no subtasks).
+ """
+ if not statuses:
+ return
+ now = datetime.now(timezone.utc)
+ if SubTaskStatus.FAILED in statuses:
+ self.status = TaskStatus.FAILED
+ self.job_completed = now
+ elif all(s is SubTaskStatus.COMPLETE for s in statuses):
+ self.status = TaskStatus.COMPLETE
+ self.job_completed = now
+ elif SubTaskStatus.IN_PROGRESS in statuses:
+ self.status = TaskStatus.IN_PROGRESS
+ self.job_completed = None
+ else:
+ self.status = TaskStatus.WAITING
+ self.job_completed = None
diff --git a/infrastructure/__init__.py b/infrastructure/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/infrastructure/address2uprn_queue_client.py b/infrastructure/address2uprn_queue_client.py
new file mode 100644
index 00000000..314e981f
--- /dev/null
+++ b/infrastructure/address2uprn_queue_client.py
@@ -0,0 +1,20 @@
+from uuid import UUID
+
+from infrastructure.sqs_client import SqsClient
+
+
+class Address2UprnQueueClient(SqsClient):
+ def publish(
+ self,
+ *,
+ parent_task_id: UUID,
+ child_subtask_id: UUID,
+ s3_uri: str,
+ ) -> str:
+ return self.send(
+ {
+ "task_id": str(parent_task_id),
+ "sub_task_id": str(child_subtask_id),
+ "s3_uri": s3_uri,
+ }
+ )
diff --git a/infrastructure/csv_s3_client.py b/infrastructure/csv_s3_client.py
new file mode 100644
index 00000000..8af8de73
--- /dev/null
+++ b/infrastructure/csv_s3_client.py
@@ -0,0 +1,33 @@
+import csv
+from io import StringIO
+
+from infrastructure.s3_client import S3Client
+from infrastructure.s3_uri import parse_s3_uri
+
+
+class CsvS3Client(S3Client):
+ def read_rows(self, s3_uri: str) -> list[dict[str, str]]:
+ bucket, key = parse_s3_uri(s3_uri)
+ if bucket != self.bucket:
+ raise ValueError(
+ f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}"
+ )
+ raw = self.get_object(key)
+ try:
+ text = raw.decode("utf-8-sig")
+ except UnicodeDecodeError:
+ # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8.
+ text = raw.decode("cp1252")
+
+ reader = csv.DictReader(StringIO(text))
+ return [dict(row) for row in reader]
+
+ def save_rows(self, rows: list[dict[str, str]], key: str) -> str:
+ if not rows:
+ raise ValueError("Cannot save an empty rows list: header is unknown")
+ buffer = StringIO()
+ fieldnames = list(rows[0].keys())
+ writer = csv.DictWriter(buffer, fieldnames=fieldnames)
+ writer.writeheader()
+ writer.writerows(rows)
+ return self.put_object(key, buffer.getvalue().encode("utf-8"))
diff --git a/infrastructure/postgres/__init__.py b/infrastructure/postgres/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/infrastructure/postgres/config.py b/infrastructure/postgres/config.py
new file mode 100644
index 00000000..c39c6f30
--- /dev/null
+++ b/infrastructure/postgres/config.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+from typing import Mapping
+
+
+@dataclass(frozen=True)
+class PostgresConfig:
+ host: str
+ port: int
+ username: str
+ password: str
+ database: str
+ driver: str = "psycopg2"
+ pool_size: int = 3
+ max_overflow: int = 5
+ pool_pre_ping: bool = True
+ pool_recycle: int = 300
+
+ def url(self) -> str:
+ return (
+ f"postgresql+{self.driver}://"
+ f"{self.username}:{self.password}@{self.host}:{self.port}/{self.database}"
+ )
+
+ @classmethod
+ def from_env(cls, env: Mapping[str, str]) -> "PostgresConfig":
+ return cls(
+ host=env["POSTGRES_HOST"],
+ port=int(env["POSTGRES_PORT"]),
+ username=env["POSTGRES_USERNAME"],
+ password=env["POSTGRES_PASSWORD"],
+ database=env["POSTGRES_DATABASE"],
+ driver=env.get("POSTGRES_DRIVER", "psycopg2"),
+ )
diff --git a/infrastructure/postgres/engine.py b/infrastructure/postgres/engine.py
new file mode 100644
index 00000000..0de9efcb
--- /dev/null
+++ b/infrastructure/postgres/engine.py
@@ -0,0 +1,18 @@
+from sqlalchemy.engine import Engine
+from sqlmodel import Session, create_engine
+
+from infrastructure.postgres.config import PostgresConfig
+
+
+def make_engine(config: PostgresConfig) -> Engine:
+ return create_engine(
+ config.url(),
+ pool_size=config.pool_size,
+ max_overflow=config.max_overflow,
+ pool_pre_ping=config.pool_pre_ping,
+ pool_recycle=config.pool_recycle,
+ )
+
+
+def make_session(engine: Engine) -> Session:
+ return Session(engine)
diff --git a/infrastructure/postgres/subtask_table.py b/infrastructure/postgres/subtask_table.py
new file mode 100644
index 00000000..dec34fbf
--- /dev/null
+++ b/infrastructure/postgres/subtask_table.py
@@ -0,0 +1,21 @@
+from datetime import datetime, timezone
+from typing import ClassVar, Optional
+from uuid import UUID, uuid4
+
+from sqlmodel import Field, SQLModel
+
+
+class SubTaskRow(SQLModel, table=True):
+ __tablename__: ClassVar[str] = "sub_task" # pyright: ignore[reportIncompatibleVariableOverride]
+
+ id: UUID = Field(default_factory=uuid4, primary_key=True, index=True)
+ task_id: UUID = Field(foreign_key="tasks.id")
+ job_started: Optional[datetime] = None
+ job_completed: Optional[datetime] = None
+ status: str = Field(default="waiting")
+ inputs: Optional[str] = None
+ outputs: Optional[str] = None
+ cloud_logs_url: Optional[str] = None
+ updated_at: datetime = Field(
+ default_factory=lambda: datetime.now(timezone.utc)
+ )
diff --git a/infrastructure/postgres/task_table.py b/infrastructure/postgres/task_table.py
new file mode 100644
index 00000000..32e5450b
--- /dev/null
+++ b/infrastructure/postgres/task_table.py
@@ -0,0 +1,36 @@
+from datetime import datetime, timezone
+from typing import ClassVar, Optional
+from uuid import UUID, uuid4
+
+from sqlalchemy import Column
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.tasks.tasks import Source
+
+
+class TaskRow(SQLModel, table=True):
+ __tablename__: ClassVar[str] = "tasks" # pyright: ignore[reportIncompatibleVariableOverride]
+
+ id: UUID = Field(default_factory=uuid4, primary_key=True, index=True)
+ task_source: str
+ job_started: Optional[datetime] = None
+ job_completed: Optional[datetime] = None
+ status: str = Field(default="waiting")
+ service: Optional[str] = None
+ updated_at: datetime = Field(
+ default_factory=lambda: datetime.now(timezone.utc)
+ )
+
+ source: Optional[Source] = Field(
+ default=None,
+ sa_column=Column(
+ SAEnum(
+ Source,
+ name="source",
+ values_callable=lambda cls: [m.value for m in cls], # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+ ),
+ nullable=True,
+ ),
+ )
+ source_id: Optional[str] = None
diff --git a/infrastructure/s3_client.py b/infrastructure/s3_client.py
new file mode 100644
index 00000000..a789fcc2
--- /dev/null
+++ b/infrastructure/s3_client.py
@@ -0,0 +1,22 @@
+from typing import Any
+
+
+class S3Client:
+ def __init__(self, boto_s3_client: Any, bucket: str) -> None:
+ self._client = boto_s3_client
+ self._bucket = bucket
+
+ @property
+ def bucket(self) -> str:
+ return self._bucket
+
+ def get_object(self, key: str) -> bytes:
+ response: dict[str, Any] = self._client.get_object(
+ Bucket=self._bucket, Key=key
+ )
+ body: bytes = response["Body"].read()
+ return body
+
+ def put_object(self, key: str, body: bytes) -> str:
+ self._client.put_object(Bucket=self._bucket, Key=key, Body=body)
+ return f"s3://{self._bucket}/{key}"
diff --git a/infrastructure/s3_uri.py b/infrastructure/s3_uri.py
new file mode 100644
index 00000000..1dd5d967
--- /dev/null
+++ b/infrastructure/s3_uri.py
@@ -0,0 +1,25 @@
+from urllib.parse import unquote
+
+
+def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
+ if s3_uri.startswith("s3://"):
+ parts = s3_uri[len("s3://") :].split("/", 1)
+ if len(parts) < 2 or not parts[0] or not parts[1]:
+ raise ValueError("S3 URI must include both a bucket and a key")
+ return parts[0], parts[1]
+
+ if "?" not in s3_uri:
+ raise ValueError(f"Not an s3:// URI and has no query string: {s3_uri!r}")
+ base, query = s3_uri.split("?", 1)
+
+ if "/s3/object/" not in base:
+ raise ValueError(f"Console URL has no '/s3/object/' segment: {s3_uri!r}")
+ bucket = base.split("/s3/object/", 1)[1]
+
+ params: dict[str, str] = {}
+ for item in query.split("&"):
+ if "=" in item:
+ name, value = item.split("=", 1)
+ params[name] = value
+ key = unquote(params.get("prefix", ""))
+ return bucket, key
diff --git a/infrastructure/sqs_client.py b/infrastructure/sqs_client.py
new file mode 100644
index 00000000..6fe8dd2e
--- /dev/null
+++ b/infrastructure/sqs_client.py
@@ -0,0 +1,20 @@
+import json
+from typing import Any
+
+
+class SqsClient:
+ def __init__(self, boto_sqs_client: Any, queue_url: str) -> None:
+ self._client = boto_sqs_client
+ self._queue_url = queue_url
+
+ @property
+ def queue_url(self) -> str:
+ return self._queue_url
+
+ def send(self, body: dict[str, Any]) -> str:
+ response: dict[str, Any] = self._client.send_message(
+ QueueUrl=self._queue_url,
+ MessageBody=json.dumps(body),
+ )
+ message_id: str = response["MessageId"]
+ return message_id
diff --git a/orchestration/__init__.py b/orchestration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py
new file mode 100644
index 00000000..36f4b515
--- /dev/null
+++ b/orchestration/postcode_splitter_orchestrator.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+from uuid import UUID
+
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from orchestration.task_orchestrator import TaskOrchestrator
+from domain.addresses.postcode_batching import iter_postcode_grouped_batches
+from repositories.user_address.user_address_repository import UserAddressRepository
+
+
+class PostcodeSplitterOrchestrator:
+ def __init__(
+ self,
+ task_orchestrator: TaskOrchestrator,
+ user_address_repo: UserAddressRepository,
+ queue_client: Address2UprnQueueClient,
+ max_batch_size: int = 500,
+ ) -> None:
+ self._task_orchestrator = task_orchestrator
+ self._user_address_repo = user_address_repo
+ self._queue_client = queue_client
+ self._max_batch_size = max_batch_size
+
+ def split_and_dispatch(
+ self,
+ *,
+ parent_task_id: UUID,
+ parent_subtask_id: UUID,
+ input_s3_uri: str,
+ ) -> list[UUID]:
+ addresses = self._user_address_repo.load_batch(input_s3_uri)
+ path_prefix = (
+ f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
+ )
+
+ child_ids: list[UUID] = []
+ for batch in iter_postcode_grouped_batches(
+ addresses, max_batch_size=self._max_batch_size
+ ):
+ batch_uri = self._user_address_repo.save_batch(batch, path_prefix)
+ child = self._task_orchestrator.create_child_subtask(
+ parent_task_id,
+ inputs={
+ "task_id": str(parent_task_id),
+ "s3_uri": batch_uri,
+ },
+ )
+ self._queue_client.publish(
+ parent_task_id=parent_task_id,
+ child_subtask_id=child.id,
+ s3_uri=batch_uri,
+ )
+ child_ids.append(child.id)
+
+ return child_ids
diff --git a/orchestration/task_orchestrator.py b/orchestration/task_orchestrator.py
new file mode 100644
index 00000000..ebb71a32
--- /dev/null
+++ b/orchestration/task_orchestrator.py
@@ -0,0 +1,106 @@
+from typing import Any, Callable, Optional
+from uuid import UUID
+
+from domain.tasks.subtasks import SubTask
+from domain.tasks.tasks import Source, Task
+from repositories.tasks.subtask_repository import SubTaskRepository
+from repositories.tasks.task_repository import TaskRepository
+from utilities.private import private
+
+
+class TaskOrchestrator:
+ """Coordinates Task + SubTask lifecycle.
+
+ Exposes primitives (start/complete/fail_subtask) for handlers that want
+ fine-grained control, and a high-level run_subtask wrapper that owns the
+ try/except so it can replace the body of the legacy subtask_handler
+ decorator in backend/utils/subtasks.py.
+
+ Each primitive saves the SubTask, then recomputes the parent Task's
+ status from all its children.
+ """
+
+ def __init__(
+ self,
+ task_repo: TaskRepository,
+ subtask_repo: SubTaskRepository,
+ ) -> None:
+ self._tasks = task_repo
+ self._subtasks = subtask_repo
+
+ def create_task_with_subtask(
+ self,
+ *,
+ task_source: str,
+ inputs: Optional[dict[str, Any]] = None,
+ service: Optional[str] = None,
+ source: Optional[Source] = None,
+ source_id: Optional[str] = None,
+ ) -> tuple[Task, SubTask]:
+ task = Task.create(
+ task_source=task_source,
+ service=service,
+ source=source,
+ source_id=source_id,
+ )
+ self._tasks.create(task)
+ subtask = SubTask.create(task_id=task.id, inputs=inputs)
+ self._subtasks.create(subtask)
+ return task, subtask
+
+ def create_child_subtask(
+ self,
+ parent_task_id: UUID,
+ *,
+ inputs: Optional[dict[str, Any]] = None,
+ ) -> SubTask:
+ subtask = SubTask.create(task_id=parent_task_id, inputs=inputs)
+ self._subtasks.create(subtask)
+ return subtask
+
+ def start_subtask(
+ self, subtask_id: UUID, cloud_logs_url: Optional[str] = None
+ ) -> SubTask:
+ subtask = self._subtasks.get(subtask_id)
+ subtask.start(cloud_logs_url)
+ self._subtasks.save(subtask)
+ self._cascade(subtask.task_id)
+ return subtask
+
+ def complete_subtask(
+ self, subtask_id: UUID, result: Any = None
+ ) -> SubTask:
+ subtask = self._subtasks.get(subtask_id)
+ subtask.complete(result)
+ self._subtasks.save(subtask)
+ self._cascade(subtask.task_id)
+ return subtask
+
+ def fail_subtask(self, subtask_id: UUID, error: BaseException) -> SubTask:
+ subtask = self._subtasks.get(subtask_id)
+ subtask.fail(error)
+ self._subtasks.save(subtask)
+ self._cascade(subtask.task_id)
+ return subtask
+
+ def run_subtask(
+ self,
+ subtask_id: UUID,
+ work: Callable[[], Any],
+ cloud_logs_url: Optional[str] = None,
+ ) -> Any:
+ self.start_subtask(subtask_id, cloud_logs_url)
+ try:
+ result = work()
+ except Exception as e:
+ self.fail_subtask(subtask_id, e)
+ raise
+ self.complete_subtask(subtask_id, result)
+ return result
+
+ @private
+ def _cascade(self, task_id: UUID) -> None:
+ statuses = [s.status for s in self._subtasks.list_by_task(task_id)]
+ task = self._tasks.get(task_id)
+ task.recalculate_from_subtasks(statuses)
+ self._tasks.save(task)
diff --git a/pytest.ini b/pytest.ini
index f1a4ec8f..d9535237 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,6 +3,28 @@ pythonpath = . packages/domain/src services/ml_training_data/src
log_cli = true
log_cli_level = INFO
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
-testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/pashub_fetcher/tests backend/documents_parser/tests backend/magic_plan/tests datatypes/magicplan/api/tests datatypes/magicplan/domain/tests backend/app/db/functions/tests packages/domain/src/domain/ml/tests
+testpaths =
+ recommendations/tests
+ backend/tests
+ backend/address2UPRN/tests
+ backend/app/db/functions/tests
+ backend/categorisation/tests
+ backend/condition/tests
+ backend/documents_parser/tests
+ backend/ecmk_fetcher/tests
+ backend/export/tests
+ backend/magic_plan/tests
+ backend/onboarders/tests
+ backend/pashub_fetcher/tests
+ datatypes/epc/domain/tests
+ datatypes/epc/schema/tests
+ datatypes/epc/surveys/tests
+ datatypes/magicplan/api/tests
+ datatypes/magicplan/domain/tests
+ etl/epc/tests
+ etl/epc_clean/tests
+ etl/hubspot/tests
+ etl/spatial/tests
+ packages/domain/src/domain/ml/tests
markers =
integration: mark a test as an integration test
diff --git a/repositories/__init__.py b/repositories/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/repositories/tasks/__init__.py b/repositories/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/repositories/tasks/subtask_postgres_repository.py b/repositories/tasks/subtask_postgres_repository.py
new file mode 100644
index 00000000..affc280e
--- /dev/null
+++ b/repositories/tasks/subtask_postgres_repository.py
@@ -0,0 +1,89 @@
+import json
+from datetime import datetime, timezone
+from typing import Any, Optional
+from uuid import UUID
+
+from sqlmodel import Session, select
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+from infrastructure.postgres.subtask_table import SubTaskRow
+from repositories.tasks.subtask_repository import SubTaskRepository
+from utilities.private import private
+
+
+class SubTaskPostgresRepository(SubTaskRepository):
+ def __init__(self, session: Session) -> None:
+ self._session = session
+
+ def create(self, subtask: SubTask) -> SubTask:
+ row = self._to_row(subtask)
+ self._session.add(row)
+ self._session.commit()
+ self._session.refresh(row)
+ return self._to_domain(row)
+
+ def get(self, subtask_id: UUID) -> SubTask:
+ row = self._session.get(SubTaskRow, subtask_id)
+ if row is None:
+ raise ValueError(f"SubTask {subtask_id} not found")
+ return self._to_domain(row)
+
+ def save(self, subtask: SubTask) -> None:
+ row = self._session.get(SubTaskRow, subtask.id)
+ if row is None:
+ raise ValueError(f"SubTask {subtask.id} not found")
+ row.status = subtask.status.value
+ row.job_started = subtask.job_started
+ row.job_completed = subtask.job_completed
+ row.inputs = (
+ json.dumps(subtask.inputs) if subtask.inputs is not None else None
+ )
+ row.outputs = (
+ json.dumps(subtask.outputs) if subtask.outputs is not None else None
+ )
+ row.cloud_logs_url = subtask.cloud_logs_url
+ row.updated_at = datetime.now(timezone.utc)
+ self._session.add(row)
+ self._session.commit()
+
+ def list_by_task(self, task_id: UUID) -> list[SubTask]:
+ rows = self._session.exec(
+ select(SubTaskRow).where(SubTaskRow.task_id == task_id)
+ ).all()
+ return [self._to_domain(r) for r in rows]
+
+ @private
+ def _to_row(self, subtask: SubTask) -> SubTaskRow:
+ return SubTaskRow(
+ id=subtask.id,
+ task_id=subtask.task_id,
+ status=subtask.status.value,
+ inputs=(
+ json.dumps(subtask.inputs) if subtask.inputs is not None else None
+ ),
+ outputs=(
+ json.dumps(subtask.outputs)
+ if subtask.outputs is not None
+ else None
+ ),
+ cloud_logs_url=subtask.cloud_logs_url,
+ job_started=subtask.job_started,
+ job_completed=subtask.job_completed,
+ )
+
+ @private
+ def _to_domain(self, row: SubTaskRow) -> SubTask:
+ return SubTask(
+ id=row.id,
+ task_id=row.task_id,
+ status=SubTaskStatus(row.status.lower()),
+ inputs=_loads_or_none(row.inputs),
+ outputs=_loads_or_none(row.outputs),
+ cloud_logs_url=row.cloud_logs_url,
+ job_started=row.job_started,
+ job_completed=row.job_completed,
+ )
+
+
+def _loads_or_none(s: Optional[str]) -> Optional[dict[str, Any]]:
+ return json.loads(s) if s else None
diff --git a/repositories/tasks/subtask_repository.py b/repositories/tasks/subtask_repository.py
new file mode 100644
index 00000000..adb36f99
--- /dev/null
+++ b/repositories/tasks/subtask_repository.py
@@ -0,0 +1,18 @@
+from abc import ABC, abstractmethod
+from uuid import UUID
+
+from domain.tasks.subtasks import SubTask
+
+
+class SubTaskRepository(ABC):
+ @abstractmethod
+ def create(self, subtask: SubTask) -> SubTask: ...
+
+ @abstractmethod
+ def get(self, subtask_id: UUID) -> SubTask: ...
+
+ @abstractmethod
+ def save(self, subtask: SubTask) -> None: ...
+
+ @abstractmethod
+ def list_by_task(self, task_id: UUID) -> list[SubTask]: ...
diff --git a/repositories/tasks/task_postgres_repository.py b/repositories/tasks/task_postgres_repository.py
new file mode 100644
index 00000000..d23fe91c
--- /dev/null
+++ b/repositories/tasks/task_postgres_repository.py
@@ -0,0 +1,77 @@
+"""
+Postgres implementation of TaskRepository.
+
+NOTE: this repository owns only the `tasks` table. Unlike the legacy
+backend.app.db.functions.tasks.Tasks.TasksInterface.create_task, it does NOT
+auto-create a child SubTask. Do not rewire existing Lambda callers to this
+repo until the SubTask aggregate + TaskOrchestrator slice lands — they would
+silently lose their initial SubTask row.
+"""
+
+from datetime import datetime, timezone
+from uuid import UUID
+
+from sqlmodel import Session
+
+from domain.tasks.tasks import Task, TaskStatus
+from infrastructure.postgres.task_table import TaskRow
+from repositories.tasks.task_repository import TaskRepository
+from utilities.private import private
+
+
+class TaskPostgresRepository(TaskRepository):
+ def __init__(self, session: Session) -> None:
+ self._session = session
+
+ def create(self, task: Task) -> Task:
+ row = self._to_row(task)
+ self._session.add(row)
+ self._session.commit()
+ self._session.refresh(row)
+ return self._to_domain(row)
+
+ def get(self, task_id: UUID) -> Task:
+ row = self._session.get(TaskRow, task_id)
+ if row is None:
+ raise ValueError(f"Task {task_id} not found")
+ return self._to_domain(row)
+
+ def save(self, task: Task) -> None:
+ row = self._session.get(TaskRow, task.id)
+ if row is None:
+ raise ValueError(f"Task {task.id} not found")
+ row.status = task.status.value
+ row.job_started = task.job_started
+ row.job_completed = task.job_completed
+ row.service = task.service
+ row.source = task.source
+ row.source_id = task.source_id
+ row.updated_at = datetime.now(timezone.utc)
+ self._session.add(row)
+ self._session.commit()
+
+ @private
+ def _to_row(self, task: Task) -> TaskRow:
+ return TaskRow(
+ id=task.id,
+ task_source=task.task_source,
+ status=task.status.value,
+ service=task.service,
+ source=task.source,
+ source_id=task.source_id,
+ job_started=task.job_started,
+ job_completed=task.job_completed,
+ )
+
+ @private
+ def _to_domain(self, row: TaskRow) -> Task:
+ return Task(
+ id=row.id,
+ task_source=row.task_source,
+ status=TaskStatus(row.status.lower()),
+ service=row.service,
+ source=row.source,
+ source_id=row.source_id,
+ job_started=row.job_started,
+ job_completed=row.job_completed,
+ )
diff --git a/repositories/tasks/task_repository.py b/repositories/tasks/task_repository.py
new file mode 100644
index 00000000..8bdce0cc
--- /dev/null
+++ b/repositories/tasks/task_repository.py
@@ -0,0 +1,15 @@
+from abc import ABC, abstractmethod
+from uuid import UUID
+
+from domain.tasks.tasks import Task
+
+
+class TaskRepository(ABC):
+ @abstractmethod
+ def create(self, task: Task) -> Task: ...
+
+ @abstractmethod
+ def get(self, task_id: UUID) -> Task: ...
+
+ @abstractmethod
+ def save(self, task: Task) -> None: ...
diff --git a/repositories/user_address/__init__.py b/repositories/user_address/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py
new file mode 100644
index 00000000..058fd5a5
--- /dev/null
+++ b/repositories/user_address/user_address_csv_s3_repository.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import uuid
+from datetime import datetime, timezone
+from typing import Optional
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+from infrastructure.csv_s3_client import CsvS3Client
+from repositories.user_address.user_address_repository import UserAddressRepository
+
+_ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
+_POSTCODE_COLUMN: str = "postcode"
+_INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
+_POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
+
+
+class UserAddressCsvS3Repository(UserAddressRepository):
+ def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
+ self._csv_client = csv_client
+ self._bucket = bucket
+
+ def load_batch(self, s3_uri: str) -> list[UserAddress]:
+ rows = self._csv_client.read_rows(s3_uri)
+ if rows and _POSTCODE_COLUMN not in rows[0]:
+ raise ValueError(
+ f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
+ f"columns present: {sorted(rows[0])}"
+ )
+ addresses: list[UserAddress] = []
+ for row in rows:
+ parts = [
+ row[col].strip()
+ for col in _ADDRESS_COLUMNS
+ if col in row and row[col].strip()
+ ]
+ user_address = ", ".join(parts)
+ postcode = row.get(_POSTCODE_COLUMN, "")
+ raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
+ internal_reference: Optional[str] = raw_ref or None
+ addresses.append(
+ UserAddress(
+ user_address=user_address,
+ postcode=Postcode(postcode),
+ internal_reference=internal_reference,
+ source_row=row,
+ )
+ )
+ return addresses
+
+ def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
+ rows: list[dict[str, str]] = [
+ {**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)}
+ for addr in addresses
+ ]
+
+ # TODO: [New Starter Task] file_name generation can be standardised
+ # and also easier to read, test for future implementation. Buiild that!
+ filename = (
+ f"{datetime.now(timezone.utc).isoformat()}_{uuid.uuid4().hex[:8]}.csv"
+ )
+ key = f"{path_prefix.rstrip('/')}/{filename}"
+ return self._csv_client.save_rows(rows, key)
diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py
new file mode 100644
index 00000000..b2c0f866
--- /dev/null
+++ b/repositories/user_address/user_address_repository.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+from domain.addresses.user_address import UserAddress
+
+
+class UserAddressRepository(ABC):
+ @abstractmethod
+ def load_batch(self, s3_uri: str) -> list[UserAddress]: ...
+
+ @abstractmethod
+ def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: ...
diff --git a/run_backlog.sh b/run_backlog.sh
deleted file mode 100644
index 398e921c..00000000
--- a/run_backlog.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-backlog browser --port 6421
diff --git a/scripts/rename_sharepoint_files.py b/scripts/rename_sharepoint_files.py
new file mode 100644
index 00000000..881b96ef
--- /dev/null
+++ b/scripts/rename_sharepoint_files.py
@@ -0,0 +1,128 @@
+"""
+Rename files in SharePoint property folders to the canonical format:
+ {UPRN}_{Street} {Postcode}_{Document Name}.ext
+
+Set DRY_RUN = False when ready to commit. Run from repo root.
+Required env vars: SHAREPOINT_CLIENT_ID, SHAREPOINT_CLIENT_SECRET,
+ SHAREPOINT_TENANT_ID, SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID
+"""
+
+import csv
+import os
+from typing import Optional
+
+from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
+from utils.logger import setup_logger
+from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
+from utils.sharepoint.domna_sites import DomnaSites
+
+DRY_RUN: bool = True
+CSV_PATH: str = "scripts/sero_address_list.csv"
+
+BASE_PATH = (
+ "Osmosis-ACD Projects/Sero-Clarion Housing/"
+ "Sero Project Documents/Property Folders"
+)
+ASSESSMENT_SUBFOLDER = "A. Assessment"
+
+logger = setup_logger()
+
+
+def build_canonical_filename(
+ uprn: str, address: str, postcode: str, original_name: str
+) -> Optional[str]:
+ """
+ Returns the canonical filename, or None if the file is already renamed.
+
+ Already-renamed: name starts with "{uprn}_".
+ Strips any existing address prefix (address+postcode first, then address alone)
+ before inserting the canonical prefix.
+ """
+ if original_name.startswith(f"{uprn}_"):
+ return None
+
+ stem, ext = os.path.splitext(original_name)
+ stem_lower = stem.lower()
+
+ street = address.split(",")[0].strip()
+ prefixes = [
+ f"{address} {postcode}",
+ address,
+ f"{street} {postcode}",
+ street,
+ ]
+
+ doc_name = stem
+ for prefix in prefixes:
+ if stem_lower.startswith(prefix.lower()):
+ doc_name = stem[len(prefix) :]
+ break
+
+ if doc_name.startswith(" - "):
+ doc_name = doc_name[3:]
+ elif doc_name.startswith(" _ "):
+ doc_name = doc_name[3:]
+ doc_name = doc_name.strip()
+
+ street_post = f"{street} {postcode}"
+ if doc_name:
+ return f"{uprn}_{street_post}_{doc_name}{ext}"
+ return f"{uprn}_{street_post}{ext}"
+
+
+def main() -> None:
+ sp_client = DomnaSharepointClient(DomnaSites.SOCIAL_HOUSING_WAVE_3)
+
+ with open(CSV_PATH, newline="", encoding="utf-8-sig") as f:
+ reader = csv.DictReader(f)
+ required = {"UPRN", "Address", "Postcode"}
+ if not reader.fieldnames or not required.issubset(set(reader.fieldnames)):
+ raise ValueError(
+ f"CSV missing required columns. Expected {required}, got {reader.fieldnames}"
+ )
+
+ for row in reader:
+ uprn = row["UPRN"].strip()
+ address = row["Address"].strip()
+ postcode = row["Postcode"].strip()
+ folder_path = (
+ f"{BASE_PATH}/{address}, {postcode}"
+ f"/{SharepointSubfolders.ASSESSMENT.value}/{ASSESSMENT_SUBFOLDER}"
+ )
+
+ try:
+ contents = sp_client.get_folders_in_path(folder_path)
+ except ValueError:
+ logger.warning(f"Missing folder for UPRN {uprn}: {folder_path}")
+ continue
+
+ for item in contents.get("value", []):
+ if "file" not in item:
+ continue
+
+ original_name: str = item["name"]
+ new_name = build_canonical_filename(
+ uprn, address, postcode, original_name
+ )
+
+ if new_name is None:
+ continue
+
+ if DRY_RUN:
+ logger.info(
+ f'[DRY RUN] Renaming: "{original_name}" → "{new_name}" (UPRN: {uprn})'
+ )
+ else:
+ try:
+ sp_client.rename_file(item["id"], new_name)
+ logger.info(
+ f'Renamed: "{original_name}" → "{new_name}" (UPRN: {uprn})'
+ )
+ except Exception as e:
+ logger.error(
+ f'Failed to rename "{original_name}" → "{new_name}" (UPRN: {uprn}): {e}'
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/sero_address_list.csv b/scripts/sero_address_list.csv
new file mode 100644
index 00000000..8c9401c9
--- /dev/null
+++ b/scripts/sero_address_list.csv
@@ -0,0 +1,51 @@
+UPRN,Address,Postcode
+U1035052,"1 Sudbury Crescent, Bromley",BR1 4PY
+U1027449,"11 Station Road, Bromley",BR1 3LP
+U1021310,"126 Faringdon Avenue, Bromley",BR2 8BU
+U1010811,"13 Gilbert Road, Bromley",BR1 3QP
+U1024017,"13 Manor Way, Bromley",BR2 8ES
+U1042232,"154 Southover, Bromley",BR1 4RZ
+U1009369,"17 Minster Road, Bromley",BR1 4DY
+U1022305,"18a Lansdowne Road, Bromley",BR1 3LZ
+U1033165,"2 Laburnum Way, Bromley",BR2 8BZ
+U1035326,"2 Whitebeam Avenue, Bromley",BR2 8DL
+U1037872,"20 Sudbury Crescent, Bromley",BR1 4PZ
+U1007432,"21 Detling Road, Bromley",BR1 4SH
+U1005123,"24 Bonville Road, Bromley",BR1 4QA
+U1034810,"24 Newbury Road, Bromley",BR2 0QW
+U1020351,"27 Laburnum Way, Bromley",BR2 8BY
+U1009511,"27 Newbury Road, Bromley",BR2 0QN
+U1034985,"272 Southborough Lane, Bromley",BR2 8AS
+U1037954,"28 Treewall Gardens, Bromley",BR1 5BT
+U1038103,"29 Whitebeam Avenue, Bromley",BR2 8DJ
+U1013358,"3 Bird In Hand Lane, Bromley",BR1 2NA
+U1024709,"3 Parkfield Way, Bromley",BR2 8AE
+U1031058,"303 Keedonwood Road, Bromley",BR1 4QR
+U1014077,"32 Aylesbury Road, Bromley",BR2 0QP
+U1019564,"32 Brook Lane, Bromley",BR1 4PU
+U1020237,"33 Hornbeam Way, Bromley",BR2 8DB
+U1027493,"35 Sudbury Crescent, Bromley",BR1 4PY
+U1042298,"39 Sudbury Crescent, Bromley",BR1 4PY
+U1024698,"4 Palace View, Bromley",BR1 3EL
+U1052186,"4 Ravensleigh Gardens, Bromley",BR1 5SN
+U1042153,"4 Scotts Road, Bromley",BR1 3QD
+U1037814,"42 Stanley Road, Bromley",BR2 9JH
+U1014078,"43 Aylesbury Road, Bromley",BR2 0QR
+U1007701,"46 Harwood Avenue, Bromley",BR1 3DU
+U1036758,"46 Newbury Road, Bromley",BR2 0QW
+U1025820,"46 Princes Plain, Bromley",BR2 8LE
+U1022991,"5 Link Way, Bromley",BR2 8JH
+U1024484,"55 Mounthurst Road, Bromley",BR2 7PG
+U1014793,"59 Headcorn Road, Bromley",BR1 4SQ
+U1037465,"6 Princes Plain, Bromley",BR2 8LE
+U1009202,"63 Mead Way, Bromley",BR2 9ER
+U1021353,"66 George Lane, Bromley",BR2 7LQ
+U1042733,"68 Whitebeam Avenue, Bromley",BR2 8DL
+U1030962,"7 Ravensleigh Gardens, Bromley",BR1 5SN
+U1031294,"70 London Lane, Bromley",BR1 4HE
+U1037450,"70 Pontefract Road, Bromley",BR1 4RB
+U1014589,"71 Empress Drive, Chislehurst",BR7 5BQ
+U1052429,"76 Southover, Bromley",BR1 4RY
+U1020199,"78 Hillside Road, Bromley",BR2 0ST
+U1024511,"81 Nightingale Lane, Bromley",BR1 2SA
+U1009194,"84 Mays Hill Road, Bromley",BR2 0HT
diff --git a/test.requirements.txt b/test.requirements.txt
index 7fdd7dc4..26125034 100644
--- a/test.requirements.txt
+++ b/test.requirements.txt
@@ -9,4 +9,5 @@ hubspot-api-client
fuzzywuzzy
pymupdf
playwright==1.58.0
-msal
\ No newline at end of file
+msal
+moto[s3,sqs]
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..0a246372
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,48 @@
+"""Shared pytest fixtures for the ``tests/`` tree.
+
+Provides an ephemeral PostgreSQL engine for tests that exercise SQLModel
+repositories. PostgreSQL has no true in-memory mode; ``pytest-postgresql``
+starts a real, throwaway server in a temp directory (the process is started
+once per session and a fresh database is created/dropped per test). That is
+the closest equivalent to "in-memory" and matches production behaviour far
+better than SQLite (enums, JSONB, constraint semantics, etc.).
+"""
+
+from __future__ import annotations
+
+import glob
+from collections.abc import Iterator
+from typing import Any
+
+import pytest
+from psycopg import Connection
+from pytest_postgresql import factories
+from sqlalchemy import Engine
+from sqlmodel import SQLModel, create_engine
+
+# Importing the SQLModel row modules registers their tables on
+# SQLModel.metadata so ``create_all`` builds the full schema. Imports look
+# unused; they aren't.
+
+
+# pg_ctl ships under a versioned path and is not on PATH in the dev container.
+_PG_CTL = next(iter(sorted(glob.glob("/usr/lib/postgresql/*/bin/pg_ctl"))), "pg_ctl")
+
+postgresql_proc = factories.postgresql_proc(
+ executable=_PG_CTL
+) # pyright: ignore[reportUnknownMemberType]
+postgresql = factories.postgresql("postgresql_proc")
+
+
+@pytest.fixture
+def db_engine(postgresql: Connection[Any]) -> Iterator[Engine]:
+ """A SQLModel engine bound to a fresh, ephemeral PostgreSQL database."""
+ info = postgresql.info
+ url = f"postgresql+psycopg://{info.user}:@{info.host}:{info.port}/{info.dbname}"
+ engine = create_engine(url)
+ SQLModel.metadata.create_all(engine)
+ try:
+ yield engine
+ finally:
+ SQLModel.metadata.drop_all(engine)
+ engine.dispose()
diff --git a/tests/domain/__init__.py b/tests/domain/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/domain/addresses/__init__.py b/tests/domain/addresses/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
new file mode 100644
index 00000000..8ffcf1b5
--- /dev/null
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -0,0 +1,118 @@
+import pytest
+
+from domain.addresses.postcode_batching import iter_postcode_grouped_batches
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+
+
+def _addrs(postcode: str, n: int) -> list[UserAddress]:
+ return [
+ UserAddress(
+ user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
+ )
+ for i in range(n)
+ ]
+
+
+def test_empty_input_yields_no_batches() -> None:
+ # act / assert
+ assert list(iter_postcode_grouped_batches([])) == []
+
+
+def test_single_batch_under_cap() -> None:
+ # arrange
+ addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2)
+ # act
+ batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500))
+ # assert
+ assert len(batches) == 1
+ assert batches[0] == addrs
+
+
+def test_multiple_postcodes_packed_into_one_batch_up_to_cap() -> None:
+ # Two groups whose total exactly equals the cap pack into a single
+ # batch -- no premature flush.
+ # arrange
+ addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 2)
+ # act
+ batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
+ # assert
+ assert len(batches) == 1
+ assert len(batches[0]) == 5
+
+
+def test_flush_on_overflow_before_adding_next_postcode() -> None:
+ # Cap is 5. First group fills 3 slots; second group of 3 would overflow,
+ # so the buffer is flushed first and the next group starts a fresh batch.
+ # arrange
+ addrs = _addrs("AA1 1AA", 3) + _addrs("BB2 2BB", 3)
+ # act
+ batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
+ # assert
+ assert len(batches) == 2
+ assert [str(a.postcode) for a in batches[0]] == ["AA11AA"] * 3
+ assert [str(a.postcode) for a in batches[1]] == ["BB22BB"] * 3
+
+
+def test_single_postcode_group_exceeding_cap_is_dispatched_whole() -> None:
+ # An oversize single-postcode group goes out as one batch larger than
+ # the cap -- the cap never splits a postcode.
+ # arrange
+ addrs = _addrs("AA1 1AA", 7)
+ # act
+ batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=5))
+ # assert
+ assert len(batches) == 1
+ assert len(batches[0]) == 7
+
+
+def test_oversize_group_flushes_existing_buffer_first() -> None:
+ # Mirrors the legacy ``if buffer: flush`` branch when an oversize group
+ # is encountered: buffered work must not be lost or interleaved.
+ # arrange
+ small = _addrs("AA1 1AA", 2)
+ big = _addrs("BB2 2BB", 7)
+ tail = _addrs("CC3 3CC", 1)
+ # act
+ batches = list(
+ iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
+ )
+ # assert
+ assert len(batches) == 3
+ assert [str(a.postcode) for a in batches[0]] == ["AA11AA", "AA11AA"]
+ assert [str(a.postcode) for a in batches[1]] == ["BB22BB"] * 7
+ assert [str(a.postcode) for a in batches[2]] == ["CC33CC"]
+
+
+def test_final_flush_yields_remaining_buffer() -> None:
+ # No overflow ever happens, but the trailing buffer must still come out.
+ # arrange
+ addrs = _addrs("AA1 1AA", 2) + _addrs("BB2 2BB", 2)
+ # act
+ batches = list(iter_postcode_grouped_batches(addrs, max_batch_size=500))
+ # assert
+ assert batches == [addrs]
+
+
+def test_postcode_grouping_preserves_first_seen_order() -> None:
+ # Interleaved input must still group by postcode and emit in first-seen
+ # order -- never alphabetical.
+ # arrange
+ a1, a2 = _addrs("ZZ9 9ZZ", 2)
+ b1, b2 = _addrs("AA1 1AA", 2)
+ # act
+ batches = list(iter_postcode_grouped_batches([a1, b1, a2, b2]))
+ # assert
+ assert len(batches) == 1
+ assert [str(a.postcode) for a in batches[0]] == [
+ "ZZ99ZZ",
+ "ZZ99ZZ",
+ "AA11AA",
+ "AA11AA",
+ ]
+
+
+def test_invalid_max_batch_size_raises() -> None:
+ # act / assert
+ with pytest.raises(ValueError, match="max_batch_size"):
+ list(iter_postcode_grouped_batches([], max_batch_size=0))
diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py
new file mode 100644
index 00000000..8d092df3
--- /dev/null
+++ b/tests/domain/addresses/test_user_address.py
@@ -0,0 +1,98 @@
+import dataclasses
+
+import pytest
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+
+
+def test_user_address_holds_postcode_value_object() -> None:
+ # act
+ addr = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+ # assert
+ assert addr.postcode == Postcode("SW1A1AA")
+
+
+def test_user_address_preserves_user_address_verbatim() -> None:
+ # The free-text user_address string is intentionally NOT normalised --
+ # only the postcode is canonicalised, and that happens inside Postcode.
+ # act
+ addr = UserAddress(
+ user_address=" 1 The Street ", postcode=Postcode("SW1A1AA")
+ )
+ # assert
+ assert addr.user_address == " 1 The Street "
+
+
+def test_user_address_internal_reference_defaults_to_none() -> None:
+ # act
+ addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+ # assert
+ assert addr.internal_reference is None
+
+
+def test_user_address_internal_reference_accepted() -> None:
+ # act
+ addr = UserAddress(
+ user_address="1 The Street",
+ postcode=Postcode("SW1A1AA"),
+ internal_reference="cust-42",
+ )
+ # assert
+ assert addr.internal_reference == "cust-42"
+
+
+def test_user_address_is_frozen() -> None:
+ # arrange
+ addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+ # act / assert
+ with pytest.raises(dataclasses.FrozenInstanceError):
+ addr.postcode = Postcode("OTHER") # type: ignore[misc]
+
+
+def test_user_address_equality_uses_canonical_postcode() -> None:
+ # Postcode sanitises eagerly, so addresses built from different surface
+ # forms of the same postcode compare equal.
+ # arrange
+ a = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+ b = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+ # act / assert
+ assert a == b
+
+
+def test_user_address_source_row_defaults_to_empty_dict() -> None:
+ # act
+ addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+ # assert
+ assert addr.source_row == {}
+
+
+def test_user_address_carries_source_row() -> None:
+ # arrange
+ row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
+ # act
+ addr = UserAddress(
+ user_address="1 The Street",
+ postcode=Postcode("SW1A 1AA"),
+ source_row=row,
+ )
+ # assert
+ assert addr.source_row == row
+
+
+def test_user_address_equality_ignores_source_row() -> None:
+ # source_row is excluded from equality (and hashing): identity stays
+ # defined by the parsed fields.
+ # arrange
+ a = UserAddress(
+ user_address="1 The Street",
+ postcode=Postcode("SW1A1AA"),
+ source_row={"x": "1"},
+ )
+ b = UserAddress(
+ user_address="1 The Street",
+ postcode=Postcode("SW1A1AA"),
+ source_row={"y": "2"},
+ )
+ # act / assert
+ assert a == b
diff --git a/tests/domain/tasks/__init__.py b/tests/domain/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/domain/tasks/test_subtasks.py b/tests/domain/tasks/test_subtasks.py
new file mode 100644
index 00000000..8cee4496
--- /dev/null
+++ b/tests/domain/tasks/test_subtasks.py
@@ -0,0 +1,95 @@
+from uuid import uuid4
+
+import pytest
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+
+
+def test_create_subtask_starts_waiting() -> None:
+ # arrange
+ task_id = uuid4()
+
+ # act
+ st = SubTask.create(task_id=task_id, inputs={"foo": "bar"})
+
+ # assert
+ assert st.task_id == task_id
+ assert st.status is SubTaskStatus.WAITING
+ assert st.inputs == {"foo": "bar"}
+ assert st.outputs is None
+ assert st.job_started is None
+ assert st.job_completed is None
+
+
+def test_start_transitions_to_in_progress_and_sets_cloud_logs_url() -> None:
+ # arrange
+ st = SubTask.create(task_id=uuid4())
+
+ # act
+ st.start(cloud_logs_url="https://example/log")
+
+ # assert
+ assert st.status is SubTaskStatus.IN_PROGRESS
+ assert st.cloud_logs_url == "https://example/log"
+ assert st.job_started is not None
+
+
+def test_start_is_idempotent_from_in_progress() -> None:
+ # arrange
+ st = SubTask.create(task_id=uuid4())
+ st.start()
+ first_start = st.job_started
+
+ # act
+ st.start(cloud_logs_url="https://other")
+
+ # assert
+ assert st.status is SubTaskStatus.IN_PROGRESS
+ assert st.job_started == first_start # not overwritten
+ assert st.cloud_logs_url == "https://other"
+
+
+def test_start_rejects_from_terminal_status() -> None:
+ # arrange
+ st = SubTask.create(task_id=uuid4())
+ st.complete()
+ # act / assert
+ with pytest.raises(ValueError):
+ st.start()
+
+
+def test_complete_marks_outputs_and_job_completed() -> None:
+ # arrange
+ st = SubTask.create(task_id=uuid4())
+ st.start()
+
+ # act
+ st.complete({"uprn": "123"})
+
+ # assert
+ assert st.status is SubTaskStatus.COMPLETE
+ assert st.outputs == {"result": {"uprn": "123"}}
+ assert st.job_completed is not None
+
+
+def test_complete_without_result_leaves_outputs_unset() -> None:
+ # arrange
+ st = SubTask.create(task_id=uuid4())
+ # act
+ st.complete()
+ # assert
+ assert st.outputs is None
+
+
+def test_fail_records_error_in_outputs() -> None:
+ # arrange
+ st = SubTask.create(task_id=uuid4())
+ err = RuntimeError("boom")
+
+ # act
+ st.fail(err)
+
+ # assert
+ assert st.status is SubTaskStatus.FAILED
+ assert st.outputs == {"error": "boom"}
+ assert st.job_completed is not None
diff --git a/tests/domain/tasks/test_tasks.py b/tests/domain/tasks/test_tasks.py
new file mode 100644
index 00000000..ba82412b
--- /dev/null
+++ b/tests/domain/tasks/test_tasks.py
@@ -0,0 +1,131 @@
+import pytest
+
+from domain.tasks.subtasks import SubTaskStatus
+from domain.tasks.tasks import Source, Task, TaskStatus
+
+
+def test_create_task_starts_waiting() -> None:
+ # arrange / act
+ t = Task.create(
+ task_source="manual:test", source=Source.PORTFOLIO, source_id="abc-123"
+ )
+
+ # assert
+ assert t.status is TaskStatus.WAITING
+ assert t.source is Source.PORTFOLIO
+ assert t.source_id == "abc-123"
+ assert t.job_started is not None
+ assert t.job_completed is None
+
+
+def test_create_task_rejects_blank_task_source() -> None:
+ # act / assert
+ with pytest.raises(ValueError, match="task_source"):
+ Task.create(task_source=" ")
+
+
+def test_start_transitions_to_in_progress() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+ # act
+ t.start()
+ # assert
+ assert t.status is TaskStatus.IN_PROGRESS
+
+
+def test_complete_marks_job_completed() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+ t.start()
+ # act
+ t.complete()
+ # assert
+ assert t.status is TaskStatus.COMPLETE
+ assert t.job_completed is not None
+
+
+def test_fail_marks_job_completed() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+ # act
+ t.fail()
+ # assert
+ assert t.status is TaskStatus.FAILED
+ assert t.job_completed is not None
+
+
+def test_start_rejects_from_terminal_status() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+ t.complete()
+ # act / assert
+ with pytest.raises(ValueError):
+ t.start()
+
+
+def test_recalculate_with_empty_statuses_is_noop() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+ original_status = t.status
+ original_completed = t.job_completed
+
+ # act
+ t.recalculate_from_subtasks([])
+
+ # assert
+ assert t.status is original_status
+ assert t.job_completed is original_completed
+
+
+def test_recalculate_all_waiting_keeps_waiting() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+ t.start() # task moved to IN_PROGRESS earlier
+ t.complete() # then COMPLETE, with job_completed set
+
+ # act
+ t.recalculate_from_subtasks([SubTaskStatus.WAITING, SubTaskStatus.WAITING])
+
+ # assert
+ assert t.status is TaskStatus.WAITING
+ assert t.job_completed is None
+
+
+def test_recalculate_any_in_progress_marks_in_progress() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+
+ # act
+ t.recalculate_from_subtasks(
+ [SubTaskStatus.WAITING, SubTaskStatus.IN_PROGRESS, SubTaskStatus.COMPLETE]
+ )
+
+ # assert
+ assert t.status is TaskStatus.IN_PROGRESS
+ assert t.job_completed is None
+
+
+def test_recalculate_all_complete_marks_complete() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+
+ # act
+ t.recalculate_from_subtasks([SubTaskStatus.COMPLETE, SubTaskStatus.COMPLETE])
+
+ # assert
+ assert t.status is TaskStatus.COMPLETE
+ assert t.job_completed is not None
+
+
+def test_recalculate_any_failed_marks_failed_even_with_others() -> None:
+ # arrange
+ t = Task.create(task_source="manual:test")
+
+ # act
+ t.recalculate_from_subtasks(
+ [SubTaskStatus.IN_PROGRESS, SubTaskStatus.COMPLETE, SubTaskStatus.FAILED]
+ )
+
+ # assert
+ assert t.status is TaskStatus.FAILED
+ assert t.job_completed is not None
diff --git a/tests/domain/test_postcode.py b/tests/domain/test_postcode.py
new file mode 100644
index 00000000..f7ce9015
--- /dev/null
+++ b/tests/domain/test_postcode.py
@@ -0,0 +1,59 @@
+import dataclasses
+
+import pytest
+
+from domain.postcode import Postcode
+
+
+def test_postcode_uppercases() -> None:
+ # act / assert
+ assert Postcode("sw1a1aa").value == "SW1A1AA"
+
+
+def test_postcode_strips_internal_spaces() -> None:
+ # act / assert
+ assert Postcode("sw1a 1aa").value == "SW1A1AA"
+
+
+def test_postcode_strips_leading_and_trailing_whitespace() -> None:
+ # act / assert
+ assert Postcode(" sw1a 1aa ").value == "SW1A1AA"
+
+
+def test_postcode_strips_tabs_and_newlines() -> None:
+ # CSV ingestion occasionally introduces stray whitespace characters; the
+ # canonical form must absorb them just like literal spaces.
+ # act / assert
+ assert Postcode("sw1a\t1aa\n").value == "SW1A1AA"
+
+
+def test_postcode_construction_is_idempotent() -> None:
+ # arrange
+ once = Postcode("sw1a 1aa")
+ # act / assert
+ assert Postcode(once.value).value == "SW1A1AA"
+
+
+def test_postcode_empty_string() -> None:
+ # act / assert
+ assert Postcode("").value == ""
+
+
+def test_postcode_str_returns_canonical_value() -> None:
+ # act / assert
+ assert str(Postcode("sw1a 1aa")) == "SW1A1AA"
+
+
+def test_postcode_equality_ignores_surface_form() -> None:
+ # Differing case / whitespace sanitise to the same canonical value, so
+ # the value objects compare equal.
+ # act / assert
+ assert Postcode("sw1a 1aa") == Postcode("SW1A1AA")
+
+
+def test_postcode_is_frozen() -> None:
+ # arrange
+ postcode = Postcode("SW1A1AA")
+ # act / assert
+ with pytest.raises(dataclasses.FrozenInstanceError):
+ postcode.value = "OTHER" # type: ignore[misc]
diff --git a/tests/infrastructure/__init__.py b/tests/infrastructure/__init__.py
new file mode 100644
index 00000000..f5ad62d0
--- /dev/null
+++ b/tests/infrastructure/__init__.py
@@ -0,0 +1,10 @@
+from typing import Any
+
+import boto3
+
+REGION = "us-east-1"
+
+
+def make_boto_client(service_name: str) -> Any:
+ factory: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+ return factory(service_name, region_name=REGION)
diff --git a/tests/infrastructure/conftest.py b/tests/infrastructure/conftest.py
new file mode 100644
index 00000000..25c1ac3b
--- /dev/null
+++ b/tests/infrastructure/conftest.py
@@ -0,0 +1,28 @@
+import os
+from collections.abc import Iterator
+from typing import Optional
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _aws_creds() -> Iterator[None]: # pyright: ignore[reportUnusedFunction]
+ keys = (
+ "AWS_ACCESS_KEY_ID",
+ "AWS_SECRET_ACCESS_KEY",
+ "AWS_SESSION_TOKEN",
+ "AWS_DEFAULT_REGION",
+ )
+ prev: dict[str, Optional[str]] = {k: os.environ.get(k) for k in keys}
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+ os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
+ os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
+ try:
+ yield
+ finally:
+ for k, v in prev.items():
+ if v is None:
+ os.environ.pop(k, None)
+ else:
+ os.environ[k] = v
diff --git a/tests/infrastructure/test_address2uprn_queue_client.py b/tests/infrastructure/test_address2uprn_queue_client.py
new file mode 100644
index 00000000..c8e89ece
--- /dev/null
+++ b/tests/infrastructure/test_address2uprn_queue_client.py
@@ -0,0 +1,71 @@
+import json
+from collections.abc import Iterator
+from typing import Any, cast
+from uuid import uuid4
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from tests.infrastructure import make_boto_client
+
+
+@pytest.fixture
+def queue_setup() -> Iterator[tuple[Address2UprnQueueClient, Any, str]]:
+ with mock_aws():
+ boto_client = make_boto_client("sqs")
+ queue: dict[str, Any] = boto_client.create_queue(
+ QueueName="address2uprn-queue"
+ )
+ queue_url = cast(str, queue["QueueUrl"])
+ yield (
+ Address2UprnQueueClient(boto_client, queue_url),
+ boto_client,
+ queue_url,
+ )
+
+
+def test_publish_returns_message_id(
+ queue_setup: tuple[Address2UprnQueueClient, Any, str],
+) -> None:
+ # arrange
+ client, _boto, _url = queue_setup
+ # act
+ message_id = client.publish(
+ parent_task_id=uuid4(),
+ child_subtask_id=uuid4(),
+ s3_uri="s3://my-bucket/path/to/chunk.csv",
+ )
+ # assert
+ assert isinstance(message_id, str)
+ assert message_id
+
+
+def test_publish_body_uses_typed_shape(
+ queue_setup: tuple[Address2UprnQueueClient, Any, str],
+) -> None:
+ # arrange
+ client, boto_client, queue_url = queue_setup
+ parent_id = uuid4()
+ child_id = uuid4()
+ s3_uri = "s3://my-bucket/path/to/chunk.csv"
+
+ # act
+ client.publish(
+ parent_task_id=parent_id,
+ child_subtask_id=child_id,
+ s3_uri=s3_uri,
+ )
+
+ # assert
+ received: dict[str, Any] = boto_client.receive_message(
+ QueueUrl=queue_url, MaxNumberOfMessages=1
+ )
+ messages: list[dict[str, Any]] = received["Messages"]
+ assert len(messages) == 1
+ body = json.loads(messages[0]["Body"])
+ assert body == {
+ "task_id": str(parent_id),
+ "sub_task_id": str(child_id),
+ "s3_uri": s3_uri,
+ }
diff --git a/tests/infrastructure/test_csv_s3_client.py b/tests/infrastructure/test_csv_s3_client.py
new file mode 100644
index 00000000..30e27164
--- /dev/null
+++ b/tests/infrastructure/test_csv_s3_client.py
@@ -0,0 +1,51 @@
+from collections.abc import Iterator
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.csv_s3_client import CsvS3Client
+from tests.infrastructure import make_boto_client
+
+BUCKET = "csv-bucket"
+
+
+@pytest.fixture
+def csv_client() -> Iterator[CsvS3Client]:
+ with mock_aws():
+ boto_client = make_boto_client("s3")
+ boto_client.create_bucket(Bucket=BUCKET)
+ yield CsvS3Client(boto_client, BUCKET)
+
+
+def test_save_rows_returns_s3_uri(csv_client: CsvS3Client) -> None:
+ # arrange
+ rows = [{"address": "1 High St", "postcode": "AB1 2CD"}]
+ # act
+ uri = csv_client.save_rows(rows, "uploads/addresses.csv")
+ # assert
+ assert uri == f"s3://{BUCKET}/uploads/addresses.csv"
+
+
+def test_round_trip_preserves_rows(csv_client: CsvS3Client) -> None:
+ # arrange
+ rows = [
+ {"address": "1 High St", "postcode": "AB1 2CD"},
+ {"address": "2 Low St", "postcode": "XY9 8ZW"},
+ ]
+ # act
+ uri = csv_client.save_rows(rows, "uploads/addresses.csv")
+ fetched = csv_client.read_rows(uri)
+ # assert
+ assert fetched == rows
+
+
+def test_save_rows_rejects_empty_list(csv_client: CsvS3Client) -> None:
+ # act / assert
+ with pytest.raises(ValueError, match="empty"):
+ csv_client.save_rows([], "uploads/empty.csv")
+
+
+def test_read_rows_rejects_wrong_bucket(csv_client: CsvS3Client) -> None:
+ # act / assert
+ with pytest.raises(ValueError, match="does not match client bucket"):
+ csv_client.read_rows("s3://other-bucket/uploads/addresses.csv")
diff --git a/tests/infrastructure/test_s3_client.py b/tests/infrastructure/test_s3_client.py
new file mode 100644
index 00000000..67db4f58
--- /dev/null
+++ b/tests/infrastructure/test_s3_client.py
@@ -0,0 +1,36 @@
+from collections.abc import Iterator
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.s3_client import S3Client
+from tests.infrastructure import make_boto_client
+
+BUCKET = "test-bucket"
+
+
+@pytest.fixture
+def s3_client() -> Iterator[S3Client]:
+ with mock_aws():
+ boto_client = make_boto_client("s3")
+ boto_client.create_bucket(Bucket=BUCKET)
+ yield S3Client(boto_client, BUCKET)
+
+
+def test_put_object_returns_s3_uri(s3_client: S3Client) -> None:
+ # act
+ uri = s3_client.put_object("folder/data.bin", b"payload")
+ # assert
+ assert uri == f"s3://{BUCKET}/folder/data.bin"
+
+
+def test_get_object_returns_bytes_written_by_put_object(s3_client: S3Client) -> None:
+ # arrange
+ s3_client.put_object("round/trip.bin", b"hello world")
+ # act / assert
+ assert s3_client.get_object("round/trip.bin") == b"hello world"
+
+
+def test_bucket_property_exposes_configured_bucket(s3_client: S3Client) -> None:
+ # act / assert
+ assert s3_client.bucket == BUCKET
diff --git a/tests/infrastructure/test_s3_uri.py b/tests/infrastructure/test_s3_uri.py
new file mode 100644
index 00000000..32fd710f
--- /dev/null
+++ b/tests/infrastructure/test_s3_uri.py
@@ -0,0 +1,40 @@
+import pytest
+
+from infrastructure.s3_uri import parse_s3_uri
+
+
+def test_parses_simple_s3_uri() -> None:
+ # act / assert
+ assert parse_s3_uri("s3://my-bucket/file.csv") == ("my-bucket", "file.csv")
+
+
+def test_parses_s3_uri_with_nested_key() -> None:
+ # act
+ bucket, key = parse_s3_uri("s3://my-bucket/nested/path/to/file.csv")
+ # assert
+ assert (bucket, key) == ("my-bucket", "nested/path/to/file.csv")
+
+
+def test_rejects_s3_uri_without_key() -> None:
+ # act / assert
+ with pytest.raises(ValueError, match="bucket and a key"):
+ parse_s3_uri("s3://my-bucket")
+
+
+def test_rejects_s3_uri_with_empty_key() -> None:
+ # act / assert
+ with pytest.raises(ValueError, match="bucket and a key"):
+ parse_s3_uri("s3://my-bucket/")
+
+
+def test_parses_console_url_prefix() -> None:
+ # arrange
+ url = "https://eu-west-2.console.aws.amazon.com/s3/object/my-bucket?prefix=nested%2Ffile.csv"
+ # act / assert
+ assert parse_s3_uri(url) == ("my-bucket", "nested/file.csv")
+
+
+def test_rejects_unparseable_string() -> None:
+ # act / assert
+ with pytest.raises(ValueError):
+ parse_s3_uri("not-a-uri-at-all")
diff --git a/tests/infrastructure/test_sqs_client.py b/tests/infrastructure/test_sqs_client.py
new file mode 100644
index 00000000..44186bbb
--- /dev/null
+++ b/tests/infrastructure/test_sqs_client.py
@@ -0,0 +1,44 @@
+import json
+from collections.abc import Iterator
+from typing import Any, cast
+
+import pytest
+from moto import mock_aws
+
+from infrastructure.sqs_client import SqsClient
+from tests.infrastructure import make_boto_client
+
+
+@pytest.fixture
+def sqs_setup() -> Iterator[tuple[SqsClient, Any, str]]:
+ with mock_aws():
+ boto_client = make_boto_client("sqs")
+ queue: dict[str, Any] = boto_client.create_queue(QueueName="test-queue")
+ queue_url = cast(str, queue["QueueUrl"])
+ yield SqsClient(boto_client, queue_url), boto_client, queue_url
+
+
+def test_send_returns_message_id(sqs_setup: tuple[SqsClient, Any, str]) -> None:
+ # arrange
+ client, _boto, _url = sqs_setup
+ # act
+ message_id = client.send({"hello": "world"})
+ # assert
+ assert isinstance(message_id, str)
+ assert message_id
+
+
+def test_send_json_serialises_body(sqs_setup: tuple[SqsClient, Any, str]) -> None:
+ # arrange
+ client, boto_client, queue_url = sqs_setup
+ body = {"hello": "world", "count": 3}
+ # act
+ client.send(body)
+
+ # assert
+ received: dict[str, Any] = boto_client.receive_message(
+ QueueUrl=queue_url, MaxNumberOfMessages=1
+ )
+ messages: list[dict[str, Any]] = received["Messages"]
+ assert len(messages) == 1
+ assert json.loads(messages[0]["Body"]) == body
diff --git a/tests/orchestration/__init__.py b/tests/orchestration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
new file mode 100644
index 00000000..a718ffbc
--- /dev/null
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -0,0 +1,299 @@
+from __future__ import annotations
+
+import json
+import os
+from collections.abc import Iterator
+from dataclasses import dataclass
+from typing import Any, cast
+
+import boto3
+import pytest
+from moto import mock_aws
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from infrastructure.csv_s3_client import CsvS3Client
+from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+from repositories.user_address.user_address_csv_s3_repository import (
+ UserAddressCsvS3Repository,
+)
+
+BUCKET = "splitter-bucket"
+REGION = "us-east-1"
+
+
+def _make_boto_client(service_name: str) -> Any:
+ factory: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+ return factory(service_name, region_name=REGION)
+
+
+@pytest.fixture(autouse=True)
+def _aws_creds() -> Iterator[None]: # pyright: ignore[reportUnusedFunction]
+ keys = (
+ "AWS_ACCESS_KEY_ID",
+ "AWS_SECRET_ACCESS_KEY",
+ "AWS_SESSION_TOKEN",
+ "AWS_DEFAULT_REGION",
+ )
+ prev: dict[str, Any] = {k: os.environ.get(k) for k in keys}
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+ os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
+ os.environ["AWS_DEFAULT_REGION"] = REGION
+ try:
+ yield
+ finally:
+ for k, v in prev.items():
+ if v is None:
+ os.environ.pop(k, None)
+ else:
+ os.environ[k] = v
+
+
+@dataclass
+class Harness:
+ splitter: PostcodeSplitterOrchestrator
+ task_orchestrator: TaskOrchestrator
+ subtasks: SubTaskPostgresRepository
+ csv_client: CsvS3Client
+ boto_sqs: Any
+ queue_url: str
+ repo: UserAddressCsvS3Repository
+
+
+@pytest.fixture
+def harness(db_engine: Engine) -> Iterator[Harness]:
+ with mock_aws():
+ # Infra: S3 + SQS
+ boto_s3 = _make_boto_client("s3")
+ boto_s3.create_bucket(Bucket=BUCKET)
+ boto_sqs = _make_boto_client("sqs")
+ queue: dict[str, Any] = boto_sqs.create_queue(QueueName="address2uprn-queue")
+ queue_url = cast(str, queue["QueueUrl"])
+
+ csv_client = CsvS3Client(boto_s3, BUCKET)
+ repo = UserAddressCsvS3Repository(csv_client, BUCKET)
+ queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
+
+ # DB: ephemeral PostgreSQL TaskOrchestrator
+ with Session(db_engine) as session:
+ task_repo = TaskPostgresRepository(session=session)
+ subtask_repo = SubTaskPostgresRepository(session=session)
+ task_orchestrator = TaskOrchestrator(
+ task_repo=task_repo, subtask_repo=subtask_repo
+ )
+
+ splitter = PostcodeSplitterOrchestrator(
+ task_orchestrator=task_orchestrator,
+ user_address_repo=repo,
+ queue_client=queue_client,
+ max_batch_size=3,
+ )
+
+ yield Harness(
+ splitter=splitter,
+ task_orchestrator=task_orchestrator,
+ subtasks=subtask_repo,
+ csv_client=csv_client,
+ boto_sqs=boto_sqs,
+ queue_url=queue_url,
+ repo=repo,
+ )
+
+
+def _upload_fixture_csv(csv_client: CsvS3Client) -> str:
+ # Three postcode groups:
+ # AA1 1AA × 2 (within cap)
+ # BB2 2BB × 4 (oversize: > max_batch_size=3)
+ # CC3 3CC × 1 (final flush)
+ # Expected batching with cap=3 and the algorithm in
+ # ``iter_postcode_grouped_batches``:
+ # batch 1: [AA1 1AA × 2] (flushed because oversize follows)
+ # batch 2: [BB2 2BB × 4] (oversize own batch)
+ # batch 3: [CC3 3CC × 1] (final flush)
+ rows: list[dict[str, str]] = []
+ rows.extend(
+ {
+ "Address 1": f"{i} High St",
+ "Address 2": "",
+ "Address 3": "",
+ "postcode": "AA1 1AA",
+ "Internal Reference": f"AA-{i}",
+ }
+ for i in range(1, 3)
+ )
+ rows.extend(
+ {
+ "Address 1": f"{i} Long Road",
+ "Address 2": "",
+ "Address 3": "",
+ "postcode": "BB2 2BB",
+ "Internal Reference": f"BB-{i}",
+ }
+ for i in range(1, 5)
+ )
+ rows.append(
+ {
+ "Address 1": "1 Final Way",
+ "Address 2": "",
+ "Address 3": "",
+ "postcode": "CC3 3CC",
+ "Internal Reference": "CC-1",
+ }
+ )
+ return csv_client.save_rows(rows, "uploads/input.csv")
+
+
+def _drain_queue(boto_sqs: Any, queue_url: str) -> list[dict[str, Any]]:
+ bodies: list[dict[str, Any]] = []
+ while True:
+ received: dict[str, Any] = boto_sqs.receive_message(
+ QueueUrl=queue_url, MaxNumberOfMessages=10, WaitTimeSeconds=0
+ )
+ messages = cast(list[dict[str, Any]], received.get("Messages", []))
+ if not messages:
+ break
+ for message in messages:
+ bodies.append(cast(dict[str, Any], json.loads(message["Body"])))
+ boto_sqs.delete_message(
+ QueueUrl=queue_url, ReceiptHandle=message["ReceiptHandle"]
+ )
+ return bodies
+
+
+def test_split_and_dispatch_creates_three_children_for_fixture(
+ harness: Harness,
+) -> None:
+ # arrange
+ parent_task, parent_subtask = (
+ harness.task_orchestrator.create_task_with_subtask(
+ task_source="manual:postcode-splitter-int"
+ )
+ )
+ input_uri = _upload_fixture_csv(harness.csv_client)
+
+ # act
+ child_ids = harness.splitter.split_and_dispatch(
+ parent_task_id=parent_task.id,
+ parent_subtask_id=parent_subtask.id,
+ input_s3_uri=input_uri,
+ )
+
+ # assert
+ assert len(child_ids) == 3
+ # All child ids are unique and persisted as WAITING children of the
+ # parent task.
+ assert len(set(child_ids)) == 3
+ for cid in child_ids:
+ child = harness.subtasks.get(cid)
+ assert child.task_id == parent_task.id
+
+
+def test_split_and_dispatch_persists_child_inputs_with_task_id_and_s3_uri(
+ harness: Harness,
+) -> None:
+ # arrange
+ parent_task, parent_subtask = (
+ harness.task_orchestrator.create_task_with_subtask(
+ task_source="manual:postcode-splitter-int"
+ )
+ )
+ input_uri = _upload_fixture_csv(harness.csv_client)
+
+ # act
+ child_ids = harness.splitter.split_and_dispatch(
+ parent_task_id=parent_task.id,
+ parent_subtask_id=parent_subtask.id,
+ input_s3_uri=input_uri,
+ )
+
+ # assert
+ for cid in child_ids:
+ child = harness.subtasks.get(cid)
+ assert child.inputs is not None
+ assert child.inputs["task_id"] == str(parent_task.id)
+ batch_uri = child.inputs["s3_uri"]
+ assert isinstance(batch_uri, str)
+ prefix = (
+ f"s3://{BUCKET}/ara_postcode_splitter_batches/"
+ f"{parent_task.id}/{parent_subtask.id}/"
+ )
+ assert batch_uri.startswith(prefix)
+ assert batch_uri.endswith(".csv")
+
+
+def test_split_and_dispatch_publishes_one_message_per_child_with_matching_ids(
+ harness: Harness,
+) -> None:
+ # arrange
+ parent_task, parent_subtask = (
+ harness.task_orchestrator.create_task_with_subtask(
+ task_source="manual:postcode-splitter-int"
+ )
+ )
+ input_uri = _upload_fixture_csv(harness.csv_client)
+
+ # act
+ child_ids = harness.splitter.split_and_dispatch(
+ parent_task_id=parent_task.id,
+ parent_subtask_id=parent_subtask.id,
+ input_s3_uri=input_uri,
+ )
+
+ # assert
+ bodies = _drain_queue(harness.boto_sqs, harness.queue_url)
+ assert len(bodies) == len(child_ids)
+
+ # Match queue messages against persisted child inputs by child_subtask_id;
+ # the message body's task_id/s3_uri must agree with the SubTask inputs.
+ bodies_by_child = {body["sub_task_id"]: body for body in bodies}
+ assert set(bodies_by_child.keys()) == {str(cid) for cid in child_ids}
+ for cid in child_ids:
+ child = harness.subtasks.get(cid)
+ body = bodies_by_child[str(cid)]
+ assert child.inputs is not None
+ assert body == {
+ "task_id": str(parent_task.id),
+ "sub_task_id": str(cid),
+ "s3_uri": child.inputs["s3_uri"],
+ }
+
+
+def test_split_and_dispatch_returns_child_ids_in_dispatch_order(
+ harness: Harness,
+) -> None:
+ # arrange
+ parent_task, parent_subtask = (
+ harness.task_orchestrator.create_task_with_subtask(
+ task_source="manual:postcode-splitter-int"
+ )
+ )
+ input_uri = _upload_fixture_csv(harness.csv_client)
+
+ # act
+ child_ids = harness.splitter.split_and_dispatch(
+ parent_task_id=parent_task.id,
+ parent_subtask_id=parent_subtask.id,
+ input_s3_uri=input_uri,
+ )
+
+ # assert
+ # Re-load each child's saved batch and inspect the postcode_clean column
+ # to confirm the dispatch order matches the postcode-batching algorithm:
+ # AA-batch first, BB oversize batch second, CC final-flush third.
+ postcodes_per_batch: list[set[str]] = []
+ for cid in child_ids:
+ child = harness.subtasks.get(cid)
+ assert child.inputs is not None
+ rows = harness.csv_client.read_rows(child.inputs["s3_uri"])
+ postcodes_per_batch.append({row["postcode_clean"] for row in rows})
+
+ assert postcodes_per_batch == [
+ {"AA11AA"},
+ {"BB22BB"},
+ {"CC33CC"},
+ ]
diff --git a/tests/orchestration/test_task_orchestrator.py b/tests/orchestration/test_task_orchestrator.py
new file mode 100644
index 00000000..ae89991d
--- /dev/null
+++ b/tests/orchestration/test_task_orchestrator.py
@@ -0,0 +1,197 @@
+from collections.abc import Iterator
+from dataclasses import dataclass
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+from domain.tasks.tasks import Source, TaskStatus
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@dataclass
+class Harness:
+ orchestrator: TaskOrchestrator
+ tasks: TaskPostgresRepository
+ subtasks: SubTaskPostgresRepository
+
+
+@pytest.fixture
+def harness(db_engine: Engine) -> Iterator[Harness]:
+ with Session(db_engine) as session:
+ tasks = TaskPostgresRepository(session=session)
+ subtasks = SubTaskPostgresRepository(session=session)
+ yield Harness(
+ orchestrator=TaskOrchestrator(task_repo=tasks, subtask_repo=subtasks),
+ tasks=tasks,
+ subtasks=subtasks,
+ )
+
+
+def test_create_task_with_subtask_creates_both_in_waiting(
+ harness: Harness,
+) -> None:
+ # act
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test",
+ inputs={"foo": "bar"},
+ source=Source.PORTFOLIO,
+ source_id="abc",
+ )
+
+ # assert
+ assert task.status is TaskStatus.WAITING
+ assert subtask.status is SubTaskStatus.WAITING
+ assert subtask.task_id == task.id
+ assert subtask.inputs == {"foo": "bar"}
+
+
+def test_start_subtask_cascades_to_in_progress(harness: Harness) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ # act
+ started = harness.orchestrator.start_subtask(
+ subtask.id, cloud_logs_url="https://example/log"
+ )
+
+ # assert
+ assert started.status is SubTaskStatus.IN_PROGRESS
+ assert started.cloud_logs_url == "https://example/log"
+ assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+
+def test_complete_subtask_cascades_to_complete(harness: Harness) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+ harness.orchestrator.start_subtask(subtask.id)
+
+ # act
+ harness.orchestrator.complete_subtask(subtask.id, {"value": 42})
+
+ # assert
+ done_subtask = harness.subtasks.get(subtask.id)
+ done_task = harness.tasks.get(task.id)
+ assert done_subtask.outputs == {"result": {"value": 42}}
+ assert done_task.status is TaskStatus.COMPLETE
+ assert done_task.job_completed is not None
+
+
+def test_fail_subtask_cascades_to_failed(harness: Harness) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ # act
+ harness.orchestrator.fail_subtask(subtask.id, RuntimeError("boom"))
+
+ # assert
+ failed_subtask = harness.subtasks.get(subtask.id)
+ failed_task = harness.tasks.get(task.id)
+ assert failed_subtask.outputs == {"error": "boom"}
+ assert failed_task.status is TaskStatus.FAILED
+
+
+def test_failed_subtask_locks_task_failed_even_with_others_complete(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, first = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+ second = SubTask.create(task_id=task.id)
+ harness.subtasks.create(second)
+
+ # act
+ harness.orchestrator.complete_subtask(first.id)
+ harness.orchestrator.fail_subtask(second.id, RuntimeError("nope"))
+
+ # assert
+ assert harness.tasks.get(task.id).status is TaskStatus.FAILED
+
+
+def test_mixed_complete_and_in_progress_keeps_task_in_progress(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, first = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+ second = SubTask.create(task_id=task.id)
+ harness.subtasks.create(second)
+
+ # act
+ harness.orchestrator.complete_subtask(first.id)
+ harness.orchestrator.start_subtask(second.id)
+
+ # assert
+ assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+
+def test_run_subtask_happy_path_returns_result_and_cascades_complete(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ # act
+ result = harness.orchestrator.run_subtask(subtask.id, work=lambda: {"answer": 42})
+
+ # assert
+ assert result == {"answer": 42}
+ assert harness.subtasks.get(subtask.id).status is SubTaskStatus.COMPLETE
+ assert harness.tasks.get(task.id).status is TaskStatus.COMPLETE
+
+
+def test_create_child_subtask_adds_waiting_child_without_changing_parent_status(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, first = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+ harness.orchestrator.start_subtask(first.id)
+ assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+ # act
+ child = harness.orchestrator.create_child_subtask(
+ task.id, inputs={"split": "a"}
+ )
+
+ # assert
+ persisted_child = harness.subtasks.get(child.id)
+ assert persisted_child.task_id == task.id
+ assert persisted_child.status is SubTaskStatus.WAITING
+ assert persisted_child.inputs == {"split": "a"}
+ assert persisted_child.id != first.id
+ # Cascade is a no-op: parent stays IN_PROGRESS.
+ assert harness.tasks.get(task.id).status is TaskStatus.IN_PROGRESS
+
+
+def test_run_subtask_failing_work_marks_failed_and_reraises(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ def boom() -> None:
+ raise RuntimeError("boom")
+
+ # act / assert
+ with pytest.raises(RuntimeError, match="boom"):
+ harness.orchestrator.run_subtask(subtask.id, work=boom)
+
+ assert harness.subtasks.get(subtask.id).status is SubTaskStatus.FAILED
+ assert harness.tasks.get(task.id).status is TaskStatus.FAILED
diff --git a/tests/repositories/__init__.py b/tests/repositories/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/tasks/__init__.py b/tests/repositories/tasks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/tasks/postgres/__init__.py b/tests/repositories/tasks/postgres/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/tasks/postgres/test_subtask_postgres_repository.py b/tests/repositories/tasks/postgres/test_subtask_postgres_repository.py
new file mode 100644
index 00000000..9cec52ea
--- /dev/null
+++ b/tests/repositories/tasks/postgres/test_subtask_postgres_repository.py
@@ -0,0 +1,100 @@
+from collections.abc import Iterator
+from uuid import UUID, uuid4
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.subtasks import SubTask, SubTaskStatus
+from domain.tasks.tasks import Task
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@pytest.fixture
+def session(db_engine: Engine) -> Iterator[Session]:
+ with Session(db_engine) as s:
+ yield s
+
+
+def _persisted_task_id(session: Session) -> UUID:
+ """Create a parent Task row so SubTask FK constraints are satisfied."""
+ task = Task.create(task_source="manual:test")
+ TaskPostgresRepository(session=session).create(task)
+ return task.id
+
+
+def test_create_and_get_round_trip_preserves_inputs(session: Session) -> None:
+ # arrange
+ repo = SubTaskPostgresRepository(session=session)
+ task_id = _persisted_task_id(session)
+ st = SubTask.create(task_id=task_id, inputs={"address": "68 Glendon Way"})
+
+ # act
+ repo.create(st)
+ fetched = repo.get(st.id)
+
+ # assert
+ assert fetched.id == st.id
+ assert fetched.task_id == task_id
+ assert fetched.status is SubTaskStatus.WAITING
+ assert fetched.inputs == {"address": "68 Glendon Way"}
+ assert fetched.outputs is None
+
+
+def test_save_persists_status_and_outputs(session: Session) -> None:
+ # arrange
+ repo = SubTaskPostgresRepository(session=session)
+ st = SubTask.create(task_id=_persisted_task_id(session))
+ repo.create(st)
+
+ # act
+ st.start(cloud_logs_url="https://example/log")
+ repo.save(st)
+ # assert
+ assert repo.get(st.id).status is SubTaskStatus.IN_PROGRESS
+
+ # act
+ st.complete({"uprn": "123"})
+ repo.save(st)
+ # assert
+ done = repo.get(st.id)
+ assert done.status is SubTaskStatus.COMPLETE
+ assert done.outputs == {"result": {"uprn": "123"}}
+ assert done.cloud_logs_url == "https://example/log"
+ assert done.job_completed is not None
+
+
+def test_list_by_task_filters_by_task_id(session: Session) -> None:
+ # arrange
+ repo = SubTaskPostgresRepository(session=session)
+ task_a = _persisted_task_id(session)
+ task_b = _persisted_task_id(session)
+ repo.create(SubTask.create(task_id=task_a))
+ repo.create(SubTask.create(task_id=task_a))
+ repo.create(SubTask.create(task_id=task_b))
+
+ # act
+ a_results = repo.list_by_task(task_a)
+ b_results = repo.list_by_task(task_b)
+
+ # assert
+ assert len(a_results) == 2
+ assert len(b_results) == 1
+ assert all(s.task_id == task_a for s in a_results)
+ assert all(s.task_id == task_b for s in b_results)
+
+
+def test_list_by_task_returns_empty_for_unknown_task(session: Session) -> None:
+ # arrange
+ repo = SubTaskPostgresRepository(session=session)
+ # act / assert
+ assert repo.list_by_task(uuid4()) == []
+
+
+def test_get_missing_raises(session: Session) -> None:
+ # arrange
+ repo = SubTaskPostgresRepository(session=session)
+ # act / assert
+ with pytest.raises(ValueError, match="not found"):
+ repo.get(uuid4())
diff --git a/tests/repositories/tasks/postgres/test_task_postgres_repository.py b/tests/repositories/tasks/postgres/test_task_postgres_repository.py
new file mode 100644
index 00000000..8a49a861
--- /dev/null
+++ b/tests/repositories/tasks/postgres/test_task_postgres_repository.py
@@ -0,0 +1,77 @@
+from collections.abc import Iterator
+from uuid import uuid4
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.tasks import Source, Task, TaskStatus
+from infrastructure.postgres.task_table import TaskRow
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@pytest.fixture
+def session(db_engine: Engine) -> Iterator[Session]:
+ with Session(db_engine) as s:
+ yield s
+
+
+def test_create_and_get_round_trip(session: Session) -> None:
+ # arrange
+ repo = TaskPostgresRepository(session=session)
+ t = Task.create(
+ task_source="manual:test", source=Source.PORTFOLIO, source_id="abc-123"
+ )
+
+ # act
+ repo.create(t)
+ fetched = repo.get(t.id)
+
+ # assert
+ assert fetched.id == t.id
+ assert fetched.status is TaskStatus.WAITING
+ assert fetched.source is Source.PORTFOLIO
+ assert fetched.source_id == "abc-123"
+
+
+def test_save_persists_status_transition(session: Session) -> None:
+ # arrange
+ repo = TaskPostgresRepository(session=session)
+ t = Task.create(task_source="manual:test")
+ repo.create(t)
+
+ # act
+ t.start()
+ repo.save(t)
+ # assert
+ assert repo.get(t.id).status is TaskStatus.IN_PROGRESS
+
+ # act
+ t.complete()
+ repo.save(t)
+ # assert
+ done = repo.get(t.id)
+ assert done.status is TaskStatus.COMPLETE
+ assert done.job_completed is not None
+
+
+def test_get_missing_raises(session: Session) -> None:
+ # arrange
+ repo = TaskPostgresRepository(session=session)
+ # act / assert
+ with pytest.raises(ValueError, match="not found"):
+ repo.get(uuid4())
+
+
+def test_get_normalises_legacy_capitalised_status(session: Session) -> None:
+ # Existing rows written by backend code use "In Progress" (capitalised).
+ # arrange
+ repo = TaskPostgresRepository(session=session)
+ row = TaskRow(task_source="manual:test", status="In Progress")
+ session.add(row)
+ session.commit()
+
+ # act
+ fetched = repo.get(row.id)
+ # assert
+ assert fetched.status is TaskStatus.IN_PROGRESS
diff --git a/tests/repositories/user_address/__init__.py b/tests/repositories/user_address/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/user_address/conftest.py b/tests/repositories/user_address/conftest.py
new file mode 100644
index 00000000..25c1ac3b
--- /dev/null
+++ b/tests/repositories/user_address/conftest.py
@@ -0,0 +1,28 @@
+import os
+from collections.abc import Iterator
+from typing import Optional
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _aws_creds() -> Iterator[None]: # pyright: ignore[reportUnusedFunction]
+ keys = (
+ "AWS_ACCESS_KEY_ID",
+ "AWS_SECRET_ACCESS_KEY",
+ "AWS_SESSION_TOKEN",
+ "AWS_DEFAULT_REGION",
+ )
+ prev: dict[str, Optional[str]] = {k: os.environ.get(k) for k in keys}
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+ os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
+ os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
+ try:
+ yield
+ finally:
+ for k, v in prev.items():
+ if v is None:
+ os.environ.pop(k, None)
+ else:
+ os.environ[k] = v
diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
new file mode 100644
index 00000000..9ffb250a
--- /dev/null
+++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
@@ -0,0 +1,237 @@
+from collections.abc import Iterator
+
+import pytest
+from moto import mock_aws
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+from infrastructure.csv_s3_client import CsvS3Client
+from repositories.user_address.user_address_csv_s3_repository import (
+ UserAddressCsvS3Repository,
+)
+from tests.infrastructure import make_boto_client
+
+BUCKET = "user-address-bucket"
+
+
+@pytest.fixture
+def repo() -> Iterator[UserAddressCsvS3Repository]:
+ with mock_aws():
+ boto_client = make_boto_client("s3")
+ boto_client.create_bucket(Bucket=BUCKET)
+ csv_client = CsvS3Client(boto_client, BUCKET)
+ yield UserAddressCsvS3Repository(csv_client, BUCKET)
+
+
+def _upload_csv(
+ repo: UserAddressCsvS3Repository, rows: list[dict[str, str]], key: str
+) -> str:
+ return repo._csv_client.save_rows(rows, key) # pyright: ignore[reportPrivateUsage]
+
+
+def test_load_batch_parses_address_postcode_and_reference(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ rows = [
+ {
+ "Address 1": "1 High Street",
+ "Address 2": "Flat 2",
+ "Address 3": "Townville",
+ "postcode": "sw1a 1aa",
+ "Internal Reference": "REF-001",
+ }
+ ]
+ uri = _upload_csv(repo, rows, "uploads/full.csv")
+
+ # act
+ addresses = repo.load_batch(uri)
+
+ # assert
+ assert len(addresses) == 1
+ address = addresses[0]
+ assert address.user_address == "1 High Street, Flat 2, Townville"
+ assert address.postcode == Postcode("SW1A1AA")
+ assert address.internal_reference == "REF-001"
+
+
+def test_load_batch_uses_only_address_1_when_others_missing(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ rows = [
+ {
+ "Address 1": "10 Cardiff Road",
+ "Address 2": "",
+ "Address 3": "",
+ "postcode": "CF10 1AA",
+ "Internal Reference": "REF-002",
+ }
+ ]
+ uri = _upload_csv(repo, rows, "uploads/address1-only.csv")
+
+ # act
+ addresses = repo.load_batch(uri)
+
+ # assert
+ assert len(addresses) == 1
+ assert addresses[0].user_address == "10 Cardiff Road"
+ assert addresses[0].postcode == Postcode("CF101AA")
+ assert addresses[0].internal_reference == "REF-002"
+
+
+def test_load_batch_handles_missing_internal_reference(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ rows = [
+ {
+ "Address 1": "5 Park Lane",
+ "Address 2": "",
+ "Address 3": "",
+ "postcode": "M1 1AA",
+ "Internal Reference": "",
+ }
+ ]
+ uri = _upload_csv(repo, rows, "uploads/no-ref.csv")
+
+ # act
+ addresses = repo.load_batch(uri)
+
+ # assert
+ assert len(addresses) == 1
+ assert addresses[0].user_address == "5 Park Lane"
+ assert addresses[0].postcode == Postcode("M11AA")
+ assert addresses[0].internal_reference is None
+
+
+def test_load_batch_captures_full_source_row(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # A raw EPC-export-shaped row: the splitter must preserve every column,
+ # not just the ones it parses into UserAddress fields.
+ # arrange
+ row = {
+ "Asset Reference": "511",
+ "Address 1": "9 Abingdon Road Padiham Lancashire BB12 7BX",
+ "postcode": "BB12 7BX",
+ "Property Type": "House: End Terrace",
+ "SAP Score": "69",
+ }
+ uri = _upload_csv(repo, [row], "uploads/epc.csv")
+
+ # act
+ addresses = repo.load_batch(uri)
+
+ # assert
+ assert addresses[0].source_row == row
+
+
+def test_load_batch_raises_when_postcode_column_absent(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
+ uri = _upload_csv(repo, rows, "uploads/no-postcode.csv")
+
+ # act / assert
+ with pytest.raises(ValueError, match="no 'postcode' column"):
+ repo.load_batch(uri)
+
+
+def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ row = {
+ "Asset Reference": "511",
+ "Address 1": "9 Abingdon Road Padiham Lancashire BB12 7BX",
+ "postcode": " BB12 7BX",
+ "Property Type": "House: End Terrace",
+ }
+ uri = _upload_csv(repo, [row], "uploads/epc.csv")
+ addresses = repo.load_batch(uri)
+
+ # act
+ saved_uri = repo.save_batch(addresses, "tasks/passthrough")
+ saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage]
+
+ # assert
+ assert len(saved_rows) == 1
+ saved = saved_rows[0]
+ # Every original column survives, byte-for-byte.
+ for column, value in row.items():
+ assert saved[column] == value
+ # Plus the one appended column the downstream address2uprn stage groups on.
+ assert saved["postcode_clean"] == "BB127BX"
+
+
+def test_save_batch_returns_uri_under_path_prefix(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ addresses = [
+ UserAddress(
+ user_address="1 High Street",
+ postcode=Postcode("SW1A 1AA"),
+ source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
+ ),
+ ]
+
+ # act
+ uri = repo.save_batch(addresses, "tasks/abc/batches")
+
+ # assert
+ assert uri.startswith(f"s3://{BUCKET}/tasks/abc/batches/")
+ assert uri.endswith(".csv")
+
+
+def test_save_then_reload_round_trip_preserves_columns(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ rows = [
+ {
+ "Address 1": "1 High Street",
+ "postcode": "SW1A 1AA",
+ "Internal Reference": "REF-001",
+ },
+ {
+ "Address 1": "2 Low Street",
+ "postcode": "XY9 8ZW",
+ "Internal Reference": "",
+ },
+ ]
+ uri = _upload_csv(repo, rows, "uploads/round-trip.csv")
+ addresses = repo.load_batch(uri)
+
+ # act
+ saved_uri = repo.save_batch(addresses, "tasks/round-trip")
+ saved_rows = repo._csv_client.read_rows(saved_uri) # pyright: ignore[reportPrivateUsage]
+
+ # assert
+ # Original columns come back verbatim; postcode_clean is the only addition.
+ assert [
+ {k: v for k, v in r.items() if k != "postcode_clean"} for r in saved_rows
+ ] == rows
+ assert [r["postcode_clean"] for r in saved_rows] == ["SW1A1AA", "XY98ZW"]
+
+
+def test_save_batch_uses_unique_filename_per_call(
+ repo: UserAddressCsvS3Repository,
+) -> None:
+ # arrange
+ addresses = [
+ UserAddress(
+ user_address="1 High Street",
+ postcode=Postcode("SW1A 1AA"),
+ source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
+ ),
+ ]
+
+ # act
+ uri_1 = repo.save_batch(addresses, "tasks/uniqueness")
+ uri_2 = repo.save_batch(addresses, "tasks/uniqueness")
+
+ # assert
+ assert uri_1 != uri_2
diff --git a/tests/utilities/__init__.py b/tests/utilities/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/utilities/aws_lambda/__init__.py b/tests/utilities/aws_lambda/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/utilities/aws_lambda/test_subtask_handler.py b/tests/utilities/aws_lambda/test_subtask_handler.py
new file mode 100644
index 00000000..d671adc4
--- /dev/null
+++ b/tests/utilities/aws_lambda/test_subtask_handler.py
@@ -0,0 +1,255 @@
+import logging
+from collections.abc import Generator, Iterator
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import Any
+from uuid import UUID
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session
+
+from domain.tasks.subtasks import SubTaskStatus
+from domain.tasks.tasks import TaskStatus
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+from utilities.aws_lambda.subtask_handler import subtask_handler
+
+_LOGGER_NAME = "utilities.aws_lambda.subtask_handler"
+
+
+@dataclass
+class Harness:
+ orchestrator: TaskOrchestrator
+ tasks: TaskPostgresRepository
+ subtasks: SubTaskPostgresRepository
+
+ @contextmanager
+ def factory(self) -> Generator[TaskOrchestrator, None, None]:
+ yield self.orchestrator
+
+
+@pytest.fixture
+def harness(db_engine: Engine) -> Iterator[Harness]:
+ with Session(db_engine) as session:
+ tasks = TaskPostgresRepository(session=session)
+ subtasks = SubTaskPostgresRepository(session=session)
+ yield Harness(
+ orchestrator=TaskOrchestrator(task_repo=tasks, subtask_repo=subtasks),
+ tasks=tasks,
+ subtasks=subtasks,
+ )
+
+
+def _direct_event(task_id: UUID, subtask_id: UUID) -> dict[str, Any]:
+ return {"task_id": str(task_id), "sub_task_id": str(subtask_id)}
+
+
+def test_subtask_handler_injects_orchestrator_as_third_positional_argument(
+ harness: Harness,
+) -> None:
+ # arrange
+ _, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ received: dict[str, Any] = {}
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ received["body"] = body
+ received["context"] = context
+ received["orchestrator"] = orchestrator
+
+ # act
+ handler(_direct_event(subtask.task_id, subtask.id), context="ctx-sentinel")
+
+ # assert
+ assert received["orchestrator"] is harness.orchestrator
+ assert received["context"] == "ctx-sentinel"
+ assert received["body"]["sub_task_id"] == str(subtask.id)
+
+
+def test_subtask_handler_completes_parent_subtask_on_success(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ return None
+
+ # act
+ handler(_direct_event(task.id, subtask.id), context=None)
+
+ # assert
+ assert harness.subtasks.get(subtask.id).status is SubTaskStatus.COMPLETE
+ assert harness.tasks.get(task.id).status is TaskStatus.COMPLETE
+
+
+def test_subtask_handler_marks_parent_failed_and_reraises_on_error(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ raise RuntimeError("boom")
+
+ # act / assert
+ with pytest.raises(RuntimeError, match="boom"):
+ handler(_direct_event(task.id, subtask.id), context=None)
+
+ assert harness.subtasks.get(subtask.id).status is SubTaskStatus.FAILED
+ assert harness.tasks.get(task.id).status is TaskStatus.FAILED
+
+
+def test_subtask_handler_injected_orchestrator_can_create_child_subtask(
+ harness: Harness,
+) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ child_ids: list[UUID] = []
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ child = orchestrator.create_child_subtask(task.id, inputs={"split": 1})
+ child_ids.append(child.id)
+
+ # act
+ handler(_direct_event(task.id, subtask.id), context=None)
+
+ # assert
+ assert len(child_ids) == 1
+ persisted_child = harness.subtasks.get(child_ids[0])
+ assert persisted_child.task_id == task.id
+ assert persisted_child.status is SubTaskStatus.WAITING
+
+
+def test_subtask_handler_logs_subtask_lifecycle_on_success(
+ harness: Harness, caplog: pytest.LogCaptureFixture
+) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ return None
+
+ # act
+ with caplog.at_level(logging.INFO, logger=_LOGGER_NAME):
+ handler(_direct_event(task.id, subtask.id), context=None)
+
+ # assert
+ assert f"Running subtask {subtask.id}" in caplog.text
+ assert f"Subtask {subtask.id} completed" in caplog.text
+
+
+def test_subtask_handler_logs_exception_on_failure(
+ harness: Harness, caplog: pytest.LogCaptureFixture
+) -> None:
+ # arrange
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ raise RuntimeError("boom")
+
+ # act / assert
+ with caplog.at_level(logging.INFO, logger=_LOGGER_NAME):
+ with pytest.raises(RuntimeError, match="boom"):
+ handler(_direct_event(task.id, subtask.id), context=None)
+
+ failures = [r for r in caplog.records if r.levelno == logging.ERROR]
+ assert any(
+ f"Subtask {subtask.id} failed" in r.getMessage() for r in failures
+ )
+ assert any(r.exc_info is not None for r in failures)
+
+
+def test_subtask_handler_records_cloudwatch_url_on_subtask(
+ harness: Harness, monkeypatch: pytest.MonkeyPatch
+) -> None:
+ # arrange
+ monkeypatch.setenv("AWS_REGION", "eu-west-2")
+ monkeypatch.setenv(
+ "AWS_LAMBDA_LOG_GROUP_NAME", "/aws/lambda/postcode-splitter"
+ )
+ monkeypatch.setenv(
+ "AWS_LAMBDA_LOG_STREAM_NAME", "2026/05/20/[$LATEST]abc123"
+ )
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ return None
+
+ # act
+ handler(_direct_event(task.id, subtask.id), context=None)
+
+ # assert
+ saved_url = harness.subtasks.get(subtask.id).cloud_logs_url
+ assert saved_url is not None
+ assert saved_url.startswith(
+ "https://eu-west-2.console.aws.amazon.com/cloudwatch/home"
+ )
+ # Log group / stream are console-encoded ("/" -> "$252F").
+ assert "$252Faws$252Flambda$252Fpostcode-splitter" in saved_url
+ assert "$255B$2524LATEST$255D" in saved_url
+
+
+def test_subtask_handler_leaves_cloudwatch_url_unset_outside_lambda(
+ harness: Harness, monkeypatch: pytest.MonkeyPatch
+) -> None:
+ # arrange
+ for var in (
+ "AWS_REGION",
+ "AWS_LAMBDA_LOG_GROUP_NAME",
+ "AWS_LAMBDA_LOG_STREAM_NAME",
+ ):
+ monkeypatch.delenv(var, raising=False)
+ task, subtask = harness.orchestrator.create_task_with_subtask(
+ task_source="manual:test"
+ )
+
+ @subtask_handler(orchestrator_cm=harness.factory)
+ def handler(
+ body: dict[str, Any], context: Any, orchestrator: TaskOrchestrator
+ ) -> None:
+ return None
+
+ # act
+ handler(_direct_event(task.id, subtask.id), context=None)
+
+ # assert
+ assert harness.subtasks.get(subtask.id).cloud_logs_url is None
diff --git a/utilities/__init__.py b/utilities/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utilities/aws_lambda/__init__.py b/utilities/aws_lambda/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utilities/aws_lambda/default_orchestrator.py b/utilities/aws_lambda/default_orchestrator.py
new file mode 100644
index 00000000..f78886b9
--- /dev/null
+++ b/utilities/aws_lambda/default_orchestrator.py
@@ -0,0 +1,26 @@
+import os
+from collections.abc import Generator
+from contextlib import contextmanager
+
+from sqlmodel import Session
+
+from infrastructure.postgres.config import PostgresConfig
+from infrastructure.postgres.engine import make_engine
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
+from repositories.tasks.task_postgres_repository import TaskPostgresRepository
+
+
+@contextmanager
+def default_orchestrator() -> Generator[TaskOrchestrator, None, None]:
+ """Yield a TaskOrchestrator wired to a fresh Postgres session.
+
+ Connection params come from os.environ via PostgresConfig.from_env. Each
+ handler invocation gets its own session, cleaned up on context exit.
+ """
+ engine = make_engine(PostgresConfig.from_env(dict(os.environ)))
+ with Session(engine) as session:
+ yield TaskOrchestrator(
+ task_repo=TaskPostgresRepository(session=session),
+ subtask_repo=SubTaskPostgresRepository(session=session),
+ )
diff --git a/utilities/aws_lambda/subtask_handler.py b/utilities/aws_lambda/subtask_handler.py
new file mode 100644
index 00000000..592ffebf
--- /dev/null
+++ b/utilities/aws_lambda/subtask_handler.py
@@ -0,0 +1,102 @@
+"""@subtask_handler decorator for Lambdas that operate on existing SubTasks.
+
+Translates an AWS Lambda invocation (SQS-shaped or direct) into
+TaskOrchestrator.run_subtask(...) calls.
+"""
+
+import json
+import logging
+import os
+from contextlib import AbstractContextManager
+from functools import wraps
+from typing import Any, Callable, Optional, cast
+from urllib.parse import quote
+
+from utilities.aws_lambda.default_orchestrator import default_orchestrator
+from utilities.aws_lambda.subtask_trigger_body import SubtaskTriggerBody
+from orchestration.task_orchestrator import TaskOrchestrator
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+OrchestratorCM = Callable[[], AbstractContextManager[TaskOrchestrator]]
+
+
+def subtask_handler(
+ *,
+ orchestrator_cm: Optional[OrchestratorCM] = None,
+) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+ """Run the wrapped function as the body of an existing SubTask.
+
+ For each record, validates the body via SubtaskTriggerBody (must contain
+ task_id and sub_task_id), then runs the function inside
+ orchestrator.run_subtask(...). The orchestrator owns the start/complete/
+ fail lifecycle and cascades status into the parent Task. On failure the
+ underlying exception propagates after the SubTask is marked FAILED.
+ """
+ factory = orchestrator_cm or default_orchestrator
+
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+ @wraps(func)
+ def wrapper(event: dict[str, Any], context: Any) -> None:
+ cloud_logs_url = _cloudwatch_url()
+ with factory() as orchestrator:
+ for record in _records(event):
+ body = _parse_body(record)
+ trigger = SubtaskTriggerBody.model_validate(body)
+ logger.info("Running subtask %s", trigger.sub_task_id)
+ try:
+ orchestrator.run_subtask(
+ trigger.sub_task_id,
+ work=lambda body=body, o=orchestrator: func(
+ body, context, o
+ ),
+ cloud_logs_url=cloud_logs_url,
+ )
+ except Exception:
+ logger.exception(
+ "Subtask %s failed", trigger.sub_task_id
+ )
+ raise
+ logger.info("Subtask %s completed", trigger.sub_task_id)
+
+ return wrapper
+
+ return decorator
+
+
+def _parse_body(record: dict[str, Any]) -> dict[str, Any]:
+ raw = record.get("body", record)
+ if isinstance(raw, str):
+ try:
+ parsed = json.loads(raw)
+ except json.JSONDecodeError:
+ return {}
+ return cast(dict[str, Any], parsed) if isinstance(parsed, dict) else {}
+ if isinstance(raw, dict):
+ return cast(dict[str, Any], raw)
+ return {}
+
+
+def _records(event: dict[str, Any]) -> list[dict[str, Any]]:
+ raw_records = event.get("Records")
+ if isinstance(raw_records, list):
+ return [r for r in cast(list[Any], raw_records) if isinstance(r, dict)]
+ return [event]
+
+
+def _console_encode(value: str) -> str:
+ return quote(value, safe="").replace("%", "$25")
+
+
+def _cloudwatch_url() -> Optional[str]:
+ region = os.environ.get("AWS_REGION")
+ log_group = os.environ.get("AWS_LAMBDA_LOG_GROUP_NAME")
+ log_stream = os.environ.get("AWS_LAMBDA_LOG_STREAM_NAME")
+ if not (region and log_group and log_stream):
+ return None
+ return (
+ f"https://{region}.console.aws.amazon.com/cloudwatch/home"
+ f"?region={region}#logsV2:log-groups/log-group/"
+ f"{_console_encode(log_group)}/log-events/{_console_encode(log_stream)}"
+ )
diff --git a/utilities/aws_lambda/subtask_trigger_body.py b/utilities/aws_lambda/subtask_trigger_body.py
new file mode 100644
index 00000000..a6b539e5
--- /dev/null
+++ b/utilities/aws_lambda/subtask_trigger_body.py
@@ -0,0 +1,17 @@
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict
+
+
+class SubtaskTriggerBody(BaseModel):
+ """The minimum the subtask_handler needs to dispatch lifecycle calls.
+
+ `extra="allow"` so the rest of the work payload passes through to the
+ decorated function untouched — handlers do their own model_validate on
+ the full body for fields specific to their use case.
+ """
+
+ model_config = ConfigDict(extra="allow")
+
+ task_id: UUID
+ sub_task_id: UUID
diff --git a/utilities/aws_lambda/task_handler.py b/utilities/aws_lambda/task_handler.py
new file mode 100644
index 00000000..82c7198e
--- /dev/null
+++ b/utilities/aws_lambda/task_handler.py
@@ -0,0 +1,98 @@
+"""@task_handler decorator for Lambdas that own the entire pipeline.
+
+Translates an AWS Lambda invocation (SQS-shaped or direct) into
+TaskOrchestrator.create_task_with_subtask(...) + run_subtask(...).
+"""
+
+import json
+from contextlib import AbstractContextManager
+from functools import wraps
+from typing import Any, Callable, Optional, cast
+
+from utilities.aws_lambda.default_orchestrator import default_orchestrator
+from domain.tasks.tasks import Source
+from orchestration.task_orchestrator import TaskOrchestrator
+
+OrchestratorCM = Callable[[], AbstractContextManager[TaskOrchestrator]]
+
+
+def task_handler(
+ *,
+ task_source: str,
+ source: Source,
+ orchestrator_cm: Optional[OrchestratorCM] = None,
+) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+ """Run the wrapped function as the body of a freshly-created Task + SubTask.
+
+ For each record, creates a new Task + initial SubTask, then runs the
+ wrapped function inside orchestrator.run_subtask(...). `source_id` is
+ read from body[source.value] (silent None if absent — preserved from
+ legacy ADR-0001).
+
+ Records-style events use SQS partial-batch-failure semantics: individual
+ failures are reported via {"batchItemFailures": [...]} rather than
+ propagating. Direct invocations re-raise.
+ """
+ factory = orchestrator_cm or default_orchestrator
+
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+ @wraps(func)
+ def wrapper(event: dict[str, Any], context: Any) -> Any:
+ with factory() as orchestrator:
+ results: list[Any] = []
+ failures: list[dict[str, Any]] = []
+
+ for record in _records(event):
+ body = _parse_body(record)
+ raw_source_id = body.get(source.value)
+ source_id = (
+ str(raw_source_id) if raw_source_id is not None else None
+ )
+
+ _, subtask = orchestrator.create_task_with_subtask(
+ task_source=task_source,
+ inputs=body,
+ source=source,
+ source_id=source_id,
+ )
+
+ try:
+ result = orchestrator.run_subtask(
+ subtask.id,
+ work=lambda body=body: func(body, context),
+ )
+ results.append(result)
+ except Exception:
+ if "Records" in event:
+ message_id = record.get("messageId", "")
+ failures.append({"itemIdentifier": message_id})
+ else:
+ raise
+
+ if "Records" in event:
+ return {"batchItemFailures": failures}
+ return results
+
+ return wrapper
+
+ return decorator
+
+
+def _parse_body(record: dict[str, Any]) -> dict[str, Any]:
+ raw = record.get("body", record)
+ if isinstance(raw, str):
+ try:
+ parsed = json.loads(raw)
+ except json.JSONDecodeError:
+ return {}
+ return cast(dict[str, Any], parsed) if isinstance(parsed, dict) else {}
+ if isinstance(raw, dict):
+ return cast(dict[str, Any], raw)
+ return {}
+
+
+def _records(event: dict[str, Any]) -> list[dict[str, Any]]:
+ raw_records = event.get("Records")
+ if isinstance(raw_records, list):
+ return [r for r in cast(list[Any], raw_records) if isinstance(r, dict)]
+ return [event]
diff --git a/utilities/private.py b/utilities/private.py
new file mode 100644
index 00000000..77a70578
--- /dev/null
+++ b/utilities/private.py
@@ -0,0 +1,33 @@
+import inspect
+from typing import Any, Callable
+
+
+class private:
+ """Decorator that raises if a _-prefixed method is called from outside its class."""
+
+ func: Callable[..., Any]
+ name: str
+ owner: type
+
+ def __init__(self, func: Callable[..., Any]) -> None:
+ self.func = func
+ self.name = getattr(func, "__name__", "")
+
+ def __set_name__(self, owner: type, name: str) -> None:
+ self.owner = owner
+
+ def __get__(self, instance: Any, owner: type) -> Callable[..., Any]:
+ # Walk up one frame to see who's calling
+ frame = inspect.currentframe()
+ if frame is None or frame.f_back is None:
+ raise RuntimeError("cannot inspect caller frame")
+ caller_frame = frame.f_back
+ caller_self = caller_frame.f_locals.get("self")
+
+ if not isinstance(caller_self, self.owner):
+ raise RuntimeError(
+ f"{self.owner.__name__}.{self.name} is private; "
+ f"called from {caller_frame.f_code.co_name}"
+ )
+
+ return getattr(self.func, "__get__")(instance, owner)
diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py
index 5e0255ac..3e9168ba 100644
--- a/utils/sharepoint/domna_sharepoint_client.py
+++ b/utils/sharepoint/domna_sharepoint_client.py
@@ -125,6 +125,15 @@ class DomnaSharepointClient:
self.logger.debug(f"Downloaded SharePoint file to: {local_path}")
return True
+ def rename_file(self, item_id: str, new_name: str) -> None:
+ sharepoint_client = SharePointClient(
+ tenant_id=self.sharepoint_tenant_id,
+ client_id=self.sharepoint_client_id,
+ client_secret=self.sharepoint_client_secret,
+ site_id=self.sharepoint_drive.value,
+ )
+ sharepoint_client.rename_file(item_id, new_name)
+
def create_temp_file(self, content: BytesIO, path: str):
# Ensure the path is under /tmp/
new_path = os.path.join("/tmp/sharepoint", path)
diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py
index 5807c3bd..38107dbf 100644
--- a/utils/sharepoint/sharepoint_client.py
+++ b/utils/sharepoint/sharepoint_client.py
@@ -335,6 +335,17 @@ class SharePointClient:
if retry == "retry":
return self.upload_file(file_name, sharepoint_parent_id, file_stream)
+ @api_call_decorator
+ def rename_file(self, item_id: str, new_name: str) -> None:
+ """
+ PATCH /drives/{drive_id}/items/{item_id}
+
+ Renames a file in-place. Caller should discard the return value.
+ """
+ url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/items/{item_id}"
+ data: Dict[str, Any] = {"name": new_name}
+ return "PATCH", url, data # type: ignore[return-value]
+
@staticmethod
def download_sharepoint_file(download_url: str) -> BytesIO:
"""