added bulk address uprn route

This commit is contained in:
Jun-te Kim 2026-04-20 13:06:31 +00:00
parent ec4c870465
commit 7caa7c476a
25 changed files with 711 additions and 8 deletions

View file

@ -66,6 +66,12 @@ RUN wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key
RUN apt update
RUN apt install -y postgresql-14
# Install Node.js + backlog.md
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
&& npm install -g backlog.md \
&& rm -rf /var/lib/apt/lists/*
# Install Claude
USER ${USER}
RUN curl -fsSL https://claude.ai/install.sh | bash \

View file

@ -6,7 +6,7 @@
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
// "source=${localEnv:HOME},target=/home/vscode,type=bind",
"source=${localEnv:HOME},target=/home/vscode,type=bind",
"source=${localEnv:HOME}/.aws,target=/home/vscode/.aws,type=bind,consistency=cached"
],
"customizations": {
@ -43,6 +43,13 @@
},
"containerEnv": {
"PYTHONFLAGS": "-Xfrozen_modules=off"
},
"forwardPorts": [6421],
"portsAttributes": {
"6421": {
"label": "Backlog.md",
"onAutoForward": "notify"
}
}
}

29
AGENTS.md Normal file
View file

@ -0,0 +1,29 @@
<!-- BACKLOG.MD MCP GUIDELINES START -->
<CRITICAL_INSTRUCTION>
## BACKLOG WORKFLOW INSTRUCTIONS
This project uses Backlog.md MCP for all task and project management activities.
**CRITICAL GUIDANCE**
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
These guides cover:
- Decision framework for when to create tasks
- Search-first workflow to avoid duplicates
- Links to detailed guides for task creation, execution, and finalization
- MCP tools reference
You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
</CRITICAL_INSTRUCTION>
<!-- BACKLOG.MD MCP GUIDELINES END -->

29
CLAUDE.md Normal file
View file

@ -0,0 +1,29 @@
<!-- BACKLOG.MD MCP GUIDELINES START -->
<CRITICAL_INSTRUCTION>
## BACKLOG WORKFLOW INSTRUCTIONS
This project uses Backlog.md MCP for all task and project management activities.
**CRITICAL GUIDANCE**
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
These guides cover:
- Decision framework for when to create tasks
- Search-first workflow to avoid duplicates
- Links to detailed guides for task creation, execution, and finalization
- MCP tools reference
You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
</CRITICAL_INSTRUCTION>
<!-- BACKLOG.MD MCP GUIDELINES END -->

View file

View file

@ -0,0 +1,127 @@
import boto3
import json
from uuid import UUID
from datetime import datetime, timezone
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlmodel import select
from sqlalchemy.dialects.postgresql import insert
from backend.app.dependencies import validate_token
from backend.app.config import get_settings
from backend.app.db.connection import get_db_session
from backend.app.db.models.bulk_address_uploads import BulkAddressUpload
from backend.app.db.models.portfolio import PropertyModel, PropertyCreationStatus, PortfolioStatus
from backend.app.bulk_uploads.schema import TriggerSplitterRequest, ConfirmMatchesRequest
from utils.s3 import parse_s3_uri, read_csv_from_s3
router = APIRouter(
prefix="/bulk-uploads",
tags=["bulk-uploads"],
dependencies=[Depends(validate_token)],
)
@router.post("/trigger-splitter", status_code=202)
async def trigger_splitter(req: TriggerSplitterRequest):
settings = get_settings()
sqs_payload = {
"task_id": req.task_id,
"sub_task_id": req.sub_task_id,
"s3_uri": req.s3_uri,
}
try:
sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION)
response = sqs.send_message(
QueueUrl=settings.POSTCODE_SPLITTER_SQS_URL,
MessageBody=json.dumps(sqs_payload),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"SQS error: {e}")
return {
"task_id": req.task_id,
"sub_task_id": req.sub_task_id,
"sqs_message_id": response.get("MessageId"),
}
@router.get("/{task_id}/combined-results")
async def get_combined_results(
task_id: UUID,
offset: int = Query(default=0, ge=0),
limit: int = Query(default=500, ge=1, le=5000),
):
with get_db_session() as session:
upload = session.exec(
select(BulkAddressUpload).where(BulkAddressUpload.task_id == task_id)
).first()
if not upload:
raise HTTPException(status_code=404, detail="Upload not found")
if not upload.combined_output_s3_uri:
raise HTTPException(status_code=409, detail="Combiner not finished")
bucket, key = parse_s3_uri(upload.combined_output_s3_uri)
rows = read_csv_from_s3(bucket, key)
total = len(rows)
return {
"rows": rows[offset : offset + limit],
"total": total,
"offset": offset,
"limit": limit,
}
@router.post("/{task_id}/confirm-matches")
async def confirm_matches(task_id: UUID, req: ConfirmMatchesRequest):
with get_db_session() as session:
upload = session.exec(
select(BulkAddressUpload).where(BulkAddressUpload.task_id == task_id)
).first()
if not upload:
raise HTTPException(status_code=404, detail="Upload not found")
rows = []
for row in req.accepted_rows:
address = row.address_line_1
if row.address_line_2:
address = f"{row.address_line_1}, {row.address_line_2}"
rows.append(
{
"uprn": row.uprn,
"address": address,
"postcode": row.postcode,
"portfolio_id": int(upload.portfolio_id),
"landlord_property_id": row.internal_reference,
"creation_status": PropertyCreationStatus.LOADING,
"status": PortfolioStatus.ASSESSMENT.value,
"has_pre_condition_report": False,
"has_recommendations": False,
}
)
stmt = (
insert(PropertyModel)
.values(rows)
.on_conflict_do_nothing(
index_elements=["portfolio_id", "uprn"],
index_where=PropertyModel.uprn.isnot(None),
)
.returning(PropertyModel.id)
)
result = session.execute(stmt)
session.flush()
inserted = len(result.fetchall())
skipped = len(rows) - inserted
upload.status = "confirmed"
upload.updated_at = datetime.now(timezone.utc)
session.add(upload)
session.commit()
return {"inserted": inserted, "skipped": skipped}

View file

@ -0,0 +1,20 @@
from pydantic import BaseModel
from typing import Optional
class TriggerSplitterRequest(BaseModel):
task_id: str
sub_task_id: str
s3_uri: str
class AcceptedRow(BaseModel):
uprn: int
address_line_1: str
address_line_2: Optional[str] = None
postcode: str
internal_reference: Optional[str] = None
class ConfirmMatchesRequest(BaseModel):
accepted_rows: list[AcceptedRow]

View file

@ -39,6 +39,7 @@ class Settings(BaseSettings):
ENGINE_SQS_URL: str = "changeme"
CATEGORISATION_SQS_URL: str = "changeme"
PASHUB_TO_ARA_SQS_URL: str = "changeme"
POSTCODE_SPLITTER_SQS_URL: str = "changeme"
# Third parties
EPC_AUTH_TOKEN: str = "changeme"

View file

@ -11,12 +11,19 @@ class BulkAddressUpload(SQLModel, table=True):
__tablename__ = "bulk_address_uploads"
id: UUID = Field(default_factory=uuid4, primary_key=True, index=True)
task_id: UUID = Field(foreign_key="tasks.id", index=True)
combined_csv_s3_uri: Optional[str] = Field(default=None)
portfolio_id: str = Field(nullable=False)
user_id: str = Field(nullable=False)
s3_bucket: str = Field(nullable=False)
s3_key: str = Field(nullable=False)
filename: str = Field(nullable=False)
status: str = Field(default="ready_for_processing", nullable=False)
task_id: Optional[UUID] = Field(default=None)
combined_output_s3_uri: Optional[str] = Field(default=None)
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
def set_combined_csv_s3_uri(task_id: UUID, s3_uri: str) -> None:
def set_combined_output_s3_uri(task_id: UUID, s3_uri: str) -> None:
now = datetime.now(timezone.utc)
with get_db_session() as session:
row = session.exec(
@ -24,7 +31,7 @@ def set_combined_csv_s3_uri(task_id: UUID, s3_uri: str) -> None:
).first()
if not row:
raise ValueError(f"No bulk_address_uploads row for task_id {task_id}")
row.combined_csv_s3_uri = s3_uri
row.combined_output_s3_uri = s3_uri
row.updated_at = now
session.add(row)
session.commit()

View file

@ -9,6 +9,7 @@ from backend.app.portfolio import router as portfolio_router
from backend.app.whlg import router as whlg_router
from backend.app.plan import router as plan_router
from backend.app.tasks import router as tasks_router
from backend.app.bulk_uploads import router as bulk_uploads_router
from backend.app.dependencies import validate_api_key
from backend.app.config import get_settings
@ -59,6 +60,7 @@ app.include_router(portfolio_router.router, prefix="/v1")
app.include_router(plan_router.router, prefix="/v1")
app.include_router(whlg_router.router, prefix="/v1")
app.include_router(tasks_router.router, prefix="/v1")
app.include_router(bulk_uploads_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
from app.local import router as local_router
@ -75,6 +77,7 @@ from mangum import Mangum
from backend.app.portfolio import router as portfolio_router
from backend.app.whlg import router as whlg_router
from backend.app.plan import router as plan_router
from backend.app.bulk_uploads import router as bulk_uploads_router
from backend.app.dependencies import validate_api_key
from backend.app.config import get_settings
@ -124,6 +127,7 @@ async def log_requests(request: Request, call_next):
app.include_router(portfolio_router.router, prefix="/v1")
app.include_router(plan_router.router, prefix="/v1")
app.include_router(whlg_router.router, prefix="/v1")
app.include_router(bulk_uploads_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
from app.local import router as local_router

View file

@ -0,0 +1,77 @@
from unittest.mock import MagicMock, patch
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client():
with patch("backend.app.config.get_settings") as mock_settings:
mock_settings.return_value = MagicMock(
ENVIRONMENT="local",
AWS_DEFAULT_REGION="eu-west-1",
POSTCODE_SPLITTER_SQS_URL="https://sqs.eu-west-1.amazonaws.com/123456789/postcode-splitter",
)
from backend.app.main import app
yield TestClient(app)
@patch("backend.app.bulk_uploads.router.boto3")
@patch("backend.app.bulk_uploads.router.TasksInterface")
def test_trigger_splitter_creates_task_and_enqueues(mock_tasks_cls, mock_boto3, client):
mock_tasks = MagicMock()
mock_tasks.create_task.return_value = ("task-123", "subtask-456")
mock_tasks_cls.return_value = mock_tasks
mock_sqs = MagicMock()
mock_sqs.send_message.return_value = {"MessageId": "msg-789"}
mock_boto3.client.return_value = mock_sqs
response = client.post(
"/v1/bulk-uploads/trigger-splitter",
json={
"upload_id": "upload-abc",
"s3_uri": "s3://bucket/file.csv",
"portfolio_id": "portfolio-xyz",
},
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 202
body = response.json()
assert body["task_id"] == "task-123"
assert body["sub_task_id"] == "subtask-456"
assert body["sqs_message_id"] == "msg-789"
mock_sqs.send_message.assert_called_once()
call_kwargs = mock_sqs.send_message.call_args[1]
import json
payload = json.loads(call_kwargs["MessageBody"])
assert payload["task_id"] == "task-123"
assert payload["sub_task_id"] == "subtask-456"
assert payload["s3_uri"] == "s3://bucket/file.csv"
@patch("backend.app.bulk_uploads.router.boto3")
@patch("backend.app.bulk_uploads.router.TasksInterface")
def test_trigger_splitter_uses_provided_task_ids(mock_tasks_cls, mock_boto3, client):
mock_sqs = MagicMock()
mock_sqs.send_message.return_value = {"MessageId": "msg-999"}
mock_boto3.client.return_value = mock_sqs
response = client.post(
"/v1/bulk-uploads/trigger-splitter",
json={
"upload_id": "upload-abc",
"s3_uri": "s3://bucket/file.csv",
"portfolio_id": "portfolio-xyz",
"task_id": "existing-task",
"sub_task_id": "existing-subtask",
},
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 202
mock_tasks_cls.assert_not_called()
body = response.json()
assert body["task_id"] == "existing-task"
assert body["sub_task_id"] == "existing-subtask"

17
backlog/config.yml Normal file
View file

@ -0,0 +1,17 @@
project_name: "model-backend"
default_status: "To Do"
statuses: ["To Do", "In Progress", "Done"]
labels: []
definition_of_done: []
date_format: yyyy-mm-dd
max_column_width: 20
default_editor: "vim"
auto_open_browser: true
default_port: 6420
remote_operations: false
auto_commit: false
zero_padded_ids: 3
bypass_git_hooks: false
check_active_branches: false
active_branch_days: 30
task_prefix: "task"

View file

@ -0,0 +1,38 @@
---
id: TASK-1
title: Add POST /bulk-uploads/trigger-splitter FastAPI route
status: Done
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 12:31'
labels:
- backend
- bulk-upload
- api
dependencies: []
priority: high
ordinal: 2000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Expose an HTTP route that the frontend can call instead of sending SQS directly. Route:
`POST /bulk-uploads/trigger-splitter`
Body: `{task_id, sub_task_id, s3_uri}` — task+subtask already created by frontend `/api/tasks` call before this is invoked.
Behaviour: validate inputs, then publish an SQS message to the postcode_splitter queue (see `backend/postcode_splitter/main.py` for expected message shape: `{task_id, sub_task_id, s3_uri}`). Use existing SubTaskInterface / TasksInterface patterns from `backend/app/tasks/router.py`.
Place under `backend/app/` next to `tasks/router.py` — likely `backend/app/bulk_uploads/router.py`.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Route returns 202 with {task_id, sub_task_id}
- [ ] #2 SQS message enqueued with correct shape for postcode_splitter Lambda
- [ ] #3 Auth via existing `validate_token` dependency
- [ ] #4 Queue URL from config, not hardcoded
- [ ] #5 Unit test with mocked boto3 sqs client
<!-- AC:END -->

View file

@ -0,0 +1,50 @@
---
id: TASK-10
title: >-
Fix bulk_address_uploads SQLModel — align columns with real schema, prevent
rogue migrations
status: Done
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 12:34'
labels:
- backend
- bulk-upload
- db
dependencies: []
priority: high
ordinal: 7000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
`backend/app/db/models/bulk_address_uploads.py` has several bugs that cause a rogue `ALTER TABLE` and silent write failures:
**1. Wrong column name**
Model declares `combined_csv_s3_uri` — real column (drizzle-managed) is `combined_output_s3_uri`. `set_combined_csv_s3_uri()` currently writes to a non-existent column.
**2. Partial model declared as `table=True`**
Model only includes `id, task_id, combined_csv_s3_uri, status, updated_at`. Missing: `portfolio_id, user_id, s3_bucket, s3_key, filename, source_headers, column_mapping, created_at`. SQLModel `table=True` with incomplete columns causes Alembic autogenerate / `create_all` to try to ALTER or recreate the table.
**3. `status` default mismatch**
Backend: `default="pending"`. Real table default: `'ready_for_processing'`. Triggers ALTER TABLE on migration runs.
**4. `task_id` nullability mismatch**
Backend: `task_id: UUID` (NOT NULL). Frontend drizzle schema: nullable (set later, after onboarding starts).
**Fix approach:**
- Declare all real columns matching drizzle schema (see `src/app/db/schema/bulk_address_uploads.ts` in assessment-model repo as source of truth).
- Rename `combined_csv_s3_uri``combined_output_s3_uri` throughout.
- `task_id: Optional[UUID]`, `status` default `'ready_for_processing'`.
- Ensure Alembic env excludes this table from autogenerate — drizzle owns migrations, not backend.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 `combined_output_s3_uri` column name correct throughout model + helper
- [ ] #2 All columns present and match real table schema
- [ ] #3 No rogue ALTER TABLE when backend starts or migrations run
- [ ] #4 `task_id` nullable, `status` default `'ready_for_processing'`
- [ ] #5 Integration test: combiner runs → `combined_output_s3_uri` populated → frontend reads it correctly
<!-- AC:END -->

View file

@ -0,0 +1,36 @@
---
id: TASK-2
title: 'Add POST /bulk-uploads/{task_id}/combine FastAPI route'
status: To Do
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 11:53'
labels:
- backend
- bulk-upload
- api
dependencies: []
priority: high
ordinal: 10000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Expose HTTP route to trigger `bulk_address2uprn_combiner`:
`POST /bulk-uploads/{task_id}/combine`
Creates a new sub_task under task_id, then pushes `{task_id, sub_task_id}` to the combiner SQS queue (see `backend/bulk_address2uprn_combiner/main.py` for consumer shape).
Idempotency: if `bulk_address_uploads.combined_output_s3_uri` already set for this task, return 200 with `{already_combined: true}` (mirror current frontend behaviour).
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Route returns 202 with {task_id, sub_task_id} on new trigger
- [ ] #2 Returns 200 {already_combined: true} if combined_output_s3_uri already set
- [ ] #3 SQS message enqueued with correct shape
- [ ] #4 Queue URL from config
- [ ] #5 Auth via validate_token
<!-- AC:END -->

View file

@ -0,0 +1,35 @@
---
id: TASK-3
title: 'Add GET /bulk-uploads/{task_id}/combined-results route'
status: Done
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 12:08'
labels:
- backend
- bulk-upload
- api
dependencies: []
priority: high
ordinal: 1000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
`GET /bulk-uploads/{task_id}/combined-results`
Behaviour: lookup `bulk_address_uploads` row by `task_id` → read `combined_output_s3_uri` → read combined CSV from S3 → return parsed JSON rows for the frontend review UI. Each row should include: input address fields, matched UPRN, matched OS address, match confidence/score.
Pagination: optional query params `?offset&limit` (default limit 500).
If `combined_output_s3_uri` not yet populated → 409 "Combiner not finished".
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Returns JSON {rows: [...], total, offset, limit}
- [ ] #2 409 when combined_output_s3_uri null
- [ ] #3 Reads CSV from S3 (retrofit_sap_data bucket) with IAM already granted to backend
- [ ] #4 Row shape matches what confirm-matches frontend expects
<!-- AC:END -->

View file

@ -0,0 +1,37 @@
---
id: TASK-4
title: 'Add POST /bulk-uploads/{task_id}/confirm-matches route'
status: Done
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 12:31'
labels:
- backend
- bulk-upload
- api
dependencies:
- TASK-3
priority: high
ordinal: 3000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
`POST /bulk-uploads/{task_id}/confirm-matches`
Body: `{accepted_rows: [{uprn, address_line_1, address_line_2, postcode, internal_reference}]}` — the rows the user accepted from the review table.
Behaviour: for each accepted row, upsert into the portfolio's `addresses` / `property` table (confirm exact model during impl — see `backend/addresses/`, `backend/backend/Property.py`). Update `bulk_address_uploads.status` to terminal (e.g. `confirmed`).
Idempotency: safe to re-call; dedupe by `task_id` + `uprn`.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Accepted rows persisted as portfolio addresses
- [ ] #2 Duplicate submits do not create duplicate address rows
- [ ] #3 bulk_address_uploads.status updated to terminal
- [ ] #4 Returns summary {inserted, skipped}
- [ ] #5 Transactional — partial failure rolls back
<!-- AC:END -->

View file

@ -0,0 +1,34 @@
---
id: TASK-5
title: Auto-chain combiner when address2uprn subtasks complete
status: To Do
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20'
labels:
- backend
- bulk-upload
- orchestration
dependencies:
- TASK-2
priority: medium
ordinal: 5000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Today the frontend client polls task status and client-side fires `/combine`. Move that logic to backend: when the last `address2uprn` subtask for a task transitions to complete, backend auto-enqueues the combiner SQS message.
Likely hook point: `SubTaskInterface.finalize_subtask` — after setting status, check if parent task's subtasks are all terminal and if so, enqueue combiner. Or a separate reconciler run in the subtask-complete code path.
Removes frontend responsibility for orchestration and avoids "browser closed → combiner never fires" bug.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Combiner fires automatically when all splitter-spawned subtasks done
- [ ] #2 Only fires once per task (dedupe via task row / existing combined_output_s3_uri check)
- [ ] #3 Failed subtasks do NOT trigger combiner — requires manual retry
- [ ] #4 Frontend combine route (task-7) can be deleted or reduced to manual re-run
<!-- AC:END -->

View file

@ -0,0 +1,30 @@
---
id: TASK-6
title: Verify combiner writes to bulk_address_uploads.combined_output_s3_uri
status: To Do
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20'
labels:
- backend
- bulk-upload
- db
dependencies: []
priority: high
ordinal: 6000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Frontend drizzle schema column: `bulk_address_uploads.combined_output_s3_uri`. Backend combiner (`backend/bulk_address2uprn_combiner/main.py`) calls `set_combined_csv_s3_uri(UUID(task_id), s3_uri)` from `backend.app.db.models.bulk_address_uploads`.
Confirm that helper actually writes to the **`combined_output_s3_uri`** column (not a legacy `combined_csv_s3_uri`). Name drift suggests risk. Fix if mismatched.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Confirmed column name matches frontend schema
- [ ] #2 Fix applied if mismatched
- [ ] #3 Integration test covers: run combiner → row updated → frontend schema reads correctly
<!-- AC:END -->

View file

@ -0,0 +1,35 @@
---
id: TASK-7
title: >-
Add BULK_ADDRESS2UPRN_COMBINER_QUEUE_NAME + POSTCODE_SPLITTER_QUEUE_NAME to
backend envs
status: Done
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 12:31'
labels:
- infra
- env
dependencies:
- TASK-1
- TASK-2
priority: high
ordinal: 4000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Since backend now enqueues to both queues (via new trigger-splitter + combine routes), its service config must have the queue names on staging + prod. Values:
- `bulk-address2uprn-combiner-queue-<stage>`
- `postcode-splitter-queue-<stage>` (if not already present)
Remove these from frontend `.env` once task-6/task-7 frontend refactor ships.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Queue names set in backend staging config
- [ ] #2 Queue names set in backend prod config
- [ ] #3 Frontend env vars removed after frontend refactor complete
<!-- AC:END -->

View file

@ -0,0 +1,34 @@
---
id: TASK-8
title: 'Grant sqs:SendMessage IAM on splitter + combiner queues to backend runtime'
status: Done
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 12:31'
labels:
- infra
- iam
- terraform
dependencies:
- TASK-1
- TASK-2
priority: high
ordinal: 5000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Backend runtime role needs `sqs:SendMessage` + `sqs:GetQueueUrl` on:
- postcode_splitter queue ARN
- bulk_address2uprn_combiner queue ARN
Update terraform IAM policy under `infrastructure/terraform/` for backend service. Can revoke equivalent IAM from frontend runtime once refactor ships.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Terraform updated for staging + prod backend role
- [ ] #2 Verified via `aws sqs get-queue-url` using backend creds
- [ ] #3 Frontend IAM revoked after frontend refactor complete
<!-- AC:END -->

View file

@ -0,0 +1,28 @@
---
id: TASK-9
title: Deploy bulk_address2uprn_combiner Lambda + queue via terraform to staging/prod
status: Done
assignee: []
created_date: '2026-04-20'
updated_date: '2026-04-20 12:31'
labels:
- infra
- terraform
dependencies: []
priority: high
ordinal: 6000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Lambda source at `backend/bulk_address2uprn_combiner/`. Use existing `lambda_with_sqs` terraform module. Lambda envs: `S3_BUCKET_NAME=retrofit_sap_data_bucket_name` + DB creds. Queue name convention: `bulk-address2uprn-combiner-queue-<stage>`. Lambda needs read on `ara_raw_outputs/` and write on `bulk_final_outputs/` in retrofit_sap_data bucket.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Lambda + queue exist in staging
- [ ] #2 Lambda + queue exist in prod
- [ ] #3 Lambda has correct S3 read/write permissions on retrofit_sap_data bucket
- [ ] #4 Lambda has DB write on bulk_address_uploads
<!-- AC:END -->

View file

@ -28,6 +28,15 @@ data "terraform_remote_state" "categorisation" {
}
}
data "terraform_remote_state" "postcode_splitter" {
backend = "s3"
config = {
bucket = "postcode-splitter-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
}
############################################
# Load Credentials
############################################
@ -83,8 +92,9 @@ module "fastapi" {
CARBON_BASELINE_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_carbon_baseline_predictions_bucket_name
HEAT_BASELINE_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_heat_baseline_predictions_bucket_name
ENGINE_SQS_URL = data.terraform_remote_state.engine.outputs.ara_engine_queue_url
CATEGORISATION_SQS_URL = data.terraform_remote_state.categorisation.outputs.categorisation_queue_url
ENGINE_SQS_URL = data.terraform_remote_state.engine.outputs.ara_engine_queue_url
CATEGORISATION_SQS_URL = data.terraform_remote_state.categorisation.outputs.categorisation_queue_url
POSTCODE_SPLITTER_SQS_URL = data.terraform_remote_state.postcode_splitter.outputs.postcode_splitter_queue_url
}
}
@ -104,7 +114,8 @@ module "fastapi_sqs_policy" {
resources = [
data.terraform_remote_state.engine.outputs.ara_engine_queue_arn,
data.terraform_remote_state.categorisation.outputs.categorisation_queue_arn
data.terraform_remote_state.categorisation.outputs.categorisation_queue_arn,
data.terraform_remote_state.postcode_splitter.outputs.postcode_splitter_queue_arn
]
conditions = null

View file

@ -0,0 +1,9 @@
output "postcode_splitter_queue_url" {
value = module.lambda.queue_url
description = "URL of the Postcode Splitter SQS queue"
}
output "postcode_splitter_queue_arn" {
value = module.lambda.queue_arn
description = "ARN of the Postcode Splitter SQS queue"
}

2
run_backlog.sh Normal file
View file

@ -0,0 +1,2 @@
#!/bin/bash
backlog browser --port 6421