Model/applications/bulk_upload_finaliser/handler.py
2026-06-04 11:47:42 +00:00

103 lines
3.8 KiB
Python

"""bulk_upload_finaliser Lambda (ADR-0013).
Replaces the synchronous Next.js ``/finalize`` property insert. Thin wiring: parse
the trigger, read the combiner output CSV from S3, hand the rows to the
``BulkUploadFinaliserOrchestrator`` (which owns the resolution + persist), then
write the terminal BulkUpload status directly (ADR-0005 hands terminal ownership to
the backend). ``complete`` is written in the *same* transaction as the property
insert (atomic finalise); ``failed`` is written on a fresh session on error.
PostgresConfig-only, like the landlord classifier Lambda — no legacy ``backend/``
connection — so a single DB config (POSTGRES_*) drives the whole run.
"""
import logging
import os
from typing import Any
from uuid import UUID
import boto3
from sqlalchemy.engine import Engine
from applications.bulk_upload_finaliser.bulk_upload_finaliser_trigger_body import (
BulkUploadFinaliserTriggerBody,
)
from infrastructure.postgres.config import PostgresConfig
from infrastructure.postgres.engine import commit_scope, make_engine, make_session
from infrastructure.s3.csv_s3_client import CsvS3Client
from infrastructure.s3.s3_uri import parse_s3_uri
from orchestration.bulk_upload_finaliser_orchestrator import (
BulkUploadFinaliserOrchestrator,
)
from orchestration.task_orchestrator import TaskOrchestrator
from repositories.bulk_upload.bulk_upload_status_writer_postgres import (
BulkUploadStatusWriterPostgresRepository,
)
from repositories.property.property_identity_writer_postgres import (
PropertyIdentityWriterPostgresRepository,
)
from utilities.aws_lambda.subtask_handler import subtask_handler
logger = logging.getLogger(__name__)
def _run(engine: Engine, trigger: BulkUploadFinaliserTriggerBody) -> int:
bucket, _key = parse_s3_uri(trigger.s3_uri)
boto3_client: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
boto_s3: Any = boto3_client("s3")
rows = CsvS3Client(boto_s3, bucket).read_rows(trigger.s3_uri)
session = make_session(engine)
try:
orchestrator = BulkUploadFinaliserOrchestrator(
property_writer=PropertyIdentityWriterPostgresRepository(session)
)
status_writer = BulkUploadStatusWriterPostgresRepository(session)
# Resolution is pure, so run it before opening the transaction.
inserts = orchestrator.to_property_rows(rows, trigger.portfolio_id)
# Atomic finalise: insert properties and mark `complete` together — a
# failure in either rolls back both, leaving the row for the failure path.
with commit_scope(session):
inserted = orchestrator.persist(inserts)
status_writer.set_status(trigger.task_id, "complete")
finally:
session.close()
logger.info(
"Finalised bulk upload %s: %d rows read, %d properties inserted.",
trigger.bulk_upload_id,
len(rows),
inserted,
)
return inserted
def _mark_failed(engine: Engine, task_id: UUID) -> None:
session = make_session(engine)
try:
with commit_scope(session):
BulkUploadStatusWriterPostgresRepository(session).set_status(
task_id, "failed"
)
finally:
session.close()
@subtask_handler()
def handler(
body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
) -> dict[str, int]:
trigger = BulkUploadFinaliserTriggerBody.model_validate(body)
engine = make_engine(PostgresConfig.from_env(os.environ))
try:
inserted = _run(engine, trigger)
except Exception:
# Hand the BulkUpload to the terminal `failed` state so the UI leaves
# `finalising`; the @subtask_handler also marks the SubTask FAILED on the
# re-raise below.
_mark_failed(engine, trigger.task_id)
raise
return {"inserted": inserted}