From 47dfe34ec062bfd884a451bc9b22e92f62c5c9d7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 29 May 2026 12:12:54 +0000 Subject: [PATCH] added landlord description overrides --- .github/workflows/_deploy_lambda.yml | 5 ++ .github/workflows/deploy_terraform.yml | 41 +++++++++++++++ .github/workflows/lambda_smoke_tests.yml | 10 ++++ .../landlord_description_overrides/Dockerfile | 2 +- .../requirements.txt | 1 + .../landlordDescriptionOverrides/main.tf | 50 +++++++++++++++++++ .../landlordDescriptionOverrides/outputs.tf | 9 ++++ .../landlordDescriptionOverrides/provider.tf | 16 ++++++ .../landlordDescriptionOverrides/variables.tf | 33 ++++++++++++ deployment/terraform/shared/main.tf | 41 ++++++++++++--- 10 files changed, 201 insertions(+), 7 deletions(-) create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/main.tf create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 0d702155..70f9eabe 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -92,6 +92,9 @@ on: TF_VAR_magicplan_api_key: required: false + + TF_VAR_openai_api_key: + required: false jobs: deploy: runs-on: ubuntu-latest @@ -163,6 +166,7 @@ jobs: TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }} TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }} TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }} + TF_VAR_openai_api_key: ${{ secrets.TF_VAR_openai_api_key }} run: | ECR_REPO_URL_VAR="" if [[ -n "${{ inputs.ecr_repo }}" ]]; then @@ -213,6 +217,7 @@ jobs: TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }} TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }} TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }} + TF_VAR_openai_api_key: ${{ secrets.TF_VAR_openai_api_key }} run: | EXTRA_VARS="" if [[ -n "${{ inputs.ecr_repo }}" ]]; then diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 7f2eb890..fc999bc0 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -203,6 +203,47 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + # ============================================================ + # Build Landlord Description Overrides image and Push + # ============================================================ + landlordDescriptionOverrides_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: landlord_description_overrides-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: applications/landlord_description_overrides/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # Deploy Landlord Description Overrides Lambda + # ============================================================ + landlordDescriptionOverrides_lambda: + needs: [landlordDescriptionOverrides_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: landlordDescriptionOverrides + lambda_path: deployment/terraform/lambda/landlordDescriptionOverrides + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: landlord_description_overrides-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.landlordDescriptionOverrides_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + TF_VAR_openai_api_key: ${{ secrets.DEV_OPENAI_API_KEY }} + # ============================================================ # Build Bulk Address2UPRN Combiner image and Push # ============================================================ diff --git a/.github/workflows/lambda_smoke_tests.yml b/.github/workflows/lambda_smoke_tests.yml index b562f91e..44288821 100644 --- a/.github/workflows/lambda_smoke_tests.yml +++ b/.github/workflows/lambda_smoke_tests.yml @@ -43,6 +43,16 @@ jobs: build_context: . service_name: postcode-splitter-ddd + # ============================================================ + # Landlord Description Overrides + # ============================================================ + landlord_description_overrides_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: applications/landlord_description_overrides/Dockerfile + build_context: . + service_name: landlord-description-overrides + # ============================================================ # Bulk Address2UPRN Combiner # ============================================================ diff --git a/applications/landlord_description_overrides/Dockerfile b/applications/landlord_description_overrides/Dockerfile index e2456b81..c2d4faf7 100644 --- a/applications/landlord_description_overrides/Dockerfile +++ b/applications/landlord_description_overrides/Dockerfile @@ -15,7 +15,7 @@ ENV POSTGRES_DATABASE=${DEV_DB_NAME} WORKDIR /var/task -COPY applications/postcode_splitter/requirements.txt . +COPY applications/landlord_description_overrides/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy the layered source the handler imports from. The new splitter pulls diff --git a/applications/landlord_description_overrides/requirements.txt b/applications/landlord_description_overrides/requirements.txt index 6a85a255..b2917847 100644 --- a/applications/landlord_description_overrides/requirements.txt +++ b/applications/landlord_description_overrides/requirements.txt @@ -2,3 +2,4 @@ boto3 pydantic sqlmodel psycopg2-binary +openai diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf new file mode 100644 index 00000000..5a69de22 --- /dev/null +++ b/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf @@ -0,0 +1,50 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + +module "lambda" { + source = "../../modules/lambda_with_sqs" + + name = "landlord-description-overrides" + stage = var.stage + + image_uri = local.image_uri + + # The classifier calls OpenAI once per distinct description per column, so it + # is latency-bound. 300s leaves headroom under the queue's 1000s visibility + # timeout. batch_size = 1 keeps one upload per invocation, so a single bad + # record cannot redrive its siblings. maximum_concurrency caps fan-out to + # respect OpenAI rate limits. + timeout = 300 + batch_size = 1 + maximum_concurrency = 5 + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username + POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password + OPENAI_API_KEY = var.openai_api_key + }, + ) +} + +# Attach S3 read policy so the handler can read the original upload CSV. +resource "aws_iam_role_policy_attachment" "landlord_overrides_s3_read" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.landlord_overrides_s3_read_arn +} diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf new file mode 100644 index 00000000..7c6534db --- /dev/null +++ b/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf @@ -0,0 +1,9 @@ +output "landlord_description_overrides_queue_url" { + value = module.lambda.queue_url + description = "URL of the Landlord Description Overrides SQS queue (wire into the FastAPI LANDLORD_OVERRIDES_SQS_URL)" +} + +output "landlord_description_overrides_queue_arn" { + value = module.lambda.queue_arn + description = "ARN of the Landlord Description Overrides SQS queue" +} diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf new file mode 100644 index 00000000..ed2fa60e --- /dev/null +++ b/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } + + backend "s3" { + bucket = "landlord-description-overrides-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf new file mode 100644 index 00000000..63437a5a --- /dev/null +++ b/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf @@ -0,0 +1,33 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. landlordDescriptionOverrides)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} + +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +variable "openai_api_key" { + type = string + description = "OpenAI API key used by the ChatGPT column classifier" + sensitive = true +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/deployment/terraform/shared/main.tf b/deployment/terraform/shared/main.tf index 0a9e87f6..7d179203 100644 --- a/deployment/terraform/shared/main.tf +++ b/deployment/terraform/shared/main.tf @@ -268,11 +268,11 @@ output "retrofit_heat_baseline_predictions_bucket_name" { // We make this bucket presignable, because we want to generate download links for the frontend module "retrofit_energy_assessments" { - source = "../modules/s3_presignable_bucket" - bucketname = "retrofit-energy-assessments-${var.stage}" - allowed_origins = var.allowed_origins - environment = var.stage - enable_versioning = true + source = "../modules/s3_presignable_bucket" + bucketname = "retrofit-energy-assessments-${var.stage}" + allowed_origins = var.allowed_origins + environment = var.stage + enable_versioning = true } output "retrofit_energy_assessments_bucket_name" { @@ -494,6 +494,35 @@ output "postcode_splitter_s3_read_arn" { value = module.postcode_splitter_s3_read.policy_arn } +################################################ +# Landlord Description Overrides – Lambda +################################################ +module "landlord_description_overrides_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "landlord-description-overrides-terraform-state" +} + +module "landlord_description_overrides_registry" { + source = "../modules/container_registry" + name = "landlord_description_overrides" + stage = var.stage +} + +# S3 policy for the landlord classifier to read the original upload CSV. +module "landlord_overrides_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "LandlordOverridesReadS3" + policy_description = "Allow landlord description overrides Lambda to read from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket"] + resource_paths = ["/*"] +} + +output "landlord_overrides_s3_read_arn" { + value = module.landlord_overrides_s3_read.policy_arn +} + ################################################ # Bulk Address2UPRN Combiner – Lambda ECR ################################################ @@ -729,7 +758,7 @@ module "hubspot_etl_bucket" { module "hubspot_etl_registry" { source = "../modules/container_registry" name = "hubspot-etl" - stage = var.stage + stage = var.stage }