From 071a67e501bb760692925e7fe30bd584b3708169 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 6 Mar 2026 13:29:25 +0000 Subject: [PATCH] ordnancesurvey deployment --- .github/workflows/deploy_terraform.yml | 39 +++++++++++++ backend/address2UPRN/main.py | 12 ++-- backend/ordnanceSurvey/main.py | 57 ++++++++++++++++++- .../terraform/lambda/ordnanceSurvey/main.tf | 57 +++++++++++++++++++ .../lambda/ordnanceSurvey/provider.tf | 16 ++++++ .../lambda/ordnanceSurvey/variables.tf | 32 +++++++++++ infrastructure/terraform/shared/main.tf | 34 ++++++++++- 7 files changed, 239 insertions(+), 8 deletions(-) create mode 100644 infrastructure/terraform/lambda/ordnanceSurvey/main.tf create mode 100644 infrastructure/terraform/lambda/ordnanceSurvey/provider.tf create mode 100644 infrastructure/terraform/lambda/ordnanceSurvey/variables.tf diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 4c9ce44a..aac49923 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -242,3 +242,42 @@ jobs: AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + # ============================================================ + # 2️⃣ Build OrdanceSurvey image and Push + # ============================================================ + ordnanceSurvey_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # 3️⃣ Deploy OrdanceSurvey Lambda + # ============================================================ + ordnanceSurvey_lambda: + needs: [ordnanceSurvey_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: ordnanceSurvey + lambda_path: infrastructure/terraform/lambda/ordnanceSurvey + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} \ No newline at end of file diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index ea588a77..33cb6ff9 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -476,9 +476,11 @@ def handler(event, context, local=False): try: # Concatenate Address columns directly address2uprn_user_input = ( - str(row.get("Address 1", "")).strip() + " " + - str(row.get("Address 2", "")).strip() + " " + - str(row.get("Address 3", "")).strip() + str(row.get("Address 1", "")).strip() + + " " + + str(row.get("Address 2", "")).strip() + + " " + + str(row.get("Address 3", "")).strip() ).strip() if not address2uprn_user_input: @@ -489,7 +491,9 @@ def handler(event, context, local=False): # Get UPRN using the pre-fetched EPC data with all return options result = get_uprn_with_epc_df( - user_inputed_address=address2uprn_user_input, epc_df=epc_df, verbose=True + user_inputed_address=address2uprn_user_input, + epc_df=epc_df, + verbose=True, ) # Parse result tuple if successful diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py index 6c4f3080..0a0e2a8a 100644 --- a/backend/ordnanceSurvey/main.py +++ b/backend/ordnanceSurvey/main.py @@ -4,7 +4,7 @@ from utils.logger import setup_logger import logging from backend.utils.subtasks import subtask_handler from utils.s3 import ( - # save_csv_to_s3, + save_csv_to_s3, read_csv_from_s3 as read_csv_from_s3_dict, parse_s3_uri, ) @@ -17,6 +17,9 @@ from backend.utils.ordnance_survey import ( ) from backend.app.config import get_settings from sqlalchemy import select +from datetime import datetime +import uuid +import os import pandas as pd @@ -64,6 +67,47 @@ def get_ordance_survey_record(row, cache=None): # process cache with row +def save_results_to_s3( + results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None +) -> bool: + """ + Save results DataFrame to S3 as CSV in a parent folder structure. + + :param results_df: The DataFrame containing results + :param task_id: The task ID (used for file naming) + :param sub_task_id: The subtask ID (used for file naming) + :param bucket_name: The S3 bucket name (defaults to env variable) + :return: True if successful, False otherwise + """ + if bucket_name is None: + bucket_name = os.getenv("S3_BUCKET_NAME") + + if not bucket_name: + logger.error( + "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set" + ) + return False + + try: + # Create a filename with timestamp and UUID + file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}" + file_key = f"ara_ordnance_survey_outputs/{task_id}/{sub_task_id}/ordnanceSurvey/{file_name}.csv" + + # Save to S3 + success = save_csv_to_s3(results_df, bucket_name, file_key) + + if success: + logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}") + return True + else: + logger.error(f"Failed to save results to S3") + return False + + except Exception as e: + logger.error(f"Error saving results to S3: {str(e)}") + return False + + @subtask_handler() # This assumes task_id and subtask_id is defined in event.Records.body def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: @@ -81,6 +125,8 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: s3_uri: str = body.get("s3_uri", "") lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5) lexiscore_column: str = body.get("lexiscore_column", None) + task_id: str = body.get("task_id", "") + sub_task_id: str = body.get("sub_task_id", "") if s3_uri == "": raise RuntimeError("Missing s3_uri in message body") @@ -168,8 +214,13 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: df.at[idx, "ordnance_survey_uprn"] = postcode_cache.at[best_idx, "UPRN"] df.at[idx, "ordnance_survey_lexiscore"] = best_score - # TODO: Save new results to s3 (ask Khalim if we want to save to db) + # Save results locally df.to_csv("ordnance_survey_results.csv", index=False) print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)") - # TODO upload to s3 once you get confirmation from Khalim or db + # Save results to S3 + if task_id and sub_task_id: + try: + save_results_to_s3(df, task_id, sub_task_id) + except Exception as s3_error: + logger.error(f"Failed to save results to S3: {s3_error}") diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/main.tf b/infrastructure/terraform/lambda/ordnanceSurvey/main.tf new file mode 100644 index 00000000..baa673e1 --- /dev/null +++ b/infrastructure/terraform/lambda/ordnanceSurvey/main.tf @@ -0,0 +1,57 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + +module "ordnance" { + source = "../modules/lambda_with_sqs" + + name = ordnanceSurvey #"address2uprn" for example + stage = var.stage + + image_uri = local.image_uri + + timeout = 900 + + # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) + maximum_concurrency = var.maximum_concurrency + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" + S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name + }, + ) +} + +# Attach S3 read policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "ordanceSurvey_read_and_write" { + role = module.ordnance.role_name + policy_arn = data.terraform_remote_state.shared.outputs.ordnance_s3_read_and_write_arn +} diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf b/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf new file mode 100644 index 00000000..37c412ce --- /dev/null +++ b/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = REPLACE_ME + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf b/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf new file mode 100644 index 00000000..e0061321 --- /dev/null +++ b/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf @@ -0,0 +1,32 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index cca3394f..df519f4f 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -414,4 +414,36 @@ module "categorisation_registry" { source = "../modules/container_registry" name = "categorisation" stage = var.stage -} \ No newline at end of file +} + + +################################################ +# OrdnanceSurveyAPI – Lambda +################################################ +module "ordnance_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "ordnance-terraform-state" + +} + +module "ordnance_registry" { + source = "../modules/container_registry" + name = "ordnance" + stage = var.stage + +} + +# S3 policy for postcode splitter to read from retrofit data bucket +module "ordnance_s3_read_and_write" { + source = "../modules/s3_iam_policy" + + policy_name = "Address2UPRNReadandWriteS3" + policy_description = "Allow ordnance Lambda to read and write from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"] + resource_paths = ["/*"] +} + +output "ordnance_s3_read_and_write_arn" { + value = module.ordnance_s3_read_and_write.policy_arn +}