ordnancesurvey deployment

2026-07-27 23:35:01 +00:00 · 2026-03-06 13:29:25 +00:00 · 2026-03-06 13:29:25 +00:00 · 071a67e501
commit 071a67e501
parent 815ce01082
7 changed files with 239 additions and 8 deletions
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@ -242,3 +242,42 @@ jobs:
      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}


+  # ============================================================
+  # 2️⃣ Build OrdanceSurvey image and Push
+  # ============================================================
+  ordnanceSurvey_image:
+    needs: [determine_stage, shared_terraform]
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
+      dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
+      build_context: .
+      build_args: |
+        DEV_DB_HOST=$DEV_DB_HOST
+        DEV_DB_PORT=$DEV_DB_PORT
+        DEV_DB_NAME=$DEV_DB_NAME
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
+      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
+      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+
+  # ============================================================
+  # 3️⃣ Deploy OrdanceSurvey Lambda
+  # ============================================================
+  ordnanceSurvey_lambda:
+    needs: [ordnanceSurvey_image, determine_stage]
+    uses: ./.github/workflows/_deploy_lambda.yml
+    with:
+      lambda_name: ordnanceSurvey
+      lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
+      stage: ${{ needs.determine_stage.outputs.stage }}
+      ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
+      image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
+      terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@ -476,9 +476,11 @@ def handler(event, context, local=False):
                    try:
                        # Concatenate Address columns directly
                        address2uprn_user_input = (
-                            str(row.get("Address 1", "")).strip() + " " +
-                            str(row.get("Address 2", "")).strip() + " " +
-                            str(row.get("Address 3", "")).strip()
+                            str(row.get("Address 1", "")).strip()
+                            + " "
+                            + str(row.get("Address 2", "")).strip()
+                            + " "
+                            + str(row.get("Address 3", "")).strip()
                        ).strip()

                        if not address2uprn_user_input:
@ -489,7 +491,9 @@ def handler(event, context, local=False):

                        # Get UPRN using the pre-fetched EPC data with all return options
                        result = get_uprn_with_epc_df(
-                            user_inputed_address=address2uprn_user_input, epc_df=epc_df, verbose=True
+                            user_inputed_address=address2uprn_user_input,
+                            epc_df=epc_df,
+                            verbose=True,
                        )

                        # Parse result tuple if successful
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@ -4,7 +4,7 @@ from utils.logger import setup_logger
 import logging
 from backend.utils.subtasks import subtask_handler
 from utils.s3 import (
-    # save_csv_to_s3,
+    save_csv_to_s3,
    read_csv_from_s3 as read_csv_from_s3_dict,
    parse_s3_uri,
 )
@ -17,6 +17,9 @@ from backend.utils.ordnance_survey import (
 )
 from backend.app.config import get_settings
 from sqlalchemy import select
+from datetime import datetime
+import uuid
+import os

 import pandas as pd

@ -64,6 +67,47 @@ def get_ordance_survey_record(row, cache=None):
    # process cache with row


+def save_results_to_s3(
+    results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+) -> bool:
+    """
+    Save results DataFrame to S3 as CSV in a parent folder structure.
+
+    :param results_df: The DataFrame containing results
+    :param task_id: The task ID (used for file naming)
+    :param sub_task_id: The subtask ID (used for file naming)
+    :param bucket_name: The S3 bucket name (defaults to env variable)
+    :return: True if successful, False otherwise
+    """
+    if bucket_name is None:
+        bucket_name = os.getenv("S3_BUCKET_NAME")
+
+    if not bucket_name:
+        logger.error(
+            "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
+        )
+        return False
+
+    try:
+        # Create a filename with timestamp and UUID
+        file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
+        file_key = f"ara_ordnance_survey_outputs/{task_id}/{sub_task_id}/ordnanceSurvey/{file_name}.csv"
+
+        # Save to S3
+        success = save_csv_to_s3(results_df, bucket_name, file_key)
+
+        if success:
+            logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
+            return True
+        else:
+            logger.error(f"Failed to save results to S3")
+            return False
+
+    except Exception as e:
+        logger.error(f"Error saving results to S3: {str(e)}")
+        return False
+
+
@subtask_handler()  # This assumes task_id and subtask_id is defined in event.Records.body
 def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:

@ -81,6 +125,8 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
    s3_uri: str = body.get("s3_uri", "")
    lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
    lexiscore_column: str = body.get("lexiscore_column", None)
+    task_id: str = body.get("task_id", "")
+    sub_task_id: str = body.get("sub_task_id", "")

    if s3_uri == "":
        raise RuntimeError("Missing s3_uri in message body")
@ -168,8 +214,13 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
            df.at[idx, "ordnance_survey_uprn"] = postcode_cache.at[best_idx, "UPRN"]
            df.at[idx, "ordnance_survey_lexiscore"] = best_score

-    # TODO: Save new results to s3 (ask Khalim if we want to save to db)
+    # Save results locally
    df.to_csv("ordnance_survey_results.csv", index=False)
    print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")

-    # TODO upload to s3 once you get confirmation from Khalim or db
+    # Save results to S3
+    if task_id and sub_task_id:
+        try:
+            save_results_to_s3(df, task_id, sub_task_id)
+        except Exception as s3_error:
+            logger.error(f"Failed to save results to S3: {s3_error}")
--- a/infrastructure/terraform/lambda/ordnanceSurvey/main.tf
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/main.tf
@ -0,0 +1,57 @@
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+data "aws_secretsmanager_secret_version" "db_credentials" {
+  secret_id = "${var.stage}/assessment_model/db_credentials"
+}
+locals {
+  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
+}
+
+module "ordnance" {
+  source = "../modules/lambda_with_sqs"
+
+  name  = ordnanceSurvey #"address2uprn" for example
+  stage = var.stage
+
+  image_uri = local.image_uri
+
+  timeout = 900
+
+  # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
+  maximum_concurrency = var.maximum_concurrency
+
+  environment = merge(
+    {
+      STAGE     = var.stage
+      LOG_LEVEL = "info"
+      DB_USERNAME = local.db_credentials.db_assessment_model_username
+      DB_PASSWORD = local.db_credentials.db_assessment_model_password
+      GOOGLE_SOLAR_API_KEY = "test"
+      SAP_PREDICTIONS_BUCKET = "test"
+      CARBON_PREDICTIONS_BUCKET = "test"
+      HEAT_PREDICTIONS_BUCKET = "test"
+      HEATING_KWH_PREDICTIONS_BUCKET = "test"
+      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
+      API_KEY = "test"
+      ENVIRONMENT = "test"
+      SECRET_KEY = "test"
+      PLAN_TRIGGER_BUCKET = "test"
+      DATA_BUCKET = "test"
+      ENGINE_SQS_URL = "test"
+      ENERGY_ASSESSMENTS_BUCKET = "test"
+      S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
+    },
+  )
+}
+
+# Attach S3 read policy to the Lambda execution role
+resource "aws_iam_role_policy_attachment" "ordanceSurvey_read_and_write" {
+  role       = module.ordnance.role_name
+  policy_arn = data.terraform_remote_state.shared.outputs.ordnance_s3_read_and_write_arn
+}
--- a/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
@ -0,0 +1,16 @@
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 4.16"
+    }
+  }
+
+  backend "s3" {
+    bucket = REPLACE_ME
+    key    = "terraform.tfstate"
+    region = "eu-west-2"
+  }
+
+  required_version = ">= 1.2.0"
+}
--- a/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
@ -0,0 +1,32 @@
+variable "lambda_name" {
+  type        = string
+  description = "Logical name of the lambda (e.g. address2uprn)"
+}
+
+variable "stage" {
+  description = "Deployment stage (e.g. dev, prod)"
+  type        = string
+}
+variable "ecr_repo_url" {
+  type        = string
+  description = "ECR repository URL (no tag, no digest)"
+}
+
+variable "image_digest" {
+  type        = string
+  description = "Image digest (sha256:...)"
+}
+
+variable "maximum_concurrency" {
+  type        = number
+  default     = null
+  description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
+}
+
+locals {
+  image_uri = "${var.ecr_repo_url}@${var.image_digest}"
+}
+
+output "resolved_image_uri" {
+  value = local.image_uri
+}
--- a/infrastructure/terraform/shared/main.tf
+++ b/infrastructure/terraform/shared/main.tf
@ -414,4 +414,36 @@ module "categorisation_registry" {
  source = "../modules/container_registry"
  name   = "categorisation"
  stage = var.stage
-}
+}
+
+
+################################################
+# OrdnanceSurveyAPI – Lambda
+################################################
+module "ordnance_state_bucket" {
+  source      = "../modules/tf_state_bucket"
+  bucket_name = "ordnance-terraform-state"
+
+}
+
+module "ordnance_registry" {
+  source = "../modules/container_registry"
+  name   = "ordnance"
+  stage = var.stage
+
+}
+
+# S3 policy for postcode splitter to read from retrofit data bucket
+module "ordnance_s3_read_and_write" {
+  source = "../modules/s3_iam_policy"
+
+  policy_name        = "Address2UPRNReadandWriteS3"
+  policy_description = "Allow ordnance Lambda to read and write from retrofit-data bucket"
+  bucket_arns        = ["arn:aws:s3:::retrofit-data-${var.stage}"]
+  actions            = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
+  resource_paths     = ["/*"]
+}
+
+output "ordnance_s3_read_and_write_arn" {
+  value = module.ordnance_s3_read_and_write.policy_arn
+}