ordnancesurvey deployment

This commit is contained in:
Jun-te Kim 2026-03-06 13:29:25 +00:00
parent 815ce01082
commit 071a67e501
7 changed files with 239 additions and 8 deletions

View file

@ -242,3 +242,42 @@ jobs:
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 2⃣ Build OrdanceSurvey image and Push
# ============================================================
ordnanceSurvey_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
# ============================================================
# 3⃣ Deploy OrdanceSurvey Lambda
# ============================================================
ordnanceSurvey_lambda:
needs: [ordnanceSurvey_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ordnanceSurvey
lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

View file

@ -476,9 +476,11 @@ def handler(event, context, local=False):
try:
# Concatenate Address columns directly
address2uprn_user_input = (
str(row.get("Address 1", "")).strip() + " " +
str(row.get("Address 2", "")).strip() + " " +
str(row.get("Address 3", "")).strip()
str(row.get("Address 1", "")).strip()
+ " "
+ str(row.get("Address 2", "")).strip()
+ " "
+ str(row.get("Address 3", "")).strip()
).strip()
if not address2uprn_user_input:
@ -489,7 +491,9 @@ def handler(event, context, local=False):
# Get UPRN using the pre-fetched EPC data with all return options
result = get_uprn_with_epc_df(
user_inputed_address=address2uprn_user_input, epc_df=epc_df, verbose=True
user_inputed_address=address2uprn_user_input,
epc_df=epc_df,
verbose=True,
)
# Parse result tuple if successful

View file

@ -4,7 +4,7 @@ from utils.logger import setup_logger
import logging
from backend.utils.subtasks import subtask_handler
from utils.s3 import (
# save_csv_to_s3,
save_csv_to_s3,
read_csv_from_s3 as read_csv_from_s3_dict,
parse_s3_uri,
)
@ -17,6 +17,9 @@ from backend.utils.ordnance_survey import (
)
from backend.app.config import get_settings
from sqlalchemy import select
from datetime import datetime
import uuid
import os
import pandas as pd
@ -64,6 +67,47 @@ def get_ordance_survey_record(row, cache=None):
# process cache with row
def save_results_to_s3(
results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
) -> bool:
"""
Save results DataFrame to S3 as CSV in a parent folder structure.
:param results_df: The DataFrame containing results
:param task_id: The task ID (used for file naming)
:param sub_task_id: The subtask ID (used for file naming)
:param bucket_name: The S3 bucket name (defaults to env variable)
:return: True if successful, False otherwise
"""
if bucket_name is None:
bucket_name = os.getenv("S3_BUCKET_NAME")
if not bucket_name:
logger.error(
"S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
)
return False
try:
# Create a filename with timestamp and UUID
file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
file_key = f"ara_ordnance_survey_outputs/{task_id}/{sub_task_id}/ordnanceSurvey/{file_name}.csv"
# Save to S3
success = save_csv_to_s3(results_df, bucket_name, file_key)
if success:
logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
return True
else:
logger.error(f"Failed to save results to S3")
return False
except Exception as e:
logger.error(f"Error saving results to S3: {str(e)}")
return False
@subtask_handler() # This assumes task_id and subtask_id is defined in event.Records.body
def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
@ -81,6 +125,8 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
s3_uri: str = body.get("s3_uri", "")
lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
lexiscore_column: str = body.get("lexiscore_column", None)
task_id: str = body.get("task_id", "")
sub_task_id: str = body.get("sub_task_id", "")
if s3_uri == "":
raise RuntimeError("Missing s3_uri in message body")
@ -168,8 +214,13 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
df.at[idx, "ordnance_survey_uprn"] = postcode_cache.at[best_idx, "UPRN"]
df.at[idx, "ordnance_survey_lexiscore"] = best_score
# TODO: Save new results to s3 (ask Khalim if we want to save to db)
# Save results locally
df.to_csv("ordnance_survey_results.csv", index=False)
print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
# TODO upload to s3 once you get confirmation from Khalim or db
# Save results to S3
if task_id and sub_task_id:
try:
save_results_to_s3(df, task_id, sub_task_id)
except Exception as s3_error:
logger.error(f"Failed to save results to S3: {s3_error}")

View file

@ -0,0 +1,57 @@
data "terraform_remote_state" "shared" {
backend = "s3"
config = {
bucket = "assessment-model-terraform-state"
key = "env:/${var.stage}/terraform.tfstate"
region = "eu-west-2"
}
}
data "aws_secretsmanager_secret_version" "db_credentials" {
secret_id = "${var.stage}/assessment_model/db_credentials"
}
locals {
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
}
module "ordnance" {
source = "../modules/lambda_with_sqs"
name = ordnanceSurvey #"address2uprn" for example
stage = var.stage
image_uri = local.image_uri
timeout = 900
# Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
maximum_concurrency = var.maximum_concurrency
environment = merge(
{
STAGE = var.stage
LOG_LEVEL = "info"
DB_USERNAME = local.db_credentials.db_assessment_model_username
DB_PASSWORD = local.db_credentials.db_assessment_model_password
GOOGLE_SOLAR_API_KEY = "test"
SAP_PREDICTIONS_BUCKET = "test"
CARBON_PREDICTIONS_BUCKET = "test"
HEAT_PREDICTIONS_BUCKET = "test"
HEATING_KWH_PREDICTIONS_BUCKET = "test"
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
API_KEY = "test"
ENVIRONMENT = "test"
SECRET_KEY = "test"
PLAN_TRIGGER_BUCKET = "test"
DATA_BUCKET = "test"
ENGINE_SQS_URL = "test"
ENERGY_ASSESSMENTS_BUCKET = "test"
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
},
)
}
# Attach S3 read policy to the Lambda execution role
resource "aws_iam_role_policy_attachment" "ordanceSurvey_read_and_write" {
role = module.ordnance.role_name
policy_arn = data.terraform_remote_state.shared.outputs.ordnance_s3_read_and_write_arn
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = REPLACE_ME
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,32 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
variable "maximum_concurrency" {
type = number
default = null
description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -414,4 +414,36 @@ module "categorisation_registry" {
source = "../modules/container_registry"
name = "categorisation"
stage = var.stage
}
}
################################################
# OrdnanceSurveyAPI Lambda
################################################
module "ordnance_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "ordnance-terraform-state"
}
module "ordnance_registry" {
source = "../modules/container_registry"
name = "ordnance"
stage = var.stage
}
# S3 policy for postcode splitter to read from retrofit data bucket
module "ordnance_s3_read_and_write" {
source = "../modules/s3_iam_policy"
policy_name = "Address2UPRNReadandWriteS3"
policy_description = "Allow ordnance Lambda to read and write from retrofit-data bucket"
bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"]
actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
resource_paths = ["/*"]
}
output "ordnance_s3_read_and_write_arn" {
value = module.ordnance_s3_read_and_write.policy_arn
}