mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
ordnancesurvey deployment
This commit is contained in:
parent
815ce01082
commit
071a67e501
7 changed files with 239 additions and 8 deletions
39
.github/workflows/deploy_terraform.yml
vendored
39
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -242,3 +242,42 @@ jobs:
|
|||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2️⃣ Build OrdanceSurvey image and Push
|
||||
# ============================================================
|
||||
ordnanceSurvey_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
|
||||
build_context: .
|
||||
build_args: |
|
||||
DEV_DB_HOST=$DEV_DB_HOST
|
||||
DEV_DB_PORT=$DEV_DB_PORT
|
||||
DEV_DB_NAME=$DEV_DB_NAME
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||
|
||||
# ============================================================
|
||||
# 3️⃣ Deploy OrdanceSurvey Lambda
|
||||
# ============================================================
|
||||
ordnanceSurvey_lambda:
|
||||
needs: [ordnanceSurvey_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: ordnanceSurvey
|
||||
lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
|
||||
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
|
@ -476,9 +476,11 @@ def handler(event, context, local=False):
|
|||
try:
|
||||
# Concatenate Address columns directly
|
||||
address2uprn_user_input = (
|
||||
str(row.get("Address 1", "")).strip() + " " +
|
||||
str(row.get("Address 2", "")).strip() + " " +
|
||||
str(row.get("Address 3", "")).strip()
|
||||
str(row.get("Address 1", "")).strip()
|
||||
+ " "
|
||||
+ str(row.get("Address 2", "")).strip()
|
||||
+ " "
|
||||
+ str(row.get("Address 3", "")).strip()
|
||||
).strip()
|
||||
|
||||
if not address2uprn_user_input:
|
||||
|
|
@ -489,7 +491,9 @@ def handler(event, context, local=False):
|
|||
|
||||
# Get UPRN using the pre-fetched EPC data with all return options
|
||||
result = get_uprn_with_epc_df(
|
||||
user_inputed_address=address2uprn_user_input, epc_df=epc_df, verbose=True
|
||||
user_inputed_address=address2uprn_user_input,
|
||||
epc_df=epc_df,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Parse result tuple if successful
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from utils.logger import setup_logger
|
|||
import logging
|
||||
from backend.utils.subtasks import subtask_handler
|
||||
from utils.s3 import (
|
||||
# save_csv_to_s3,
|
||||
save_csv_to_s3,
|
||||
read_csv_from_s3 as read_csv_from_s3_dict,
|
||||
parse_s3_uri,
|
||||
)
|
||||
|
|
@ -17,6 +17,9 @@ from backend.utils.ordnance_survey import (
|
|||
)
|
||||
from backend.app.config import get_settings
|
||||
from sqlalchemy import select
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
|
@ -64,6 +67,47 @@ def get_ordance_survey_record(row, cache=None):
|
|||
# process cache with row
|
||||
|
||||
|
||||
def save_results_to_s3(
|
||||
results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
|
||||
) -> bool:
|
||||
"""
|
||||
Save results DataFrame to S3 as CSV in a parent folder structure.
|
||||
|
||||
:param results_df: The DataFrame containing results
|
||||
:param task_id: The task ID (used for file naming)
|
||||
:param sub_task_id: The subtask ID (used for file naming)
|
||||
:param bucket_name: The S3 bucket name (defaults to env variable)
|
||||
:return: True if successful, False otherwise
|
||||
"""
|
||||
if bucket_name is None:
|
||||
bucket_name = os.getenv("S3_BUCKET_NAME")
|
||||
|
||||
if not bucket_name:
|
||||
logger.error(
|
||||
"S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create a filename with timestamp and UUID
|
||||
file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
|
||||
file_key = f"ara_ordnance_survey_outputs/{task_id}/{sub_task_id}/ordnanceSurvey/{file_name}.csv"
|
||||
|
||||
# Save to S3
|
||||
success = save_csv_to_s3(results_df, bucket_name, file_key)
|
||||
|
||||
if success:
|
||||
logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Failed to save results to S3")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving results to S3: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
@subtask_handler() # This assumes task_id and subtask_id is defined in event.Records.body
|
||||
def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
||||
|
||||
|
|
@ -81,6 +125,8 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
|||
s3_uri: str = body.get("s3_uri", "")
|
||||
lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
|
||||
lexiscore_column: str = body.get("lexiscore_column", None)
|
||||
task_id: str = body.get("task_id", "")
|
||||
sub_task_id: str = body.get("sub_task_id", "")
|
||||
|
||||
if s3_uri == "":
|
||||
raise RuntimeError("Missing s3_uri in message body")
|
||||
|
|
@ -168,8 +214,13 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
|||
df.at[idx, "ordnance_survey_uprn"] = postcode_cache.at[best_idx, "UPRN"]
|
||||
df.at[idx, "ordnance_survey_lexiscore"] = best_score
|
||||
|
||||
# TODO: Save new results to s3 (ask Khalim if we want to save to db)
|
||||
# Save results locally
|
||||
df.to_csv("ordnance_survey_results.csv", index=False)
|
||||
print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
|
||||
|
||||
# TODO upload to s3 once you get confirmation from Khalim or db
|
||||
# Save results to S3
|
||||
if task_id and sub_task_id:
|
||||
try:
|
||||
save_results_to_s3(df, task_id, sub_task_id)
|
||||
except Exception as s3_error:
|
||||
logger.error(f"Failed to save results to S3: {s3_error}")
|
||||
|
|
|
|||
57
infrastructure/terraform/lambda/ordnanceSurvey/main.tf
Normal file
57
infrastructure/terraform/lambda/ordnanceSurvey/main.tf
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
data "terraform_remote_state" "shared" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "assessment-model-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||
}
|
||||
locals {
|
||||
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||
}
|
||||
|
||||
module "ordnance" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
name = ordnanceSurvey #"address2uprn" for example
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
|
||||
timeout = 900
|
||||
|
||||
# Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
|
||||
maximum_concurrency = var.maximum_concurrency
|
||||
|
||||
environment = merge(
|
||||
{
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||
GOOGLE_SOLAR_API_KEY = "test"
|
||||
SAP_PREDICTIONS_BUCKET = "test"
|
||||
CARBON_PREDICTIONS_BUCKET = "test"
|
||||
HEAT_PREDICTIONS_BUCKET = "test"
|
||||
HEATING_KWH_PREDICTIONS_BUCKET = "test"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
|
||||
API_KEY = "test"
|
||||
ENVIRONMENT = "test"
|
||||
SECRET_KEY = "test"
|
||||
PLAN_TRIGGER_BUCKET = "test"
|
||||
DATA_BUCKET = "test"
|
||||
ENGINE_SQS_URL = "test"
|
||||
ENERGY_ASSESSMENTS_BUCKET = "test"
|
||||
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
# Attach S3 read policy to the Lambda execution role
|
||||
resource "aws_iam_role_policy_attachment" "ordanceSurvey_read_and_write" {
|
||||
role = module.ordnance.role_name
|
||||
policy_arn = data.terraform_remote_state.shared.outputs.ordnance_s3_read_and_write_arn
|
||||
}
|
||||
16
infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
Normal file
16
infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.16"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = REPLACE_ME
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
32
infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
Normal file
32
infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
variable "lambda_name" {
|
||||
type = string
|
||||
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
variable "maximum_concurrency" {
|
||||
type = number
|
||||
default = null
|
||||
description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
|
||||
}
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
||||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
|
|
@ -414,4 +414,36 @@ module "categorisation_registry" {
|
|||
source = "../modules/container_registry"
|
||||
name = "categorisation"
|
||||
stage = var.stage
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
################################################
|
||||
# OrdnanceSurveyAPI – Lambda
|
||||
################################################
|
||||
module "ordnance_state_bucket" {
|
||||
source = "../modules/tf_state_bucket"
|
||||
bucket_name = "ordnance-terraform-state"
|
||||
|
||||
}
|
||||
|
||||
module "ordnance_registry" {
|
||||
source = "../modules/container_registry"
|
||||
name = "ordnance"
|
||||
stage = var.stage
|
||||
|
||||
}
|
||||
|
||||
# S3 policy for postcode splitter to read from retrofit data bucket
|
||||
module "ordnance_s3_read_and_write" {
|
||||
source = "../modules/s3_iam_policy"
|
||||
|
||||
policy_name = "Address2UPRNReadandWriteS3"
|
||||
policy_description = "Allow ordnance Lambda to read and write from retrofit-data bucket"
|
||||
bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"]
|
||||
actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
|
||||
resource_paths = ["/*"]
|
||||
}
|
||||
|
||||
output "ordnance_s3_read_and_write_arn" {
|
||||
value = module.ordnance_s3_read_and_write.policy_arn
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue