mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
walthamforest_etl
This commit is contained in:
parent
382f37531c
commit
5c2a8f0755
10 changed files with 225 additions and 0 deletions
21
deployment/lambda/walthamforest_etl/docker/.dockerignore
Normal file
21
deployment/lambda/walthamforest_etl/docker/.dockerignore
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Ignore junk and large files
|
||||
*.pdf
|
||||
*.csv
|
||||
*.xml
|
||||
*.parquet
|
||||
*.ipynb
|
||||
*.mp4
|
||||
*.mov
|
||||
*.jpg
|
||||
*.png
|
||||
*.zip
|
||||
*.tar.gz
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
build/
|
||||
dist/
|
||||
.etl_cache/
|
||||
tests/
|
||||
docs/
|
||||
25
deployment/lambda/walthamforest_etl/docker/Dockerfile
Normal file
25
deployment/lambda/walthamforest_etl/docker/Dockerfile
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
FROM public.ecr.aws/lambda/python:3.12
|
||||
|
||||
# Install Poetry (you could pin a version if you like)
|
||||
RUN curl -sSL https://install.python-poetry.org | python3 -
|
||||
|
||||
# Add Poetry to PATH
|
||||
ENV PATH="/root/.local/bin:$PATH"
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /var/task
|
||||
|
||||
# Copy Poetry files first to leverage Docker layer caching
|
||||
COPY pyproject.toml poetry.lock README.md ./
|
||||
COPY etl/ etl/
|
||||
|
||||
|
||||
# Install dependencies into /var/task
|
||||
RUN poetry config virtualenvs.create false \
|
||||
&& poetry install --only main --no-interaction --no-ansi
|
||||
|
||||
# Copy app code
|
||||
COPY deployment/lambda/extractor_and_loader/docker/app.py ./
|
||||
|
||||
# Set Lambda handler
|
||||
CMD ["app.handler"]
|
||||
2
deployment/lambda/walthamforest_etl/docker/app.py
Normal file
2
deployment/lambda/walthamforest_etl/docker/app.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
def handler(event, context):
|
||||
print("hello world")
|
||||
62
deployment/lambda/walthamforest_etl/docker/ecr.tf
Normal file
62
deployment/lambda/walthamforest_etl/docker/ecr.tf
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
# ECR repo
|
||||
resource "aws_ecr_repository" "walthamforest_etl" {
|
||||
name = "walthamforest_etl"
|
||||
}
|
||||
|
||||
# ECR policy to allow Lambda access
|
||||
resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" {
|
||||
repository = aws_ecr_repository.walthamforest_etl.name
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2008-10-17",
|
||||
Statement = [{
|
||||
Sid = "AllowLambdaPull",
|
||||
Effect = "Allow",
|
||||
Principal = {
|
||||
Service = "lambda.amazonaws.com"
|
||||
},
|
||||
Action = [
|
||||
"ecr:GetDownloadUrlForLayer",
|
||||
"ecr:BatchGetImage",
|
||||
"ecr:BatchCheckLayerAvailability"
|
||||
]
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
# ECR lifecycle policy to delete tagged images older than 14 days
|
||||
resource "aws_ecr_lifecycle_policy" "walthamforest_etl_loader_lifecycle" {
|
||||
repository = aws_ecr_repository.walthamforest_etl.name
|
||||
|
||||
policy = jsonencode({
|
||||
"rules": [
|
||||
{
|
||||
"rulePriority": 2,
|
||||
"description": "Expire images older than 14 days",
|
||||
"selection": {
|
||||
"tagStatus": "untagged",
|
||||
"countType": "sinceImagePushed",
|
||||
"countUnit": "days",
|
||||
"countNumber": 1
|
||||
},
|
||||
"action": {
|
||||
"type": "expire"
|
||||
}
|
||||
},
|
||||
{
|
||||
"rulePriority": 1,
|
||||
"description": "Keep last 5 images",
|
||||
"selection": {
|
||||
"tagStatus": "tagged",
|
||||
"tagPrefixList": ["feature"],
|
||||
"countType": "imageCountMoreThan",
|
||||
"countNumber": 5
|
||||
},
|
||||
"action": {
|
||||
"type": "expire"
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
0
deployment/lambda/walthamforest_etl/docker/main.tf
Normal file
0
deployment/lambda/walthamforest_etl/docker/main.tf
Normal file
15
deployment/lambda/walthamforest_etl/docker/provider.tf
Normal file
15
deployment/lambda/walthamforest_etl/docker/provider.tf
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 6.3.0"
|
||||
}
|
||||
}
|
||||
backend "s3" {
|
||||
bucket = "survey-extractor-tf-state"
|
||||
region = "eu-west-2"
|
||||
key = "env:/dev/lambda/ecr/walthamforest_etl.tfstate"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
0
deployment/lambda/walthamforest_etl/main.tf
Normal file
0
deployment/lambda/walthamforest_etl/main.tf
Normal file
15
deployment/lambda/walthamforest_etl/provider.tf
Normal file
15
deployment/lambda/walthamforest_etl/provider.tf
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 6.3.0"
|
||||
}
|
||||
}
|
||||
backend "s3" {
|
||||
bucket = "survey-extractor-tf-state"
|
||||
region = "eu-west-2"
|
||||
key = "env:/dev/lambda/eachlambda/extractor_and_loader_lambda.tfstate"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
5
deployment/lambda/walthamforest_etl/vars.tf
Normal file
5
deployment/lambda/walthamforest_etl/vars.tf
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
variable "lambda_image_tag" {
|
||||
description = "Docker image tag (e.g. GitHub SHA)"
|
||||
type = string
|
||||
default = "local-dev-latest"
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
# Reference existing IAM role
|
||||
data "aws_iam_role" "lambda_exec_role" {
|
||||
name = "lambda-exec-role"
|
||||
}
|
||||
|
||||
# Reference existing ECR repository
|
||||
data "aws_ecr_repository" "walthamforest_etl_ecr" {
|
||||
name = "walthamforest_etl_ecr"
|
||||
}
|
||||
|
||||
# SQS queue for extractor_and_loader
|
||||
resource "aws_sqs_queue" "walthamforest_etl_queue" {
|
||||
name = "walthamforest_etl-queue"
|
||||
}
|
||||
|
||||
|
||||
# IAM policy specific to this Lambda
|
||||
resource "aws_iam_policy" "walthamforest_etl_policy" {
|
||||
name = "walthamforest_etl-loader-policy"
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17",
|
||||
Statement = [
|
||||
{
|
||||
Effect = "Allow",
|
||||
Action = [
|
||||
"sqs:ReceiveMessage",
|
||||
"sqs:DeleteMessage",
|
||||
"sqs:GetQueueAttributes"
|
||||
],
|
||||
Resource = aws_sqs_queue.walthamforest_etl_queue.arn
|
||||
},
|
||||
{
|
||||
Effect = "Allow",
|
||||
Action = [
|
||||
"ecr:GetDownloadUrlForLayer",
|
||||
"ecr:BatchGetImage",
|
||||
"ecr:BatchCheckLayerAvailability"
|
||||
],
|
||||
Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn
|
||||
},
|
||||
{
|
||||
Effect = "Allow",
|
||||
Action = ["ecr:GetAuthorizationToken"],
|
||||
Resource = "*"
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" {
|
||||
role = data.aws_iam_role.lambda_exec_role.name
|
||||
policy_arn = aws_iam_policy.walthamforest_etl-loader-policy.arn
|
||||
}
|
||||
|
||||
# Lambda function
|
||||
resource "aws_lambda_function" "waltham_forest_etl" {
|
||||
function_name = "walthamforest_etl"
|
||||
role = data.aws_iam_role.lambda_exec_role.arn
|
||||
package_type = "Image"
|
||||
image_uri = "${data.aws_ecr_repository.walthamforest_etl_ecr.repository_url}:${var.lambda_image_tag}"
|
||||
# Increase timeout (max 900 sec / 15 min)
|
||||
timeout = 300 # e.g. 5 minutes
|
||||
|
||||
# Increase memory (default 128 MB)
|
||||
memory_size = 2048 # try 1024 or 2048 MB to start
|
||||
|
||||
# environment {
|
||||
# variables = {
|
||||
# DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB"
|
||||
# }
|
||||
# }
|
||||
}
|
||||
|
||||
# SQS trigger
|
||||
resource "aws_lambda_event_source_mapping" "extractor_and_loader_trigger" {
|
||||
event_source_arn = aws_sqs_queue.walthamforest_etl_queue.arn
|
||||
function_name = aws_lambda_function.walthamforest_etl.arn
|
||||
batch_size = 1
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue