diff --git a/deployment/lambda/walthamforest_etl/docker/.dockerignore b/deployment/lambda/walthamforest_etl/docker/.dockerignore new file mode 100644 index 0000000..d587d34 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/.dockerignore @@ -0,0 +1,21 @@ +# Ignore junk and large files +*.pdf +*.csv +*.xml +*.parquet +*.ipynb +*.mp4 +*.mov +*.jpg +*.png +*.zip +*.tar.gz +__pycache__/ +*.pyc +*.pyo +*.pyd +build/ +dist/ +.etl_cache/ +tests/ +docs/ diff --git a/deployment/lambda/walthamforest_etl/docker/Dockerfile b/deployment/lambda/walthamforest_etl/docker/Dockerfile new file mode 100644 index 0000000..cdd1f8a --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/Dockerfile @@ -0,0 +1,25 @@ +FROM public.ecr.aws/lambda/python:3.12 + +# Install Poetry (you could pin a version if you like) +RUN curl -sSL https://install.python-poetry.org | python3 - + +# Add Poetry to PATH +ENV PATH="/root/.local/bin:$PATH" + +# Set working directory +WORKDIR /var/task + +# Copy Poetry files first to leverage Docker layer caching +COPY pyproject.toml poetry.lock README.md ./ +COPY etl/ etl/ + + +# Install dependencies into /var/task +RUN poetry config virtualenvs.create false \ + && poetry install --only main --no-interaction --no-ansi + +# Copy app code +COPY deployment/lambda/extractor_and_loader/docker/app.py ./ + +# Set Lambda handler +CMD ["app.handler"] \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/docker/app.py b/deployment/lambda/walthamforest_etl/docker/app.py new file mode 100644 index 0000000..3a4a625 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/app.py @@ -0,0 +1,2 @@ +def handler(event, context): + print("hello world") \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/docker/ecr.tf b/deployment/lambda/walthamforest_etl/docker/ecr.tf new file mode 100644 index 0000000..36bec88 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/ecr.tf @@ -0,0 +1,62 @@ +# ECR repo +resource "aws_ecr_repository" "walthamforest_etl" { + name = "walthamforest_etl" +} + +# ECR policy to allow Lambda access +resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" { + repository = aws_ecr_repository.walthamforest_etl.name + + policy = jsonencode({ + Version = "2008-10-17", + Statement = [{ + Sid = "AllowLambdaPull", + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + }, + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ] + }] + }) +} + + +# ECR lifecycle policy to delete tagged images older than 14 days +resource "aws_ecr_lifecycle_policy" "walthamforest_etl_loader_lifecycle" { + repository = aws_ecr_repository.walthamforest_etl.name + + policy = jsonencode({ + "rules": [ + { + "rulePriority": 2, + "description": "Expire images older than 14 days", + "selection": { + "tagStatus": "untagged", + "countType": "sinceImagePushed", + "countUnit": "days", + "countNumber": 1 + }, + "action": { + "type": "expire" + } + }, + { + "rulePriority": 1, + "description": "Keep last 5 images", + "selection": { + "tagStatus": "tagged", + "tagPrefixList": ["feature"], + "countType": "imageCountMoreThan", + "countNumber": 5 + }, + "action": { + "type": "expire" + } + } + ] + }) +} \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/docker/main.tf b/deployment/lambda/walthamforest_etl/docker/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/deployment/lambda/walthamforest_etl/docker/provider.tf b/deployment/lambda/walthamforest_etl/docker/provider.tf new file mode 100644 index 0000000..e41dcbf --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "survey-extractor-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/ecr/walthamforest_etl.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/walthamforest_etl/main.tf b/deployment/lambda/walthamforest_etl/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/deployment/lambda/walthamforest_etl/provider.tf b/deployment/lambda/walthamforest_etl/provider.tf new file mode 100644 index 0000000..51eca0c --- /dev/null +++ b/deployment/lambda/walthamforest_etl/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "survey-extractor-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/eachlambda/extractor_and_loader_lambda.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/walthamforest_etl/vars.tf b/deployment/lambda/walthamforest_etl/vars.tf new file mode 100644 index 0000000..ecdf359 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/vars.tf @@ -0,0 +1,5 @@ +variable "lambda_image_tag" { + description = "Docker image tag (e.g. GitHub SHA)" + type = string + default = "local-dev-latest" +} \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf new file mode 100644 index 0000000..2ac9b38 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -0,0 +1,80 @@ +# Reference existing IAM role +data "aws_iam_role" "lambda_exec_role" { + name = "lambda-exec-role" +} + +# Reference existing ECR repository +data "aws_ecr_repository" "walthamforest_etl_ecr" { + name = "walthamforest_etl_ecr" +} + +# SQS queue for extractor_and_loader +resource "aws_sqs_queue" "walthamforest_etl_queue" { + name = "walthamforest_etl-queue" +} + + +# IAM policy specific to this Lambda +resource "aws_iam_policy" "walthamforest_etl_policy" { + name = "walthamforest_etl-loader-policy" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes" + ], + Resource = aws_sqs_queue.walthamforest_etl_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" { + role = data.aws_iam_role.lambda_exec_role.name + policy_arn = aws_iam_policy.walthamforest_etl-loader-policy.arn +} + +# Lambda function +resource "aws_lambda_function" "waltham_forest_etl" { + function_name = "walthamforest_etl" + role = data.aws_iam_role.lambda_exec_role.arn + package_type = "Image" + image_uri = "${data.aws_ecr_repository.walthamforest_etl_ecr.repository_url}:${var.lambda_image_tag}" + # Increase timeout (max 900 sec / 15 min) + timeout = 300 # e.g. 5 minutes + + # Increase memory (default 128 MB) + memory_size = 2048 # try 1024 or 2048 MB to start + + # environment { + # variables = { + # DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB" + # } + # } +} + +# SQS trigger +resource "aws_lambda_event_source_mapping" "extractor_and_loader_trigger" { + event_source_arn = aws_sqs_queue.walthamforest_etl_queue.arn + function_name = aws_lambda_function.walthamforest_etl.arn + batch_size = 1 +}