From 0daa1592d7f218d73acab5e02d5b14906f85b6aa Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 11 Nov 2025 14:06:35 +0000 Subject: [PATCH] added lambda files --- .devcontainer/Dockerfile | 37 +++++++++ .devcontainer/devcontainer.json | 30 +++++++ .devcontainer/docker-compose.yml | 18 ++++ .devcontainer/post-install.sh | 27 ++++++ .../whlg_calculator/docker/.dockerignore | 21 +++++ .../lambda/whlg_calculator/docker/Dockerfile | 25 ++++++ .../lambda/whlg_calculator/docker/app.py | 3 + .../lambda/whlg_calculator/docker/ecr.tf | 63 ++++++++++++++ .../lambda/whlg_calculator/docker/main.tf | 0 .../lambda/whlg_calculator/docker/provider.tf | 15 ++++ deployment/lambda/whlg_calculator/main.tf | 0 deployment/lambda/whlg_calculator/provider.tf | 15 ++++ deployment/lambda/whlg_calculator/vars.tf | 5 ++ .../lambda/whlg_calculator/whlg_lambda.tf | 83 +++++++++++++++++++ 14 files changed, 342 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yml create mode 100644 .devcontainer/post-install.sh create mode 100644 deployment/lambda/whlg_calculator/docker/.dockerignore create mode 100644 deployment/lambda/whlg_calculator/docker/Dockerfile create mode 100644 deployment/lambda/whlg_calculator/docker/app.py create mode 100644 deployment/lambda/whlg_calculator/docker/ecr.tf create mode 100644 deployment/lambda/whlg_calculator/docker/main.tf create mode 100644 deployment/lambda/whlg_calculator/docker/provider.tf create mode 100644 deployment/lambda/whlg_calculator/main.tf create mode 100644 deployment/lambda/whlg_calculator/provider.tf create mode 100644 deployment/lambda/whlg_calculator/vars.tf create mode 100644 deployment/lambda/whlg_calculator/whlg_lambda.tf diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..4d898973 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,37 @@ +FROM python:3.12-bullseye + +ARG USER=vscode +ARG DEBIAN_FRONTEND=noninteractive + +# 1) Toolchain + utilities for building libpostal +RUN apt-get update && apt-get install -y --no-install-recommends \ + sudo jq vim curl git ca-certificates \ + build-essential pkg-config automake autoconf libtool \ + && rm -rf /var/lib/apt/lists/* + +# 2) Build and install libpostal from source +RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ + && cd /tmp/libpostal \ + && ./bootstrap.sh \ + && ./configure --datadir=/usr/local/share/libpostal \ + && make -j"$(nproc)" \ + && make install \ + && ldconfig \ + && rm -rf /tmp/libpostal + +# 3) Create the user and grant sudo privileges +RUN useradd -m -s /usr/bin/bash ${USER} \ + && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \ + && chmod 0440 /etc/sudoers.d/${USER} + +# 4) Python deps +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +ADD asset_list/requirements.txt requirements.txt +RUN pip install -r requirements.txt + +# 5) Workdir +WORKDIR /workspaces/model + +# 6) Make Python find your package +# Add project root to PYTHONPATH for all processes +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..49bd6f83 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Basic Python", + "dockerComposeFile": "docker-compose.yml", + "service": "model", + "remoteUser": "vscode", + "workspaceFolder": "/workspaces/model", + "postStartCommand": "bash .devcontainer/post-install.sh", + "mounts": [ + // Optional, just makes getting from Downloads (local env) easier + "source=${localEnv:HOME},target=/workspaces/home,type=bind" + ], + "customizations": { + "vscode": { + "settings": { + "files.defaultWorkspace": "/workspaces/model" + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "mechatroner.rainbow-csv", + "ms-toolsai.datawrangler", + "lindacong.vscode-book-reader", + "4ops.terraform", + "fabiospampinato.vscode-todo-plus", + "jgclark.vscode-todo-highlight", + "corentinartaud.pdfpreview" + ] + } + } +} diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 00000000..7f60d34d --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3.8' + +services: + model: + user: "${UID}:${GID}" + build: + context: .. + dockerfile: .devcontainer/Dockerfile + command: sleep infinity + volumes: + - ..:/workspaces/model + networks: + - model-net + +networks: + model-net: + driver: bridge + diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh new file mode 100644 index 00000000..d9fc3a9e --- /dev/null +++ b/.devcontainer/post-install.sh @@ -0,0 +1,27 @@ +# #!/bin/bash +# poetry install; + +# # Get the Poetry virtual environment path +# VENV_PATH=$(poetry env info --path 2>/dev/null) + +# if [ -z "$VENV_PATH" ]; then +# echo "No Poetry environment found. Did you run 'poetry install'?" +# exit 1 +# fi + +# # Ensure VS Code settings directory exists +# SETTINGS_DIR="/home/vscode/.vscode-server/data/Machine" +# SETTINGS_FILE="$SETTINGS_DIR/settings.json" + +# mkdir -p "$SETTINGS_DIR" + +# # If settings.json doesn't exist, create a default one +# if [ ! -f "$SETTINGS_FILE" ]; then +# echo "{}" > "$SETTINGS_FILE" +# fi + +# # Update VS Code settings to use the Poetry virtual environment +# jq --arg venv "$VENV_PATH/bin/python" '.["python.defaultInterpreterPath"] = $venv' \ +# "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE" + +# echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH" diff --git a/deployment/lambda/whlg_calculator/docker/.dockerignore b/deployment/lambda/whlg_calculator/docker/.dockerignore new file mode 100644 index 00000000..d587d341 --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/.dockerignore @@ -0,0 +1,21 @@ +# Ignore junk and large files +*.pdf +*.csv +*.xml +*.parquet +*.ipynb +*.mp4 +*.mov +*.jpg +*.png +*.zip +*.tar.gz +__pycache__/ +*.pyc +*.pyo +*.pyd +build/ +dist/ +.etl_cache/ +tests/ +docs/ diff --git a/deployment/lambda/whlg_calculator/docker/Dockerfile b/deployment/lambda/whlg_calculator/docker/Dockerfile new file mode 100644 index 00000000..cdd1f8a3 --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/Dockerfile @@ -0,0 +1,25 @@ +FROM public.ecr.aws/lambda/python:3.12 + +# Install Poetry (you could pin a version if you like) +RUN curl -sSL https://install.python-poetry.org | python3 - + +# Add Poetry to PATH +ENV PATH="/root/.local/bin:$PATH" + +# Set working directory +WORKDIR /var/task + +# Copy Poetry files first to leverage Docker layer caching +COPY pyproject.toml poetry.lock README.md ./ +COPY etl/ etl/ + + +# Install dependencies into /var/task +RUN poetry config virtualenvs.create false \ + && poetry install --only main --no-interaction --no-ansi + +# Copy app code +COPY deployment/lambda/extractor_and_loader/docker/app.py ./ + +# Set Lambda handler +CMD ["app.handler"] \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/app.py b/deployment/lambda/whlg_calculator/docker/app.py new file mode 100644 index 00000000..4dcf1a8e --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/app.py @@ -0,0 +1,3 @@ +def handler(event, context): + print("Hello and welcome to the WHLG Calculator") + print("Please contact the tech team for implementation") \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/ecr.tf b/deployment/lambda/whlg_calculator/docker/ecr.tf new file mode 100644 index 00000000..a1501dff --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/ecr.tf @@ -0,0 +1,63 @@ +# ECR repo +resource "aws_ecr_repository" "whlg_calc_adhoc_ecr" { + name = "whlg_calc_adhoc_ecr" +} + +# ECR policy to allow Lambda access +resource "aws_ecr_repository_policy" "whlg_calc_adhoc_ecr_access" { + repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name + + policy = jsonencode({ + Version = "2008-10-17", + Statement = [{ + Sid = "AllowLambdaPull", + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + }, + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ] + }] + }) +} + + + +# ECR lifecycle policy to delete tagged images older than 14 days +resource "aws_ecr_lifecycle_policy" "whlg_calc_adhoc_loader_lifecycle" { + repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name + + policy = jsonencode({ + "rules": [ + { + "rulePriority": 2, + "description": "Expire images older than 14 days", + "selection": { + "tagStatus": "untagged", + "countType": "sinceImagePushed", + "countUnit": "days", + "countNumber": 1 + }, + "action": { + "type": "expire" + } + }, + { + "rulePriority": 1, + "description": "Keep last 5 images", + "selection": { + "tagStatus": "tagged", + "tagPrefixList": ["feature"], + "countType": "imageCountMoreThan", + "countNumber": 5 + }, + "action": { + "type": "expire" + } + } + ] + }) +} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/main.tf b/deployment/lambda/whlg_calculator/docker/main.tf new file mode 100644 index 00000000..e69de29b diff --git a/deployment/lambda/whlg_calculator/docker/provider.tf b/deployment/lambda/whlg_calculator/docker/provider.tf new file mode 100644 index 00000000..5f0fef0f --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "whlg-calc-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/ecr/whlg-calc.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/whlg_calculator/main.tf b/deployment/lambda/whlg_calculator/main.tf new file mode 100644 index 00000000..e69de29b diff --git a/deployment/lambda/whlg_calculator/provider.tf b/deployment/lambda/whlg_calculator/provider.tf new file mode 100644 index 00000000..df9abf1c --- /dev/null +++ b/deployment/lambda/whlg_calculator/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "whlg-calc-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/eachlambda/whlg_calc_lambda.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/whlg_calculator/vars.tf b/deployment/lambda/whlg_calculator/vars.tf new file mode 100644 index 00000000..ecdf359d --- /dev/null +++ b/deployment/lambda/whlg_calculator/vars.tf @@ -0,0 +1,5 @@ +variable "lambda_image_tag" { + description = "Docker image tag (e.g. GitHub SHA)" + type = string + default = "local-dev-latest" +} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/whlg_lambda.tf b/deployment/lambda/whlg_calculator/whlg_lambda.tf new file mode 100644 index 00000000..0a5433a9 --- /dev/null +++ b/deployment/lambda/whlg_calculator/whlg_lambda.tf @@ -0,0 +1,83 @@ +# Reference existing IAM role +data "aws_iam_role" "lambda_exec_role" { + name = "lambda-exec-role" +} + +# Reference existing ECR repository +data "aws_ecr_repository" "whlg_calc_adhoc_ecr" { + name = "whlg_calc_adhoc_ecr" +} + +# SQS queue +resource "aws_sqs_queue" "whlg_calc_adhoc_queue" { + name = "whlg_calc_adhoc-queue" + visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) +} + + +# Custom IAM policy specific to lambda_example +resource "aws_iam_policy" "whlg_calc_adhoc_policy" { + name = "walthamforest_adhoc_policy_lambda" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" + ], + Resource = aws_sqs_queue.whlg_calc_adhoc_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.whlg_calc_adhoc_ecr.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "whlg_calc_adhoc_policy_attach" { + role = data.aws_iam_role.lambda_exec_role.name + policy_arn = aws_iam_policy.whlg_calc_adhoc_policy.arn +} + +# Lambda function +resource "aws_lambda_function" "whlg_calc_adhoc" { + function_name = "whlg_calc_adhoc" + role = data.aws_iam_role.lambda_exec_role.arn + package_type = "Image" + image_uri = "${data.aws_ecr_repository.whlg_calc_adhoc_ecr.repository_url}:${var.lambda_image_tag}" + # Increase timeout (max 900 sec / 15 min) + # timeout = 300 # e.g. 5 minutes + + # Increase memory (default 128 MB) + memory_size = 2048 # try 1024 or 2048 MB to start + + # environment { + # variables = { + # DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB" + # } + # } +} + +# SQS trigger +resource "aws_lambda_event_source_mapping" "whlg_calc_adhoc_trigger" { + event_source_arn = aws_sqs_queue.whlg_calc_adhoc_queue.arn + function_name = aws_lambda_function.whlg_calc_adhoc.arn + batch_size = 1 +}