added lambda files

This commit is contained in:
Jun-te Kim 2025-11-11 14:06:35 +00:00
parent 771f87bb70
commit 0daa1592d7
14 changed files with 342 additions and 0 deletions

37
.devcontainer/Dockerfile Normal file
View file

@ -0,0 +1,37 @@
FROM python:3.12-bullseye
ARG USER=vscode
ARG DEBIAN_FRONTEND=noninteractive
# 1) Toolchain + utilities for building libpostal
RUN apt-get update && apt-get install -y --no-install-recommends \
sudo jq vim curl git ca-certificates \
build-essential pkg-config automake autoconf libtool \
&& rm -rf /var/lib/apt/lists/*
# 2) Build and install libpostal from source
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
&& cd /tmp/libpostal \
&& ./bootstrap.sh \
&& ./configure --datadir=/usr/local/share/libpostal \
&& make -j"$(nproc)" \
&& make install \
&& ldconfig \
&& rm -rf /tmp/libpostal
# 3) Create the user and grant sudo privileges
RUN useradd -m -s /usr/bin/bash ${USER} \
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
&& chmod 0440 /etc/sudoers.d/${USER}
# 4) Python deps
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt
RUN pip install -r requirements.txt
# 5) Workdir
WORKDIR /workspaces/model
# 6) Make Python find your package
# Add project root to PYTHONPATH for all processes
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}

View file

@ -0,0 +1,30 @@
{
"name": "Basic Python",
"dockerComposeFile": "docker-compose.yml",
"service": "model",
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/post-install.sh",
"mounts": [
// Optional, just makes getting from Downloads (local env) easier
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
],
"customizations": {
"vscode": {
"settings": {
"files.defaultWorkspace": "/workspaces/model"
},
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter",
"mechatroner.rainbow-csv",
"ms-toolsai.datawrangler",
"lindacong.vscode-book-reader",
"4ops.terraform",
"fabiospampinato.vscode-todo-plus",
"jgclark.vscode-todo-highlight",
"corentinartaud.pdfpreview"
]
}
}
}

View file

@ -0,0 +1,18 @@
version: '3.8'
services:
model:
user: "${UID}:${GID}"
build:
context: ..
dockerfile: .devcontainer/Dockerfile
command: sleep infinity
volumes:
- ..:/workspaces/model
networks:
- model-net
networks:
model-net:
driver: bridge

View file

@ -0,0 +1,27 @@
# #!/bin/bash
# poetry install;
# # Get the Poetry virtual environment path
# VENV_PATH=$(poetry env info --path 2>/dev/null)
# if [ -z "$VENV_PATH" ]; then
# echo "No Poetry environment found. Did you run 'poetry install'?"
# exit 1
# fi
# # Ensure VS Code settings directory exists
# SETTINGS_DIR="/home/vscode/.vscode-server/data/Machine"
# SETTINGS_FILE="$SETTINGS_DIR/settings.json"
# mkdir -p "$SETTINGS_DIR"
# # If settings.json doesn't exist, create a default one
# if [ ! -f "$SETTINGS_FILE" ]; then
# echo "{}" > "$SETTINGS_FILE"
# fi
# # Update VS Code settings to use the Poetry virtual environment
# jq --arg venv "$VENV_PATH/bin/python" '.["python.defaultInterpreterPath"] = $venv' \
# "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE"
# echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH"

View file

@ -0,0 +1,21 @@
# Ignore junk and large files
*.pdf
*.csv
*.xml
*.parquet
*.ipynb
*.mp4
*.mov
*.jpg
*.png
*.zip
*.tar.gz
__pycache__/
*.pyc
*.pyo
*.pyd
build/
dist/
.etl_cache/
tests/
docs/

View file

@ -0,0 +1,25 @@
FROM public.ecr.aws/lambda/python:3.12
# Install Poetry (you could pin a version if you like)
RUN curl -sSL https://install.python-poetry.org | python3 -
# Add Poetry to PATH
ENV PATH="/root/.local/bin:$PATH"
# Set working directory
WORKDIR /var/task
# Copy Poetry files first to leverage Docker layer caching
COPY pyproject.toml poetry.lock README.md ./
COPY etl/ etl/
# Install dependencies into /var/task
RUN poetry config virtualenvs.create false \
&& poetry install --only main --no-interaction --no-ansi
# Copy app code
COPY deployment/lambda/extractor_and_loader/docker/app.py ./
# Set Lambda handler
CMD ["app.handler"]

View file

@ -0,0 +1,3 @@
def handler(event, context):
print("Hello and welcome to the WHLG Calculator")
print("Please contact the tech team for implementation")

View file

@ -0,0 +1,63 @@
# ECR repo
resource "aws_ecr_repository" "whlg_calc_adhoc_ecr" {
name = "whlg_calc_adhoc_ecr"
}
# ECR policy to allow Lambda access
resource "aws_ecr_repository_policy" "whlg_calc_adhoc_ecr_access" {
repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name
policy = jsonencode({
Version = "2008-10-17",
Statement = [{
Sid = "AllowLambdaPull",
Effect = "Allow",
Principal = {
Service = "lambda.amazonaws.com"
},
Action = [
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"ecr:BatchCheckLayerAvailability"
]
}]
})
}
# ECR lifecycle policy to delete tagged images older than 14 days
resource "aws_ecr_lifecycle_policy" "whlg_calc_adhoc_loader_lifecycle" {
repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name
policy = jsonencode({
"rules": [
{
"rulePriority": 2,
"description": "Expire images older than 14 days",
"selection": {
"tagStatus": "untagged",
"countType": "sinceImagePushed",
"countUnit": "days",
"countNumber": 1
},
"action": {
"type": "expire"
}
},
{
"rulePriority": 1,
"description": "Keep last 5 images",
"selection": {
"tagStatus": "tagged",
"tagPrefixList": ["feature"],
"countType": "imageCountMoreThan",
"countNumber": 5
},
"action": {
"type": "expire"
}
}
]
})
}

View file

@ -0,0 +1,15 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 6.3.0"
}
}
backend "s3" {
bucket = "whlg-calc-tf-state"
region = "eu-west-2"
key = "env:/dev/lambda/ecr/whlg-calc.tfstate"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,15 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 6.3.0"
}
}
backend "s3" {
bucket = "whlg-calc-tf-state"
region = "eu-west-2"
key = "env:/dev/lambda/eachlambda/whlg_calc_lambda.tfstate"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,5 @@
variable "lambda_image_tag" {
description = "Docker image tag (e.g. GitHub SHA)"
type = string
default = "local-dev-latest"
}

View file

@ -0,0 +1,83 @@
# Reference existing IAM role
data "aws_iam_role" "lambda_exec_role" {
name = "lambda-exec-role"
}
# Reference existing ECR repository
data "aws_ecr_repository" "whlg_calc_adhoc_ecr" {
name = "whlg_calc_adhoc_ecr"
}
# SQS queue
resource "aws_sqs_queue" "whlg_calc_adhoc_queue" {
name = "whlg_calc_adhoc-queue"
visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout)
}
# Custom IAM policy specific to lambda_example
resource "aws_iam_policy" "whlg_calc_adhoc_policy" {
name = "walthamforest_adhoc_policy_lambda"
policy = jsonencode({
Version = "2012-10-17",
Statement = [
{
Effect = "Allow",
Action = [
"sqs:ReceiveMessage",
"sqs:DeleteMessage",
"sqs:GetQueueAttributes",
"sqs:GetQueueUrl",
"sqs:ChangeMessageVisibility"
],
Resource = aws_sqs_queue.whlg_calc_adhoc_queue.arn
},
{
Effect = "Allow",
Action = [
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"ecr:BatchCheckLayerAvailability"
],
Resource = data.aws_ecr_repository.whlg_calc_adhoc_ecr.arn
},
{
Effect = "Allow",
Action = ["ecr:GetAuthorizationToken"],
Resource = "*"
}
]
})
}
resource "aws_iam_role_policy_attachment" "whlg_calc_adhoc_policy_attach" {
role = data.aws_iam_role.lambda_exec_role.name
policy_arn = aws_iam_policy.whlg_calc_adhoc_policy.arn
}
# Lambda function
resource "aws_lambda_function" "whlg_calc_adhoc" {
function_name = "whlg_calc_adhoc"
role = data.aws_iam_role.lambda_exec_role.arn
package_type = "Image"
image_uri = "${data.aws_ecr_repository.whlg_calc_adhoc_ecr.repository_url}:${var.lambda_image_tag}"
# Increase timeout (max 900 sec / 15 min)
# timeout = 300 # e.g. 5 minutes
# Increase memory (default 128 MB)
memory_size = 2048 # try 1024 or 2048 MB to start
# environment {
# variables = {
# DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB"
# }
# }
}
# SQS trigger
resource "aws_lambda_event_source_mapping" "whlg_calc_adhoc_trigger" {
event_source_arn = aws_sqs_queue.whlg_calc_adhoc_queue.arn
function_name = aws_lambda_function.whlg_calc_adhoc.arn
batch_size = 1
}