From 0daa1592d7f218d73acab5e02d5b14906f85b6aa Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 11 Nov 2025 14:06:35 +0000 Subject: [PATCH 01/15] added lambda files --- .devcontainer/Dockerfile | 37 +++++++++ .devcontainer/devcontainer.json | 30 +++++++ .devcontainer/docker-compose.yml | 18 ++++ .devcontainer/post-install.sh | 27 ++++++ .../whlg_calculator/docker/.dockerignore | 21 +++++ .../lambda/whlg_calculator/docker/Dockerfile | 25 ++++++ .../lambda/whlg_calculator/docker/app.py | 3 + .../lambda/whlg_calculator/docker/ecr.tf | 63 ++++++++++++++ .../lambda/whlg_calculator/docker/main.tf | 0 .../lambda/whlg_calculator/docker/provider.tf | 15 ++++ deployment/lambda/whlg_calculator/main.tf | 0 deployment/lambda/whlg_calculator/provider.tf | 15 ++++ deployment/lambda/whlg_calculator/vars.tf | 5 ++ .../lambda/whlg_calculator/whlg_lambda.tf | 83 +++++++++++++++++++ 14 files changed, 342 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yml create mode 100644 .devcontainer/post-install.sh create mode 100644 deployment/lambda/whlg_calculator/docker/.dockerignore create mode 100644 deployment/lambda/whlg_calculator/docker/Dockerfile create mode 100644 deployment/lambda/whlg_calculator/docker/app.py create mode 100644 deployment/lambda/whlg_calculator/docker/ecr.tf create mode 100644 deployment/lambda/whlg_calculator/docker/main.tf create mode 100644 deployment/lambda/whlg_calculator/docker/provider.tf create mode 100644 deployment/lambda/whlg_calculator/main.tf create mode 100644 deployment/lambda/whlg_calculator/provider.tf create mode 100644 deployment/lambda/whlg_calculator/vars.tf create mode 100644 deployment/lambda/whlg_calculator/whlg_lambda.tf diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..4d898973 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,37 @@ +FROM python:3.12-bullseye + +ARG USER=vscode +ARG DEBIAN_FRONTEND=noninteractive + +# 1) Toolchain + utilities for building libpostal +RUN apt-get update && apt-get install -y --no-install-recommends \ + sudo jq vim curl git ca-certificates \ + build-essential pkg-config automake autoconf libtool \ + && rm -rf /var/lib/apt/lists/* + +# 2) Build and install libpostal from source +RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ + && cd /tmp/libpostal \ + && ./bootstrap.sh \ + && ./configure --datadir=/usr/local/share/libpostal \ + && make -j"$(nproc)" \ + && make install \ + && ldconfig \ + && rm -rf /tmp/libpostal + +# 3) Create the user and grant sudo privileges +RUN useradd -m -s /usr/bin/bash ${USER} \ + && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \ + && chmod 0440 /etc/sudoers.d/${USER} + +# 4) Python deps +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +ADD asset_list/requirements.txt requirements.txt +RUN pip install -r requirements.txt + +# 5) Workdir +WORKDIR /workspaces/model + +# 6) Make Python find your package +# Add project root to PYTHONPATH for all processes +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..49bd6f83 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Basic Python", + "dockerComposeFile": "docker-compose.yml", + "service": "model", + "remoteUser": "vscode", + "workspaceFolder": "/workspaces/model", + "postStartCommand": "bash .devcontainer/post-install.sh", + "mounts": [ + // Optional, just makes getting from Downloads (local env) easier + "source=${localEnv:HOME},target=/workspaces/home,type=bind" + ], + "customizations": { + "vscode": { + "settings": { + "files.defaultWorkspace": "/workspaces/model" + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "mechatroner.rainbow-csv", + "ms-toolsai.datawrangler", + "lindacong.vscode-book-reader", + "4ops.terraform", + "fabiospampinato.vscode-todo-plus", + "jgclark.vscode-todo-highlight", + "corentinartaud.pdfpreview" + ] + } + } +} diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 00000000..7f60d34d --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3.8' + +services: + model: + user: "${UID}:${GID}" + build: + context: .. + dockerfile: .devcontainer/Dockerfile + command: sleep infinity + volumes: + - ..:/workspaces/model + networks: + - model-net + +networks: + model-net: + driver: bridge + diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh new file mode 100644 index 00000000..d9fc3a9e --- /dev/null +++ b/.devcontainer/post-install.sh @@ -0,0 +1,27 @@ +# #!/bin/bash +# poetry install; + +# # Get the Poetry virtual environment path +# VENV_PATH=$(poetry env info --path 2>/dev/null) + +# if [ -z "$VENV_PATH" ]; then +# echo "No Poetry environment found. Did you run 'poetry install'?" +# exit 1 +# fi + +# # Ensure VS Code settings directory exists +# SETTINGS_DIR="/home/vscode/.vscode-server/data/Machine" +# SETTINGS_FILE="$SETTINGS_DIR/settings.json" + +# mkdir -p "$SETTINGS_DIR" + +# # If settings.json doesn't exist, create a default one +# if [ ! -f "$SETTINGS_FILE" ]; then +# echo "{}" > "$SETTINGS_FILE" +# fi + +# # Update VS Code settings to use the Poetry virtual environment +# jq --arg venv "$VENV_PATH/bin/python" '.["python.defaultInterpreterPath"] = $venv' \ +# "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE" + +# echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH" diff --git a/deployment/lambda/whlg_calculator/docker/.dockerignore b/deployment/lambda/whlg_calculator/docker/.dockerignore new file mode 100644 index 00000000..d587d341 --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/.dockerignore @@ -0,0 +1,21 @@ +# Ignore junk and large files +*.pdf +*.csv +*.xml +*.parquet +*.ipynb +*.mp4 +*.mov +*.jpg +*.png +*.zip +*.tar.gz +__pycache__/ +*.pyc +*.pyo +*.pyd +build/ +dist/ +.etl_cache/ +tests/ +docs/ diff --git a/deployment/lambda/whlg_calculator/docker/Dockerfile b/deployment/lambda/whlg_calculator/docker/Dockerfile new file mode 100644 index 00000000..cdd1f8a3 --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/Dockerfile @@ -0,0 +1,25 @@ +FROM public.ecr.aws/lambda/python:3.12 + +# Install Poetry (you could pin a version if you like) +RUN curl -sSL https://install.python-poetry.org | python3 - + +# Add Poetry to PATH +ENV PATH="/root/.local/bin:$PATH" + +# Set working directory +WORKDIR /var/task + +# Copy Poetry files first to leverage Docker layer caching +COPY pyproject.toml poetry.lock README.md ./ +COPY etl/ etl/ + + +# Install dependencies into /var/task +RUN poetry config virtualenvs.create false \ + && poetry install --only main --no-interaction --no-ansi + +# Copy app code +COPY deployment/lambda/extractor_and_loader/docker/app.py ./ + +# Set Lambda handler +CMD ["app.handler"] \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/app.py b/deployment/lambda/whlg_calculator/docker/app.py new file mode 100644 index 00000000..4dcf1a8e --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/app.py @@ -0,0 +1,3 @@ +def handler(event, context): + print("Hello and welcome to the WHLG Calculator") + print("Please contact the tech team for implementation") \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/ecr.tf b/deployment/lambda/whlg_calculator/docker/ecr.tf new file mode 100644 index 00000000..a1501dff --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/ecr.tf @@ -0,0 +1,63 @@ +# ECR repo +resource "aws_ecr_repository" "whlg_calc_adhoc_ecr" { + name = "whlg_calc_adhoc_ecr" +} + +# ECR policy to allow Lambda access +resource "aws_ecr_repository_policy" "whlg_calc_adhoc_ecr_access" { + repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name + + policy = jsonencode({ + Version = "2008-10-17", + Statement = [{ + Sid = "AllowLambdaPull", + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + }, + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ] + }] + }) +} + + + +# ECR lifecycle policy to delete tagged images older than 14 days +resource "aws_ecr_lifecycle_policy" "whlg_calc_adhoc_loader_lifecycle" { + repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name + + policy = jsonencode({ + "rules": [ + { + "rulePriority": 2, + "description": "Expire images older than 14 days", + "selection": { + "tagStatus": "untagged", + "countType": "sinceImagePushed", + "countUnit": "days", + "countNumber": 1 + }, + "action": { + "type": "expire" + } + }, + { + "rulePriority": 1, + "description": "Keep last 5 images", + "selection": { + "tagStatus": "tagged", + "tagPrefixList": ["feature"], + "countType": "imageCountMoreThan", + "countNumber": 5 + }, + "action": { + "type": "expire" + } + } + ] + }) +} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/main.tf b/deployment/lambda/whlg_calculator/docker/main.tf new file mode 100644 index 00000000..e69de29b diff --git a/deployment/lambda/whlg_calculator/docker/provider.tf b/deployment/lambda/whlg_calculator/docker/provider.tf new file mode 100644 index 00000000..5f0fef0f --- /dev/null +++ b/deployment/lambda/whlg_calculator/docker/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "whlg-calc-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/ecr/whlg-calc.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/whlg_calculator/main.tf b/deployment/lambda/whlg_calculator/main.tf new file mode 100644 index 00000000..e69de29b diff --git a/deployment/lambda/whlg_calculator/provider.tf b/deployment/lambda/whlg_calculator/provider.tf new file mode 100644 index 00000000..df9abf1c --- /dev/null +++ b/deployment/lambda/whlg_calculator/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "whlg-calc-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/eachlambda/whlg_calc_lambda.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/whlg_calculator/vars.tf b/deployment/lambda/whlg_calculator/vars.tf new file mode 100644 index 00000000..ecdf359d --- /dev/null +++ b/deployment/lambda/whlg_calculator/vars.tf @@ -0,0 +1,5 @@ +variable "lambda_image_tag" { + description = "Docker image tag (e.g. GitHub SHA)" + type = string + default = "local-dev-latest" +} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/whlg_lambda.tf b/deployment/lambda/whlg_calculator/whlg_lambda.tf new file mode 100644 index 00000000..0a5433a9 --- /dev/null +++ b/deployment/lambda/whlg_calculator/whlg_lambda.tf @@ -0,0 +1,83 @@ +# Reference existing IAM role +data "aws_iam_role" "lambda_exec_role" { + name = "lambda-exec-role" +} + +# Reference existing ECR repository +data "aws_ecr_repository" "whlg_calc_adhoc_ecr" { + name = "whlg_calc_adhoc_ecr" +} + +# SQS queue +resource "aws_sqs_queue" "whlg_calc_adhoc_queue" { + name = "whlg_calc_adhoc-queue" + visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) +} + + +# Custom IAM policy specific to lambda_example +resource "aws_iam_policy" "whlg_calc_adhoc_policy" { + name = "walthamforest_adhoc_policy_lambda" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" + ], + Resource = aws_sqs_queue.whlg_calc_adhoc_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.whlg_calc_adhoc_ecr.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "whlg_calc_adhoc_policy_attach" { + role = data.aws_iam_role.lambda_exec_role.name + policy_arn = aws_iam_policy.whlg_calc_adhoc_policy.arn +} + +# Lambda function +resource "aws_lambda_function" "whlg_calc_adhoc" { + function_name = "whlg_calc_adhoc" + role = data.aws_iam_role.lambda_exec_role.arn + package_type = "Image" + image_uri = "${data.aws_ecr_repository.whlg_calc_adhoc_ecr.repository_url}:${var.lambda_image_tag}" + # Increase timeout (max 900 sec / 15 min) + # timeout = 300 # e.g. 5 minutes + + # Increase memory (default 128 MB) + memory_size = 2048 # try 1024 or 2048 MB to start + + # environment { + # variables = { + # DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB" + # } + # } +} + +# SQS trigger +resource "aws_lambda_event_source_mapping" "whlg_calc_adhoc_trigger" { + event_source_arn = aws_sqs_queue.whlg_calc_adhoc_queue.arn + function_name = aws_lambda_function.whlg_calc_adhoc.arn + batch_size = 1 +} From 91db4fb86c24e1a76fcdfe9af002011794644a7c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 11 Nov 2025 14:29:03 +0000 Subject: [PATCH 02/15] lambda --- .../actions/lambda-deploy/action.yml | 86 +++++++++++++++++++ .../actions/terraform-deploy/action.yml | 55 ++++++++++++ .github/workflows/lambda_main.yml | 33 +++++++ 3 files changed, 174 insertions(+) create mode 100644 .github/workflows/actions/lambda-deploy/action.yml create mode 100644 .github/workflows/actions/terraform-deploy/action.yml create mode 100644 .github/workflows/lambda_main.yml diff --git a/.github/workflows/actions/lambda-deploy/action.yml b/.github/workflows/actions/lambda-deploy/action.yml new file mode 100644 index 00000000..3ca0fc8d --- /dev/null +++ b/.github/workflows/actions/lambda-deploy/action.yml @@ -0,0 +1,86 @@ +name: "Build and Push Lambda Image to ECR" +description: "Reusable action for building and pushing lambda Docker image to ECR" + +inputs: + ecr_name: + description: "Lambda name / ECR repo name" + required: true + dockerfile_path: + description: "Path to Dockerfile" + required: true + ecr_tf_dir: + description: "Path to ECR terraform directory" + required: true + lambda_tf_dir: + description: "Path to Lambda terraform directory" + required: true + aws-access-key-id: + description: "AWS access key" + required: true + aws-secret-access-key: + description: "AWS secret key" + required: true + aws-region: + description: "AWS region" + required: true + git-sha: + description: "Git commit SHA" + required: true + git-ref: + description: "Git ref name" + required: true + +runs: + using: "composite" + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Log in to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Deploy ECR + uses: ./.github/workflows/actions/terraform-deploy + with: + working_directory: ${{ inputs.ecr_tf_dir }} + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + - name: Set Docker image tag + id: set_tag + shell: bash + run: | + SHORT_SHA=$(echo "${{ inputs.git-sha }}" | cut -c1-7) + BRANCH=$(echo "${{ inputs.git-ref }}" | tr '/' '-') + TAG="${BRANCH}-${SHORT_SHA}" + echo "IMAGE_TAG=${TAG}" >> $GITHUB_ENV + echo "tag=$TAG" >> $GITHUB_OUTPUT + + - name: Build and push Docker image + shell: bash + run: | + IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }} + echo "Building Docker image for ${{ inputs.ecr_name }}..." + docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} . + + echo "Pushing to ECR..." + docker push $IMAGE_URI + + - name: Deploy Lambda + uses: ./.github/workflows/actions/terraform-deploy + with: + working_directory: ${{ inputs.lambda_tf_dir }} + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + lambda-image-tag: ${{ steps.set_tag.outputs.tag }} + + + diff --git a/.github/workflows/actions/terraform-deploy/action.yml b/.github/workflows/actions/terraform-deploy/action.yml new file mode 100644 index 00000000..56133299 --- /dev/null +++ b/.github/workflows/actions/terraform-deploy/action.yml @@ -0,0 +1,55 @@ +name: "Terraform Plan Shared Config" +description: "Plans shared Terraform config for Lambdas" + +inputs: + working_directory: + description: "Directory containing Terraform config" + required: true + aws-access-key-id: + description: "AWS access key" + required: true + aws-secret-access-key: + description: "AWS secret key" + required: true + aws-region: + description: "AWS region" + required: true + lambda-image-tag: + description: "Tag of the Lambda image (e.g., GitHub SHA)" + required: false + +runs: + using: "composite" + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Terraform Init + working-directory: ${{ inputs.working_directory }} + shell: bash + run: terraform init -reconfigure + + - name: Terraform Plan + working-directory: ${{ inputs.working_directory }} + shell: bash + run: | + if [ -n "${{ inputs.lambda-image-tag }}" ]; then + terraform plan -out=tfplan -var="lambda_image_tag=${{ inputs.lambda-image-tag }}" + else + terraform plan -out=tfplan + fi + + - name: Terraform Apply + working-directory: ${{ inputs.working_directory }} + shell: bash + run: terraform apply -auto-approve tfplan + diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml new file mode 100644 index 00000000..73645ac5 --- /dev/null +++ b/.github/workflows/lambda_main.yml @@ -0,0 +1,33 @@ +# Please note, this github work flows assumes that shared-terrform is deployed in aws env +# The shared-terraform files lives in https://github.com/Hestia-Homes/survey-extraction/tree/main/deployment/lambda/lambda_shared + +name: Deploy Lambdas +on: + push: + branches: [main, feautre/whlg_lambda] + +env: + AWS_REGION: eu-west-2 + +jobs: + whlg-calc: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Build and deploy Warm Homes Local Grant Calc (whlg-calc) + uses: ./.github/workflows/actions/lambda-deploy + with: + ecr_name: whlg_calc_adhoc_ecr + dockerfile_path: ./deployment/lambda/whlg_calculator/docker/Dockerfile + ecr_tf_dir: ./deployment/lambda/whlg_calculator/docker/ + lambda_tf_dir: ./deployment/lambda/whlg_calculator/ + aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY}} + aws-region: eu-west-2 + git-sha: ${{ github.sha }} + git-ref: ${{ github.ref_name }} \ No newline at end of file From f10473cbf3952ac76ab3e30b977aa290bb2b20ff Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 11 Nov 2025 14:30:11 +0000 Subject: [PATCH 03/15] wrong branch --- .github/workflows/lambda_main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml index 73645ac5..960adbe5 100644 --- a/.github/workflows/lambda_main.yml +++ b/.github/workflows/lambda_main.yml @@ -4,7 +4,7 @@ name: Deploy Lambdas on: push: - branches: [main, feautre/whlg_lambda] + branches: [main, feature/whlg_lambda] env: AWS_REGION: eu-west-2 From 5ab4d5a6d850c407f6678fbd83937cef8266b6f4 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 13:36:09 +0000 Subject: [PATCH 04/15] add my code to main --- .devcontainer/Dockerfile | 23 ++++--- .devcontainer/devcontainer.json | 3 +- .devcontainer/post-install.sh | 37 ++++------ .vscode/settings.json | 19 +++++ backend/app/config.py | 10 ++- backend/app/db/connection.py | 6 ++ backend/app/db/functions/tasks/Tasks.py | 10 +++ backend/app/db/functions/whlg_functions.py | 80 ++++++++++++++++++++++ backend/app/db/models/tasks.py | 37 ++++++++++ backend/app/db/models/whlg.py | 15 ++++ backend/app/local/router.py | 5 ++ backend/app/main.py | 73 +++++++++++++++++++- backend/app/requirements/requirements.txt | 5 ++ backend/app/whlg/route.py | 47 ------------- backend/app/whlg/router.py | 78 +++++++++++++++++++++ backend/app/whlg/schema.py | 4 ++ backend/run_curl.sh | 11 +++ backend/run_local.sh | 6 ++ 18 files changed, 382 insertions(+), 87 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 backend/app/db/functions/tasks/Tasks.py create mode 100644 backend/app/db/functions/whlg_functions.py create mode 100644 backend/app/db/models/tasks.py create mode 100644 backend/app/db/models/whlg.py delete mode 100644 backend/app/whlg/route.py create mode 100644 backend/app/whlg/router.py create mode 100644 backend/run_curl.sh create mode 100644 backend/run_local.sh diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 4d898973..c061c9f8 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -9,15 +9,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential pkg-config automake autoconf libtool \ && rm -rf /var/lib/apt/lists/* -# 2) Build and install libpostal from source -RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ - && cd /tmp/libpostal \ - && ./bootstrap.sh \ - && ./configure --datadir=/usr/local/share/libpostal \ - && make -j"$(nproc)" \ - && make install \ - && ldconfig \ - && rm -rf /tmp/libpostal +# # 2) Build and install libpostal from source +# RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ +# && cd /tmp/libpostal \ +# && ./bootstrap.sh \ +# && ./configure --datadir=/usr/local/share/libpostal \ +# && make -j"$(nproc)" \ +# && make install \ +# && ldconfig \ +# && rm -rf /tmp/libpostal # 3) Create the user and grant sudo privileges RUN useradd -m -s /usr/bin/bash ${USER} \ @@ -26,7 +26,10 @@ RUN useradd -m -s /usr/bin/bash ${USER} \ # 4) Python deps ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -ADD asset_list/requirements.txt requirements.txt +# Model +# ADD asset_list/requirements.txt requirements.txt +# FASTAPI backend +ADD backend/app/requirements/requirements.txt requirements.txt RUN pip install -r requirements.txt # 5) Workdir diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 49bd6f83..91a76c3d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -23,7 +23,8 @@ "4ops.terraform", "fabiospampinato.vscode-todo-plus", "jgclark.vscode-todo-highlight", - "corentinartaud.pdfpreview" + "corentinartaud.pdfpreview", + "ms-python.vscode-python-envs" ] } } diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index d9fc3a9e..dc6da006 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -1,27 +1,14 @@ -# #!/bin/bash -# poetry install; +mkdir -p ~/.ipython/profile_default/startup -# # Get the Poetry virtual environment path -# VENV_PATH=$(poetry env info --path 2>/dev/null) +cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py +from dotenv import load_dotenv +import os -# if [ -z "$VENV_PATH" ]; then -# echo "No Poetry environment found. Did you run 'poetry install'?" -# exit 1 -# fi - -# # Ensure VS Code settings directory exists -# SETTINGS_DIR="/home/vscode/.vscode-server/data/Machine" -# SETTINGS_FILE="$SETTINGS_DIR/settings.json" - -# mkdir -p "$SETTINGS_DIR" - -# # If settings.json doesn't exist, create a default one -# if [ ! -f "$SETTINGS_FILE" ]; then -# echo "{}" > "$SETTINGS_FILE" -# fi - -# # Update VS Code settings to use the Poetry virtual environment -# jq --arg venv "$VENV_PATH/bin/python" '.["python.defaultInterpreterPath"] = $venv' \ -# "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE" - -# echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH" +# Adjust path as needed +env_path = "/workspaces/model/backend/.env" +if os.path.exists(env_path): + load_dotenv(env_path) + print("✔ Loaded .env into Jupyter kernel") +else: + print("⚠ No .env file found to load") +EOF \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..27782c10 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,19 @@ +{ + "jupyter.interactiveWindow.textEditor.executeSelection": true, + "python.REPL.sendToNativeREPL": true, + "notebook.output.scrolling": true, + "terminal.integrated.defaultProfile.linux": "bash", + "editor.rulers": [67], + "terminal.integrated.profiles.linux": { + "bash": { + "path": "/bin/bash" + } + }, + + // Hot reload setting that needs to be in user settings + // "jupyter.runStartupCommands": [ + // "%load_ext autoreload", "%autoreload 2" + // ] + + +} \ No newline at end of file diff --git a/backend/app/config.py b/backend/app/config.py index b53d5223..98e1c447 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,5 +1,7 @@ from functools import lru_cache from pydantic_settings import BaseSettings +from typing import Optional + class Settings(BaseSettings): @@ -35,9 +37,13 @@ class Settings(BaseSettings): # Other S3 buckts ENERGY_ASSESSMENTS_BUCKET: str - class Config: - env_file = "backend/.env" + # Optional AWS creds (only required in local) + AWS_ACCESS_KEY_ID: Optional[str] = None + AWS_SECRET_KEY_ID: Optional[str] = None + AWS_DEFAULT_REGION: Optional[str] = None + class Config: + env_file = "backend.env" @lru_cache() def get_settings(): diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index 9efdfd25..fbec9102 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -1,5 +1,6 @@ from sqlalchemy import create_engine from backend.app.config import get_settings +from sqlmodel import Session connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}" db_string = connection_string.format( @@ -12,3 +13,8 @@ db_string = connection_string.format( ) db_engine = create_engine(db_string, pool_size=5, max_overflow=5) + +def get_db_session(): + if db_engine is None: + raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.") + return Session(db_engine) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py new file mode 100644 index 00000000..d3f06d33 --- /dev/null +++ b/backend/app/db/functions/tasks/Tasks.py @@ -0,0 +1,10 @@ + +class TasksInterface: + def __init__(self): + pass + + + +class SubTaskInterface: + def __init__(self) + pass \ No newline at end of file diff --git a/backend/app/db/functions/whlg_functions.py b/backend/app/db/functions/whlg_functions.py new file mode 100644 index 00000000..e318d004 --- /dev/null +++ b/backend/app/db/functions/whlg_functions.py @@ -0,0 +1,80 @@ +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg + + +def upsert_whlg_postcode(postcode: str): + """ + Manually upsert a postcode into the WHLG table. + No unique constraint is required. + """ + + cleaned = postcode.lower().replace(" ", "") + + with get_db_session() as session: + # Check if record exists + existing = session.query(Whlg).filter(Whlg.postcode == cleaned).first() + + if existing: + return existing # nothing to update, just return it + + # Insert a new row + record = Whlg(postcode=cleaned) + session.add(record) + session.commit() + session.refresh(record) + + return record + + +# One time script to upload 400,000 records in one go with the pay +# of pandas and one insert +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg +from sqlalchemy import select +from sqlalchemy.orm import Session + + +def upload_whlg_from_dataframe(df): + """ + FAST bulk insert of WHLG postcodes (400k+ rows). + No unique constraint needed. + """ + + if "Postcode" not in df.columns: + raise ValueError("DataFrame must contain a 'Postcode' column") + + # 1. Clean incoming postcodes + cleaned_postcodes = ( + df["Postcode"] + .astype(str) + .str.lower() + .str.replace(" ", "", regex=False) + .dropna() + .unique() + .tolist() + ) + + with get_db_session() as session: + # 2. Fetch existing postcodes once (VERY FAST) + existing = session.exec(select(Whlg.postcode)).all() + existing_set = set(existing) + + # 3. Determine which are new + new_postcodes = [ + pc for pc in cleaned_postcodes if pc not in existing_set + ] + + if not new_postcodes: + return {"inserted": 0, "skipped_existing": len(cleaned_postcodes)} + + # 4. Bulk insert new postcodes in one shot + objects = [Whlg(postcode=pc) for pc in new_postcodes] + + session.bulk_save_objects(objects) + session.commit() + + return { + "inserted": len(new_postcodes), + "skipped_existing": len(cleaned_postcodes) - len(new_postcodes), + "total_provided": len(cleaned_postcodes) + } diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py new file mode 100644 index 00000000..ed5b3710 --- /dev/null +++ b/backend/app/db/models/tasks.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Optional, List +from datetime import datetime +from uuid import UUID, uuid4 + +from sqlmodel import SQLModel, Field, Relationship + + +class Task(SQLModel, table=True): + __tablename__ = "tasks" + + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + index=True, + ) + + taskSource: str = Field(alias="task_source") + + jobStarted: Optional[datetime] = Field( + default=None, alias="job_started" + ) + jobCompleted: Optional[datetime] = Field( + default=None, alias="job_completed" + ) + + status: str = Field(default="In Progress") + service: Optional[str] = None + + updatedAt: datetime = Field( + default_factory=datetime.utcnow, + alias="updated_at", + ) + + # Relationship + subTasks: List["SubTask"] = Relationship(back_populates="task") diff --git a/backend/app/db/models/whlg.py b/backend/app/db/models/whlg.py new file mode 100644 index 00000000..29d907e4 --- /dev/null +++ b/backend/app/db/models/whlg.py @@ -0,0 +1,15 @@ +import uuid +from typing import Optional +from sqlmodel import SQLModel, Field + + +class Whlg(SQLModel, table=True): + __tablename__ = "whlg" + + id: Optional[int] = Field( + default=None, + primary_key=True, + index=True, + ) + + postcode: str = Field(nullable=False) \ No newline at end of file diff --git a/backend/app/local/router.py b/backend/app/local/router.py index 4ebb490c..0977be04 100644 --- a/backend/app/local/router.py +++ b/backend/app/local/router.py @@ -31,6 +31,11 @@ def create_dummy_token(secret: str) -> str: return token +@router.get("/") +async def dummy_token(): + return {"hello": "world"} + + @router.get("/dummy-token") async def dummy_token(): settings = get_settings() diff --git a/backend/app/main.py b/backend/app/main.py index de6f0795..261e2f34 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -6,6 +6,7 @@ from fastapi.encoders import jsonable_encoder from starlette.exceptions import HTTPException as StarletteHTTPException from mangum import Mangum from backend.app.portfolio import router as portfolio_router +from backend.app.whlg import router as whlg_router from backend.app.plan import router as plan_router from backend.app.dependencies import validate_api_key from backend.app.config import get_settings @@ -13,7 +14,10 @@ from backend.app.config import get_settings logger = logging.getLogger("uvicorn.error") logging.basicConfig(level=logging.INFO) -app = FastAPI(dependencies=[Depends(validate_api_key)]) +if get_settings().ENVIRONMENT == "local": + app = FastAPI() +else: + app = FastAPI(dependencies=[Depends(validate_api_key)]) # Handle 422 errors (validation failures) @@ -52,10 +56,75 @@ async def log_requests(request: Request, call_next): app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") +app.include_router(whlg_router.router, prefix="/v1") + +if get_settings().ENVIRONMENT == "local": + from app.local import router as local_router + app.include_router(local_router.router) + +handler = Mangum(app) +import logging +from fastapi.responses import JSONResponse +from fastapi import FastAPI, Depends, Request, status +from fastapi.exceptions import RequestValidationError +from fastapi.encoders import jsonable_encoder +from starlette.exceptions import HTTPException as StarletteHTTPException +from mangum import Mangum +from backend.app.portfolio import router as portfolio_router +from backend.app.whlg import router as whlg_router +from backend.app.plan import router as plan_router +from backend.app.dependencies import validate_api_key +from backend.app.config import get_settings + +logger = logging.getLogger("uvicorn.error") +logging.basicConfig(level=logging.INFO) + +if get_settings().ENVIRONMENT == "local": + app = FastAPI() +else: + app = FastAPI(dependencies=[Depends(validate_api_key)]) + + +# Handle 422 errors (validation failures) +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + logger.error(f"422 Validation Error at {request.url}") + logger.error(f"Body: {exc.body}") + logger.error(f"Validation Errors: {exc.errors()}") + return JSONResponse( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + content=jsonable_encoder({ + "detail": exc.errors(), + "body": exc.body + }), + ) + + +# Handle generic HTTP exceptions (optional, useful for catching 404, 403, etc.) +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + logger.warning(f"{exc.status_code} Error at {request.url} - Detail: {exc.detail}") + return JSONResponse( + status_code=exc.status_code, + content={"detail": exc.detail}, + ) + + +# Middleware to log requests +@app.middleware("http") +async def log_requests(request: Request, call_next): + logger.info(f"Incoming request: {request.method} {request.url}") + response = await call_next(request) + logger.info(f"Response status: {response.status_code}") + return response + + +app.include_router(portfolio_router.router, prefix="/v1") +app.include_router(plan_router.router, prefix="/v1") +app.include_router(whlg_router.router, prefix="/v1") if get_settings().ENVIRONMENT == "local": from app.local import router as local_router - app.include_router(local_router.router) handler = Mangum(app) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index a213214d..8a151e83 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -12,3 +12,8 @@ boto3==1.35.44 openpyxl==3.1.2 # Basic pytz +uvicorn[standard] +pandas +ipykernel +sqlmodel + diff --git a/backend/app/whlg/route.py b/backend/app/whlg/route.py deleted file mode 100644 index 21d417c5..00000000 --- a/backend/app/whlg/route.py +++ /dev/null @@ -1,47 +0,0 @@ -import boto3 -import json -import math -import asyncio -import random - -from datetime import datetime - -from fastapi import APIRouter, Depends -from backend.app.dependencies import validate_token -from backend.app.plan.schemas import PlanTriggerRequest -from backend.app.config import get_settings -from sqlalchemy.orm import sessionmaker -from utils.logger import setup_logger -from backend.app.db.connection import db_engine - -from backend.app.db.functions.recommendations_functions import create_scenario - -logger = setup_logger() - -router = APIRouter( - prefix="/whlg", - tags=["whlg"], - dependencies=[Depends(validate_token)], - responses={404: {"description": "Not found"}} -) - - -@router.post("/") -async def whlg_entrypoint(body): - # body needs to include postcode, UPRN [task ID?] - # - # Refer to the plan trigger route for code - # 1) Create an event schema and store it in the schemas file - # 2) Build the tasks functions - # 3) Read in the funding csx. This can be found as such: - # whlg_eligible_postcodes = read_csv_from_s3( - # bucket_name=get_settings().DATA_BUCKET, - # filepath="funding/whlg eligible postcodes.csv", - # ) - # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) - # Check the postcode against this file - # We need to store this somewhere????!!!??!??!?!?!?!??!??!??!??!??!??!??!??!??!??! Create a new table! - # Update subtask to be complete - # Once this is complete, build the logs stuff, add the cloudwatch logs ID to the database - - print("We're gonna do stuff!") diff --git a/backend/app/whlg/router.py b/backend/app/whlg/router.py new file mode 100644 index 00000000..3957a3f4 --- /dev/null +++ b/backend/app/whlg/router.py @@ -0,0 +1,78 @@ +import boto3 +import json +import math +import asyncio +import random + +from datetime import datetime + +from fastapi import APIRouter, Depends +from backend.app.dependencies import validate_token +from backend.app.plan.schemas import PlanTriggerRequest +from backend.app.config import get_settings +from sqlalchemy.orm import sessionmaker +from utils.logger import setup_logger +from backend.app.db.connection import db_engine +from backend.app.db.functions.recommendations_functions import create_scenario +import pandas as pd +from backend.app.whlg.schema import WHLGElligibilityRequest + +from utils.s3 import read_csv_from_s3 +from sqlalchemy.dialects.postgresql import insert +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg +from backend.app.db.functions.whlg_functions import upsert_whlg_postcode + +logger = setup_logger() + + +if get_settings().ENVIRONMENT == "local": + router = APIRouter( + prefix="/whlg", + tags=["whlg"], + ) + +else: + router = APIRouter( + prefix="/whlg", + tags=["whlg"], + dependencies=[Depends(validate_token)], + responses={404: {"description": "Not found"}} + ) + +@router.get("/") +async def whlg_entrypoint(): + # body needs to include postcode, UPRN [task ID?] + # + # Refer to the plan trigger route for code + # 1) Create an event schema and store it in the schemas file + # 2) Build the tasks functions + # 3) Read in the funding csx. This can be found as such: + # whlg_eligible_postcodes = read_csv_from_s3( + # bucket_name=get_settings().DATA_BUCKET, + # filepath="funding/whlg eligible postcodes.csv", + # ) + # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + # Check the postcode against this file + # We need to store this somewhere????!!!??!??!?!?!?!??!??!??!??!??!??!??!??!??!??! Create a new table! + # Update subtask to be complete + # Once this is complete, build the logs stuff, add the cloudwatch logs ID to the database + return {"hello": "from whlg"} + + +@router.post("/eligible") +async def eligiable(body: WHLGElligibilityRequest): + postcode = body.postcode or "" + postcode = postcode.lower().replace(" ", "") + + whlg_eligible_postcodes = read_csv_from_s3( + bucket_name=get_settings().DATA_BUCKET, + filepath="funding/whlg eligible postcodes.csv", + ) + whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + whlg_eligible_postcodes['Postcode'] = whlg_eligible_postcodes['Postcode'].str.replace(' ', '', regex=False) + + is_eligible = postcode in whlg_eligible_postcodes['Postcode'].values + return {"whlg_eligible": is_eligible} + + diff --git a/backend/app/whlg/schema.py b/backend/app/whlg/schema.py index e69de29b..648ecbf3 100644 --- a/backend/app/whlg/schema.py +++ b/backend/app/whlg/schema.py @@ -0,0 +1,4 @@ +from pydantic import BaseModel, Field + +class WHLGElligibilityRequest(BaseModel): + postcode: str = Field(..., example="B93 8SY") \ No newline at end of file diff --git a/backend/run_curl.sh b/backend/run_curl.sh new file mode 100644 index 00000000..22433e39 --- /dev/null +++ b/backend/run_curl.sh @@ -0,0 +1,11 @@ +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "B93 8SY"}' + +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "BN15 0FD"}' + +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "DY6 0LB"}' diff --git a/backend/run_local.sh b/backend/run_local.sh new file mode 100644 index 00000000..be45a54a --- /dev/null +++ b/backend/run_local.sh @@ -0,0 +1,6 @@ +set -a +source ./.env +set +a + +uvicorn app.main:app --reload + From 05740f82a4026dc7f5de23519576dea1498b2dec Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 13:40:29 +0000 Subject: [PATCH 05/15] uploade without workflows --- .../actions/lambda-deploy/action.yml | 86 ------------------- .../actions/terraform-deploy/action.yml | 55 ------------ .github/workflows/lambda_main.yml | 33 ------- 3 files changed, 174 deletions(-) delete mode 100644 .github/workflows/actions/lambda-deploy/action.yml delete mode 100644 .github/workflows/actions/terraform-deploy/action.yml delete mode 100644 .github/workflows/lambda_main.yml diff --git a/.github/workflows/actions/lambda-deploy/action.yml b/.github/workflows/actions/lambda-deploy/action.yml deleted file mode 100644 index 3ca0fc8d..00000000 --- a/.github/workflows/actions/lambda-deploy/action.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: "Build and Push Lambda Image to ECR" -description: "Reusable action for building and pushing lambda Docker image to ECR" - -inputs: - ecr_name: - description: "Lambda name / ECR repo name" - required: true - dockerfile_path: - description: "Path to Dockerfile" - required: true - ecr_tf_dir: - description: "Path to ECR terraform directory" - required: true - lambda_tf_dir: - description: "Path to Lambda terraform directory" - required: true - aws-access-key-id: - description: "AWS access key" - required: true - aws-secret-access-key: - description: "AWS secret key" - required: true - aws-region: - description: "AWS region" - required: true - git-sha: - description: "Git commit SHA" - required: true - git-ref: - description: "Git ref name" - required: true - -runs: - using: "composite" - steps: - - uses: actions/checkout@v4 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - - - name: Log in to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - - - name: Deploy ECR - uses: ./.github/workflows/actions/terraform-deploy - with: - working_directory: ${{ inputs.ecr_tf_dir }} - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - - name: Set Docker image tag - id: set_tag - shell: bash - run: | - SHORT_SHA=$(echo "${{ inputs.git-sha }}" | cut -c1-7) - BRANCH=$(echo "${{ inputs.git-ref }}" | tr '/' '-') - TAG="${BRANCH}-${SHORT_SHA}" - echo "IMAGE_TAG=${TAG}" >> $GITHUB_ENV - echo "tag=$TAG" >> $GITHUB_OUTPUT - - - name: Build and push Docker image - shell: bash - run: | - IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }} - echo "Building Docker image for ${{ inputs.ecr_name }}..." - docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} . - - echo "Pushing to ECR..." - docker push $IMAGE_URI - - - name: Deploy Lambda - uses: ./.github/workflows/actions/terraform-deploy - with: - working_directory: ${{ inputs.lambda_tf_dir }} - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - lambda-image-tag: ${{ steps.set_tag.outputs.tag }} - - - diff --git a/.github/workflows/actions/terraform-deploy/action.yml b/.github/workflows/actions/terraform-deploy/action.yml deleted file mode 100644 index 56133299..00000000 --- a/.github/workflows/actions/terraform-deploy/action.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: "Terraform Plan Shared Config" -description: "Plans shared Terraform config for Lambdas" - -inputs: - working_directory: - description: "Directory containing Terraform config" - required: true - aws-access-key-id: - description: "AWS access key" - required: true - aws-secret-access-key: - description: "AWS secret key" - required: true - aws-region: - description: "AWS region" - required: true - lambda-image-tag: - description: "Tag of the Lambda image (e.g., GitHub SHA)" - required: false - -runs: - using: "composite" - steps: - - uses: actions/checkout@v4 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ inputs.aws-access-key-id }} - aws-secret-access-key: ${{ inputs.aws-secret-access-key }} - aws-region: ${{ inputs.aws-region }} - - - name: Setup Terraform - uses: hashicorp/setup-terraform@v3 - - - name: Terraform Init - working-directory: ${{ inputs.working_directory }} - shell: bash - run: terraform init -reconfigure - - - name: Terraform Plan - working-directory: ${{ inputs.working_directory }} - shell: bash - run: | - if [ -n "${{ inputs.lambda-image-tag }}" ]; then - terraform plan -out=tfplan -var="lambda_image_tag=${{ inputs.lambda-image-tag }}" - else - terraform plan -out=tfplan - fi - - - name: Terraform Apply - working-directory: ${{ inputs.working_directory }} - shell: bash - run: terraform apply -auto-approve tfplan - diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml deleted file mode 100644 index 960adbe5..00000000 --- a/.github/workflows/lambda_main.yml +++ /dev/null @@ -1,33 +0,0 @@ -# Please note, this github work flows assumes that shared-terrform is deployed in aws env -# The shared-terraform files lives in https://github.com/Hestia-Homes/survey-extraction/tree/main/deployment/lambda/lambda_shared - -name: Deploy Lambdas -on: - push: - branches: [main, feature/whlg_lambda] - -env: - AWS_REGION: eu-west-2 - -jobs: - whlg-calc: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - - steps: - - name: Checkout repo - uses: actions/checkout@v4 - - name: Build and deploy Warm Homes Local Grant Calc (whlg-calc) - uses: ./.github/workflows/actions/lambda-deploy - with: - ecr_name: whlg_calc_adhoc_ecr - dockerfile_path: ./deployment/lambda/whlg_calculator/docker/Dockerfile - ecr_tf_dir: ./deployment/lambda/whlg_calculator/docker/ - lambda_tf_dir: ./deployment/lambda/whlg_calculator/ - aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY}} - aws-region: eu-west-2 - git-sha: ${{ github.sha }} - git-ref: ${{ github.ref_name }} \ No newline at end of file From c617d603a360477c36a9a463c0c262d2de800b48 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 13:41:24 +0000 Subject: [PATCH 06/15] uploade without workflows --- .../whlg_calculator/docker/.dockerignore | 21 ----- .../lambda/whlg_calculator/docker/Dockerfile | 25 ------ .../lambda/whlg_calculator/docker/app.py | 3 - .../lambda/whlg_calculator/docker/ecr.tf | 63 -------------- .../lambda/whlg_calculator/docker/main.tf | 0 .../lambda/whlg_calculator/docker/provider.tf | 15 ---- deployment/lambda/whlg_calculator/main.tf | 0 deployment/lambda/whlg_calculator/provider.tf | 15 ---- deployment/lambda/whlg_calculator/vars.tf | 5 -- .../lambda/whlg_calculator/whlg_lambda.tf | 83 ------------------- 10 files changed, 230 deletions(-) delete mode 100644 deployment/lambda/whlg_calculator/docker/.dockerignore delete mode 100644 deployment/lambda/whlg_calculator/docker/Dockerfile delete mode 100644 deployment/lambda/whlg_calculator/docker/app.py delete mode 100644 deployment/lambda/whlg_calculator/docker/ecr.tf delete mode 100644 deployment/lambda/whlg_calculator/docker/main.tf delete mode 100644 deployment/lambda/whlg_calculator/docker/provider.tf delete mode 100644 deployment/lambda/whlg_calculator/main.tf delete mode 100644 deployment/lambda/whlg_calculator/provider.tf delete mode 100644 deployment/lambda/whlg_calculator/vars.tf delete mode 100644 deployment/lambda/whlg_calculator/whlg_lambda.tf diff --git a/deployment/lambda/whlg_calculator/docker/.dockerignore b/deployment/lambda/whlg_calculator/docker/.dockerignore deleted file mode 100644 index d587d341..00000000 --- a/deployment/lambda/whlg_calculator/docker/.dockerignore +++ /dev/null @@ -1,21 +0,0 @@ -# Ignore junk and large files -*.pdf -*.csv -*.xml -*.parquet -*.ipynb -*.mp4 -*.mov -*.jpg -*.png -*.zip -*.tar.gz -__pycache__/ -*.pyc -*.pyo -*.pyd -build/ -dist/ -.etl_cache/ -tests/ -docs/ diff --git a/deployment/lambda/whlg_calculator/docker/Dockerfile b/deployment/lambda/whlg_calculator/docker/Dockerfile deleted file mode 100644 index cdd1f8a3..00000000 --- a/deployment/lambda/whlg_calculator/docker/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.12 - -# Install Poetry (you could pin a version if you like) -RUN curl -sSL https://install.python-poetry.org | python3 - - -# Add Poetry to PATH -ENV PATH="/root/.local/bin:$PATH" - -# Set working directory -WORKDIR /var/task - -# Copy Poetry files first to leverage Docker layer caching -COPY pyproject.toml poetry.lock README.md ./ -COPY etl/ etl/ - - -# Install dependencies into /var/task -RUN poetry config virtualenvs.create false \ - && poetry install --only main --no-interaction --no-ansi - -# Copy app code -COPY deployment/lambda/extractor_and_loader/docker/app.py ./ - -# Set Lambda handler -CMD ["app.handler"] \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/app.py b/deployment/lambda/whlg_calculator/docker/app.py deleted file mode 100644 index 4dcf1a8e..00000000 --- a/deployment/lambda/whlg_calculator/docker/app.py +++ /dev/null @@ -1,3 +0,0 @@ -def handler(event, context): - print("Hello and welcome to the WHLG Calculator") - print("Please contact the tech team for implementation") \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/ecr.tf b/deployment/lambda/whlg_calculator/docker/ecr.tf deleted file mode 100644 index a1501dff..00000000 --- a/deployment/lambda/whlg_calculator/docker/ecr.tf +++ /dev/null @@ -1,63 +0,0 @@ -# ECR repo -resource "aws_ecr_repository" "whlg_calc_adhoc_ecr" { - name = "whlg_calc_adhoc_ecr" -} - -# ECR policy to allow Lambda access -resource "aws_ecr_repository_policy" "whlg_calc_adhoc_ecr_access" { - repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name - - policy = jsonencode({ - Version = "2008-10-17", - Statement = [{ - Sid = "AllowLambdaPull", - Effect = "Allow", - Principal = { - Service = "lambda.amazonaws.com" - }, - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ] - }] - }) -} - - - -# ECR lifecycle policy to delete tagged images older than 14 days -resource "aws_ecr_lifecycle_policy" "whlg_calc_adhoc_loader_lifecycle" { - repository = aws_ecr_repository.whlg_calc_adhoc_ecr.name - - policy = jsonencode({ - "rules": [ - { - "rulePriority": 2, - "description": "Expire images older than 14 days", - "selection": { - "tagStatus": "untagged", - "countType": "sinceImagePushed", - "countUnit": "days", - "countNumber": 1 - }, - "action": { - "type": "expire" - } - }, - { - "rulePriority": 1, - "description": "Keep last 5 images", - "selection": { - "tagStatus": "tagged", - "tagPrefixList": ["feature"], - "countType": "imageCountMoreThan", - "countNumber": 5 - }, - "action": { - "type": "expire" - } - } - ] - }) -} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/docker/main.tf b/deployment/lambda/whlg_calculator/docker/main.tf deleted file mode 100644 index e69de29b..00000000 diff --git a/deployment/lambda/whlg_calculator/docker/provider.tf b/deployment/lambda/whlg_calculator/docker/provider.tf deleted file mode 100644 index 5f0fef0f..00000000 --- a/deployment/lambda/whlg_calculator/docker/provider.tf +++ /dev/null @@ -1,15 +0,0 @@ -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - version = "~> 6.3.0" - } - } - backend "s3" { - bucket = "whlg-calc-tf-state" - region = "eu-west-2" - key = "env:/dev/lambda/ecr/whlg-calc.tfstate" - } - - required_version = ">= 1.2.0" -} diff --git a/deployment/lambda/whlg_calculator/main.tf b/deployment/lambda/whlg_calculator/main.tf deleted file mode 100644 index e69de29b..00000000 diff --git a/deployment/lambda/whlg_calculator/provider.tf b/deployment/lambda/whlg_calculator/provider.tf deleted file mode 100644 index df9abf1c..00000000 --- a/deployment/lambda/whlg_calculator/provider.tf +++ /dev/null @@ -1,15 +0,0 @@ -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - version = "~> 6.3.0" - } - } - backend "s3" { - bucket = "whlg-calc-tf-state" - region = "eu-west-2" - key = "env:/dev/lambda/eachlambda/whlg_calc_lambda.tfstate" - } - - required_version = ">= 1.2.0" -} diff --git a/deployment/lambda/whlg_calculator/vars.tf b/deployment/lambda/whlg_calculator/vars.tf deleted file mode 100644 index ecdf359d..00000000 --- a/deployment/lambda/whlg_calculator/vars.tf +++ /dev/null @@ -1,5 +0,0 @@ -variable "lambda_image_tag" { - description = "Docker image tag (e.g. GitHub SHA)" - type = string - default = "local-dev-latest" -} \ No newline at end of file diff --git a/deployment/lambda/whlg_calculator/whlg_lambda.tf b/deployment/lambda/whlg_calculator/whlg_lambda.tf deleted file mode 100644 index 0a5433a9..00000000 --- a/deployment/lambda/whlg_calculator/whlg_lambda.tf +++ /dev/null @@ -1,83 +0,0 @@ -# Reference existing IAM role -data "aws_iam_role" "lambda_exec_role" { - name = "lambda-exec-role" -} - -# Reference existing ECR repository -data "aws_ecr_repository" "whlg_calc_adhoc_ecr" { - name = "whlg_calc_adhoc_ecr" -} - -# SQS queue -resource "aws_sqs_queue" "whlg_calc_adhoc_queue" { - name = "whlg_calc_adhoc-queue" - visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) -} - - -# Custom IAM policy specific to lambda_example -resource "aws_iam_policy" "whlg_calc_adhoc_policy" { - name = "walthamforest_adhoc_policy_lambda" - - policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Action = [ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes", - "sqs:GetQueueUrl", - "sqs:ChangeMessageVisibility" - ], - Resource = aws_sqs_queue.whlg_calc_adhoc_queue.arn - }, - { - Effect = "Allow", - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ], - Resource = data.aws_ecr_repository.whlg_calc_adhoc_ecr.arn - }, - { - Effect = "Allow", - Action = ["ecr:GetAuthorizationToken"], - Resource = "*" - } - ] - }) -} - -resource "aws_iam_role_policy_attachment" "whlg_calc_adhoc_policy_attach" { - role = data.aws_iam_role.lambda_exec_role.name - policy_arn = aws_iam_policy.whlg_calc_adhoc_policy.arn -} - -# Lambda function -resource "aws_lambda_function" "whlg_calc_adhoc" { - function_name = "whlg_calc_adhoc" - role = data.aws_iam_role.lambda_exec_role.arn - package_type = "Image" - image_uri = "${data.aws_ecr_repository.whlg_calc_adhoc_ecr.repository_url}:${var.lambda_image_tag}" - # Increase timeout (max 900 sec / 15 min) - # timeout = 300 # e.g. 5 minutes - - # Increase memory (default 128 MB) - memory_size = 2048 # try 1024 or 2048 MB to start - - # environment { - # variables = { - # DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB" - # } - # } -} - -# SQS trigger -resource "aws_lambda_event_source_mapping" "whlg_calc_adhoc_trigger" { - event_source_arn = aws_sqs_queue.whlg_calc_adhoc_queue.arn - function_name = aws_lambda_function.whlg_calc_adhoc.arn - batch_size = 1 -} From d98ae7db7b7713ad1fc1a9d7d7d73c1f91cedfb6 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 14:25:28 +0000 Subject: [PATCH 07/15] added task and sub task interface --- backend/app/db/functions/tasks/Tasks.py | 182 +++++++++++++++++++++++- backend/app/db/models/tasks.py | 36 +++++ backend/app/tasks/__init__.py | 0 backend/app/tasks/router.py | 87 +++++++++++ backend/app/tasks/schema.py | 21 +++ 5 files changed, 320 insertions(+), 6 deletions(-) create mode 100644 backend/app/tasks/__init__.py create mode 100644 backend/app/tasks/router.py create mode 100644 backend/app/tasks/schema.py diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index d3f06d33..18900c83 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -1,10 +1,180 @@ +from __future__ import annotations -class TasksInterface: - def __init__(self): - pass - +# ---- Standard Library ---- +from typing import Optional, Dict, Any +from datetime import datetime, timezone +from uuid import UUID +import json + +# ---- SQLModel / SQLAlchemy ---- +from sqlmodel import Session, select + +# ---- DB Session ---- +from backend.app.db.connection import get_db_session + +# ---- Models ---- +from backend.app.db.models.tasks import Task, SubTask +# ============================================================ +# SubTask Interface +# ============================================================ class SubTaskInterface: - def __init__(self) - pass \ No newline at end of file + """ + CRUD operations for SubTask + cascading Task progress updates. + """ + + def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise ValueError(f"Task {task_id} not found") + + subtask = SubTask( + taskId=task_id, + inputs=json.dumps(inputs) if inputs else None, + jobStarted=now, + ) + + session.add(subtask) + session.commit() + session.refresh(subtask) + + # Recalculate the parent task status + self._update_task_progress(session, task_id) + + return subtask + + def update_subtask_status(self, subtask_id: UUID, status: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + normalized = status.lower() + + # Start time + if normalized == "in progress" and subtask.jobStarted is None: + subtask.jobStarted = now + + # Completed time + if normalized == "complete": + subtask.jobCompleted = now + + subtask.status = normalized + subtask.updatedAt = now + + session.add(subtask) + session.commit() + + # Re-evaluate the task + self._update_task_progress(session, subtask.taskId) + + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # Task Progress Calculation + # -------------------------------------------------------- + def _update_task_progress(self, session: Session, task_id: UUID): + task = session.get(Task, task_id) + if not task: + return + + subtasks = session.exec( + select(SubTask).where(SubTask.taskId == task_id) + ).all() + + if not subtasks: + return + + statuses = [st.status.lower() for st in subtasks] + now = datetime.now(timezone.utc) + + # Priority: + # failed > in progress > complete + if "failed" in statuses: + task.status = "failed" + task.jobCompleted = now + + elif all(s == "complete" for s in statuses): + task.status = "complete" + task.jobCompleted = now + + else: + task.status = "in progress" + if task.jobStarted is None: + task.jobStarted = now + task.jobCompleted = None # still running + + task.updatedAt = now + session.add(task) + session.commit() + + +# ============================================================ +# Task Interface +# ============================================================ +class TasksInterface: + """ + High-level operations for Task records. + """ + + def create_task( + self, + *, + task_source: str, + service: Optional[str] = None, + inputs: Optional[Dict[str, Any]] = None, + ): + now = datetime.now(timezone.utc) + + # Step 1: Create the task + with get_db_session() as session: + task = Task( + taskSource=task_source, + service=service, + jobStarted=now, + ) + + session.add(task) + session.commit() + session.refresh(task) + + # Step 2: Create first subtask using SubTaskInterface + subtask_interface = SubTaskInterface() + subtask = subtask_interface.create_subtask( + task_id=task.id, + inputs=inputs + ) + + return task.id, subtask.id + + def update_task_status(self, task_id: UUID, status: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise ValueError(f"Task {task_id} not found") + + normalized = status.lower() + + if normalized == "in progress" and task.jobStarted is None: + task.jobStarted = now + + if normalized == "complete": + task.jobCompleted = now + + task.status = normalized + task.updatedAt = now + + session.add(task) + session.commit() + session.refresh(task) + + return task diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py index ed5b3710..d8007dcd 100644 --- a/backend/app/db/models/tasks.py +++ b/backend/app/db/models/tasks.py @@ -35,3 +35,39 @@ class Task(SQLModel, table=True): # Relationship subTasks: List["SubTask"] = Relationship(back_populates="task") + + +class SubTask(SQLModel, table=True): + __tablename__ = "sub_task" + + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + index=True, + ) + + taskId: UUID = Field( + foreign_key="tasks.id", + alias="task_id", + ) + + jobStarted: Optional[datetime] = Field( + default=None, alias="job_started" + ) + jobCompleted: Optional[datetime] = Field( + default=None, alias="job_completed" + ) + + status: str = Field(default="In Progress") + + inputs: Optional[str] = None + outputs: Optional[str] = None + cloudLogsURL: Optional[str] = Field(alias="cloud_logs_url") + + updatedAt: datetime = Field( + default_factory=datetime.utcnow, + alias="updated_at", + ) + + # Relationship + task: Optional[Task] = Relationship(back_populates="subTasks") diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py new file mode 100644 index 00000000..d324f9ba --- /dev/null +++ b/backend/app/tasks/router.py @@ -0,0 +1,87 @@ +from fastapi import APIRouter, Depends, HTTPException +from uuid import UUID + +from backend.app.dependencies import validate_token +from backend.app.tasks.schema import ( + CreateTaskRequest, + UpdateTaskStatusRequest, + CreateSubTaskRequest, + UpdateSubTaskStatusRequest, +) + +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface +from backend.app.db.connection import get_db_session +from backend.app.db.models.tasks import Task, SubTask +from sqlmodel import select + + +router = APIRouter( + prefix="/tasks", + tags=["tasks"], + dependencies=[Depends(validate_token)], +) + + +@router.post("/", summary="Create a new task and its first subtask") +async def create_task(req: CreateTaskRequest): + tasks = TasksInterface() + task_id, subtask_id = tasks.create_task( + task_source=req.task_source, + service=req.service, + inputs=req.inputs, + ) + return {"task_id": task_id, "subtask_id": subtask_id} + + +@router.get("/{task_id}", summary="Get a task and its subtasks") +async def get_task(task_id: UUID): + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise HTTPException(status_code=404, detail="Task not found") + + subtasks = session.exec( + select(SubTask).where(SubTask.taskId == task_id) + ).all() + + # Deserialize JSON inputs back to dict + formatted = [] + for st in subtasks: + formatted.append({ + **st.dict(), + "inputs": json.loads(st.inputs) if st.inputs else None + }) + + return { + "task": task, + "subtasks": formatted, + } + + +@router.put("/{task_id}/status", summary="Update a task's status") +async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest): + tasks = TasksInterface() + try: + updated = tasks.update_task_status(task_id, req.status) + return {"task_id": updated.id, "status": updated.status} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + +@router.post("/{task_id}/subtasks", summary="Create a new subtask under a task") +async def create_subtask(task_id: UUID, req: CreateSubTaskRequest): + subtasks = SubTaskInterface() + try: + st = subtasks.create_subtask(task_id, req.inputs) + return {"subtask_id": st.id, "task_id": task_id} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + + +@router.put("/subtasks/{subtask_id}/status", summary="Update a subtask's status") +async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusRequest): + subtasks = SubTaskInterface() + try: + st = subtasks.update_subtask_status(subtask_id, req.status) + return {"subtask_id": st.id, "status": st.status} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) diff --git a/backend/app/tasks/schema.py b/backend/app/tasks/schema.py new file mode 100644 index 00000000..66be61e7 --- /dev/null +++ b/backend/app/tasks/schema.py @@ -0,0 +1,21 @@ +from typing import Optional, Any, Dict +from uuid import UUID +from pydantic import BaseModel + + +class CreateTaskRequest(BaseModel): + task_source: str + service: Optional[str] = None + inputs: Optional[Dict[str, Any]] = None # JSON object + + +class UpdateTaskStatusRequest(BaseModel): + status: str + + +class CreateSubTaskRequest(BaseModel): + inputs: Optional[Dict[str, Any]] = None # JSON object + + +class UpdateSubTaskStatusRequest(BaseModel): + status: str From 47be3ffea37ee03f820c33fcbf0a37c974638594 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 14:26:17 +0000 Subject: [PATCH 08/15] added tasks interface --- backend/app/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/app/main.py b/backend/app/main.py index 261e2f34..f0ab4d86 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -8,6 +8,7 @@ from mangum import Mangum from backend.app.portfolio import router as portfolio_router from backend.app.whlg import router as whlg_router from backend.app.plan import router as plan_router +from backend.app.tasks import router as tasks_router from backend.app.dependencies import validate_api_key from backend.app.config import get_settings @@ -57,6 +58,7 @@ async def log_requests(request: Request, call_next): app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") app.include_router(whlg_router.router, prefix="/v1") +app.include_router(tasks_router.router, prefix="/v1") if get_settings().ENVIRONMENT == "local": from app.local import router as local_router From 68a5de28e2b5b34d82e12eb3e1c869c00db4594b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 15:02:33 +0000 Subject: [PATCH 09/15] added tasks so Khalim can reeview it --- backend/app/db/functions/tasks/Tasks.py | 155 ++++++++++++++++++++---- backend/app/tasks/router.py | 54 ++++++++- backend/app/tasks/schema.py | 5 + 3 files changed, 189 insertions(+), 25 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 18900c83..06e1c6fe 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -24,6 +24,9 @@ class SubTaskInterface: CRUD operations for SubTask + cascading Task progress updates. """ + # -------------------------------------------------------- + # CREATE SUBTASK + # -------------------------------------------------------- def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): now = datetime.now(timezone.utc) @@ -35,18 +38,22 @@ class SubTaskInterface: subtask = SubTask( taskId=task_id, inputs=json.dumps(inputs) if inputs else None, - jobStarted=now, + status="waiting", + jobStarted=None, + jobCompleted=None, ) session.add(subtask) session.commit() session.refresh(subtask) - # Recalculate the parent task status + # Recalculate parent task progress self._update_task_progress(session, task_id) - return subtask + # -------------------------------------------------------- + # UPDATE STATUS (in progress, complete, failed) + # -------------------------------------------------------- def update_subtask_status(self, subtask_id: UUID, status: str): now = datetime.now(timezone.utc) @@ -57,12 +64,12 @@ class SubTaskInterface: normalized = status.lower() - # Start time + # When job really starts if normalized == "in progress" and subtask.jobStarted is None: subtask.jobStarted = now - # Completed time - if normalized == "complete": + # Completed or failed + if normalized in ("complete", "failed"): subtask.jobCompleted = now subtask.status = normalized @@ -71,14 +78,80 @@ class SubTaskInterface: session.add(subtask) session.commit() - # Re-evaluate the task + # Recalculate task status self._update_task_progress(session, subtask.taskId) session.refresh(subtask) return subtask # -------------------------------------------------------- - # Task Progress Calculation + # UPDATE OUTPUTS + # -------------------------------------------------------- + def update_subtask_output(self, subtask_id: UUID, outputs: Dict[str, Any]): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + subtask.outputs = json.dumps(outputs) + subtask.updatedAt = now + + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # UPDATE CLOUD LOGS URL + # -------------------------------------------------------- + def update_subtask_logs(self, subtask_id: UUID, cloud_logs_url: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + subtask.cloudLogsURL = cloud_logs_url + subtask.updatedAt = now + + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # SET BOTH OUTPUT + LOGS + # -------------------------------------------------------- + def set_subtask_result( + self, + subtask_id: UUID, + outputs: Optional[Dict[str, Any]] = None, + cloud_logs_url: Optional[str] = None, + ): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + if outputs is not None: + subtask.outputs = json.dumps(outputs) + + if cloud_logs_url is not None: + subtask.cloudLogsURL = cloud_logs_url + + subtask.updatedAt = now + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # TASK PROGRESS CALCULATION # -------------------------------------------------------- def _update_task_progress(self, session: Session, task_id: UUID): task = session.get(Task, task_id) @@ -89,14 +162,9 @@ class SubTaskInterface: select(SubTask).where(SubTask.taskId == task_id) ).all() - if not subtasks: - return - - statuses = [st.status.lower() for st in subtasks] + statuses = [s.status.lower() for s in subtasks] now = datetime.now(timezone.utc) - # Priority: - # failed > in progress > complete if "failed" in statuses: task.status = "failed" task.jobCompleted = now @@ -105,16 +173,61 @@ class SubTaskInterface: task.status = "complete" task.jobCompleted = now - else: + elif "in progress" in statuses: task.status = "in progress" if task.jobStarted is None: task.jobStarted = now - task.jobCompleted = None # still running + + else: + # All waiting + task.status = "waiting" + task.jobStarted = None + task.jobCompleted = None task.updatedAt = now session.add(task) session.commit() + def finalize_subtask( + self, + subtask_id: UUID, + status: str, + outputs: Optional[Dict[str, Any]], + cloud_logs_url: Optional[str] + ): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + normalized = status.lower() + if normalized not in ("complete", "failed"): + raise ValueError("Status must be 'complete' or 'failed'") + + # Set outputs + if outputs is not None: + subtask.outputs = json.dumps(outputs) + + # Set logs + if cloud_logs_url is not None: + subtask.cloudLogsURL = cloud_logs_url + + # Status + timestamps + subtask.status = normalized + subtask.jobCompleted = now + subtask.updatedAt = now + + session.add(subtask) + session.commit() + + # Update parent task (complete/failed) + self._update_task_progress(session, subtask.taskId) + + session.refresh(subtask) + return subtask + # ============================================================ # Task Interface @@ -133,23 +246,24 @@ class TasksInterface: ): now = datetime.now(timezone.utc) - # Step 1: Create the task with get_db_session() as session: task = Task( taskSource=task_source, service=service, - jobStarted=now, + status="waiting", + jobStarted=None, + jobCompleted=None, ) session.add(task) session.commit() session.refresh(task) - # Step 2: Create first subtask using SubTaskInterface + # Create first subtask in waiting state subtask_interface = SubTaskInterface() subtask = subtask_interface.create_subtask( task_id=task.id, - inputs=inputs + inputs=inputs, ) return task.id, subtask.id @@ -176,5 +290,4 @@ class TasksInterface: session.add(task) session.commit() session.refresh(task) - return task diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py index d324f9ba..2a45a303 100644 --- a/backend/app/tasks/router.py +++ b/backend/app/tasks/router.py @@ -1,5 +1,6 @@ from fastapi import APIRouter, Depends, HTTPException from uuid import UUID +import json # ← REQUIRED for json.loads from backend.app.dependencies import validate_token from backend.app.tasks.schema import ( @@ -7,9 +8,12 @@ from backend.app.tasks.schema import ( UpdateTaskStatusRequest, CreateSubTaskRequest, UpdateSubTaskStatusRequest, + FinalizeSubTaskRequest, ) +# Correct location of interfaces from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface + from backend.app.db.connection import get_db_session from backend.app.db.models.tasks import Task, SubTask from sqlmodel import select @@ -22,6 +26,9 @@ router = APIRouter( ) +# ============================================================ +# Create Task +# ============================================================ @router.post("/", summary="Create a new task and its first subtask") async def create_task(req: CreateTaskRequest): tasks = TasksInterface() @@ -33,6 +40,9 @@ async def create_task(req: CreateTaskRequest): return {"task_id": task_id, "subtask_id": subtask_id} +# ============================================================ +# Get Task + Subtasks +# ============================================================ @router.get("/{task_id}", summary="Get a task and its subtasks") async def get_task(task_id: UUID): with get_db_session() as session: @@ -44,12 +54,13 @@ async def get_task(task_id: UUID): select(SubTask).where(SubTask.taskId == task_id) ).all() - # Deserialize JSON inputs back to dict formatted = [] for st in subtasks: formatted.append({ **st.dict(), - "inputs": json.loads(st.inputs) if st.inputs else None + "inputs": json.loads(st.inputs) if st.inputs else None, + "outputs": json.loads(st.outputs) if st.outputs else None, + "cloud_logs_url": st.cloudLogsURL, }) return { @@ -58,6 +69,9 @@ async def get_task(task_id: UUID): } +# ============================================================ +# Update Task Status +# ============================================================ @router.put("/{task_id}/status", summary="Update a task's status") async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest): tasks = TasksInterface() @@ -67,17 +81,24 @@ async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest): except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) + +# ============================================================ +# Create Additional Subtask +# ============================================================ @router.post("/{task_id}/subtasks", summary="Create a new subtask under a task") async def create_subtask(task_id: UUID, req: CreateSubTaskRequest): subtasks = SubTaskInterface() try: st = subtasks.create_subtask(task_id, req.inputs) - return {"subtask_id": st.id, "task_id": task_id} + return {"subtask_id": st.id, "task_id": task_id, "status": st.status} except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) -@router.put("/subtasks/{subtask_id}/status", summary="Update a subtask's status") +# ============================================================ +# Update Subtask Status +# ============================================================ +@router.put("/subtask/{subtask_id}/status", summary="Update a subtask's status") async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusRequest): subtasks = SubTaskInterface() try: @@ -85,3 +106,28 @@ async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusReques return {"subtask_id": st.id, "status": st.status} except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) + + +# === +# Sub task is complete +@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs") +async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): + subtasks = SubTaskInterface() + + try: + st = subtasks.finalize_subtask( + subtask_id=subtask_id, + status=req.status, + outputs=req.outputs, + cloud_logs_url=req.cloud_logs_url + ) + + return { + "subtask_id": st.id, + "status": st.status, + "outputs": req.outputs, + "cloud_logs_url": req.cloud_logs_url, + } + + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) diff --git a/backend/app/tasks/schema.py b/backend/app/tasks/schema.py index 66be61e7..b1a923b3 100644 --- a/backend/app/tasks/schema.py +++ b/backend/app/tasks/schema.py @@ -19,3 +19,8 @@ class CreateSubTaskRequest(BaseModel): class UpdateSubTaskStatusRequest(BaseModel): status: str + +class FinalizeSubTaskRequest(BaseModel): + status: str # "complete" or "failed" + outputs: Optional[Dict[str, Any]] = None + cloud_logs_url: Optional[str] = None \ No newline at end of file From d5b7fb21b3e405e20d05acf498d399f602a12bff Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 15:36:58 +0000 Subject: [PATCH 10/15] add this to dev so i can test --- backend/app/tasks/router.py | 56 +++++++++++++++++++++++++++++++++++++ backend/app/tasks/schema.py | 7 ++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py index 2a45a303..90b62dd1 100644 --- a/backend/app/tasks/router.py +++ b/backend/app/tasks/router.py @@ -9,6 +9,7 @@ from backend.app.tasks.schema import ( CreateSubTaskRequest, UpdateSubTaskStatusRequest, FinalizeSubTaskRequest, + TaskSqsTriggerRequest ) # Correct location of interfaces @@ -131,3 +132,58 @@ async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) + + +# for testing: + +import boto3 +import json +from backend.app.tasks.schema import TaskSqsTriggerRequest +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface +from backend.app.config import get_settings + +sqs = boto3.client("sqs") + +@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202) +async def trigger_task(req: TaskSqsTriggerRequest): + """ + Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it. + If inputs are empty, automatically replaced with {}. + """ + + settings = get_settings() + + tasks = TasksInterface() + + # ---- Normalize empty inputs ---- + inputs = req.inputs or {} # ensures {} even if null + + # ---- 1. Create Task + SubTask ---- + task_id, subtask_id = tasks.create_task( + task_source=req.task_source, + service=req.service, + inputs=inputs, + ) + + # ---- 2. Prepare SQS payload ---- + sqs_payload = { + "subtask_id": str(subtask_id), + "params": inputs, + } + + try: + response = sqs.send_message( + QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/" + f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue", + MessageBody=json.dumps(sqs_payload) + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"SQS error: {e}") + + return { + "message": "Task triggered", + "task_id": task_id, + "subtask_id": subtask_id, + "sqs_message_id": response.get("MessageId"), + "inputs_sent": inputs, + } \ No newline at end of file diff --git a/backend/app/tasks/schema.py b/backend/app/tasks/schema.py index b1a923b3..a5b4424b 100644 --- a/backend/app/tasks/schema.py +++ b/backend/app/tasks/schema.py @@ -23,4 +23,9 @@ class UpdateSubTaskStatusRequest(BaseModel): class FinalizeSubTaskRequest(BaseModel): status: str # "complete" or "failed" outputs: Optional[Dict[str, Any]] = None - cloud_logs_url: Optional[str] = None \ No newline at end of file + cloud_logs_url: Optional[str] = None + +class TaskSqsTriggerRequest(BaseModel): + task_source: str + service: Optional[str] = None + inputs: Dict[str, Any] # forwarded into SubTask.inputs + SQS message \ No newline at end of file From 30b7370ede0fbac7b8a7e226acac40c0423d4626 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:32:12 +0000 Subject: [PATCH 11/15] tag numpy --- backend/app/requirements/requirements.txt | 1 + serverless.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 8a151e83..e850948d 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -6,6 +6,7 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 +numpy==1.26.4 # AWS boto3==1.35.44 # Data diff --git a/serverless.yml b/serverless.yml index 6eea03eb..b1186191 100644 --- a/serverless.yml +++ b/serverless.yml @@ -39,6 +39,8 @@ custom: pythonRequirements: fileName: backend/app/requirements/requirements.txt dockerizePip: true + useDocker: false + enabled: false customDomain: domainName: api.${self:provider.environment.DOMAIN_NAME} createRoute53Record: true From 56902d48ce2060054c4c0da2473f0001c03e6cb2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:32:59 +0000 Subject: [PATCH 12/15] tag numpy --- serverless.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/serverless.yml b/serverless.yml index b1186191..6eea03eb 100644 --- a/serverless.yml +++ b/serverless.yml @@ -39,8 +39,6 @@ custom: pythonRequirements: fileName: backend/app/requirements/requirements.txt dockerizePip: true - useDocker: false - enabled: false customDomain: domainName: api.${self:provider.environment.DOMAIN_NAME} createRoute53Record: true From a7dd4b636fc7bdb572fb08052af870712b0e4ff7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:42:18 +0000 Subject: [PATCH 13/15] we like minimal --- backend/app/requirements/requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index e850948d..7d5fb26b 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -6,7 +6,6 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 -numpy==1.26.4 # AWS boto3==1.35.44 # Data @@ -14,7 +13,6 @@ openpyxl==3.1.2 # Basic pytz uvicorn[standard] -pandas ipykernel sqlmodel From 4dc233e010944b1ad7402682edc231f469b5a945 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:43:26 +0000 Subject: [PATCH 14/15] more minmal --- .devcontainer/Dockerfile | 2 +- .devcontainer/requirements.txt | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 .devcontainer/requirements.txt diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index c061c9f8..56c366f4 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -29,7 +29,7 @@ ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 # Model # ADD asset_list/requirements.txt requirements.txt # FASTAPI backend -ADD backend/app/requirements/requirements.txt requirements.txt +ADD .devcontainer/requirements.txt requirements.txt RUN pip install -r requirements.txt # 5) Workdir diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt new file mode 100644 index 00000000..d8c51f19 --- /dev/null +++ b/.devcontainer/requirements.txt @@ -0,0 +1,17 @@ +# fastapi +fastapi==0.115.2 +sqlalchemy==2.0.36 +pydantic-settings==2.6.0 +psycopg2-binary==2.9.10 +python-jose==3.3.0 +cryptography==43.0.3 +mangum==0.19.0 +# AWS +boto3==1.35.44 +# Data +openpyxl==3.1.2 +# Basic +pytz +uvicorn[standard] +sqlmodel + From e73046b37b03e7f1169e2362dc16fc2fc267cdd8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 14 Nov 2025 16:46:55 +0000 Subject: [PATCH 15/15] more minimal --- backend/app/requirements/requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 7d5fb26b..dff7a546 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -12,7 +12,5 @@ boto3==1.35.44 openpyxl==3.1.2 # Basic pytz -uvicorn[standard] -ipykernel sqlmodel