From 5c2a8f075536a66fd06366e734ca4d14f36524b2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 9 Sep 2025 10:46:58 +0000 Subject: [PATCH 01/29] walthamforest_etl --- .../walthamforest_etl/docker/.dockerignore | 21 +++++ .../walthamforest_etl/docker/Dockerfile | 25 ++++++ .../lambda/walthamforest_etl/docker/app.py | 2 + .../lambda/walthamforest_etl/docker/ecr.tf | 62 ++++++++++++++ .../lambda/walthamforest_etl/docker/main.tf | 0 .../walthamforest_etl/docker/provider.tf | 15 ++++ deployment/lambda/walthamforest_etl/main.tf | 0 .../lambda/walthamforest_etl/provider.tf | 15 ++++ deployment/lambda/walthamforest_etl/vars.tf | 5 ++ .../walthamforest_etl_lambda.tf | 80 +++++++++++++++++++ 10 files changed, 225 insertions(+) create mode 100644 deployment/lambda/walthamforest_etl/docker/.dockerignore create mode 100644 deployment/lambda/walthamforest_etl/docker/Dockerfile create mode 100644 deployment/lambda/walthamforest_etl/docker/app.py create mode 100644 deployment/lambda/walthamforest_etl/docker/ecr.tf create mode 100644 deployment/lambda/walthamforest_etl/docker/main.tf create mode 100644 deployment/lambda/walthamforest_etl/docker/provider.tf create mode 100644 deployment/lambda/walthamforest_etl/main.tf create mode 100644 deployment/lambda/walthamforest_etl/provider.tf create mode 100644 deployment/lambda/walthamforest_etl/vars.tf create mode 100644 deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf diff --git a/deployment/lambda/walthamforest_etl/docker/.dockerignore b/deployment/lambda/walthamforest_etl/docker/.dockerignore new file mode 100644 index 0000000..d587d34 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/.dockerignore @@ -0,0 +1,21 @@ +# Ignore junk and large files +*.pdf +*.csv +*.xml +*.parquet +*.ipynb +*.mp4 +*.mov +*.jpg +*.png +*.zip +*.tar.gz +__pycache__/ +*.pyc +*.pyo +*.pyd +build/ +dist/ +.etl_cache/ +tests/ +docs/ diff --git a/deployment/lambda/walthamforest_etl/docker/Dockerfile b/deployment/lambda/walthamforest_etl/docker/Dockerfile new file mode 100644 index 0000000..cdd1f8a --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/Dockerfile @@ -0,0 +1,25 @@ +FROM public.ecr.aws/lambda/python:3.12 + +# Install Poetry (you could pin a version if you like) +RUN curl -sSL https://install.python-poetry.org | python3 - + +# Add Poetry to PATH +ENV PATH="/root/.local/bin:$PATH" + +# Set working directory +WORKDIR /var/task + +# Copy Poetry files first to leverage Docker layer caching +COPY pyproject.toml poetry.lock README.md ./ +COPY etl/ etl/ + + +# Install dependencies into /var/task +RUN poetry config virtualenvs.create false \ + && poetry install --only main --no-interaction --no-ansi + +# Copy app code +COPY deployment/lambda/extractor_and_loader/docker/app.py ./ + +# Set Lambda handler +CMD ["app.handler"] \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/docker/app.py b/deployment/lambda/walthamforest_etl/docker/app.py new file mode 100644 index 0000000..3a4a625 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/app.py @@ -0,0 +1,2 @@ +def handler(event, context): + print("hello world") \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/docker/ecr.tf b/deployment/lambda/walthamforest_etl/docker/ecr.tf new file mode 100644 index 0000000..36bec88 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/ecr.tf @@ -0,0 +1,62 @@ +# ECR repo +resource "aws_ecr_repository" "walthamforest_etl" { + name = "walthamforest_etl" +} + +# ECR policy to allow Lambda access +resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" { + repository = aws_ecr_repository.walthamforest_etl.name + + policy = jsonencode({ + Version = "2008-10-17", + Statement = [{ + Sid = "AllowLambdaPull", + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + }, + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ] + }] + }) +} + + +# ECR lifecycle policy to delete tagged images older than 14 days +resource "aws_ecr_lifecycle_policy" "walthamforest_etl_loader_lifecycle" { + repository = aws_ecr_repository.walthamforest_etl.name + + policy = jsonencode({ + "rules": [ + { + "rulePriority": 2, + "description": "Expire images older than 14 days", + "selection": { + "tagStatus": "untagged", + "countType": "sinceImagePushed", + "countUnit": "days", + "countNumber": 1 + }, + "action": { + "type": "expire" + } + }, + { + "rulePriority": 1, + "description": "Keep last 5 images", + "selection": { + "tagStatus": "tagged", + "tagPrefixList": ["feature"], + "countType": "imageCountMoreThan", + "countNumber": 5 + }, + "action": { + "type": "expire" + } + } + ] + }) +} \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/docker/main.tf b/deployment/lambda/walthamforest_etl/docker/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/deployment/lambda/walthamforest_etl/docker/provider.tf b/deployment/lambda/walthamforest_etl/docker/provider.tf new file mode 100644 index 0000000..e41dcbf --- /dev/null +++ b/deployment/lambda/walthamforest_etl/docker/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "survey-extractor-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/ecr/walthamforest_etl.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/walthamforest_etl/main.tf b/deployment/lambda/walthamforest_etl/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/deployment/lambda/walthamforest_etl/provider.tf b/deployment/lambda/walthamforest_etl/provider.tf new file mode 100644 index 0000000..51eca0c --- /dev/null +++ b/deployment/lambda/walthamforest_etl/provider.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6.3.0" + } + } + backend "s3" { + bucket = "survey-extractor-tf-state" + region = "eu-west-2" + key = "env:/dev/lambda/eachlambda/extractor_and_loader_lambda.tfstate" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/lambda/walthamforest_etl/vars.tf b/deployment/lambda/walthamforest_etl/vars.tf new file mode 100644 index 0000000..ecdf359 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/vars.tf @@ -0,0 +1,5 @@ +variable "lambda_image_tag" { + description = "Docker image tag (e.g. GitHub SHA)" + type = string + default = "local-dev-latest" +} \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf new file mode 100644 index 0000000..2ac9b38 --- /dev/null +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -0,0 +1,80 @@ +# Reference existing IAM role +data "aws_iam_role" "lambda_exec_role" { + name = "lambda-exec-role" +} + +# Reference existing ECR repository +data "aws_ecr_repository" "walthamforest_etl_ecr" { + name = "walthamforest_etl_ecr" +} + +# SQS queue for extractor_and_loader +resource "aws_sqs_queue" "walthamforest_etl_queue" { + name = "walthamforest_etl-queue" +} + + +# IAM policy specific to this Lambda +resource "aws_iam_policy" "walthamforest_etl_policy" { + name = "walthamforest_etl-loader-policy" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes" + ], + Resource = aws_sqs_queue.walthamforest_etl_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" { + role = data.aws_iam_role.lambda_exec_role.name + policy_arn = aws_iam_policy.walthamforest_etl-loader-policy.arn +} + +# Lambda function +resource "aws_lambda_function" "waltham_forest_etl" { + function_name = "walthamforest_etl" + role = data.aws_iam_role.lambda_exec_role.arn + package_type = "Image" + image_uri = "${data.aws_ecr_repository.walthamforest_etl_ecr.repository_url}:${var.lambda_image_tag}" + # Increase timeout (max 900 sec / 15 min) + timeout = 300 # e.g. 5 minutes + + # Increase memory (default 128 MB) + memory_size = 2048 # try 1024 or 2048 MB to start + + # environment { + # variables = { + # DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB" + # } + # } +} + +# SQS trigger +resource "aws_lambda_event_source_mapping" "extractor_and_loader_trigger" { + event_source_arn = aws_sqs_queue.walthamforest_etl_queue.arn + function_name = aws_lambda_function.walthamforest_etl.arn + batch_size = 1 +} From 6652553c04afde6c130ddc7c80b14aa14ddd7097 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 9 Sep 2025 15:17:56 +0000 Subject: [PATCH 02/29] pandas to get json information --- .../lambda/walthamforest_etl/docker/app.py | 48 ++++++++++++++++++- .../walthamforest_etl_lambda.tf | 2 +- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/deployment/lambda/walthamforest_etl/docker/app.py b/deployment/lambda/walthamforest_etl/docker/app.py index 3a4a625..2f82c66 100644 --- a/deployment/lambda/walthamforest_etl/docker/app.py +++ b/deployment/lambda/walthamforest_etl/docker/app.py @@ -1,2 +1,48 @@ +import pandas as pd +import json +from pprint import pprint + def handler(event, context): - print("hello world") \ No newline at end of file + # read data for houses only + df = pd.read_excel("../../home/Downloads/data.xlsx", sheet_name="Houses Asset Data") + + element_cols = [ + "ELEMENT GROUP", "ELEMENT CODE", "ELEMENT CODE DESCRIPTION", + "ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION", + "ELEMENT DATE VALUE", "ELEMENT NUMERIC VALUE", + "ELEMENT TEXT VALUE", "QUANTITY", + "INSTALL DATE", "REMAINING LIFE", "ELEMENT COMMENTS" + ] + + property_cols = [ + "PROP REF", "Domna", "ADDRESS", "OWNERSHIP", + "PROP STATUS", "PROP TYPE", "PROP SUB TYPE" + ] + + # Group by ADDRESS (and other identifiers if needed) + result = ( + df.groupby(["ADDRESS"]) + .apply(lambda g: { + "property_info": g[property_cols].drop_duplicates().iloc[0].to_dict(), + "elements_info": [ + { + "ELEMENT GROUP": eg_name, + "elements": eg_df.drop(columns=["ELEMENT GROUP"]).to_dict(orient="records") + } + for eg_name, eg_df in g[element_cols].groupby("ELEMENT GROUP") + ] + }) + .reset_index() + .rename(columns={0: "data"}) + ) + + # Convert to list of dicts + records = [] + for _, row in result.iterrows(): + records.append({ + "ADDRESS": row["ADDRESS"], + **row["data"] + }) + + json_output = json.dumps(records, ensure_ascii=False, default=str) + pprint(json_output) diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 2ac9b38..2780add 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -73,7 +73,7 @@ resource "aws_lambda_function" "waltham_forest_etl" { } # SQS trigger -resource "aws_lambda_event_source_mapping" "extractor_and_loader_trigger" { +resource "aws_lambda_event_source_mapping" "walthamforest_etl_trigger" { event_source_arn = aws_sqs_queue.walthamforest_etl_queue.arn function_name = aws_lambda_function.walthamforest_etl.arn batch_size = 1 From 006eccf8ecf1e2e5a227da9abbc187168299e458 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 13:42:43 +0000 Subject: [PATCH 03/29] check if lambda is set up correctly --- .github/workflows/lambda_main.yml | 25 ++++++++++++++++++- .../lambda/walthamforest_etl/docker/app.py | 2 ++ .../lambda/walthamforest_etl/docker/ecr.tf | 2 +- .../walthamforest_etl_lambda.tf | 4 +-- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml index 6330833..c83d0f0 100644 --- a/.github/workflows/lambda_main.yml +++ b/.github/workflows/lambda_main.yml @@ -2,7 +2,7 @@ name: Lambda Main Workflow on: push: - branches: [main, feautre/additional_features_in_condition_report_extraction] + branches: [main, feautre/walthamforest_etl] env: AWS_REGION: eu-west-2 @@ -67,3 +67,26 @@ jobs: git-sha: ${{ github.sha }} git-ref: ${{ github.ref_name }} + + walthamforest-etl: + runs-on: ubuntu-latest + needs: shared-lambda-terraform + permissions: + id-token: write + contents: read + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Build and deploy WalthamForest ETL + uses: ./.github/workflows/actions/lambda-deploy + with: + lambda_name: walthamforest_etl + dockerfile_path: ./deployment/lambda/walthamforest_etl/docker/Dockerfile + ecr_tf_dir: ./deployment/lambda/walthamforest_etl/docker/ + lambda_tf_dir: ./deployment/lambda/walthamforest_etl/ + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + git-sha: ${{ github.sha }} + git-ref: ${{ github.ref_name }} \ No newline at end of file diff --git a/deployment/lambda/walthamforest_etl/docker/app.py b/deployment/lambda/walthamforest_etl/docker/app.py index 2f82c66..1cb261f 100644 --- a/deployment/lambda/walthamforest_etl/docker/app.py +++ b/deployment/lambda/walthamforest_etl/docker/app.py @@ -4,6 +4,8 @@ from pprint import pprint def handler(event, context): # read data for houses only + print("waltham forest set up correctly") + return None df = pd.read_excel("../../home/Downloads/data.xlsx", sheet_name="Houses Asset Data") element_cols = [ diff --git a/deployment/lambda/walthamforest_etl/docker/ecr.tf b/deployment/lambda/walthamforest_etl/docker/ecr.tf index 36bec88..5fceebc 100644 --- a/deployment/lambda/walthamforest_etl/docker/ecr.tf +++ b/deployment/lambda/walthamforest_etl/docker/ecr.tf @@ -1,6 +1,6 @@ # ECR repo resource "aws_ecr_repository" "walthamforest_etl" { - name = "walthamforest_etl" + name = "walthamforest_etl_ecr" } # ECR policy to allow Lambda access diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 2780add..c8b776e 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -8,7 +8,7 @@ data "aws_ecr_repository" "walthamforest_etl_ecr" { name = "walthamforest_etl_ecr" } -# SQS queue for extractor_and_loader +# SQS queue resource "aws_sqs_queue" "walthamforest_etl_queue" { name = "walthamforest_etl-queue" } @@ -54,7 +54,7 @@ resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" { } # Lambda function -resource "aws_lambda_function" "waltham_forest_etl" { +resource "aws_lambda_function" "walthamforest_etl" { function_name = "walthamforest_etl" role = data.aws_iam_role.lambda_exec_role.arn package_type = "Image" From 2a8adde8e7bf1ab451aa18d52aef4f3b5839d5f7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 13:44:49 +0000 Subject: [PATCH 04/29] get rid of unused automation scripts --- .../hubspot_surveyed_needs_sign_off.yml | 50 ++++++------- .github/workflows/scis_invoice_calculator.yml | 70 +++++++++---------- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/.github/workflows/hubspot_surveyed_needs_sign_off.yml b/.github/workflows/hubspot_surveyed_needs_sign_off.yml index cd4db7e..ac747d8 100644 --- a/.github/workflows/hubspot_surveyed_needs_sign_off.yml +++ b/.github/workflows/hubspot_surveyed_needs_sign_off.yml @@ -1,29 +1,29 @@ -name: Surveyed Needs Sign Off Script -on: - schedule: - # - cron: '0 17 * * 1-5' - workflow_dispatch: +# name: Surveyed Needs Sign Off Script +# on: +# schedule: +# # - cron: '0 17 * * 1-5' +# workflow_dispatch: -jobs: - surveyed-needs-sign-off: - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 +# jobs: +# surveyed-needs-sign-off: +# runs-on: ubuntu-22.04 +# steps: +# - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.12' +# - name: Set up Python +# uses: actions/setup-python@v4 +# with: +# python-version: '3.12' - - name: Install dependencies - run: | - pip install poetry - poetry install --no-root +# - name: Install dependencies +# run: | +# pip install poetry +# poetry install --no-root - - name: run script - run: | - pwd - ls -la - poetry run python etl/hubspot_surveyed_needs_sign_off.py - env: - PYTHONPATH: ${{ github.workspace }} \ No newline at end of file +# - name: run script +# run: | +# pwd +# ls -la +# poetry run python etl/hubspot_surveyed_needs_sign_off.py +# env: +# PYTHONPATH: ${{ github.workspace }} \ No newline at end of file diff --git a/.github/workflows/scis_invoice_calculator.yml b/.github/workflows/scis_invoice_calculator.yml index 66a5461..d0739f7 100644 --- a/.github/workflows/scis_invoice_calculator.yml +++ b/.github/workflows/scis_invoice_calculator.yml @@ -1,39 +1,39 @@ -name: SCIS Invoice Calculator -on: - schedule: - - cron: '0 6 * * *' - workflow_dispatch: +# name: SCIS Invoice Calculator +# on: +# schedule: +# - cron: '0 6 * * *' +# workflow_dispatch: -jobs: - scis_invoice_calculator: - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 +# jobs: +# scis_invoice_calculator: +# runs-on: ubuntu-22.04 +# steps: +# - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.12' +# - name: Set up Python +# uses: actions/setup-python@v4 +# with: +# python-version: '3.12' - - name: Install dependencies - run: | - pip install poetry - poetry install --no-root +# - name: Install dependencies +# run: | +# pip install poetry +# poetry install --no-root - - name: run script - run: | - bash scis_invoice.sh - env: - PYTHONPATH: ${{ github.workspace }} - SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID: ${{ secrets.SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID }} - JJC_SERVICE_SHAREPOINT_ID: ${{ secrets.JJC_SERVICE_SHAREPOINT_ID }} - BAXTER_KELLY_SERVICE_SHAREPOINT_ID: ${{ secrets.BAXTER_KELLY_SERVICE_SHAREPOINT_ID }} - SGEC_SERVICE_SHAREPOINT_ID: ${{ secrets.SGEC_SERVICE_SHAREPOINT_ID }} - SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }} - SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }} - SHAREPOINT_TENANT_ID: ${{ secrets.SHAREPOINT_TENANT_ID }} - - name: Upload Excel file - uses: actions/upload-artifact@v4 - with: - name: my-excel-file - path: survey_data.xlsx \ No newline at end of file +# - name: run script +# run: | +# bash scis_invoice.sh +# env: +# PYTHONPATH: ${{ github.workspace }} +# SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID: ${{ secrets.SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID }} +# JJC_SERVICE_SHAREPOINT_ID: ${{ secrets.JJC_SERVICE_SHAREPOINT_ID }} +# BAXTER_KELLY_SERVICE_SHAREPOINT_ID: ${{ secrets.BAXTER_KELLY_SERVICE_SHAREPOINT_ID }} +# SGEC_SERVICE_SHAREPOINT_ID: ${{ secrets.SGEC_SERVICE_SHAREPOINT_ID }} +# SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }} +# SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }} +# SHAREPOINT_TENANT_ID: ${{ secrets.SHAREPOINT_TENANT_ID }} +# - name: Upload Excel file +# uses: actions/upload-artifact@v4 +# with: +# name: my-excel-file +# path: survey_data.xlsx \ No newline at end of file From d1aa4e63b2313d7fd961dbad4fbf883308818e99 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 13:45:18 +0000 Subject: [PATCH 05/29] get rid of unused automation scripts --- .github/workflows/pytest.yml | 64 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 7109d8f..5d93010 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -1,42 +1,42 @@ -name: Run Pytest +# name: Run Pytest -on: - push: - branches: - - '**' # Run on all branches - pull_request: - branches: - - main +# on: +# push: +# branches: +# - '**' # Run on all branches +# pull_request: +# branches: +# - main -jobs: - etl-unit-tests: - runs-on: ubuntu-22.04 +# jobs: +# etl-unit-tests: +# runs-on: ubuntu-22.04 - steps: - - name: Checkout Repository - uses: actions/checkout@v4 +# steps: +# - name: Checkout Repository +# uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' +# - name: Set up Python +# uses: actions/setup-python@v5 +# with: +# python-version: '3.12' - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.12' +# - name: Set up Python +# uses: actions/setup-python@v4 +# with: +# python-version: '3.12' - - name: Install dependencies - run: | - pip install poetry - poetry install --no-root +# - name: Install dependencies +# run: | +# pip install poetry +# poetry install --no-root - - name: Run Tests - run: | - poetry run pytest -W ignore::DeprecationWarning - env: - PYTHONPATH: ${{ github.workspace }} +# - name: Run Tests +# run: | +# poetry run pytest -W ignore::DeprecationWarning +# env: +# PYTHONPATH: ${{ github.workspace }} - continue-on-error: ${{ github.event_name == 'push' && github.ref != 'refs/heads/main' }} +# continue-on-error: ${{ github.event_name == 'push' && github.ref != 'refs/heads/main' }} From b61834ffb775a14e978a695e35086225b24a9f96 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 16:37:55 +0100 Subject: [PATCH 06/29] name incorrect --- deployment/lambda/walthamforest_etl/docker/ecr.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/lambda/walthamforest_etl/docker/ecr.tf b/deployment/lambda/walthamforest_etl/docker/ecr.tf index 5fceebc..60d1c6c 100644 --- a/deployment/lambda/walthamforest_etl/docker/ecr.tf +++ b/deployment/lambda/walthamforest_etl/docker/ecr.tf @@ -1,5 +1,5 @@ # ECR repo -resource "aws_ecr_repository" "walthamforest_etl" { +resource "aws_ecr_repository" "walthamforest_etl_ecr" { name = "walthamforest_etl_ecr" } From 0395ca041fc9e9ff9c122ac0b85d5853c1d5df28 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 16:46:38 +0100 Subject: [PATCH 07/29] name incorrect --- deployment/lambda/walthamforest_etl/docker/ecr.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/lambda/walthamforest_etl/docker/ecr.tf b/deployment/lambda/walthamforest_etl/docker/ecr.tf index 60d1c6c..343d628 100644 --- a/deployment/lambda/walthamforest_etl/docker/ecr.tf +++ b/deployment/lambda/walthamforest_etl/docker/ecr.tf @@ -5,7 +5,7 @@ resource "aws_ecr_repository" "walthamforest_etl_ecr" { # ECR policy to allow Lambda access resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" { - repository = aws_ecr_repository.walthamforest_etl.name + repository = aws_ecr_repository.walthamforest_etl_ecr.name policy = jsonencode({ Version = "2008-10-17", @@ -27,7 +27,7 @@ resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" { # ECR lifecycle policy to delete tagged images older than 14 days resource "aws_ecr_lifecycle_policy" "walthamforest_etl_loader_lifecycle" { - repository = aws_ecr_repository.walthamforest_etl.name + repository = aws_ecr_repository.walthamforest_etl_ecr.name policy = jsonencode({ "rules": [ From 0519a70aa8abbd7f4828e9d254a683bd00a78998 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 16:49:02 +0100 Subject: [PATCH 08/29] name incorrect --- deployment/lambda/walthamforest_etl/docker/ecr.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/deployment/lambda/walthamforest_etl/docker/ecr.tf b/deployment/lambda/walthamforest_etl/docker/ecr.tf index 343d628..b9bc58e 100644 --- a/deployment/lambda/walthamforest_etl/docker/ecr.tf +++ b/deployment/lambda/walthamforest_etl/docker/ecr.tf @@ -25,6 +25,7 @@ resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" { } + # ECR lifecycle policy to delete tagged images older than 14 days resource "aws_ecr_lifecycle_policy" "walthamforest_etl_loader_lifecycle" { repository = aws_ecr_repository.walthamforest_etl_ecr.name From 08e2e1939285575e9b346826bc7d1d3cdb232463 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 16:54:41 +0100 Subject: [PATCH 09/29] name incorrect --- .github/workflows/actions/lambda-deploy/action.yml | 2 +- .github/workflows/lambda_main.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/actions/lambda-deploy/action.yml b/.github/workflows/actions/lambda-deploy/action.yml index ba19c67..c7bd0b6 100644 --- a/.github/workflows/actions/lambda-deploy/action.yml +++ b/.github/workflows/actions/lambda-deploy/action.yml @@ -2,7 +2,7 @@ name: "Build and Push Lambda Image to ECR" description: "Reusable action for building and pushing lambda Docker image to ECR" inputs: - lambda_name: + ecr_name: description: "Lambda name / ECR repo name" required: true dockerfile_path: diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml index c83d0f0..d526d81 100644 --- a/.github/workflows/lambda_main.yml +++ b/.github/workflows/lambda_main.yml @@ -34,7 +34,7 @@ jobs: - name: Build and deploy Lambda example uses: ./.github/workflows/actions/lambda-deploy with: - lambda_name: lambda_example + ecr_name: lambda_example dockerfile_path: ./deployment/lambda/lambda_example/docker/Dockerfile ecr_tf_dir: ./deployment/lambda/lambda_example/docker/ lambda_tf_dir: ./deployment/lambda/lambda_example/ @@ -57,7 +57,7 @@ jobs: - name: Build and deploy Extractor & Loader Lambda uses: ./.github/workflows/actions/lambda-deploy with: - lambda_name: extractor_and_loader + ecr_name: extractor_and_loader dockerfile_path: ./deployment/lambda/extractor_and_loader/docker/Dockerfile ecr_tf_dir: ./deployment/lambda/extractor_and_loader/docker/ lambda_tf_dir: ./deployment/lambda/extractor_and_loader/ @@ -81,7 +81,7 @@ jobs: - name: Build and deploy WalthamForest ETL uses: ./.github/workflows/actions/lambda-deploy with: - lambda_name: walthamforest_etl + ecr_name: walthamforest_etl_ecr dockerfile_path: ./deployment/lambda/walthamforest_etl/docker/Dockerfile ecr_tf_dir: ./deployment/lambda/walthamforest_etl/docker/ lambda_tf_dir: ./deployment/lambda/walthamforest_etl/ From 0571b2836d2cba832429ff64d0a3e9114bf5f290 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 16:57:19 +0100 Subject: [PATCH 10/29] name incorrect --- .github/workflows/actions/lambda-deploy/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/actions/lambda-deploy/action.yml b/.github/workflows/actions/lambda-deploy/action.yml index c7bd0b6..3ca0fc8 100644 --- a/.github/workflows/actions/lambda-deploy/action.yml +++ b/.github/workflows/actions/lambda-deploy/action.yml @@ -66,8 +66,8 @@ runs: - name: Build and push Docker image shell: bash run: | - IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.lambda_name }}:${{ steps.set_tag.outputs.tag }} - echo "Building Docker image for ${{ inputs.lambda_name }}..." + IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }} + echo "Building Docker image for ${{ inputs.ecr_name }}..." docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} . echo "Pushing to ECR..." From 5b9f0f72193c86d937f09b673c122cd3cf0b0a24 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 17:01:23 +0100 Subject: [PATCH 11/29] wrong name --- deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index c8b776e..e8e98be 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -50,7 +50,7 @@ resource "aws_iam_policy" "walthamforest_etl_policy" { resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" { role = data.aws_iam_role.lambda_exec_role.name - policy_arn = aws_iam_policy.walthamforest_etl-loader-policy.arn + policy_arn = aws_iam_policy.walthamforest_etl_policy.arn } # Lambda function From 152e690358810f830b9f9ecd4a9f70a65a5ee4b1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 17:09:36 +0100 Subject: [PATCH 12/29] wrong name --- deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index e8e98be..22f03aa 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -11,6 +11,8 @@ data "aws_ecr_repository" "walthamforest_etl_ecr" { # SQS queue resource "aws_sqs_queue" "walthamforest_etl_queue" { name = "walthamforest_etl-queue" + visibility_timeout_seconds = 1800 # 30 minutes (>= 300s; ~6x rule of thumb) + receive_wait_time_seconds = 20 # optional: long polling } From 00fe4fea2bec794ff930bbfdb0f4a75e189e650f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 17:19:50 +0100 Subject: [PATCH 13/29] change visibility' --- .../lambda/extractor_and_loader/extractor_and_loader_lambda.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index ef1c07c..9965b73 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -11,6 +11,8 @@ data "aws_ecr_repository" "extractor_and_loader" { # SQS queue for extractor_and_loader resource "aws_sqs_queue" "extractor_and_loader_queue" { name = "extractor-loader-queue" + visibility_timeout_seconds = 1800 # 30 minutes (>= 300s; ~6x rule of thumb) + receive_wait_time_seconds = 20 # optional: long polling } From 7b02e53f1d48a7971adadff6ca2a85c896b45b30 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 17:31:55 +0100 Subject: [PATCH 14/29] policy exists --- .../walthamforest_etl_lambda.tf | 66 ++++++++++--------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 22f03aa..12b9aab 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -16,38 +16,42 @@ resource "aws_sqs_queue" "walthamforest_etl_queue" { } -# IAM policy specific to this Lambda -resource "aws_iam_policy" "walthamforest_etl_policy" { - name = "walthamforest_etl-loader-policy" +# # IAM policy specific to this Lambda +# resource "aws_iam_policy" "walthamforest_etl_policy" { +# name = "walthamforest_etl-loader-policy" - policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Action = [ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes" - ], - Resource = aws_sqs_queue.walthamforest_etl_queue.arn - }, - { - Effect = "Allow", - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ], - Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn - }, - { - Effect = "Allow", - Action = ["ecr:GetAuthorizationToken"], - Resource = "*" - } - ] - }) +# policy = jsonencode({ +# Version = "2012-10-17", +# Statement = [ +# { +# Effect = "Allow", +# Action = [ +# "sqs:ReceiveMessage", +# "sqs:DeleteMessage", +# "sqs:GetQueueAttributes" +# ], +# Resource = aws_sqs_queue.walthamforest_etl_queue.arn +# }, +# { +# Effect = "Allow", +# Action = [ +# "ecr:GetDownloadUrlForLayer", +# "ecr:BatchGetImage", +# "ecr:BatchCheckLayerAvailability" +# ], +# Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn +# }, +# { +# Effect = "Allow", +# Action = ["ecr:GetAuthorizationToken"], +# Resource = "*" +# } +# ] +# }) +# } + +data "aws_iam_policy" "walthamforest_etl" { + name = "walthamforest_etl-loader-policy" } resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" { From 05bf0c132278e055c9fff5d2e6c90c480bc8a4ae Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 10 Sep 2025 17:35:04 +0100 Subject: [PATCH 15/29] terrform includes policy --- .../walthamforest_etl_lambda.tf | 66 +++++++++---------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 12b9aab..22f03aa 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -16,42 +16,38 @@ resource "aws_sqs_queue" "walthamforest_etl_queue" { } -# # IAM policy specific to this Lambda -# resource "aws_iam_policy" "walthamforest_etl_policy" { -# name = "walthamforest_etl-loader-policy" - -# policy = jsonencode({ -# Version = "2012-10-17", -# Statement = [ -# { -# Effect = "Allow", -# Action = [ -# "sqs:ReceiveMessage", -# "sqs:DeleteMessage", -# "sqs:GetQueueAttributes" -# ], -# Resource = aws_sqs_queue.walthamforest_etl_queue.arn -# }, -# { -# Effect = "Allow", -# Action = [ -# "ecr:GetDownloadUrlForLayer", -# "ecr:BatchGetImage", -# "ecr:BatchCheckLayerAvailability" -# ], -# Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn -# }, -# { -# Effect = "Allow", -# Action = ["ecr:GetAuthorizationToken"], -# Resource = "*" -# } -# ] -# }) -# } - -data "aws_iam_policy" "walthamforest_etl" { +# IAM policy specific to this Lambda +resource "aws_iam_policy" "walthamforest_etl_policy" { name = "walthamforest_etl-loader-policy" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes" + ], + Resource = aws_sqs_queue.walthamforest_etl_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) } resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" { From cd34ec2a3b860435d70c5c8bf8445a9650a625be Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 09:00:15 +0000 Subject: [PATCH 16/29] run tf destory first --- .../extractor_and_loader/extractor_and_loader_lambda.tf | 2 -- .../lambda/walthamforest_etl/walthamforest_etl_lambda.tf | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index 9965b73..ef1c07c 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -11,8 +11,6 @@ data "aws_ecr_repository" "extractor_and_loader" { # SQS queue for extractor_and_loader resource "aws_sqs_queue" "extractor_and_loader_queue" { name = "extractor-loader-queue" - visibility_timeout_seconds = 1800 # 30 minutes (>= 300s; ~6x rule of thumb) - receive_wait_time_seconds = 20 # optional: long polling } diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 22f03aa..c538ac3 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -11,8 +11,6 @@ data "aws_ecr_repository" "walthamforest_etl_ecr" { # SQS queue resource "aws_sqs_queue" "walthamforest_etl_queue" { name = "walthamforest_etl-queue" - visibility_timeout_seconds = 1800 # 30 minutes (>= 300s; ~6x rule of thumb) - receive_wait_time_seconds = 20 # optional: long polling } @@ -62,7 +60,7 @@ resource "aws_lambda_function" "walthamforest_etl" { package_type = "Image" image_uri = "${data.aws_ecr_repository.walthamforest_etl_ecr.repository_url}:${var.lambda_image_tag}" # Increase timeout (max 900 sec / 15 min) - timeout = 300 # e.g. 5 minutes + # timeout = 300 # e.g. 5 minutes # Increase memory (default 128 MB) memory_size = 2048 # try 1024 or 2048 MB to start From 93ad138ac1122448424a637cb0975f3298ea9db1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 09:18:46 +0000 Subject: [PATCH 17/29] modified --- .../extractor_and_loader/extractor_and_loader_lambda.tf | 2 ++ .../lambda/lambda_example/lambda_example_and_config.tf | 4 +++- .../lambda/walthamforest_etl/walthamforest_etl_lambda.tf | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index ef1c07c..dd37492 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -6,11 +6,13 @@ data "aws_iam_role" "lambda_exec_role" { # Reference existing ECR repository data "aws_ecr_repository" "extractor_and_loader" { name = "extractor_and_loader" + } # SQS queue for extractor_and_loader resource "aws_sqs_queue" "extractor_and_loader_queue" { name = "extractor-loader-queue" + visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) } diff --git a/deployment/lambda/lambda_example/lambda_example_and_config.tf b/deployment/lambda/lambda_example/lambda_example_and_config.tf index 4f87771..ae71b0c 100644 --- a/deployment/lambda/lambda_example/lambda_example_and_config.tf +++ b/deployment/lambda/lambda_example/lambda_example_and_config.tf @@ -25,7 +25,9 @@ resource "aws_iam_policy" "lambda_example_policy" { Action = [ "sqs:ReceiveMessage", "sqs:DeleteMessage", - "sqs:GetQueueAttributes" + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" ], Resource = aws_sqs_queue.lambda_example_queue.arn }, diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index c538ac3..9da8452 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -11,6 +11,7 @@ data "aws_ecr_repository" "walthamforest_etl_ecr" { # SQS queue resource "aws_sqs_queue" "walthamforest_etl_queue" { name = "walthamforest_etl-queue" + visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) } @@ -26,7 +27,9 @@ resource "aws_iam_policy" "walthamforest_etl_policy" { Action = [ "sqs:ReceiveMessage", "sqs:DeleteMessage", - "sqs:GetQueueAttributes" + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" ], Resource = aws_sqs_queue.walthamforest_etl_queue.arn }, From ae9b9fef8575e4d871aa602c9d74efa39030f089 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 09:19:36 +0000 Subject: [PATCH 18/29] modified --- .../extractor_and_loader/extractor_and_loader_lambda.tf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index dd37492..f7acdc3 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -28,7 +28,9 @@ resource "aws_iam_policy" "extractor_loader_policy" { Action = [ "sqs:ReceiveMessage", "sqs:DeleteMessage", - "sqs:GetQueueAttributes" + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" ], Resource = aws_sqs_queue.extractor_and_loader_queue.arn }, From 0503e857fdabef09fbc458dc56feaabe33d6f4a1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 14:15:35 +0000 Subject: [PATCH 19/29] change walthamforest name --- .../lambda/walthamforest_etl/docker/ecr.tf | 12 +++---- .../walthamforest_etl_lambda.tf | 32 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/deployment/lambda/walthamforest_etl/docker/ecr.tf b/deployment/lambda/walthamforest_etl/docker/ecr.tf index b9bc58e..503bb20 100644 --- a/deployment/lambda/walthamforest_etl/docker/ecr.tf +++ b/deployment/lambda/walthamforest_etl/docker/ecr.tf @@ -1,11 +1,11 @@ # ECR repo -resource "aws_ecr_repository" "walthamforest_etl_ecr" { - name = "walthamforest_etl_ecr" +resource "aws_ecr_repository" "walthamforest_etl_adhoc_ecr" { + name = "walthamforest_etl_adhoc_ecr" } # ECR policy to allow Lambda access -resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" { - repository = aws_ecr_repository.walthamforest_etl_ecr.name +resource "aws_ecr_repository_policy" "walthamforest_etl_adhoc_ecr_access" { + repository = aws_ecr_repository.walthamforest_etl_adhoc_ecr.name policy = jsonencode({ Version = "2008-10-17", @@ -27,8 +27,8 @@ resource "aws_ecr_repository_policy" "walthamforest_etl_ecr_access" { # ECR lifecycle policy to delete tagged images older than 14 days -resource "aws_ecr_lifecycle_policy" "walthamforest_etl_loader_lifecycle" { - repository = aws_ecr_repository.walthamforest_etl_ecr.name +resource "aws_ecr_lifecycle_policy" "walthamforest_etl_adhoc_loader_lifecycle" { + repository = aws_ecr_repository.walthamforest_etl_adhoc_ecr.name policy = jsonencode({ "rules": [ diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 9da8452..8c0ff47 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -4,20 +4,20 @@ data "aws_iam_role" "lambda_exec_role" { } # Reference existing ECR repository -data "aws_ecr_repository" "walthamforest_etl_ecr" { - name = "walthamforest_etl_ecr" +data "aws_ecr_repository" "walthamforest_etl_adhoc_adhoc_adhoc_adhoc_adhoc_ecr" { + name = "walthamforest_etl_adhoc_ecr" } # SQS queue -resource "aws_sqs_queue" "walthamforest_etl_queue" { - name = "walthamforest_etl-queue" +resource "aws_sqs_queue" "walthamforest_etl_adhoc_queue" { + name = "walthamforest_etl_adhoc-queue" visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout) } # IAM policy specific to this Lambda -resource "aws_iam_policy" "walthamforest_etl_policy" { - name = "walthamforest_etl-loader-policy" +resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { + name = "walthamforest_etl_adhoc-loader-policy" policy = jsonencode({ Version = "2012-10-17", @@ -31,7 +31,7 @@ resource "aws_iam_policy" "walthamforest_etl_policy" { "sqs:GetQueueUrl", "sqs:ChangeMessageVisibility" ], - Resource = aws_sqs_queue.walthamforest_etl_queue.arn + Resource = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn }, { Effect = "Allow", @@ -40,7 +40,7 @@ resource "aws_iam_policy" "walthamforest_etl_policy" { "ecr:BatchGetImage", "ecr:BatchCheckLayerAvailability" ], - Resource = data.aws_ecr_repository.walthamforest_etl_ecr.arn + Resource = data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.arn }, { Effect = "Allow", @@ -51,17 +51,17 @@ resource "aws_iam_policy" "walthamforest_etl_policy" { }) } -resource "aws_iam_role_policy_attachment" "walthamforest_etl_policy_attach" { +resource "aws_iam_role_policy_attachment" "walthamforest_etl_adhoc_policy_attach" { role = data.aws_iam_role.lambda_exec_role.name - policy_arn = aws_iam_policy.walthamforest_etl_policy.arn + policy_arn = aws_iam_policy.walthamforest_etl_adhoc_policy.arn } # Lambda function -resource "aws_lambda_function" "walthamforest_etl" { - function_name = "walthamforest_etl" +resource "aws_lambda_function" "walthamforest_etl_adhoc" { + function_name = "walthamforest_etl_adhoc" role = data.aws_iam_role.lambda_exec_role.arn package_type = "Image" - image_uri = "${data.aws_ecr_repository.walthamforest_etl_ecr.repository_url}:${var.lambda_image_tag}" + image_uri = "${data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.repository_url}:${var.lambda_image_tag}" # Increase timeout (max 900 sec / 15 min) # timeout = 300 # e.g. 5 minutes @@ -76,8 +76,8 @@ resource "aws_lambda_function" "walthamforest_etl" { } # SQS trigger -resource "aws_lambda_event_source_mapping" "walthamforest_etl_trigger" { - event_source_arn = aws_sqs_queue.walthamforest_etl_queue.arn - function_name = aws_lambda_function.walthamforest_etl.arn +resource "aws_lambda_event_source_mapping" "walthamforest_etl_adhoc_trigger" { + event_source_arn = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn + function_name = aws_lambda_function.walthamforest_etl_adhoc.arn batch_size = 1 } From f3a3f4765adb215e73969d44bcef934583a389c9 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 15:23:41 +0100 Subject: [PATCH 20/29] re run --- .github/workflows/actions/terraform-deploy/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/actions/terraform-deploy/action.yml b/.github/workflows/actions/terraform-deploy/action.yml index 685a0ac..5613329 100644 --- a/.github/workflows/actions/terraform-deploy/action.yml +++ b/.github/workflows/actions/terraform-deploy/action.yml @@ -52,3 +52,4 @@ runs: working-directory: ${{ inputs.working_directory }} shell: bash run: terraform apply -auto-approve tfplan + From eab363432210fda683fed6b06c5396dd929a0f1f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 15:26:42 +0100 Subject: [PATCH 21/29] redo --- .github/workflows/lambda_main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml index d526d81..15f5991 100644 --- a/.github/workflows/lambda_main.yml +++ b/.github/workflows/lambda_main.yml @@ -81,7 +81,7 @@ jobs: - name: Build and deploy WalthamForest ETL uses: ./.github/workflows/actions/lambda-deploy with: - ecr_name: walthamforest_etl_ecr + ecr_name: walthamforest_etl_adhoc_ecr dockerfile_path: ./deployment/lambda/walthamforest_etl/docker/Dockerfile ecr_tf_dir: ./deployment/lambda/walthamforest_etl/docker/ lambda_tf_dir: ./deployment/lambda/walthamforest_etl/ From 6dd3ee1643e472ed39173c6b181d7b0adaee1e2f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 15:57:41 +0100 Subject: [PATCH 22/29] waltham forest --- .../extractor_and_loader_lambda.tf | 68 +++++++++-------- .../lambda_example_and_config.tf | 71 +++++++++--------- .../walthamforest_etl_lambda.tf | 73 ++++++++++--------- 3 files changed, 113 insertions(+), 99 deletions(-) diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index f7acdc3..193ae27 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -17,39 +17,43 @@ resource "aws_sqs_queue" "extractor_and_loader_queue" { # IAM policy specific to this Lambda -resource "aws_iam_policy" "extractor_loader_policy" { - name = "extractor-loader-policy" +# resource "aws_iam_policy" "extractor_loader_policy" { +# name = "extractor-loader-policy" - policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Action = [ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes", - "sqs:GetQueueUrl", - "sqs:ChangeMessageVisibility" - ], - Resource = aws_sqs_queue.extractor_and_loader_queue.arn - }, - { - Effect = "Allow", - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ], - Resource = data.aws_ecr_repository.extractor_and_loader.arn - }, - { - Effect = "Allow", - Action = ["ecr:GetAuthorizationToken"], - Resource = "*" - } - ] - }) +# policy = jsonencode({ +# Version = "2012-10-17", +# Statement = [ +# { +# Effect = "Allow", +# Action = [ +# "sqs:ReceiveMessage", +# "sqs:DeleteMessage", +# "sqs:GetQueueAttributes", +# "sqs:GetQueueUrl", +# "sqs:ChangeMessageVisibility" +# ], +# Resource = aws_sqs_queue.extractor_and_loader_queue.arn +# }, +# { +# Effect = "Allow", +# Action = [ +# "ecr:GetDownloadUrlForLayer", +# "ecr:BatchGetImage", +# "ecr:BatchCheckLayerAvailability" +# ], +# Resource = data.aws_ecr_repository.extractor_and_loader.arn +# }, +# { +# Effect = "Allow", +# Action = ["ecr:GetAuthorizationToken"], +# Resource = "*" +# } +# ] +# }) +# } +data "aws_iam_policy" "extractor_loader_policy" { + # Existing customer-managed policy ARN: + arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/extractor-loader-policy" } resource "aws_iam_role_policy_attachment" "extractor_loader_policy_attach" { diff --git a/deployment/lambda/lambda_example/lambda_example_and_config.tf b/deployment/lambda/lambda_example/lambda_example_and_config.tf index ae71b0c..94259a9 100644 --- a/deployment/lambda/lambda_example/lambda_example_and_config.tf +++ b/deployment/lambda/lambda_example/lambda_example_and_config.tf @@ -13,40 +13,45 @@ resource "aws_sqs_queue" "lambda_example_queue" { name = "lambda-example-queue" } -# Custom IAM policy specific to lambda_example -resource "aws_iam_policy" "lambda_example_policy" { - name = "lambda-example-policy" +# # Custom IAM policy specific to lambda_example +# resource "aws_iam_policy" "lambda_example_policy" { +# name = "lambda-example-policy" - policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Action = [ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes", - "sqs:GetQueueUrl", - "sqs:ChangeMessageVisibility" - ], - Resource = aws_sqs_queue.lambda_example_queue.arn - }, - { - Effect = "Allow", - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ], - Resource = data.aws_ecr_repository.lambda_example.arn - }, - { - Effect = "Allow", - Action = ["ecr:GetAuthorizationToken"], - Resource = "*" - } - ] - }) +# policy = jsonencode({ +# Version = "2012-10-17", +# Statement = [ +# { +# Effect = "Allow", +# Action = [ +# "sqs:ReceiveMessage", +# "sqs:DeleteMessage", +# "sqs:GetQueueAttributes", +# "sqs:GetQueueUrl", +# "sqs:ChangeMessageVisibility" +# ], +# Resource = aws_sqs_queue.lambda_example_queue.arn +# }, +# { +# Effect = "Allow", +# Action = [ +# "ecr:GetDownloadUrlForLayer", +# "ecr:BatchGetImage", +# "ecr:BatchCheckLayerAvailability" +# ], +# Resource = data.aws_ecr_repository.lambda_example.arn +# }, +# { +# Effect = "Allow", +# Action = ["ecr:GetAuthorizationToken"], +# Resource = "*" +# } +# ] +# }) +# } + +data "aws_iam_policy" "lambda_example_policy" { + # Existing customer-managed policy ARN: + arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/lambda_example_policy" } resource "aws_iam_role_policy_attachment" "lambda_example_policy_attach" { diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 8c0ff47..e188c96 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -4,7 +4,7 @@ data "aws_iam_role" "lambda_exec_role" { } # Reference existing ECR repository -data "aws_ecr_repository" "walthamforest_etl_adhoc_adhoc_adhoc_adhoc_adhoc_ecr" { +data "aws_ecr_repository" "walthamforest_etl_adhoc_ecr" { name = "walthamforest_etl_adhoc_ecr" } @@ -15,42 +15,47 @@ resource "aws_sqs_queue" "walthamforest_etl_adhoc_queue" { } -# IAM policy specific to this Lambda -resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { - name = "walthamforest_etl_adhoc-loader-policy" +# # IAM policy specific to this Lambda +# resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { +# name = "walthamforest_etl_adhoc-loader-policy" - policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Action = [ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes", - "sqs:GetQueueUrl", - "sqs:ChangeMessageVisibility" - ], - Resource = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn - }, - { - Effect = "Allow", - Action = [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability" - ], - Resource = data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.arn - }, - { - Effect = "Allow", - Action = ["ecr:GetAuthorizationToken"], - Resource = "*" - } - ] - }) +# policy = jsonencode({ +# Version = "2012-10-17", +# Statement = [ +# { +# Effect = "Allow", +# Action = [ +# "sqs:ReceiveMessage", +# "sqs:DeleteMessage", +# "sqs:GetQueueAttributes", +# "sqs:GetQueueUrl", +# "sqs:ChangeMessageVisibility" +# ], +# Resource = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn +# }, +# { +# Effect = "Allow", +# Action = [ +# "ecr:GetDownloadUrlForLayer", +# "ecr:BatchGetImage", +# "ecr:BatchCheckLayerAvailability" +# ], +# Resource = data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.arn +# }, +# { +# Effect = "Allow", +# Action = ["ecr:GetAuthorizationToken"], +# Resource = "*" +# } +# ] +# }) +# } +data "aws_iam_policy" "walthamforest_etl_adhoc_policy" { + # Existing customer-managed policy ARN: + arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/walthamforest_etl_adhoc_policy" } + resource "aws_iam_role_policy_attachment" "walthamforest_etl_adhoc_policy_attach" { role = data.aws_iam_role.lambda_exec_role.name policy_arn = aws_iam_policy.walthamforest_etl_adhoc_policy.arn From 04f0708b15b87b64655c7977b818e9fcd38d2e5d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 15:01:47 +0000 Subject: [PATCH 23/29] walthamforest --- .../extractor_and_loader_lambda.tf | 70 +++++++++--------- .../lambda_example_and_config.tf | 73 +++++++++---------- .../walthamforest_etl_lambda.tf | 71 +++++++++--------- 3 files changed, 101 insertions(+), 113 deletions(-) diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index 193ae27..c5996fe 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -16,44 +16,40 @@ resource "aws_sqs_queue" "extractor_and_loader_queue" { } -# IAM policy specific to this Lambda -# resource "aws_iam_policy" "extractor_loader_policy" { -# name = "extractor-loader-policy" +# Custom IAM policy specific to lambda_example +resource "aws_iam_policy" "extractor_loader_policy" { + name = "extractor_loader_policy" -# policy = jsonencode({ -# Version = "2012-10-17", -# Statement = [ -# { -# Effect = "Allow", -# Action = [ -# "sqs:ReceiveMessage", -# "sqs:DeleteMessage", -# "sqs:GetQueueAttributes", -# "sqs:GetQueueUrl", -# "sqs:ChangeMessageVisibility" -# ], -# Resource = aws_sqs_queue.extractor_and_loader_queue.arn -# }, -# { -# Effect = "Allow", -# Action = [ -# "ecr:GetDownloadUrlForLayer", -# "ecr:BatchGetImage", -# "ecr:BatchCheckLayerAvailability" -# ], -# Resource = data.aws_ecr_repository.extractor_and_loader.arn -# }, -# { -# Effect = "Allow", -# Action = ["ecr:GetAuthorizationToken"], -# Resource = "*" -# } -# ] -# }) -# } -data "aws_iam_policy" "extractor_loader_policy" { - # Existing customer-managed policy ARN: - arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/extractor-loader-policy" + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" + ], + Resource = aws_sqs_queue.lambda_example_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.lambda_example.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) } resource "aws_iam_role_policy_attachment" "extractor_loader_policy_attach" { diff --git a/deployment/lambda/lambda_example/lambda_example_and_config.tf b/deployment/lambda/lambda_example/lambda_example_and_config.tf index 94259a9..5e52a05 100644 --- a/deployment/lambda/lambda_example/lambda_example_and_config.tf +++ b/deployment/lambda/lambda_example/lambda_example_and_config.tf @@ -13,47 +13,44 @@ resource "aws_sqs_queue" "lambda_example_queue" { name = "lambda-example-queue" } -# # Custom IAM policy specific to lambda_example -# resource "aws_iam_policy" "lambda_example_policy" { -# name = "lambda-example-policy" +# Custom IAM policy specific to lambda_example +resource "aws_iam_policy" "lambda_example_policy" { + name = "lambda-example-policy" -# policy = jsonencode({ -# Version = "2012-10-17", -# Statement = [ -# { -# Effect = "Allow", -# Action = [ -# "sqs:ReceiveMessage", -# "sqs:DeleteMessage", -# "sqs:GetQueueAttributes", -# "sqs:GetQueueUrl", -# "sqs:ChangeMessageVisibility" -# ], -# Resource = aws_sqs_queue.lambda_example_queue.arn -# }, -# { -# Effect = "Allow", -# Action = [ -# "ecr:GetDownloadUrlForLayer", -# "ecr:BatchGetImage", -# "ecr:BatchCheckLayerAvailability" -# ], -# Resource = data.aws_ecr_repository.lambda_example.arn -# }, -# { -# Effect = "Allow", -# Action = ["ecr:GetAuthorizationToken"], -# Resource = "*" -# } -# ] -# }) -# } - -data "aws_iam_policy" "lambda_example_policy" { - # Existing customer-managed policy ARN: - arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/lambda_example_policy" + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" + ], + Resource = aws_sqs_queue.lambda_example_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.lambda_example.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) } + + resource "aws_iam_role_policy_attachment" "lambda_example_policy_attach" { role = data.aws_iam_role.lambda_exec_role.name policy_arn = aws_iam_policy.lambda_example_policy.arn diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index e188c96..2e8ffd3 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -15,47 +15,42 @@ resource "aws_sqs_queue" "walthamforest_etl_adhoc_queue" { } -# # IAM policy specific to this Lambda -# resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { -# name = "walthamforest_etl_adhoc-loader-policy" +# Custom IAM policy specific to lambda_example +resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { + name = "lambda-example-policy" -# policy = jsonencode({ -# Version = "2012-10-17", -# Statement = [ -# { -# Effect = "Allow", -# Action = [ -# "sqs:ReceiveMessage", -# "sqs:DeleteMessage", -# "sqs:GetQueueAttributes", -# "sqs:GetQueueUrl", -# "sqs:ChangeMessageVisibility" -# ], -# Resource = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn -# }, -# { -# Effect = "Allow", -# Action = [ -# "ecr:GetDownloadUrlForLayer", -# "ecr:BatchGetImage", -# "ecr:BatchCheckLayerAvailability" -# ], -# Resource = data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.arn -# }, -# { -# Effect = "Allow", -# Action = ["ecr:GetAuthorizationToken"], -# Resource = "*" -# } -# ] -# }) -# } -data "aws_iam_policy" "walthamforest_etl_adhoc_policy" { - # Existing customer-managed policy ARN: - arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/walthamforest_etl_adhoc_policy" + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ChangeMessageVisibility" + ], + Resource = aws_sqs_queue.lambda_example_queue.arn + }, + { + Effect = "Allow", + Action = [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability" + ], + Resource = data.aws_ecr_repository.lambda_example.arn + }, + { + Effect = "Allow", + Action = ["ecr:GetAuthorizationToken"], + Resource = "*" + } + ] + }) } - resource "aws_iam_role_policy_attachment" "walthamforest_etl_adhoc_policy_attach" { role = data.aws_iam_role.lambda_exec_role.name policy_arn = aws_iam_policy.walthamforest_etl_adhoc_policy.arn From 4fb1d9c24a16bedfce53ccb968f5f118fb18dbb4 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 15:07:14 +0000 Subject: [PATCH 24/29] walthamforest --- .../extractor_and_loader/extractor_and_loader_lambda.tf | 4 ++-- .../lambda/walthamforest_etl/walthamforest_etl_lambda.tf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index c5996fe..73f6da9 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -32,7 +32,7 @@ resource "aws_iam_policy" "extractor_loader_policy" { "sqs:GetQueueUrl", "sqs:ChangeMessageVisibility" ], - Resource = aws_sqs_queue.lambda_example_queue.arn + Resource = aws_sqs_queue.extractor_and_loader_queue.arn }, { Effect = "Allow", @@ -41,7 +41,7 @@ resource "aws_iam_policy" "extractor_loader_policy" { "ecr:BatchGetImage", "ecr:BatchCheckLayerAvailability" ], - Resource = data.aws_ecr_repository.lambda_example.arn + Resource = data.aws_ecr_repository.extractor_and_loader.arn }, { Effect = "Allow", diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 2e8ffd3..88a9198 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -31,7 +31,7 @@ resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { "sqs:GetQueueUrl", "sqs:ChangeMessageVisibility" ], - Resource = aws_sqs_queue.lambda_example_queue.arn + Resource = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn }, { Effect = "Allow", @@ -40,7 +40,7 @@ resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { "ecr:BatchGetImage", "ecr:BatchCheckLayerAvailability" ], - Resource = data.aws_ecr_repository.lambda_example.arn + Resource = data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.arn }, { Effect = "Allow", From cf6213c57f65e94168611d4e870e30387ba1460a Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 15:10:29 +0000 Subject: [PATCH 25/29] test --- .devcontainer/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 24893e8..a0d477b 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -47,3 +47,4 @@ networks: volumes: postgres-data: + From d7d8bd7f57f8d341d44ac033b1b15cab6e072477 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 15:12:56 +0000 Subject: [PATCH 26/29] ssh agent work --- .devcontainer/devcontainer.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index dac0087..87033e9 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -5,9 +5,14 @@ "remoteUser": "vscode", "workspaceFolder": "/workspaces/survey-extractor", "postStartCommand": "bash .devcontainer/post-install.sh", + "remoteEnv": { + "SSH_AUTH_SOCK": "/ssh-agent" + }, "mounts": [ // Optional, just makes getting from Downloads (local env) easier - "source=${localEnv:HOME},target=/workspaces/home,type=bind" + "source=${localEnv:HOME},target=/workspaces/home,type=bind", + "source=${localEnv:SSH_AUTH_SOCK},target=/ssh-agent,type=bind" + ], "customizations": { "vscode": { From 3a8c8d979d567fc8df244b1f397ee3a309d49ba0 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 11 Sep 2025 16:42:12 +0100 Subject: [PATCH 27/29] used the same state! idiot --- .devcontainer/devcontainer.json | 15 ++++++++------- .../extractor_and_loader_lambda.tf | 2 +- deployment/lambda/walthamforest_etl/provider.tf | 2 +- .../walthamforest_etl/walthamforest_etl_lambda.tf | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 87033e9..af1f24d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -5,15 +5,16 @@ "remoteUser": "vscode", "workspaceFolder": "/workspaces/survey-extractor", "postStartCommand": "bash .devcontainer/post-install.sh", - "remoteEnv": { - "SSH_AUTH_SOCK": "/ssh-agent" - }, - "mounts": [ - // Optional, just makes getting from Downloads (local env) easier - "source=${localEnv:HOME},target=/workspaces/home,type=bind", - "source=${localEnv:SSH_AUTH_SOCK},target=/ssh-agent,type=bind" + "features": { + "ghcr.io/devcontainers/features/ssh-agent:1": {} + }, + + "mounts": [ + // Optional convenience mount + "source=${localEnv:HOME},target=/workspaces/home,type=bind" ], + "customizations": { "vscode": { "settings": { diff --git a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf index 73f6da9..6e3ecbf 100644 --- a/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf +++ b/deployment/lambda/extractor_and_loader/extractor_and_loader_lambda.tf @@ -59,7 +59,7 @@ resource "aws_iam_role_policy_attachment" "extractor_loader_policy_attach" { # Lambda function resource "aws_lambda_function" "extractor_and_loader" { - function_name = "extractor-and-loader" + function_name = "extractor-and-loader-lambda" role = data.aws_iam_role.lambda_exec_role.arn package_type = "Image" image_uri = "${data.aws_ecr_repository.extractor_and_loader.repository_url}:${var.lambda_image_tag}" diff --git a/deployment/lambda/walthamforest_etl/provider.tf b/deployment/lambda/walthamforest_etl/provider.tf index 51eca0c..7100c0d 100644 --- a/deployment/lambda/walthamforest_etl/provider.tf +++ b/deployment/lambda/walthamforest_etl/provider.tf @@ -8,7 +8,7 @@ terraform { backend "s3" { bucket = "survey-extractor-tf-state" region = "eu-west-2" - key = "env:/dev/lambda/eachlambda/extractor_and_loader_lambda.tfstate" + key = "env:/dev/lambda/eachlambda/walthamforest_etl_lambda.tfstate" } required_version = ">= 1.2.0" diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 88a9198..a4eedc8 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -17,7 +17,7 @@ resource "aws_sqs_queue" "walthamforest_etl_adhoc_queue" { # Custom IAM policy specific to lambda_example resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" { - name = "lambda-example-policy" + name = "walthamforest_adhoc_policy_lambda" policy = jsonencode({ Version = "2012-10-17", From c112091e0250de5485383d466bcd3ba025009275 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 12 Sep 2025 10:18:26 +0000 Subject: [PATCH 28/29] dev container update --- .devcontainer/devcontainer.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index af1f24d..7354027 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,7 +7,7 @@ "postStartCommand": "bash .devcontainer/post-install.sh", "features": { - "ghcr.io/devcontainers/features/ssh-agent:1": {} + // "ghcr.io/devcontainers/features/ssh-agent:1": {} }, "mounts": [ @@ -34,3 +34,4 @@ } } } + From 8171360881f957a0c1fe7e43bcd06f09d9bd7c75 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 16 Sep 2025 16:15:28 +0000 Subject: [PATCH 29/29] added month end change --- .../lambda/walthamforest_etl/docker/app.py | 132 ++++++++++++++---- etl/month_end_automation_wave_3_layout.py | 6 +- ...onth_end_automation_wave_accent_housing.py | 49 ++++++- 3 files changed, 152 insertions(+), 35 deletions(-) diff --git a/deployment/lambda/walthamforest_etl/docker/app.py b/deployment/lambda/walthamforest_etl/docker/app.py index 1cb261f..535ddd0 100644 --- a/deployment/lambda/walthamforest_etl/docker/app.py +++ b/deployment/lambda/walthamforest_etl/docker/app.py @@ -1,12 +1,13 @@ import pandas as pd import json from pprint import pprint +import os +import copy +from collections import defaultdict +from typing import List, Dict, Any, Union, Optional -def handler(event, context): - # read data for houses only - print("waltham forest set up correctly") - return None - df = pd.read_excel("../../home/Downloads/data.xlsx", sheet_name="Houses Asset Data") +def process_complex(sheet_name, group_key="ADDRESS"): + df = pd.read_excel("../../../../../home/Downloads/data.xlsx", sheet_name=sheet_name) element_cols = [ "ELEMENT GROUP", "ELEMENT CODE", "ELEMENT CODE DESCRIPTION", @@ -17,34 +18,107 @@ def handler(event, context): ] property_cols = [ - "PROP REF", "Domna", "ADDRESS", "OWNERSHIP", + "PROP REF", "ADDRESS", "OWNERSHIP", "PROP STATUS", "PROP TYPE", "PROP SUB TYPE" ] - # Group by ADDRESS (and other identifiers if needed) - result = ( - df.groupby(["ADDRESS"]) - .apply(lambda g: { - "property_info": g[property_cols].drop_duplicates().iloc[0].to_dict(), - "elements_info": [ - { - "ELEMENT GROUP": eg_name, - "elements": eg_df.drop(columns=["ELEMENT GROUP"]).to_dict(orient="records") - } - for eg_name, eg_df in g[element_cols].groupby("ELEMENT GROUP") - ] - }) - .reset_index() - .rename(columns={0: "data"}) - ) - - # Convert to list of dicts + # Prepare output records = [] - for _, row in result.iterrows(): + + # Loop through unique values in group_key (ADDRESS or BLOCK_CODE) + for val in df[group_key].unique(): + g = df[df[group_key] == val] # subset + + property_info = g[property_cols].drop_duplicates().iloc[0].to_dict() + + # build elements dict keyed by ELEMENT CODE DESCRIPTION + elements_dict = {} + for _, row in g[element_cols].drop_duplicates().iterrows(): + key = row["ELEMENT CODE DESCRIPTION"] # could also use "ELEMENT CODE" + elements_dict[key] = row.to_dict() + records.append({ - "ADDRESS": row["ADDRESS"], - **row["data"] + group_key: val, + "property_info": property_info, + "elements": elements_dict }) - json_output = json.dumps(records, ensure_ascii=False, default=str) - pprint(json_output) + return records + +def process_simple(sheet_name): + df = pd.read_excel("../../../../../home/Downloads/data.xlsx", sheet_name=sheet_name) + + records = [] + + for address in df["Address"].unique(): + g = df[df["Address"] == address].drop_duplicates() # subset for that address + row = g.iloc[0] # take first row if multiple + + # build dict of all columns except Address + elements_dict = row.drop(labels=["Address"]).to_dict() + + records.append({ + "ADDRESS": address, + "to_add": elements_dict + }) + + return records + + +def combine_records_by_address( + asset_records: List[Dict[str, Any]], + simple_records: List[Dict[str, Any]], + dest_key: str = "to_add", + unique_identifier="Address" +) -> List[Dict[str, Any]]: + """ + Merge process_house_asset_data() and process_simple() results by ADDRESS. + All columns from simple_records['to_add'] will be merged under dest_key. + """ + # Index inputs by ADDRESS + asset_by_addr = {r["ADDRESS"]: r for r in asset_records} + simple_by_addr = {r["ADDRESS"]: r for r in simple_records} + + merged: List[Dict[str, Any]] = [] + + # Use union of addresses from both sources + all_addresses = set(asset_by_addr) | set(simple_by_addr) + + for addr in sorted(all_addresses): + base = copy.deepcopy(asset_by_addr.get(addr, {"ADDRESS": addr})) + simple = simple_by_addr.get(addr) + + if simple: + base[dest_key] = simple.get("to_add", {}) + + merged.append(base) + + return merged + +def combine_records_for_flats(assets: dict, simple: list) -> dict: + """Attach BLOCK_INFO (from simple[0]) to each asset in assets.""" + if not simple or not isinstance(simple[0], dict): + return assets # nothing to add + + block_info = simple[0] + + for record in assets: + # Make sure record is a dict + record.update({"BLOCK_INFO": block_info}) + + return assets + +def handler(event, context): + # read data for houses only + assets = process_complex("Houses Asset Data") + simple = process_simple("Houses") + houses = combine_records_by_address(assets, simple, dest_key="EPC_DATA") + + # read data for flats + assets = process_complex("Chingford Rd 236-256 Properties") + simple = process_complex("CHINGFORD ROAD 236-254 Asset Bl", "BLOCK_CODE") + flats = combine_records_for_flats(assets, simple) + + + + diff --git a/etl/month_end_automation_wave_3_layout.py b/etl/month_end_automation_wave_3_layout.py index 8b18355..74038ad 100644 --- a/etl/month_end_automation_wave_3_layout.py +++ b/etl/month_end_automation_wave_3_layout.py @@ -256,17 +256,17 @@ for board, all_records in board_to_record.items(): filtered_dfs.append(design2) # Design repetitive simple - design3 = get_df(design, "design invoice type", ["archetype (simple)"], "Design Archetype repetitive") + design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design repetitive simple") if not design1.empty: filtered_dfs.append(design3) # Design repetitive complex - design4 = get_df(design, "design invoice type", ["archetype (complex)"], "Design Archetype complex") + design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design Repetitive complex") if not design1.empty: filtered_dfs.append(design4) # Design not specified - all_filtered = pd.concat([design1, design2, design3, design4], ignore_index=True) + all_filtered = pd.concat([df for df in (design1, design2, design3, design4) if not df.empty]) design_remaining = design.loc[~design.index.isin(all_filtered.index)] if not design_remaining.empty: design_remaining["job_type"] = "design type not specified" diff --git a/etl/month_end_automation_wave_accent_housing.py b/etl/month_end_automation_wave_accent_housing.py index b13d155..8e04c38 100644 --- a/etl/month_end_automation_wave_accent_housing.py +++ b/etl/month_end_automation_wave_accent_housing.py @@ -15,15 +15,21 @@ board_ids = [ ] empty = "Rate card info missing" - +junte = "ask junte to update" rate_card_data_2502_accent_housing = { "job_type": [ - "First half of MTP", "Second half of MTP", "Full MTP" + "First half of MTP", "Second half of MTP", "Full MTP", "Design Archetype Complex", + "Design Archetype Simple", "Design Repetitive Complex", "Design Repetitive Simple", + "Design Revision", "design type not specified", + ], "rate": [ - 150, 130, 280 + 150, 130, 280, junte, junte, junte, junte, junte, "please ask andreas" ] } +# ToDO +# Design Revision +# Design Check with Andreas rate_card_df = pd.DataFrame(rate_card_data_2502_accent_housing) @@ -91,6 +97,43 @@ full_cost = get_df(df, "mtp invoicing status", ["(v1) full cost mtp to invoice ( if not full_cost.empty: filtered_dfs.append(full_cost) +# Design archetype complex +design = get_df(df, "design invoicing status", ["to invoice"]) +design1 = get_df(design, "design invoice type", ["archetype (complex)"], "Design Archetype Complex") +if not design1.empty : + filtered_dfs.append(design1) + +# Design archetype simple +design2 = get_df(design, "design invoice type", ["archetype (simple)"], "Design Archetype Simple") +if not design1.empty: + filtered_dfs.append(design2) + +# Design repetitive simple +design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design repetitive simple") +if not design1.empty: + filtered_dfs.append(design3) + +# Design repetitive complex +design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design repetitive complex") +if not design1.empty: + filtered_dfs.append(design4) + +# Design not specified +all_filtered = pd.concat([df for df in (design1, design2, design3, design4) if not df.empty]) +design_remaining = design.loc[~design.index.isin(all_filtered.index)] + +if not design_remaining.empty: + design_remaining["job_type"] = "design type not specified" + filtered_dfs.append(design_remaining) + +# Design Revision +revision_letter = ['a', 'b', 'c', 'd'] +for letter in revision_letter: + design = get_df(df, "design revision invoice", [f"rev. {letter} to invoice"], "Design Revision") + if not design.empty: + filtered_dfs.append(design) + + final_df = pd.concat(filtered_dfs).reset_index(drop=True) final_df["job_type"] = final_df["job_type"].str.lower()