mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
Merge pull request #85 from Hestia-Homes/feautre/walthamforest_etl
Feautre/walthamforest etl
This commit is contained in:
commit
a52f3dfb5f
22 changed files with 544 additions and 110 deletions
|
|
@ -5,10 +5,16 @@
|
|||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/survey-extractor",
|
||||
"postStartCommand": "bash .devcontainer/post-install.sh",
|
||||
|
||||
"features": {
|
||||
// "ghcr.io/devcontainers/features/ssh-agent:1": {}
|
||||
},
|
||||
|
||||
"mounts": [
|
||||
// Optional, just makes getting from Downloads (local env) easier
|
||||
// Optional convenience mount
|
||||
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
|
||||
],
|
||||
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"settings": {
|
||||
|
|
@ -28,3 +34,4 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,3 +47,4 @@ networks:
|
|||
|
||||
volumes:
|
||||
postgres-data:
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ name: "Build and Push Lambda Image to ECR"
|
|||
description: "Reusable action for building and pushing lambda Docker image to ECR"
|
||||
|
||||
inputs:
|
||||
lambda_name:
|
||||
ecr_name:
|
||||
description: "Lambda name / ECR repo name"
|
||||
required: true
|
||||
dockerfile_path:
|
||||
|
|
@ -66,8 +66,8 @@ runs:
|
|||
- name: Build and push Docker image
|
||||
shell: bash
|
||||
run: |
|
||||
IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.lambda_name }}:${{ steps.set_tag.outputs.tag }}
|
||||
echo "Building Docker image for ${{ inputs.lambda_name }}..."
|
||||
IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }}
|
||||
echo "Building Docker image for ${{ inputs.ecr_name }}..."
|
||||
docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} .
|
||||
|
||||
echo "Pushing to ECR..."
|
||||
|
|
|
|||
|
|
@ -52,3 +52,4 @@ runs:
|
|||
working-directory: ${{ inputs.working_directory }}
|
||||
shell: bash
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
|
|
|
|||
|
|
@ -1,29 +1,29 @@
|
|||
name: Surveyed Needs Sign Off Script
|
||||
on:
|
||||
schedule:
|
||||
# - cron: '0 17 * * 1-5'
|
||||
workflow_dispatch:
|
||||
# name: Surveyed Needs Sign Off Script
|
||||
# on:
|
||||
# schedule:
|
||||
# # - cron: '0 17 * * 1-5'
|
||||
# workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
surveyed-needs-sign-off:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
# jobs:
|
||||
# surveyed-needs-sign-off:
|
||||
# runs-on: ubuntu-22.04
|
||||
# steps:
|
||||
# - uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
# - name: Set up Python
|
||||
# uses: actions/setup-python@v4
|
||||
# with:
|
||||
# python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install poetry
|
||||
poetry install --no-root
|
||||
# - name: Install dependencies
|
||||
# run: |
|
||||
# pip install poetry
|
||||
# poetry install --no-root
|
||||
|
||||
- name: run script
|
||||
run: |
|
||||
pwd
|
||||
ls -la
|
||||
poetry run python etl/hubspot_surveyed_needs_sign_off.py
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
# - name: run script
|
||||
# run: |
|
||||
# pwd
|
||||
# ls -la
|
||||
# poetry run python etl/hubspot_surveyed_needs_sign_off.py
|
||||
# env:
|
||||
# PYTHONPATH: ${{ github.workspace }}
|
||||
29
.github/workflows/lambda_main.yml
vendored
29
.github/workflows/lambda_main.yml
vendored
|
|
@ -2,7 +2,7 @@ name: Lambda Main Workflow
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [main, feautre/additional_features_in_condition_report_extraction]
|
||||
branches: [main, feautre/walthamforest_etl]
|
||||
|
||||
env:
|
||||
AWS_REGION: eu-west-2
|
||||
|
|
@ -34,7 +34,7 @@ jobs:
|
|||
- name: Build and deploy Lambda example
|
||||
uses: ./.github/workflows/actions/lambda-deploy
|
||||
with:
|
||||
lambda_name: lambda_example
|
||||
ecr_name: lambda_example
|
||||
dockerfile_path: ./deployment/lambda/lambda_example/docker/Dockerfile
|
||||
ecr_tf_dir: ./deployment/lambda/lambda_example/docker/
|
||||
lambda_tf_dir: ./deployment/lambda/lambda_example/
|
||||
|
|
@ -57,7 +57,7 @@ jobs:
|
|||
- name: Build and deploy Extractor & Loader Lambda
|
||||
uses: ./.github/workflows/actions/lambda-deploy
|
||||
with:
|
||||
lambda_name: extractor_and_loader
|
||||
ecr_name: extractor_and_loader
|
||||
dockerfile_path: ./deployment/lambda/extractor_and_loader/docker/Dockerfile
|
||||
ecr_tf_dir: ./deployment/lambda/extractor_and_loader/docker/
|
||||
lambda_tf_dir: ./deployment/lambda/extractor_and_loader/
|
||||
|
|
@ -67,3 +67,26 @@ jobs:
|
|||
git-sha: ${{ github.sha }}
|
||||
git-ref: ${{ github.ref_name }}
|
||||
|
||||
|
||||
walthamforest-etl:
|
||||
runs-on: ubuntu-latest
|
||||
needs: shared-lambda-terraform
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Build and deploy WalthamForest ETL
|
||||
uses: ./.github/workflows/actions/lambda-deploy
|
||||
with:
|
||||
ecr_name: walthamforest_etl_adhoc_ecr
|
||||
dockerfile_path: ./deployment/lambda/walthamforest_etl/docker/Dockerfile
|
||||
ecr_tf_dir: ./deployment/lambda/walthamforest_etl/docker/
|
||||
lambda_tf_dir: ./deployment/lambda/walthamforest_etl/
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
git-sha: ${{ github.sha }}
|
||||
git-ref: ${{ github.ref_name }}
|
||||
64
.github/workflows/pytest.yml
vendored
64
.github/workflows/pytest.yml
vendored
|
|
@ -1,42 +1,42 @@
|
|||
name: Run Pytest
|
||||
# name: Run Pytest
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- '**' # Run on all branches
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
# on:
|
||||
# push:
|
||||
# branches:
|
||||
# - '**' # Run on all branches
|
||||
# pull_request:
|
||||
# branches:
|
||||
# - main
|
||||
|
||||
jobs:
|
||||
etl-unit-tests:
|
||||
runs-on: ubuntu-22.04
|
||||
# jobs:
|
||||
# etl-unit-tests:
|
||||
# runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
# steps:
|
||||
# - name: Checkout Repository
|
||||
# uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
# - name: Set up Python
|
||||
# uses: actions/setup-python@v5
|
||||
# with:
|
||||
# python-version: '3.12'
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
# - name: Set up Python
|
||||
# uses: actions/setup-python@v4
|
||||
# with:
|
||||
# python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install poetry
|
||||
poetry install --no-root
|
||||
# - name: Install dependencies
|
||||
# run: |
|
||||
# pip install poetry
|
||||
# poetry install --no-root
|
||||
|
||||
- name: Run Tests
|
||||
run: |
|
||||
poetry run pytest -W ignore::DeprecationWarning
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
# - name: Run Tests
|
||||
# run: |
|
||||
# poetry run pytest -W ignore::DeprecationWarning
|
||||
# env:
|
||||
# PYTHONPATH: ${{ github.workspace }}
|
||||
|
||||
|
||||
|
||||
continue-on-error: ${{ github.event_name == 'push' && github.ref != 'refs/heads/main' }}
|
||||
# continue-on-error: ${{ github.event_name == 'push' && github.ref != 'refs/heads/main' }}
|
||||
|
|
|
|||
70
.github/workflows/scis_invoice_calculator.yml
vendored
70
.github/workflows/scis_invoice_calculator.yml
vendored
|
|
@ -1,39 +1,39 @@
|
|||
name: SCIS Invoice Calculator
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 6 * * *'
|
||||
workflow_dispatch:
|
||||
# name: SCIS Invoice Calculator
|
||||
# on:
|
||||
# schedule:
|
||||
# - cron: '0 6 * * *'
|
||||
# workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
scis_invoice_calculator:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
# jobs:
|
||||
# scis_invoice_calculator:
|
||||
# runs-on: ubuntu-22.04
|
||||
# steps:
|
||||
# - uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
# - name: Set up Python
|
||||
# uses: actions/setup-python@v4
|
||||
# with:
|
||||
# python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install poetry
|
||||
poetry install --no-root
|
||||
# - name: Install dependencies
|
||||
# run: |
|
||||
# pip install poetry
|
||||
# poetry install --no-root
|
||||
|
||||
- name: run script
|
||||
run: |
|
||||
bash scis_invoice.sh
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID: ${{ secrets.SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID }}
|
||||
JJC_SERVICE_SHAREPOINT_ID: ${{ secrets.JJC_SERVICE_SHAREPOINT_ID }}
|
||||
BAXTER_KELLY_SERVICE_SHAREPOINT_ID: ${{ secrets.BAXTER_KELLY_SERVICE_SHAREPOINT_ID }}
|
||||
SGEC_SERVICE_SHAREPOINT_ID: ${{ secrets.SGEC_SERVICE_SHAREPOINT_ID }}
|
||||
SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
|
||||
SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
|
||||
SHAREPOINT_TENANT_ID: ${{ secrets.SHAREPOINT_TENANT_ID }}
|
||||
- name: Upload Excel file
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: my-excel-file
|
||||
path: survey_data.xlsx
|
||||
# - name: run script
|
||||
# run: |
|
||||
# bash scis_invoice.sh
|
||||
# env:
|
||||
# PYTHONPATH: ${{ github.workspace }}
|
||||
# SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID: ${{ secrets.SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID }}
|
||||
# JJC_SERVICE_SHAREPOINT_ID: ${{ secrets.JJC_SERVICE_SHAREPOINT_ID }}
|
||||
# BAXTER_KELLY_SERVICE_SHAREPOINT_ID: ${{ secrets.BAXTER_KELLY_SERVICE_SHAREPOINT_ID }}
|
||||
# SGEC_SERVICE_SHAREPOINT_ID: ${{ secrets.SGEC_SERVICE_SHAREPOINT_ID }}
|
||||
# SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
|
||||
# SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
|
||||
# SHAREPOINT_TENANT_ID: ${{ secrets.SHAREPOINT_TENANT_ID }}
|
||||
# - name: Upload Excel file
|
||||
# uses: actions/upload-artifact@v4
|
||||
# with:
|
||||
# name: my-excel-file
|
||||
# path: survey_data.xlsx
|
||||
|
|
@ -6,17 +6,19 @@ data "aws_iam_role" "lambda_exec_role" {
|
|||
# Reference existing ECR repository
|
||||
data "aws_ecr_repository" "extractor_and_loader" {
|
||||
name = "extractor_and_loader"
|
||||
|
||||
}
|
||||
|
||||
# SQS queue for extractor_and_loader
|
||||
resource "aws_sqs_queue" "extractor_and_loader_queue" {
|
||||
name = "extractor-loader-queue"
|
||||
visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout)
|
||||
}
|
||||
|
||||
|
||||
# IAM policy specific to this Lambda
|
||||
# Custom IAM policy specific to lambda_example
|
||||
resource "aws_iam_policy" "extractor_loader_policy" {
|
||||
name = "extractor-loader-policy"
|
||||
name = "extractor_loader_policy"
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17",
|
||||
|
|
@ -26,7 +28,9 @@ resource "aws_iam_policy" "extractor_loader_policy" {
|
|||
Action = [
|
||||
"sqs:ReceiveMessage",
|
||||
"sqs:DeleteMessage",
|
||||
"sqs:GetQueueAttributes"
|
||||
"sqs:GetQueueAttributes",
|
||||
"sqs:GetQueueUrl",
|
||||
"sqs:ChangeMessageVisibility"
|
||||
],
|
||||
Resource = aws_sqs_queue.extractor_and_loader_queue.arn
|
||||
},
|
||||
|
|
@ -55,7 +59,7 @@ resource "aws_iam_role_policy_attachment" "extractor_loader_policy_attach" {
|
|||
|
||||
# Lambda function
|
||||
resource "aws_lambda_function" "extractor_and_loader" {
|
||||
function_name = "extractor-and-loader"
|
||||
function_name = "extractor-and-loader-lambda"
|
||||
role = data.aws_iam_role.lambda_exec_role.arn
|
||||
package_type = "Image"
|
||||
image_uri = "${data.aws_ecr_repository.extractor_and_loader.repository_url}:${var.lambda_image_tag}"
|
||||
|
|
|
|||
|
|
@ -25,7 +25,9 @@ resource "aws_iam_policy" "lambda_example_policy" {
|
|||
Action = [
|
||||
"sqs:ReceiveMessage",
|
||||
"sqs:DeleteMessage",
|
||||
"sqs:GetQueueAttributes"
|
||||
"sqs:GetQueueAttributes",
|
||||
"sqs:GetQueueUrl",
|
||||
"sqs:ChangeMessageVisibility"
|
||||
],
|
||||
Resource = aws_sqs_queue.lambda_example_queue.arn
|
||||
},
|
||||
|
|
@ -47,6 +49,8 @@ resource "aws_iam_policy" "lambda_example_policy" {
|
|||
})
|
||||
}
|
||||
|
||||
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "lambda_example_policy_attach" {
|
||||
role = data.aws_iam_role.lambda_exec_role.name
|
||||
policy_arn = aws_iam_policy.lambda_example_policy.arn
|
||||
|
|
|
|||
21
deployment/lambda/walthamforest_etl/docker/.dockerignore
Normal file
21
deployment/lambda/walthamforest_etl/docker/.dockerignore
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Ignore junk and large files
|
||||
*.pdf
|
||||
*.csv
|
||||
*.xml
|
||||
*.parquet
|
||||
*.ipynb
|
||||
*.mp4
|
||||
*.mov
|
||||
*.jpg
|
||||
*.png
|
||||
*.zip
|
||||
*.tar.gz
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
build/
|
||||
dist/
|
||||
.etl_cache/
|
||||
tests/
|
||||
docs/
|
||||
25
deployment/lambda/walthamforest_etl/docker/Dockerfile
Normal file
25
deployment/lambda/walthamforest_etl/docker/Dockerfile
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
FROM public.ecr.aws/lambda/python:3.12
|
||||
|
||||
# Install Poetry (you could pin a version if you like)
|
||||
RUN curl -sSL https://install.python-poetry.org | python3 -
|
||||
|
||||
# Add Poetry to PATH
|
||||
ENV PATH="/root/.local/bin:$PATH"
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /var/task
|
||||
|
||||
# Copy Poetry files first to leverage Docker layer caching
|
||||
COPY pyproject.toml poetry.lock README.md ./
|
||||
COPY etl/ etl/
|
||||
|
||||
|
||||
# Install dependencies into /var/task
|
||||
RUN poetry config virtualenvs.create false \
|
||||
&& poetry install --only main --no-interaction --no-ansi
|
||||
|
||||
# Copy app code
|
||||
COPY deployment/lambda/extractor_and_loader/docker/app.py ./
|
||||
|
||||
# Set Lambda handler
|
||||
CMD ["app.handler"]
|
||||
124
deployment/lambda/walthamforest_etl/docker/app.py
Normal file
124
deployment/lambda/walthamforest_etl/docker/app.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
import pandas as pd
|
||||
import json
|
||||
from pprint import pprint
|
||||
import os
|
||||
import copy
|
||||
from collections import defaultdict
|
||||
from typing import List, Dict, Any, Union, Optional
|
||||
|
||||
def process_complex(sheet_name, group_key="ADDRESS"):
|
||||
df = pd.read_excel("../../../../../home/Downloads/data.xlsx", sheet_name=sheet_name)
|
||||
|
||||
element_cols = [
|
||||
"ELEMENT GROUP", "ELEMENT CODE", "ELEMENT CODE DESCRIPTION",
|
||||
"ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION",
|
||||
"ELEMENT DATE VALUE", "ELEMENT NUMERIC VALUE",
|
||||
"ELEMENT TEXT VALUE", "QUANTITY",
|
||||
"INSTALL DATE", "REMAINING LIFE", "ELEMENT COMMENTS"
|
||||
]
|
||||
|
||||
property_cols = [
|
||||
"PROP REF", "ADDRESS", "OWNERSHIP",
|
||||
"PROP STATUS", "PROP TYPE", "PROP SUB TYPE"
|
||||
]
|
||||
|
||||
# Prepare output
|
||||
records = []
|
||||
|
||||
# Loop through unique values in group_key (ADDRESS or BLOCK_CODE)
|
||||
for val in df[group_key].unique():
|
||||
g = df[df[group_key] == val] # subset
|
||||
|
||||
property_info = g[property_cols].drop_duplicates().iloc[0].to_dict()
|
||||
|
||||
# build elements dict keyed by ELEMENT CODE DESCRIPTION
|
||||
elements_dict = {}
|
||||
for _, row in g[element_cols].drop_duplicates().iterrows():
|
||||
key = row["ELEMENT CODE DESCRIPTION"] # could also use "ELEMENT CODE"
|
||||
elements_dict[key] = row.to_dict()
|
||||
|
||||
records.append({
|
||||
group_key: val,
|
||||
"property_info": property_info,
|
||||
"elements": elements_dict
|
||||
})
|
||||
|
||||
return records
|
||||
|
||||
def process_simple(sheet_name):
|
||||
df = pd.read_excel("../../../../../home/Downloads/data.xlsx", sheet_name=sheet_name)
|
||||
|
||||
records = []
|
||||
|
||||
for address in df["Address"].unique():
|
||||
g = df[df["Address"] == address].drop_duplicates() # subset for that address
|
||||
row = g.iloc[0] # take first row if multiple
|
||||
|
||||
# build dict of all columns except Address
|
||||
elements_dict = row.drop(labels=["Address"]).to_dict()
|
||||
|
||||
records.append({
|
||||
"ADDRESS": address,
|
||||
"to_add": elements_dict
|
||||
})
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def combine_records_by_address(
|
||||
asset_records: List[Dict[str, Any]],
|
||||
simple_records: List[Dict[str, Any]],
|
||||
dest_key: str = "to_add",
|
||||
unique_identifier="Address"
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Merge process_house_asset_data() and process_simple() results by ADDRESS.
|
||||
All columns from simple_records['to_add'] will be merged under dest_key.
|
||||
"""
|
||||
# Index inputs by ADDRESS
|
||||
asset_by_addr = {r["ADDRESS"]: r for r in asset_records}
|
||||
simple_by_addr = {r["ADDRESS"]: r for r in simple_records}
|
||||
|
||||
merged: List[Dict[str, Any]] = []
|
||||
|
||||
# Use union of addresses from both sources
|
||||
all_addresses = set(asset_by_addr) | set(simple_by_addr)
|
||||
|
||||
for addr in sorted(all_addresses):
|
||||
base = copy.deepcopy(asset_by_addr.get(addr, {"ADDRESS": addr}))
|
||||
simple = simple_by_addr.get(addr)
|
||||
|
||||
if simple:
|
||||
base[dest_key] = simple.get("to_add", {})
|
||||
|
||||
merged.append(base)
|
||||
|
||||
return merged
|
||||
|
||||
def combine_records_for_flats(assets: dict, simple: list) -> dict:
|
||||
"""Attach BLOCK_INFO (from simple[0]) to each asset in assets."""
|
||||
if not simple or not isinstance(simple[0], dict):
|
||||
return assets # nothing to add
|
||||
|
||||
block_info = simple[0]
|
||||
|
||||
for record in assets:
|
||||
# Make sure record is a dict
|
||||
record.update({"BLOCK_INFO": block_info})
|
||||
|
||||
return assets
|
||||
|
||||
def handler(event, context):
|
||||
# read data for houses only
|
||||
assets = process_complex("Houses Asset Data")
|
||||
simple = process_simple("Houses")
|
||||
houses = combine_records_by_address(assets, simple, dest_key="EPC_DATA")
|
||||
|
||||
# read data for flats
|
||||
assets = process_complex("Chingford Rd 236-256 Properties")
|
||||
simple = process_complex("CHINGFORD ROAD 236-254 Asset Bl", "BLOCK_CODE")
|
||||
flats = combine_records_for_flats(assets, simple)
|
||||
|
||||
|
||||
|
||||
|
||||
63
deployment/lambda/walthamforest_etl/docker/ecr.tf
Normal file
63
deployment/lambda/walthamforest_etl/docker/ecr.tf
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
# ECR repo
|
||||
resource "aws_ecr_repository" "walthamforest_etl_adhoc_ecr" {
|
||||
name = "walthamforest_etl_adhoc_ecr"
|
||||
}
|
||||
|
||||
# ECR policy to allow Lambda access
|
||||
resource "aws_ecr_repository_policy" "walthamforest_etl_adhoc_ecr_access" {
|
||||
repository = aws_ecr_repository.walthamforest_etl_adhoc_ecr.name
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2008-10-17",
|
||||
Statement = [{
|
||||
Sid = "AllowLambdaPull",
|
||||
Effect = "Allow",
|
||||
Principal = {
|
||||
Service = "lambda.amazonaws.com"
|
||||
},
|
||||
Action = [
|
||||
"ecr:GetDownloadUrlForLayer",
|
||||
"ecr:BatchGetImage",
|
||||
"ecr:BatchCheckLayerAvailability"
|
||||
]
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
|
||||
# ECR lifecycle policy to delete tagged images older than 14 days
|
||||
resource "aws_ecr_lifecycle_policy" "walthamforest_etl_adhoc_loader_lifecycle" {
|
||||
repository = aws_ecr_repository.walthamforest_etl_adhoc_ecr.name
|
||||
|
||||
policy = jsonencode({
|
||||
"rules": [
|
||||
{
|
||||
"rulePriority": 2,
|
||||
"description": "Expire images older than 14 days",
|
||||
"selection": {
|
||||
"tagStatus": "untagged",
|
||||
"countType": "sinceImagePushed",
|
||||
"countUnit": "days",
|
||||
"countNumber": 1
|
||||
},
|
||||
"action": {
|
||||
"type": "expire"
|
||||
}
|
||||
},
|
||||
{
|
||||
"rulePriority": 1,
|
||||
"description": "Keep last 5 images",
|
||||
"selection": {
|
||||
"tagStatus": "tagged",
|
||||
"tagPrefixList": ["feature"],
|
||||
"countType": "imageCountMoreThan",
|
||||
"countNumber": 5
|
||||
},
|
||||
"action": {
|
||||
"type": "expire"
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
0
deployment/lambda/walthamforest_etl/docker/main.tf
Normal file
0
deployment/lambda/walthamforest_etl/docker/main.tf
Normal file
15
deployment/lambda/walthamforest_etl/docker/provider.tf
Normal file
15
deployment/lambda/walthamforest_etl/docker/provider.tf
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 6.3.0"
|
||||
}
|
||||
}
|
||||
backend "s3" {
|
||||
bucket = "survey-extractor-tf-state"
|
||||
region = "eu-west-2"
|
||||
key = "env:/dev/lambda/ecr/walthamforest_etl.tfstate"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
0
deployment/lambda/walthamforest_etl/main.tf
Normal file
0
deployment/lambda/walthamforest_etl/main.tf
Normal file
15
deployment/lambda/walthamforest_etl/provider.tf
Normal file
15
deployment/lambda/walthamforest_etl/provider.tf
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 6.3.0"
|
||||
}
|
||||
}
|
||||
backend "s3" {
|
||||
bucket = "survey-extractor-tf-state"
|
||||
region = "eu-west-2"
|
||||
key = "env:/dev/lambda/eachlambda/walthamforest_etl_lambda.tfstate"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
5
deployment/lambda/walthamforest_etl/vars.tf
Normal file
5
deployment/lambda/walthamforest_etl/vars.tf
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
variable "lambda_image_tag" {
|
||||
description = "Docker image tag (e.g. GitHub SHA)"
|
||||
type = string
|
||||
default = "local-dev-latest"
|
||||
}
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
# Reference existing IAM role
|
||||
data "aws_iam_role" "lambda_exec_role" {
|
||||
name = "lambda-exec-role"
|
||||
}
|
||||
|
||||
# Reference existing ECR repository
|
||||
data "aws_ecr_repository" "walthamforest_etl_adhoc_ecr" {
|
||||
name = "walthamforest_etl_adhoc_ecr"
|
||||
}
|
||||
|
||||
# SQS queue
|
||||
resource "aws_sqs_queue" "walthamforest_etl_adhoc_queue" {
|
||||
name = "walthamforest_etl_adhoc-queue"
|
||||
visibility_timeout_seconds = 1800 # 30 minutes (>= 300s and ~6x Lambda timeout)
|
||||
}
|
||||
|
||||
|
||||
# Custom IAM policy specific to lambda_example
|
||||
resource "aws_iam_policy" "walthamforest_etl_adhoc_policy" {
|
||||
name = "walthamforest_adhoc_policy_lambda"
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17",
|
||||
Statement = [
|
||||
{
|
||||
Effect = "Allow",
|
||||
Action = [
|
||||
"sqs:ReceiveMessage",
|
||||
"sqs:DeleteMessage",
|
||||
"sqs:GetQueueAttributes",
|
||||
"sqs:GetQueueUrl",
|
||||
"sqs:ChangeMessageVisibility"
|
||||
],
|
||||
Resource = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn
|
||||
},
|
||||
{
|
||||
Effect = "Allow",
|
||||
Action = [
|
||||
"ecr:GetDownloadUrlForLayer",
|
||||
"ecr:BatchGetImage",
|
||||
"ecr:BatchCheckLayerAvailability"
|
||||
],
|
||||
Resource = data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.arn
|
||||
},
|
||||
{
|
||||
Effect = "Allow",
|
||||
Action = ["ecr:GetAuthorizationToken"],
|
||||
Resource = "*"
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "walthamforest_etl_adhoc_policy_attach" {
|
||||
role = data.aws_iam_role.lambda_exec_role.name
|
||||
policy_arn = aws_iam_policy.walthamforest_etl_adhoc_policy.arn
|
||||
}
|
||||
|
||||
# Lambda function
|
||||
resource "aws_lambda_function" "walthamforest_etl_adhoc" {
|
||||
function_name = "walthamforest_etl_adhoc"
|
||||
role = data.aws_iam_role.lambda_exec_role.arn
|
||||
package_type = "Image"
|
||||
image_uri = "${data.aws_ecr_repository.walthamforest_etl_adhoc_ecr.repository_url}:${var.lambda_image_tag}"
|
||||
# Increase timeout (max 900 sec / 15 min)
|
||||
# timeout = 300 # e.g. 5 minutes
|
||||
|
||||
# Increase memory (default 128 MB)
|
||||
memory_size = 2048 # try 1024 or 2048 MB to start
|
||||
|
||||
# environment {
|
||||
# variables = {
|
||||
# DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB"
|
||||
# }
|
||||
# }
|
||||
}
|
||||
|
||||
# SQS trigger
|
||||
resource "aws_lambda_event_source_mapping" "walthamforest_etl_adhoc_trigger" {
|
||||
event_source_arn = aws_sqs_queue.walthamforest_etl_adhoc_queue.arn
|
||||
function_name = aws_lambda_function.walthamforest_etl_adhoc.arn
|
||||
batch_size = 1
|
||||
}
|
||||
|
|
@ -256,17 +256,17 @@ for board, all_records in board_to_record.items():
|
|||
filtered_dfs.append(design2)
|
||||
|
||||
# Design repetitive simple
|
||||
design3 = get_df(design, "design invoice type", ["archetype (simple)"], "Design Archetype repetitive")
|
||||
design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design repetitive simple")
|
||||
if not design1.empty:
|
||||
filtered_dfs.append(design3)
|
||||
|
||||
# Design repetitive complex
|
||||
design4 = get_df(design, "design invoice type", ["archetype (complex)"], "Design Archetype complex")
|
||||
design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design Repetitive complex")
|
||||
if not design1.empty:
|
||||
filtered_dfs.append(design4)
|
||||
|
||||
# Design not specified
|
||||
all_filtered = pd.concat([design1, design2, design3, design4], ignore_index=True)
|
||||
all_filtered = pd.concat([df for df in (design1, design2, design3, design4) if not df.empty])
|
||||
design_remaining = design.loc[~design.index.isin(all_filtered.index)]
|
||||
if not design_remaining.empty:
|
||||
design_remaining["job_type"] = "design type not specified"
|
||||
|
|
|
|||
|
|
@ -15,15 +15,21 @@ board_ids = [
|
|||
]
|
||||
|
||||
empty = "Rate card info missing"
|
||||
|
||||
junte = "ask junte to update"
|
||||
rate_card_data_2502_accent_housing = {
|
||||
"job_type": [
|
||||
"First half of MTP", "Second half of MTP", "Full MTP"
|
||||
"First half of MTP", "Second half of MTP", "Full MTP", "Design Archetype Complex",
|
||||
"Design Archetype Simple", "Design Repetitive Complex", "Design Repetitive Simple",
|
||||
"Design Revision", "design type not specified",
|
||||
|
||||
],
|
||||
"rate": [
|
||||
150, 130, 280
|
||||
150, 130, 280, junte, junte, junte, junte, junte, "please ask andreas"
|
||||
]
|
||||
}
|
||||
# ToDO
|
||||
# Design Revision
|
||||
# Design Check with Andreas
|
||||
|
||||
rate_card_df = pd.DataFrame(rate_card_data_2502_accent_housing)
|
||||
|
||||
|
|
@ -91,6 +97,43 @@ full_cost = get_df(df, "mtp invoicing status", ["(v1) full cost mtp to invoice (
|
|||
if not full_cost.empty:
|
||||
filtered_dfs.append(full_cost)
|
||||
|
||||
# Design archetype complex
|
||||
design = get_df(df, "design invoicing status", ["to invoice"])
|
||||
design1 = get_df(design, "design invoice type", ["archetype (complex)"], "Design Archetype Complex")
|
||||
if not design1.empty :
|
||||
filtered_dfs.append(design1)
|
||||
|
||||
# Design archetype simple
|
||||
design2 = get_df(design, "design invoice type", ["archetype (simple)"], "Design Archetype Simple")
|
||||
if not design1.empty:
|
||||
filtered_dfs.append(design2)
|
||||
|
||||
# Design repetitive simple
|
||||
design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design repetitive simple")
|
||||
if not design1.empty:
|
||||
filtered_dfs.append(design3)
|
||||
|
||||
# Design repetitive complex
|
||||
design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design repetitive complex")
|
||||
if not design1.empty:
|
||||
filtered_dfs.append(design4)
|
||||
|
||||
# Design not specified
|
||||
all_filtered = pd.concat([df for df in (design1, design2, design3, design4) if not df.empty])
|
||||
design_remaining = design.loc[~design.index.isin(all_filtered.index)]
|
||||
|
||||
if not design_remaining.empty:
|
||||
design_remaining["job_type"] = "design type not specified"
|
||||
filtered_dfs.append(design_remaining)
|
||||
|
||||
# Design Revision
|
||||
revision_letter = ['a', 'b', 'c', 'd']
|
||||
for letter in revision_letter:
|
||||
design = get_df(df, "design revision invoice", [f"rev. {letter} to invoice"], "Design Revision")
|
||||
if not design.empty:
|
||||
filtered_dfs.append(design)
|
||||
|
||||
|
||||
final_df = pd.concat(filtered_dfs).reset_index(drop=True)
|
||||
|
||||
final_df["job_type"] = final_df["job_type"].str.lower()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue