mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Merge branch 'main' of https://github.com/Hestia-Homes/Model into feature/ara-rebaselining
# Conflicts: # asset_list/app.py
This commit is contained in:
commit
6e73d807a9
74 changed files with 2191 additions and 431 deletions
|
|
@ -21,7 +21,7 @@ RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
|
||||||
&& rm -rf /tmp/libpostal
|
&& rm -rf /tmp/libpostal
|
||||||
|
|
||||||
# 3) Create the user and grant sudo privileges
|
# 3) Create the user and grant sudo privileges
|
||||||
RUN useradd -m -s /usr/bin/bash ${USER} \
|
RUN useradd -m -s /bin/bash ${USER} \
|
||||||
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
|
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
|
||||||
&& chmod 0440 /etc/sudoers.d/${USER}
|
&& chmod 0440 /etc/sudoers.d/${USER}
|
||||||
|
|
||||||
|
|
@ -32,6 +32,11 @@ ADD asset_list/requirements.txt requirements1.txt
|
||||||
RUN cat requirements1.txt requirements2.txt >> requirements.txt
|
RUN cat requirements1.txt requirements2.txt >> requirements.txt
|
||||||
|
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Install code server
|
||||||
|
RUN curl -fsSL https://code-server.dev/install.sh | sh
|
||||||
|
|
||||||
|
|
||||||
# 5) Workdir
|
# 5) Workdir
|
||||||
WORKDIR /workspaces/model
|
WORKDIR /workspaces/model
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,14 @@
|
||||||
"name": "SAL ENV",
|
"name": "SAL ENV",
|
||||||
"dockerComposeFile": "docker-compose.yml",
|
"dockerComposeFile": "docker-compose.yml",
|
||||||
"service": "model-sal",
|
"service": "model-sal",
|
||||||
"remoteUser": "vscode",
|
// "remoteUser": "vscode",
|
||||||
"workspaceFolder": "/workspaces/model",
|
"workspaceFolder": "/workspaces/model",
|
||||||
"postStartCommand": "bash .devcontainer/post-install.sh",
|
"postStartCommand": "bash .devcontainer/asset_list/post-install.sh",
|
||||||
"mounts": [
|
"mounts": [
|
||||||
// Optional, just makes getting from Downloads (local env) easier
|
// Optional, just makes getting from Downloads (local env) easier
|
||||||
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
|
"source=${localEnv:HOME},target=/home/vscode,type=bind"
|
||||||
],
|
],
|
||||||
|
"forwardPorts": [8081],
|
||||||
"customizations": {
|
"customizations": {
|
||||||
"vscode": {
|
"vscode": {
|
||||||
"extensions": [
|
"extensions": [
|
||||||
|
|
@ -24,7 +25,8 @@
|
||||||
"ms-python.vscode-python-envs",
|
"ms-python.vscode-python-envs",
|
||||||
"ms-python.black-formatter",
|
"ms-python.black-formatter",
|
||||||
"GrapeCity.gc-excelviewer",
|
"GrapeCity.gc-excelviewer",
|
||||||
"jakobhoeg.vscode-pokemon"
|
"jakobhoeg.vscode-pokemon",
|
||||||
|
"eamodio.gitlens"
|
||||||
],
|
],
|
||||||
"settings": {
|
"settings": {
|
||||||
"files.defaultWorkspace": "/workspaces/model",
|
"files.defaultWorkspace": "/workspaces/model",
|
||||||
|
|
|
||||||
|
|
@ -2,15 +2,17 @@ version: '3.8'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
model-sal:
|
model-sal:
|
||||||
user: "${UID}:${GID}"
|
|
||||||
build:
|
build:
|
||||||
context: ../..
|
context: ../..
|
||||||
dockerfile: .devcontainer/asset_list/Dockerfile
|
dockerfile: .devcontainer/asset_list/Dockerfile
|
||||||
command: sleep infinity
|
command: code-server --bind-addr 0.0.0.0:8080
|
||||||
|
user: vscode
|
||||||
volumes:
|
volumes:
|
||||||
- ../../:/workspaces/model
|
- ../../:/workspaces/model
|
||||||
networks:
|
networks:
|
||||||
- model-net
|
- model-net
|
||||||
|
ports:
|
||||||
|
- "8081:8080"
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
model-net:
|
model-net:
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,9 @@
|
||||||
"GrapeCity.gc-excelviewer",
|
"GrapeCity.gc-excelviewer",
|
||||||
"jakobhoeg.vscode-pokemon",
|
"jakobhoeg.vscode-pokemon",
|
||||||
"github.vscode-github-actions",
|
"github.vscode-github-actions",
|
||||||
"me-dutour-mathieu.vscode-github-actions"
|
"me-dutour-mathieu.vscode-github-actions",
|
||||||
|
"anthropic.claude-code",
|
||||||
|
"eamodio.gitlens"
|
||||||
],
|
],
|
||||||
"settings": {
|
"settings": {
|
||||||
"files.defaultWorkspace": "/workspaces/model",
|
"files.defaultWorkspace": "/workspaces/model",
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
mkdir -p ~/.ipython/profile_default/startup
|
# mkdir -p ~/.ipython/profile_default/startup
|
||||||
|
|
||||||
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
|
# cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
|
||||||
from dotenv import load_dotenv
|
# from dotenv import load_dotenv
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
# Adjust path as needed
|
# # Adjust path as needed
|
||||||
env_path = "/workspaces/model/backend/.env"
|
# env_path = "/workspaces/model/backend/.env"
|
||||||
if os.path.exists(env_path):
|
# if os.path.exists(env_path):
|
||||||
load_dotenv(env_path)
|
# load_dotenv(env_path)
|
||||||
print("✔ Loaded .env into Jupyter kernel")
|
# print("✔ Loaded .env into Jupyter kernel")
|
||||||
else:
|
# else:
|
||||||
print("⚠ No .env file found to load")
|
# print("⚠ No .env file found to load")
|
||||||
EOF
|
# EOF
|
||||||
|
|
|
||||||
32
.github/workflows/_deploy_lambda.yml
vendored
32
.github/workflows/_deploy_lambda.yml
vendored
|
|
@ -16,12 +16,14 @@ on:
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
ecr_repo:
|
ecr_repo:
|
||||||
required: true
|
required: false
|
||||||
type: string
|
type: string
|
||||||
|
default: ''
|
||||||
|
|
||||||
image_digest:
|
image_digest:
|
||||||
required: true
|
required: false
|
||||||
type: string
|
type: string
|
||||||
|
default: ''
|
||||||
|
|
||||||
terraform_apply:
|
terraform_apply:
|
||||||
required: false
|
required: false
|
||||||
|
|
@ -58,6 +60,8 @@ on:
|
||||||
required: false
|
required: false
|
||||||
TF_VAR_google_solar_api_key:
|
TF_VAR_google_solar_api_key:
|
||||||
required: false
|
required: false
|
||||||
|
TF_VAR_ordnance_survey_api_key:
|
||||||
|
required: false
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
deploy:
|
deploy:
|
||||||
|
|
@ -115,12 +119,23 @@ jobs:
|
||||||
TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
|
TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
|
||||||
TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
|
TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
|
||||||
TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
|
TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
|
||||||
|
TF_VAR_ordnance_survey_api_key: ${{ secrets.TF_VAR_ordnance_survey_api_key }}
|
||||||
run: |
|
run: |
|
||||||
|
ECR_REPO_URL_VAR=""
|
||||||
|
if [[ -n "${{ inputs.ecr_repo }}" ]]; then
|
||||||
|
ECR_REPO_URL_VAR="-var=ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
IMAGE_DIGEST_VAR=""
|
||||||
|
if [[ -n "${{ inputs.ecr_repo }}" ]]; then
|
||||||
|
IMAGE_DIGEST_VAR="-var=image_digest=${{ inputs.image_digest }}"
|
||||||
|
fi
|
||||||
|
|
||||||
terraform plan \
|
terraform plan \
|
||||||
-var="stage=${{ inputs.stage }}" \
|
-var="stage=${{ inputs.stage }}" \
|
||||||
-var="lambda_name=${{ inputs.lambda_name }}" \
|
-var="lambda_name=${{ inputs.lambda_name }}" \
|
||||||
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
|
$ECR_REPO_URL_VAR \
|
||||||
-var="image_digest=${{ inputs.image_digest }}" \
|
$IMAGE_DIGEST_VAR \
|
||||||
-out=lambdaplan
|
-out=lambdaplan
|
||||||
|
|
||||||
- name: Terraform Apply
|
- name: Terraform Apply
|
||||||
|
|
@ -140,9 +155,14 @@ jobs:
|
||||||
TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
|
TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
|
||||||
TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
|
TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
|
||||||
TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
|
TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
|
||||||
|
TF_VAR_ordnance_survey_api_key: ${{ secrets.TF_VAR_ordnance_survey_api_key }}
|
||||||
run: |
|
run: |
|
||||||
|
EXTRA_VARS=""
|
||||||
|
if [[ -n "${{ inputs.ecr_repo }}" ]]; then
|
||||||
|
EXTRA_VARS="-var=ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }} -var=image_digest=${{ inputs.image_digest }}"
|
||||||
|
fi
|
||||||
|
|
||||||
terraform destroy -auto-approve \
|
terraform destroy -auto-approve \
|
||||||
-var="stage=${{ inputs.stage }}" \
|
-var="stage=${{ inputs.stage }}" \
|
||||||
-var="lambda_name=${{ inputs.lambda_name }}" \
|
-var="lambda_name=${{ inputs.lambda_name }}" \
|
||||||
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
|
$EXTRA_VARS
|
||||||
-var="image_digest=${{ inputs.image_digest }}"
|
|
||||||
|
|
|
||||||
193
.github/workflows/deploy_terraform.yml
vendored
193
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -41,7 +41,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 1️⃣ Shared Terraform (infra)
|
# Shared Terraform (infra)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
shared_terraform:
|
shared_terraform:
|
||||||
needs: determine_stage
|
needs: determine_stage
|
||||||
|
|
@ -77,9 +77,49 @@ jobs:
|
||||||
if: env.TERRAFORM_APPLY == 'true'
|
if: env.TERRAFORM_APPLY == 'true'
|
||||||
working-directory: infrastructure/terraform/shared
|
working-directory: infrastructure/terraform/shared
|
||||||
run: terraform apply -auto-approve tfplan
|
run: terraform apply -auto-approve tfplan
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Ara Engine image and Push
|
||||||
|
# ============================================================
|
||||||
|
ara_engine_image:
|
||||||
|
needs: [determine_stage, shared_terraform]
|
||||||
|
uses: ./.github/workflows/_build_image.yml
|
||||||
|
with:
|
||||||
|
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
|
||||||
|
dockerfile_path: backend/docker/engine.Dockerfile
|
||||||
|
build_context: .
|
||||||
|
secrets:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Deploy Ara Engine Lambda
|
||||||
|
# ============================================================
|
||||||
|
ara_engine_lambda:
|
||||||
|
needs: [ara_engine_image, determine_stage]
|
||||||
|
uses: ./.github/workflows/_deploy_lambda.yml
|
||||||
|
with:
|
||||||
|
lambda_name: ara_engine
|
||||||
|
lambda_path: infrastructure/terraform/lambda/engine
|
||||||
|
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||||
|
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
|
||||||
|
image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
|
||||||
|
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||||
|
secrets:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
|
||||||
|
TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
|
||||||
|
TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
|
||||||
|
TF_VAR_api_key: ${{ secrets.DEV_API_KEY }}
|
||||||
|
TF_VAR_secret_key: ${{ secrets.DEV_SECRET_KEY }}
|
||||||
|
TF_VAR_epc_auth_token: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
|
||||||
|
TF_VAR_google_solar_api_key: ${{ secrets.DEV_GOOGLE_SOLAR_API_KEY }}
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 2️⃣ Build Address 2 UPRN image and Push
|
# Build Address 2 UPRN image and Push
|
||||||
# ============================================================
|
# ============================================================
|
||||||
address2uprn_image:
|
address2uprn_image:
|
||||||
needs: [determine_stage, shared_terraform]
|
needs: [determine_stage, shared_terraform]
|
||||||
|
|
@ -103,7 +143,7 @@ jobs:
|
||||||
EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
|
EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 3️⃣ Deploy Address 2 UPRN Lambda
|
# Deploy Address 2 UPRN Lambda
|
||||||
# ============================================================
|
# ============================================================
|
||||||
address2uprn_lambda:
|
address2uprn_lambda:
|
||||||
needs: [address2uprn_image, determine_stage]
|
needs: [address2uprn_image, determine_stage]
|
||||||
|
|
@ -122,7 +162,7 @@ jobs:
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 2️⃣ Build Postcode Splitter image and Push
|
# Build Postcode Splitter image and Push
|
||||||
# ============================================================
|
# ============================================================
|
||||||
postcodeSplitter_image:
|
postcodeSplitter_image:
|
||||||
needs: [determine_stage, shared_terraform]
|
needs: [determine_stage, shared_terraform]
|
||||||
|
|
@ -144,7 +184,7 @@ jobs:
|
||||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 3️⃣ Deploy Postcode Splitter Lambda
|
# Deploy Postcode Splitter Lambda
|
||||||
# ============================================================
|
# ============================================================
|
||||||
postcodeSplitter_lambda:
|
postcodeSplitter_lambda:
|
||||||
needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda]
|
needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda]
|
||||||
|
|
@ -242,32 +282,56 @@ jobs:
|
||||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Ara Engine image and Push
|
# Build OrdanceSurvey image and Push
|
||||||
# ============================================================
|
# ============================================================
|
||||||
ara_engine_image:
|
ordnanceSurvey_image:
|
||||||
needs: [determine_stage, shared_terraform]
|
needs: [determine_stage, shared_terraform]
|
||||||
uses: ./.github/workflows/_build_image.yml
|
uses: ./.github/workflows/_build_image.yml
|
||||||
with:
|
with:
|
||||||
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
|
ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
|
||||||
dockerfile_path: backend/docker/engine.Dockerfile
|
dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
|
||||||
build_context: .
|
build_context: .
|
||||||
|
build_args: |
|
||||||
|
DEV_DB_HOST=$DEV_DB_HOST
|
||||||
|
DEV_DB_PORT=$DEV_DB_PORT
|
||||||
|
DEV_DB_NAME=$DEV_DB_NAME
|
||||||
secrets:
|
secrets:
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||||
|
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||||
|
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Deploy Categorisation Lambda
|
# Deploy OrdanceSurvey Lambda
|
||||||
# ============================================================
|
# ============================================================
|
||||||
ara_engine_lambda:
|
ordnanceSurvey_lambda:
|
||||||
needs: [ara_engine_image, determine_stage]
|
needs: [ordnanceSurvey_image, determine_stage]
|
||||||
uses: ./.github/workflows/_deploy_lambda.yml
|
uses: ./.github/workflows/_deploy_lambda.yml
|
||||||
with:
|
with:
|
||||||
lambda_name: ara_engine
|
lambda_name: ordnanceSurvey
|
||||||
lambda_path: infrastructure/terraform/lambda/engine
|
lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
|
||||||
|
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||||
|
ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
|
||||||
|
image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
|
||||||
|
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||||
|
secrets:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
TF_VAR_ORDNANCE_SURVEY_API_KEY: ${{ secrets.ORDNANCE_SURVEY_API_KEY }}
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Deploy FastAPI Lambda
|
||||||
|
# ============================================================
|
||||||
|
fast_api_lambda:
|
||||||
|
needs: [determine_stage, ara_engine_lambda, categorisation_lambda]
|
||||||
|
uses: ./.github/workflows/_deploy_lambda.yml
|
||||||
|
with:
|
||||||
|
lambda_name: ara_fast_api
|
||||||
|
lambda_path: infrastructure/terraform/lambda/fast-api
|
||||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||||
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
|
|
||||||
image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
|
|
||||||
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||||
secrets:
|
secrets:
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
|
@ -276,8 +340,97 @@ jobs:
|
||||||
TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
|
TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
|
||||||
TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
|
TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
|
||||||
TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
|
TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
|
||||||
TF_VAR_api_key: ${{ secrets.DEV_API_KEY }}
|
TF_VAR_api_key: ${{ secrets.FASTAPI_API_KEY }}
|
||||||
TF_VAR_secret_key: ${{ secrets.DEV_SECRET_KEY }}
|
TF_VAR_secret_key: ${{ secrets.NEXTAUTH_SECRET }}
|
||||||
TF_VAR_domain_name: ${{ secrets.DEV_DOMAIN_NAME }}
|
TF_VAR_domain_name: ${{ secrets.ARA_DEV_DOMAIN_NAME }}
|
||||||
TF_VAR_epc_auth_token: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
|
TF_VAR_epc_auth_token: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
|
||||||
TF_VAR_google_solar_api_key: ${{ secrets.DEV_GOOGLE_SOLAR_API_KEY }}
|
TF_VAR_google_solar_api_key: ${{ secrets.DEV_GOOGLE_SOLAR_API_KEY }}
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Deploy ACM Certificate for Cloudfront
|
||||||
|
# ============================================================
|
||||||
|
cloudfront_acm:
|
||||||
|
needs: [determine_stage, shared_terraform, fast_api_lambda]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
env:
|
||||||
|
STAGE: ${{ needs.determine_stage.outputs.stage }}
|
||||||
|
TERRAFORM_APPLY: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
aws-region: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
|
||||||
|
- uses: hashicorp/setup-terraform@v3
|
||||||
|
|
||||||
|
- name: Terraform Init
|
||||||
|
working-directory: infrastructure/terraform/cdn_certificate
|
||||||
|
run: terraform init -reconfigure
|
||||||
|
|
||||||
|
- name: Terraform Workspace
|
||||||
|
working-directory: infrastructure/terraform/cdn_certificate
|
||||||
|
run: |
|
||||||
|
terraform workspace select $STAGE \
|
||||||
|
|| terraform workspace new $STAGE
|
||||||
|
|
||||||
|
- name: Terraform Plan
|
||||||
|
working-directory: infrastructure/terraform/cdn_certificate
|
||||||
|
run: |
|
||||||
|
terraform plan \
|
||||||
|
-var="stage=${STAGE}" \
|
||||||
|
-out=tfplan
|
||||||
|
|
||||||
|
- name: Terraform Apply
|
||||||
|
if: env.TERRAFORM_APPLY == 'true'
|
||||||
|
working-directory: infrastructure/terraform/cdn_certificate
|
||||||
|
run: terraform apply -auto-approve tfplan
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Deploy Cloudfront CDN
|
||||||
|
# ============================================================
|
||||||
|
cloudfront_cdn:
|
||||||
|
needs: [determine_stage, cloudfront_acm]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
env:
|
||||||
|
STAGE: ${{ needs.determine_stage.outputs.stage }}
|
||||||
|
TERRAFORM_APPLY: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
aws-region: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
|
||||||
|
- uses: hashicorp/setup-terraform@v3
|
||||||
|
|
||||||
|
- name: Terraform Init
|
||||||
|
working-directory: infrastructure/terraform/cdn
|
||||||
|
run: terraform init -reconfigure
|
||||||
|
|
||||||
|
- name: Terraform Workspace
|
||||||
|
working-directory: infrastructure/terraform/cdn
|
||||||
|
run: |
|
||||||
|
terraform workspace select $STAGE \
|
||||||
|
|| terraform workspace new $STAGE
|
||||||
|
|
||||||
|
- name: Terraform Plan
|
||||||
|
working-directory: infrastructure/terraform/cdn
|
||||||
|
run: |
|
||||||
|
terraform plan \
|
||||||
|
-var="stage=${STAGE}" \
|
||||||
|
-out=tfplan
|
||||||
|
|
||||||
|
- name: Terraform Apply
|
||||||
|
if: env.TERRAFORM_APPLY == 'true'
|
||||||
|
working-directory: infrastructure/terraform/cdn
|
||||||
|
run: terraform apply -auto-approve tfplan
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -246,7 +246,7 @@ etl/epc/local_data/*
|
||||||
/backend/condition/sample_data/peabody/*
|
/backend/condition/sample_data/peabody/*
|
||||||
|
|
||||||
*.DS_Store
|
*.DS_Store
|
||||||
infrastructure/terraform/.terraform*
|
**/.terraform*
|
||||||
|
|
||||||
# Don't commit packages up serverless packages
|
# Don't commit packages up serverless packages
|
||||||
.serverless
|
.serverless
|
||||||
|
|
|
||||||
8
.vscode/settings.json
vendored
8
.vscode/settings.json
vendored
|
|
@ -16,7 +16,13 @@
|
||||||
"python.languageServer": "Pylance",
|
"python.languageServer": "Pylance",
|
||||||
"python.analysis.typeCheckingMode": "strict",
|
"python.analysis.typeCheckingMode": "strict",
|
||||||
"python.analysis.autoSearchPaths": true,
|
"python.analysis.autoSearchPaths": true,
|
||||||
"python.analysis.extraPaths": ["./src"]
|
"python.analysis.extraPaths": ["./src"],
|
||||||
|
|
||||||
|
"vim.useCtrlKeys": true,
|
||||||
|
"vim.handleKeys": {
|
||||||
|
"<C-c>": false,
|
||||||
|
"<C-v>": false
|
||||||
|
}
|
||||||
|
|
||||||
// Hot reload setting that needs to be in user settings
|
// Hot reload setting that needs to be in user settings
|
||||||
// "jupyter.runStartupCommands": [
|
// "jupyter.runStartupCommands": [
|
||||||
|
|
|
||||||
|
|
@ -73,24 +73,25 @@ def app():
|
||||||
Property UPRN
|
Property UPRN
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/March 2026 SAL"
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed"
|
||||||
data_filename = "Domna System Review - Livewest.xlsx"
|
# data_filename = "For Modelling - Final - reviewed.xlsx"
|
||||||
|
data_filename = "Missed Properties - with address.xlsx"
|
||||||
sheet_name = "Sheet1"
|
sheet_name = "Sheet1"
|
||||||
postcode_column = "Postcode"
|
postcode_column = "Postcode"
|
||||||
address1_column = None
|
address1_column = "address1"
|
||||||
address1_method = "house_number_extraction"
|
address1_method = None
|
||||||
fulladdress_column = "Address"
|
fulladdress_column = "address1"
|
||||||
address_cols_to_concat = []
|
address_cols_to_concat = []
|
||||||
missing_postcodes_method = None
|
missing_postcodes_method = None
|
||||||
landlord_year_built = None
|
landlord_year_built = None
|
||||||
landlord_os_uprn = "gov UPRN"
|
landlord_os_uprn = "UPRN"
|
||||||
landlord_property_type = "AssetType"
|
landlord_property_type = "Type"
|
||||||
landlord_built_form = "AssetType"
|
landlord_built_form = None
|
||||||
landlord_wall_construction = None
|
landlord_wall_construction = None
|
||||||
landlord_roof_construction = None
|
landlord_roof_construction = None
|
||||||
landlord_heating_system = None
|
landlord_heating_system = None
|
||||||
landlord_existing_pv = None
|
landlord_existing_pv = None
|
||||||
landlord_property_id = "landlord_uprn"
|
landlord_property_id = "Reference"
|
||||||
landlord_sap = None
|
landlord_sap = None
|
||||||
outcomes_filename = None
|
outcomes_filename = None
|
||||||
outcomes_sheetname = None
|
outcomes_sheetname = None
|
||||||
|
|
|
||||||
|
|
@ -364,5 +364,5 @@ Here's what you should do:
|
||||||
function.
|
function.
|
||||||
|
|
||||||
By following these steps, you should have your custom domain properly configured and pointing to your AWS Lambda
|
By following these steps, you should have your custom domain properly configured and pointing to your AWS Lambda
|
||||||
function via the CloudFront distribution.
|
function via the CloudFront distribution
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,58 @@
|
||||||
We have list of address as input.
|
So you want to fetch UPRN for an address list?
|
||||||
|
|
||||||
It'll come in batches of the same post code and from then we want to somehow convert that into UPRN
|
|
||||||
|
|
||||||
if this lambda/function can do that we'll be speeding ahead
|
|
||||||
|
|
||||||
|
|
||||||
Energy Performance Information: https://epc.opendatacommunities.org/
|
Before you run:
|
||||||
|
|
||||||
guidance page: https://epc.opendatacommunities.org/docs/guidance#field_domestic_LMK_KEY
|
Step 1) Get the list and ensure the following columns exists
|
||||||
|
|
||||||
Example of past khalims code that he wrote some tests for: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/tests/test_search_epc.py#L11
|
* Address 1
|
||||||
|
* Address 2
|
||||||
|
* Address 3
|
||||||
|
* postcode
|
||||||
|
|
||||||
|
And save it as a .csv file
|
||||||
|
|
||||||
|
|
||||||
Example of EPC search: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/SearchEpc.py#L118
|
Step 2)
|
||||||
|
|
||||||
|
Before we run this, we need to upload it into S3 as well as put initiate a subtask + task
|
||||||
|
|
||||||
|
* S3 upload I'll recommend somewhere in retrofit-data-dev and get the s3_uri
|
||||||
|
|
||||||
|
For this example I'll be using "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv"
|
||||||
|
|
||||||
|
Go to Ara DB and make a new task_id with a randomly generated uuid as the primarily key
|
||||||
|
|
||||||
|
task_id = a7b70a02-4df4-45b5-a50b-196e095910bb
|
||||||
|
sub_task_id = 567cf73b-1210-4909-9ecc-36ae7e23420e
|
||||||
|
|
||||||
|
Step 3) Alright, now lets make the input for postcode-splitter sqs to get the ball rolling
|
||||||
|
postcode-splitter-sqs => https://eu-west-2.console.aws.amazon.com/sqs/v3/home?region=eu-west-2#/queues/https%3A%2F%2Fsqs.eu-west-2.amazonaws.com%2F337213553626%2Fpostcode-splitter-queue-dev
|
||||||
|
|
||||||
|
{
|
||||||
|
"task_id": "a7b70a02-4df4-45b5-a50b-196e095910bb",
|
||||||
|
"sub_task_id": "567cf73b-1210-4909-9ecc-36ae7e23420e",
|
||||||
|
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv"
|
||||||
|
}
|
||||||
|
Each batch of csv should be saved in retrofit-data-dev/ara_postcode_splitter_batches/<task-id>/<sub-task-id>/<timestamp:uuid4>.csv
|
||||||
|
|
||||||
|
outputs of address2uprn ( which is automatically triggered on postcodesplitter) will be saved on retrofit-data-dev/ara_raw_outputs/<task-id>/<subtask-id>/<timestamp:uuid4>.csv
|
||||||
|
|
||||||
|
|
||||||
|
Run the script in backend/scripts/combine_address2uprn_outputs.py with <task-id>.
|
||||||
|
This will combine all the outputs of the files for each address2uprn into one big file
|
||||||
|
|
||||||
Khalim has made a python package to help scrape data: https://github.com/KhalimCK/epc-api-python
|
Find out which ones have missing uprn and save that as a seperate sheet and save it somewhere in s3://retrofit-data-dev
|
||||||
|
|
||||||
|
I uploaded the missing uprn here: s3://retrofit-data-dev/ara_raw_inputs/calico/missinguprn.csv
|
||||||
|
|
||||||
|
ordnance_survey sqs is => https://eu-west-2.console.aws.amazon.com/sqs/v3/home?region=eu-west-2#/queues/https%3A%2F%2Fsqs.eu-west-2.amazonaws.com%2F337213553626%2FordnanceSurvey-queue-dev
|
||||||
|
|
||||||
|
{
|
||||||
|
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/missinguprn.csv",
|
||||||
|
"task_id": "a7b70a02-4df4-45b5-a50b-196e095910bb",
|
||||||
|
"sub_task_id": "567cf73b-1210-4909-9ecc-36ae7e23420e"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
outputs are at s3://retrofit-data-dev/ara_ordnance_survey_outputs/<task-id>/<sub-task-id>
|
||||||
|
|
@ -1,13 +1,11 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from epc_api.client import EpcClient
|
from epc_api.client import EpcClient
|
||||||
import os
|
import os
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from difflib import SequenceMatcher
|
|
||||||
from utils.logger import setup_logger
|
from utils.logger import setup_logger
|
||||||
import re
|
|
||||||
from typing import Set
|
|
||||||
import json
|
import json
|
||||||
import requests
|
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
import uuid
|
import uuid
|
||||||
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
||||||
|
|
@ -18,6 +16,8 @@ from utils.s3 import (
|
||||||
)
|
)
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from backend.utils.addressMatch import AddressMatch
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -29,191 +29,6 @@ if EPC_AUTH_TOKEN is None:
|
||||||
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
|
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
|
||||||
|
|
||||||
|
|
||||||
def is_valid_postcode(postcode_clean: str) -> bool:
|
|
||||||
"""
|
|
||||||
Validate postcode using postcodes.io.
|
|
||||||
|
|
||||||
Expects a sanitised postcode (e.g. E84SQ).
|
|
||||||
Returns True if valid, False otherwise.
|
|
||||||
"""
|
|
||||||
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
|
|
||||||
if not postcode_clean:
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = requests.get(
|
|
||||||
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
|
||||||
timeout=5,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
return resp.json().get("result", False)
|
|
||||||
except requests.RequestException:
|
|
||||||
# Network issues, rate limits, etc.
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def levenshtein(a: str, b: str) -> float:
|
|
||||||
"""
|
|
||||||
Address similarity score in [0, 1].
|
|
||||||
|
|
||||||
Strategy:
|
|
||||||
- Normalise
|
|
||||||
- Strongly penalise mismatched house/flat numbers
|
|
||||||
- Combine token overlap + character similarity
|
|
||||||
"""
|
|
||||||
|
|
||||||
def extract_number_sequence(s: str) -> list[str]:
|
|
||||||
return re.findall(r"\d+[a-z]?", s)
|
|
||||||
|
|
||||||
def extract_numbers(s: str) -> Set[str]:
|
|
||||||
return set(extract_number_sequence(s))
|
|
||||||
|
|
||||||
def tokenise(s: str) -> Set[str]:
|
|
||||||
return set(s.split())
|
|
||||||
|
|
||||||
def extract_building_number(s: str) -> str | None:
|
|
||||||
"""
|
|
||||||
Extract the main building number (NOT flat/unit).
|
|
||||||
Assumes formats like:
|
|
||||||
- '42 moreton road'
|
|
||||||
- 'flat 3 42 moreton road'
|
|
||||||
"""
|
|
||||||
tokens = s.split()
|
|
||||||
|
|
||||||
# remove flat/unit context
|
|
||||||
cleaned = []
|
|
||||||
skip_next = False
|
|
||||||
for t in tokens:
|
|
||||||
if t in ("flat", "apt", "apartment", "unit"):
|
|
||||||
skip_next = True
|
|
||||||
continue
|
|
||||||
if skip_next:
|
|
||||||
skip_next = False
|
|
||||||
continue
|
|
||||||
cleaned.append(t)
|
|
||||||
|
|
||||||
# first remaining number is building number
|
|
||||||
for t in cleaned:
|
|
||||||
if re.fullmatch(r"\d+[a-z]?", t):
|
|
||||||
return t
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
a_norm = normalise_address(a)
|
|
||||||
b_norm = normalise_address(b)
|
|
||||||
|
|
||||||
# --- hard signal: numbers ---
|
|
||||||
nums_a = extract_numbers(a_norm)
|
|
||||||
nums_b = extract_numbers(b_norm)
|
|
||||||
|
|
||||||
if nums_a and not nums_b:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
# No shared numbers at all → impossible match
|
|
||||||
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
# 🔒 HARD GUARD: building number must match
|
|
||||||
bld_a = extract_building_number(a_norm)
|
|
||||||
bld_b = extract_building_number(b_norm)
|
|
||||||
|
|
||||||
if bld_a and bld_b and bld_a != bld_b:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
# --- order-sensitive flat/building guard ---
|
|
||||||
seq_a = extract_number_sequence(a_norm)
|
|
||||||
seq_b = extract_number_sequence(b_norm)
|
|
||||||
|
|
||||||
has_flat_token_user = any(
|
|
||||||
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
|
|
||||||
)
|
|
||||||
has_flat_token_epc = "flat" in b_norm
|
|
||||||
|
|
||||||
if (
|
|
||||||
len(seq_a) == 2
|
|
||||||
and len(seq_b) >= 2
|
|
||||||
and has_flat_token_epc
|
|
||||||
and not has_flat_token_user
|
|
||||||
and seq_a != seq_b[:2]
|
|
||||||
):
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
# --- token similarity (order-independent) ---
|
|
||||||
toks_a = tokenise(a_norm)
|
|
||||||
toks_b = tokenise(b_norm)
|
|
||||||
|
|
||||||
if not toks_a or not toks_b:
|
|
||||||
token_score = 0.0
|
|
||||||
else:
|
|
||||||
token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
|
|
||||||
|
|
||||||
# --- character similarity (soft signal) ---
|
|
||||||
char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
|
|
||||||
|
|
||||||
# --- weighted blend ---
|
|
||||||
return round(
|
|
||||||
0.65 * token_score + 0.35 * char_score,
|
|
||||||
4,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def normalise_address(s: str) -> str:
|
|
||||||
"""
|
|
||||||
Canonical UK-focused address normalisation.
|
|
||||||
|
|
||||||
- Lowercases
|
|
||||||
- Removes punctuation (keeps / for flats)
|
|
||||||
- Normalises whitespace
|
|
||||||
- Applies synonym compression at token level
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not s:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
ADDRESS_SYNONYMS = {
|
|
||||||
# street types
|
|
||||||
"rd": "road",
|
|
||||||
"rd.": "road",
|
|
||||||
"st": "street",
|
|
||||||
"st.": "street",
|
|
||||||
"ave": "avenue",
|
|
||||||
"ave.": "avenue",
|
|
||||||
"ln": "lane",
|
|
||||||
"ln.": "lane",
|
|
||||||
"cres": "crescent",
|
|
||||||
"ct": "court",
|
|
||||||
"dr": "drive",
|
|
||||||
# flats / units
|
|
||||||
"apt": "flat",
|
|
||||||
"apartment": "flat",
|
|
||||||
"unit": "flat",
|
|
||||||
"ste": "suite",
|
|
||||||
# numbering noise
|
|
||||||
"no": "",
|
|
||||||
"no.": "",
|
|
||||||
}
|
|
||||||
# 1. lowercase
|
|
||||||
s = s.lower()
|
|
||||||
|
|
||||||
# 1.5 split digit-letter suffixes
|
|
||||||
s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
|
|
||||||
|
|
||||||
# 2. remove punctuation except /
|
|
||||||
s = re.sub(r"[^\w\s/]", " ", s)
|
|
||||||
|
|
||||||
# 3. normalise whitespace
|
|
||||||
s = re.sub(r"\s+", " ", s).strip()
|
|
||||||
|
|
||||||
# 4. tokenise + synonym normalisation
|
|
||||||
tokens = []
|
|
||||||
for tok in s.split():
|
|
||||||
replacement = ADDRESS_SYNONYMS.get(tok, tok)
|
|
||||||
if replacement:
|
|
||||||
tokens.append(replacement)
|
|
||||||
|
|
||||||
return " ".join(tokens)
|
|
||||||
|
|
||||||
|
|
||||||
def score_addresses(
|
def score_addresses(
|
||||||
df: pd.DataFrame,
|
df: pd.DataFrame,
|
||||||
user_address: str,
|
user_address: str,
|
||||||
|
|
@ -222,7 +37,7 @@ def score_addresses(
|
||||||
if column not in df.columns:
|
if column not in df.columns:
|
||||||
raise ValueError(f"Missing column: {column}")
|
raise ValueError(f"Missing column: {column}")
|
||||||
|
|
||||||
return df[column].apply(lambda x: levenshtein(user_address, x))
|
return df[column].apply(lambda x: AddressMatch.score(user_address, x))
|
||||||
|
|
||||||
|
|
||||||
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
||||||
|
|
@ -314,9 +129,11 @@ def get_uprn_candidates(
|
||||||
|
|
||||||
out = df.copy()
|
out = df.copy()
|
||||||
|
|
||||||
user_norm = normalise_address(user_address)
|
user_norm = AddressMatch.normalise_address(user_address)
|
||||||
|
|
||||||
out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
|
out["lexiscore"] = out[address_column].apply(
|
||||||
|
lambda x: AddressMatch.levenshtein(user_norm, x)
|
||||||
|
)
|
||||||
|
|
||||||
# Normalise UPRN to string
|
# Normalise UPRN to string
|
||||||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||||
|
|
@ -480,7 +297,10 @@ def resolve_uprns_for_postcode_group(
|
||||||
|
|
||||||
|
|
||||||
def save_results_to_s3(
|
def save_results_to_s3(
|
||||||
results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
|
results_df: pd.DataFrame,
|
||||||
|
task_id: str,
|
||||||
|
sub_task_id: str,
|
||||||
|
bucket_name: Optional[str] = None,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""
|
"""
|
||||||
Save results DataFrame to S3 as CSV.
|
Save results DataFrame to S3 as CSV.
|
||||||
|
|
@ -533,7 +353,7 @@ def handler(event, context, local=False):
|
||||||
{
|
{
|
||||||
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
|
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
|
||||||
"sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
|
"sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
|
||||||
"s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv",
|
"s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-18T11:47:00.822579_f95467f5.csv",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -621,19 +441,6 @@ def handler(event, context, local=False):
|
||||||
# Process the rows
|
# Process the rows
|
||||||
logger.info(f"Processing {len(df)} rows for task {task_id}")
|
logger.info(f"Processing {len(df)} rows for task {task_id}")
|
||||||
|
|
||||||
# Create user_input column by concatenating Address columns if not already present
|
|
||||||
if "user_input" not in df.columns:
|
|
||||||
df["user_input"] = (
|
|
||||||
df["Address 1"].fillna("")
|
|
||||||
+ " "
|
|
||||||
+ df["Address 2"].fillna("")
|
|
||||||
+ " "
|
|
||||||
+ df["Address 3"].fillna("")
|
|
||||||
).str.strip()
|
|
||||||
logger.info(f"Created user_input column from Address 1 and Address 2")
|
|
||||||
else:
|
|
||||||
logger.info(f"user_input column already present in data")
|
|
||||||
|
|
||||||
clean_df = df.dropna(subset=["postcode_clean"])
|
clean_df = df.dropna(subset=["postcode_clean"])
|
||||||
|
|
||||||
postcode_to_addresses = {
|
postcode_to_addresses = {
|
||||||
|
|
@ -653,7 +460,7 @@ def handler(event, context, local=False):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Validate postcode before processing
|
# Validate postcode before processing
|
||||||
if not is_valid_postcode(postcode):
|
if not AddressMatch.is_valid_postcode(postcode):
|
||||||
logger.warning(f"Postcode {postcode} is invalid, skipping")
|
logger.warning(f"Postcode {postcode} is invalid, skipping")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -672,57 +479,67 @@ def handler(event, context, local=False):
|
||||||
# Process each address in this postcode with the same EPC data
|
# Process each address in this postcode with the same EPC data
|
||||||
for row in postcode_rows:
|
for row in postcode_rows:
|
||||||
try:
|
try:
|
||||||
user_input = row.get("user_input", "")
|
# Concatenate Address columns directly
|
||||||
if not user_input:
|
address2uprn_user_input = (
|
||||||
|
str(row.get("Address 1", "")).strip()
|
||||||
|
+ " "
|
||||||
|
+ str(row.get("Address 2", "")).strip()
|
||||||
|
+ " "
|
||||||
|
+ str(row.get("Address 3", "")).strip()
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
if not address2uprn_user_input:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Skipping row with missing user_input for postcode {postcode}"
|
f"Skipping row with missing address components for postcode {postcode}"
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get UPRN using the pre-fetched EPC data with all return options
|
# Get UPRN using the pre-fetched EPC data with all return options
|
||||||
result = get_uprn_with_epc_df(
|
result = get_uprn_with_epc_df(
|
||||||
user_inputed_address=user_input, epc_df=epc_df, verbose=True
|
user_inputed_address=address2uprn_user_input,
|
||||||
|
epc_df=epc_df,
|
||||||
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse result tuple if successful
|
# Parse result tuple if successful
|
||||||
if result:
|
if result:
|
||||||
uprn, found_address, score = result
|
uprn, found_address, score = result
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})"
|
f"Found UPRN for {address2uprn_user_input} in {postcode}: {uprn} (score: {score})"
|
||||||
)
|
)
|
||||||
|
|
||||||
results_data.append(
|
results_data.append(
|
||||||
{
|
{
|
||||||
**row, # Include all original data
|
**row, # Include all original data
|
||||||
"uprn": uprn,
|
"address2uprn_uprn": uprn,
|
||||||
"domna_found_address": found_address,
|
"address2uprn_address": found_address,
|
||||||
"domna_lexiscore": score,
|
"address2uprn_lexiscore": score,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"No UPRN found for {user_input} in {postcode}"
|
f"No UPRN found for {address2uprn_user_input} in {postcode}"
|
||||||
)
|
)
|
||||||
results_data.append(
|
results_data.append(
|
||||||
{
|
{
|
||||||
**row, # Include all original data
|
**row, # Include all original data
|
||||||
"uprn": None,
|
"address2uprn_uprn": None,
|
||||||
"domna_found_address": None,
|
"address2uprn_address": None,
|
||||||
"domna_lexiscore": None,
|
"address2uprn_lexiscore": None,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Error processing address {row.get('user_input', 'unknown')}: {e}"
|
f"Error processing address {row.get('address2uprn_user_input', 'unknown')}: {e}"
|
||||||
)
|
)
|
||||||
# Still add the row with error markers
|
# Still add the row with error markers
|
||||||
results_data.append(
|
results_data.append(
|
||||||
{
|
{
|
||||||
**row,
|
**row,
|
||||||
"uprn": None,
|
"address2uprn_uprn": None,
|
||||||
"domna_found_address": None,
|
"address2uprn_address": None,
|
||||||
"domna_lexiscore": None,
|
"address2uprn_lexiscore": None,
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ class Settings(BaseSettings):
|
||||||
SECRET_KEY: str = "changeme"
|
SECRET_KEY: str = "changeme"
|
||||||
ENVIRONMENT: str = "changeme"
|
ENVIRONMENT: str = "changeme"
|
||||||
DATA_BUCKET: str = "changeme"
|
DATA_BUCKET: str = "changeme"
|
||||||
PLAN_TRIGGER_BUCKET: str
|
PLAN_TRIGGER_BUCKET: str = "changeme"
|
||||||
ENGINE_SQS_URL: str = "changeme"
|
ENGINE_SQS_URL: str = "changeme"
|
||||||
CATEGORISATION_SQS_URL: str = "changeme"
|
CATEGORISATION_SQS_URL: str = "changeme"
|
||||||
|
|
||||||
|
|
@ -63,6 +63,8 @@ class Settings(BaseSettings):
|
||||||
# Other S3 buckts
|
# Other S3 buckts
|
||||||
ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
|
ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
|
||||||
|
|
||||||
|
ORDNANCE_SURVEY_API_KEY: str = "changeme"
|
||||||
|
|
||||||
# Optional AWS creds (only required in local)
|
# Optional AWS creds (only required in local)
|
||||||
AWS_ACCESS_KEY_ID: Optional[str] = None
|
AWS_ACCESS_KEY_ID: Optional[str] = None
|
||||||
AWS_SECRET_KEY_ID: Optional[str] = None
|
AWS_SECRET_KEY_ID: Optional[str] = None
|
||||||
|
|
|
||||||
24
backend/app/db/models/postcode_search.py
Normal file
24
backend/app/db/models/postcode_search.py
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
import pytz
|
||||||
|
import datetime
|
||||||
|
from sqlalchemy import (
|
||||||
|
Column,
|
||||||
|
BigInteger,
|
||||||
|
Text,
|
||||||
|
DateTime,
|
||||||
|
)
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class PostcodeSearchModel(Base):
|
||||||
|
__tablename__ = "postcode_search"
|
||||||
|
|
||||||
|
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||||
|
postcode = Column(Text, nullable=False)
|
||||||
|
result_data = Column(JSONB, nullable=True)
|
||||||
|
|
||||||
|
created_at = Column(
|
||||||
|
DateTime(timezone=True), nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||||
|
)
|
||||||
|
|
@ -43,7 +43,7 @@ def generate_api_key():
|
||||||
# Define the characters that will be used to generate the api key
|
# Define the characters that will be used to generate the api key
|
||||||
characters = string.ascii_letters + string.digits
|
characters = string.ascii_letters + string.digits
|
||||||
# Generate a 40 character long api key
|
# Generate a 40 character long api key
|
||||||
api_key = ''.join(secrets.choice(characters) for _ in range(40))
|
api_key = "".join(secrets.choice(characters) for _ in range(40))
|
||||||
return api_key
|
return api_key
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -113,7 +113,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
|
||||||
df.to_parquet(parquet_buffer)
|
df.to_parquet(parquet_buffer)
|
||||||
|
|
||||||
# Create the boto3 client
|
# Create the boto3 client
|
||||||
s3 = boto3.resource('s3')
|
s3 = boto3.resource("s3")
|
||||||
|
|
||||||
# Upload the Parquet file to S3
|
# Upload the Parquet file to S3
|
||||||
s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue())
|
s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue())
|
||||||
|
|
|
||||||
0
backend/ordnanceSurvey/__init__.py
Normal file
0
backend/ordnanceSurvey/__init__.py
Normal file
25
backend/ordnanceSurvey/handler/Dockerfile
Normal file
25
backend/ordnanceSurvey/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
FROM public.ecr.aws/lambda/python:3.11
|
||||||
|
|
||||||
|
ARG DEV_DB_HOST
|
||||||
|
ARG DEV_DB_PORT
|
||||||
|
ARG DEV_DB_NAME
|
||||||
|
|
||||||
|
ENV DB_HOST=${DEV_DB_HOST}
|
||||||
|
ENV DB_PORT=${DEV_DB_PORT}
|
||||||
|
ENV DB_NAME=${DEV_DB_NAME}
|
||||||
|
|
||||||
|
# Set working directory (Lambda task root)
|
||||||
|
WORKDIR /var/task
|
||||||
|
|
||||||
|
COPY backend/ordnanceSurvey/handler/requirements.txt .
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy necessary files for database and utility imports
|
||||||
|
COPY utils/ utils/
|
||||||
|
COPY backend/ backend/
|
||||||
|
COPY datatypes/ datatypes/
|
||||||
|
|
||||||
|
# Lambda handler
|
||||||
|
CMD ["backend/ordnanceSurvey/main.handler"]
|
||||||
|
|
||||||
11
backend/ordnanceSurvey/handler/requirements.txt
Normal file
11
backend/ordnanceSurvey/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
pandas==2.2.2
|
||||||
|
numpy<2.0
|
||||||
|
requests
|
||||||
|
tqdm
|
||||||
|
openpyxl
|
||||||
|
epc-api-python==1.0.2
|
||||||
|
boto3==1.35.44
|
||||||
|
sqlmodel
|
||||||
|
sqlalchemy==2.0.36
|
||||||
|
psycopg2-binary==2.9.10
|
||||||
|
pydantic-settings==2.6.0
|
||||||
47
backend/ordnanceSurvey/helpers.py
Normal file
47
backend/ordnanceSurvey/helpers.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
import urllib.parse
|
||||||
|
from pydantic import ValidationError
|
||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
from utils.logger import setup_logger
|
||||||
|
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Flatten the OS Places API response results into a DataFrame.
|
||||||
|
Each result contains either a DPA or LPI record.
|
||||||
|
"""
|
||||||
|
results = data.get("results", [])
|
||||||
|
rows = []
|
||||||
|
for r in results:
|
||||||
|
if "DPA" in r:
|
||||||
|
rows.append(r["DPA"])
|
||||||
|
elif "LPI" in r:
|
||||||
|
rows.append(r["LPI"])
|
||||||
|
return pd.DataFrame(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_os_places(postcode: str, api_key: str) -> dict:
|
||||||
|
"""
|
||||||
|
Lookup a postcode using the OS Places API.
|
||||||
|
Returns the full API response data or an error dict.
|
||||||
|
"""
|
||||||
|
if not api_key:
|
||||||
|
return {"error": "Ordnance Survey API key not specified", "status": 400}
|
||||||
|
|
||||||
|
encoded_postcode = urllib.parse.quote(postcode)
|
||||||
|
url = (
|
||||||
|
f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}"
|
||||||
|
f"&dataset=DPA,LPI&key={api_key}"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(
|
||||||
|
f"OS Places API error for postcode {postcode}: {response.status_code}"
|
||||||
|
)
|
||||||
|
return {"error": "Failed to fetch address data", "status": response.status_code}
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
return {"data": data, "status": 200}
|
||||||
11
backend/ordnanceSurvey/local_handler/docker-compose.yml
Normal file
11
backend/ordnanceSurvey/local_handler/docker-compose.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
services:
|
||||||
|
ordnance-survey-lambda:
|
||||||
|
build:
|
||||||
|
context: ../../../
|
||||||
|
dockerfile: backend/ordnanceSurvey/handler/Dockerfile
|
||||||
|
ports:
|
||||||
|
- "9000:8080"
|
||||||
|
env_file:
|
||||||
|
- ../../../.env
|
||||||
28
backend/ordnanceSurvey/local_handler/invoke_local_lambda.py
Normal file
28
backend/ordnanceSurvey/local_handler/invoke_local_lambda.py
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
|
||||||
|
HOST = "localhost"
|
||||||
|
PORT = "9000"
|
||||||
|
|
||||||
|
LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"Records": [
|
||||||
|
{
|
||||||
|
"body": json.dumps(
|
||||||
|
{
|
||||||
|
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
|
||||||
|
"sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
|
||||||
|
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/missinguprn.csv",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(LAMBDA_URL, json=payload)
|
||||||
|
|
||||||
|
print("Status code:", response.status_code)
|
||||||
|
print("Response:")
|
||||||
|
print(response.text)
|
||||||
223
backend/ordnanceSurvey/main.py
Normal file
223
backend/ordnanceSurvey/main.py
Normal file
|
|
@ -0,0 +1,223 @@
|
||||||
|
from typing import Any, Optional
|
||||||
|
import json
|
||||||
|
from utils.logger import setup_logger
|
||||||
|
import logging
|
||||||
|
from backend.utils.subtasks import subtask_handler
|
||||||
|
from utils.s3 import (
|
||||||
|
save_csv_to_s3,
|
||||||
|
read_csv_from_s3 as read_csv_from_s3_dict,
|
||||||
|
parse_s3_uri,
|
||||||
|
)
|
||||||
|
from backend.utils.addressMatch import AddressMatch
|
||||||
|
from backend.app.db.connection import get_db_session
|
||||||
|
from backend.app.db.models.postcode_search import PostcodeSearchModel
|
||||||
|
from backend.ordnanceSurvey.helpers import (
|
||||||
|
lookup_os_places,
|
||||||
|
os_places_results_to_dataframe,
|
||||||
|
)
|
||||||
|
from backend.app.config import get_settings
|
||||||
|
from sqlalchemy import select
|
||||||
|
from datetime import datetime
|
||||||
|
import uuid
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
logger: logging.Logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def check_if_post_code_exists_in_db_cache(postcode):
|
||||||
|
|
||||||
|
with get_db_session() as session:
|
||||||
|
result = (
|
||||||
|
session.execute(
|
||||||
|
select(PostcodeSearchModel).where(
|
||||||
|
PostcodeSearchModel.postcode == postcode
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if result:
|
||||||
|
return os_places_results_to_dataframe(result.result_data)
|
||||||
|
|
||||||
|
# Cache miss — fetch from OS Places API
|
||||||
|
api_key = get_settings().ORDNANCE_SURVEY_API_KEY
|
||||||
|
response = lookup_os_places(postcode, api_key)
|
||||||
|
|
||||||
|
if response.get("status") != 200 or "data" not in response:
|
||||||
|
logger.error(f"OS Places API failed for {postcode}: {response}")
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
# Save to cache
|
||||||
|
new_record = PostcodeSearchModel(
|
||||||
|
postcode=postcode,
|
||||||
|
result_data=response["data"],
|
||||||
|
)
|
||||||
|
session.add(new_record)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
return os_places_results_to_dataframe(response["data"])
|
||||||
|
|
||||||
|
|
||||||
|
def save_results_to_s3(
|
||||||
|
results_df: pd.DataFrame,
|
||||||
|
task_id: str,
|
||||||
|
sub_task_id: str,
|
||||||
|
bucket_name: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Save results DataFrame to S3 as CSV in a parent folder structure.
|
||||||
|
|
||||||
|
:param results_df: The DataFrame containing results
|
||||||
|
:param task_id: The task ID (used for file naming)
|
||||||
|
:param sub_task_id: The subtask ID (used for file naming)
|
||||||
|
:param bucket_name: The S3 bucket name (defaults to env variable)
|
||||||
|
:return: True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
if bucket_name is None:
|
||||||
|
bucket_name = os.getenv("S3_BUCKET_NAME")
|
||||||
|
|
||||||
|
if not bucket_name:
|
||||||
|
logger.error(
|
||||||
|
"S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create a filename with timestamp and UUID
|
||||||
|
file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
|
||||||
|
file_key = f"ara_ordnance_survey_outputs/{task_id}/{sub_task_id}/ordnanceSurvey/{file_name}.csv"
|
||||||
|
|
||||||
|
# Save to S3
|
||||||
|
success = save_csv_to_s3(results_df, bucket_name, file_key)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to save results to S3")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error saving results to S3: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@subtask_handler() # This assumes task_id and subtask_id is defined in event.Records.body
|
||||||
|
def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
|
||||||
|
|
||||||
|
# delete this line after test
|
||||||
|
# local = True
|
||||||
|
# Example SQS message for testing (copy and paste into SQS):
|
||||||
|
if local is True:
|
||||||
|
body = {
|
||||||
|
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
|
||||||
|
"sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
|
||||||
|
"s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/e31f2f21-175b-4a91-a3ec-a6baa325e917/6a427b6e-1ece-4983-b1e5-9bffccc53d1d/2026-03-04T16:48:22.339995_634c88fc.csv",
|
||||||
|
"lexiscore_column": "address2uprn_lexiscore",
|
||||||
|
}
|
||||||
|
|
||||||
|
s3_uri: str = body.get("s3_uri", "")
|
||||||
|
lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
|
||||||
|
lexiscore_column: Optional[str] = body.get("lexiscore_column", None)
|
||||||
|
task_id: str = body.get("task_id", "")
|
||||||
|
sub_task_id: str = body.get("sub_task_id", "")
|
||||||
|
|
||||||
|
if s3_uri == "":
|
||||||
|
raise RuntimeError("Missing s3_uri in message body")
|
||||||
|
|
||||||
|
bucket, key = parse_s3_uri(s3_uri)
|
||||||
|
|
||||||
|
# Assumption designing with address2uprn was ran first
|
||||||
|
csv_data = read_csv_from_s3_dict(bucket, key)
|
||||||
|
df = pd.DataFrame(csv_data)
|
||||||
|
# df = df.head(5)
|
||||||
|
|
||||||
|
# If lexiscore_column is specified, use it; otherwise process all rows
|
||||||
|
if lexiscore_column and lexiscore_column in df.columns:
|
||||||
|
df[lexiscore_column] = pd.to_numeric(df[lexiscore_column], errors="coerce")
|
||||||
|
needs_processing = df[
|
||||||
|
df[lexiscore_column].isna() | (df[lexiscore_column] < lexiscore_threshold)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# Default: process all rows
|
||||||
|
needs_processing = df
|
||||||
|
|
||||||
|
grouped = needs_processing.groupby("postcode_clean")
|
||||||
|
|
||||||
|
# Initialise new columns
|
||||||
|
df["ordnance_survey_address"] = None
|
||||||
|
df["ordnance_survey_uprn"] = None
|
||||||
|
df["ordnance_survey_lexiscore"] = None
|
||||||
|
|
||||||
|
# Process each postcode group at a time
|
||||||
|
for postcode, group in grouped:
|
||||||
|
print(f"Processing postcode: {postcode} ({len(group)} rows)")
|
||||||
|
valid_group = AddressMatch.is_valid_postcode(postcode)
|
||||||
|
if not valid_group:
|
||||||
|
logger.warning(f"Postcode {postcode} is invalid, skipping")
|
||||||
|
for idx in group.index:
|
||||||
|
df.at[idx, "ordnance_survey_address"] = (
|
||||||
|
"postcode not found in ordnance survey"
|
||||||
|
)
|
||||||
|
df.at[idx, "ordnance_survey_uprn"] = (
|
||||||
|
"postcode not found in ordnance survey"
|
||||||
|
)
|
||||||
|
df.at[idx, "ordnance_survey_lexiscore"] = (
|
||||||
|
"postcode not found in ordnance survey"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
postcode_cache = check_if_post_code_exists_in_db_cache(postcode)
|
||||||
|
if postcode_cache.empty:
|
||||||
|
logger.warning(f"No OS Places data for {postcode}")
|
||||||
|
for idx in group.index:
|
||||||
|
df.at[idx, "ordnance_survey_address"] = (
|
||||||
|
"postcode not found in ordnance survey"
|
||||||
|
)
|
||||||
|
df.at[idx, "ordnance_survey_uprn"] = (
|
||||||
|
"postcode not found in ordnance survey"
|
||||||
|
)
|
||||||
|
df.at[idx, "ordnance_survey_lexiscore"] = (
|
||||||
|
"postcode not found in ordnance survey"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for idx, row in group.iterrows():
|
||||||
|
# Concatenate Address columns directly
|
||||||
|
ordnancy_survey_user_input = (
|
||||||
|
str(row.get("Address 1", "")).strip()
|
||||||
|
+ " "
|
||||||
|
+ str(row.get("Address 2", "")).strip()
|
||||||
|
+ " "
|
||||||
|
+ str(row.get("Address 3", "")).strip()
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
if not ordnancy_survey_user_input:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Score against OS Places addresses
|
||||||
|
scores = postcode_cache["ADDRESS"].apply(
|
||||||
|
lambda addr: AddressMatch.score(ordnancy_survey_user_input, addr)
|
||||||
|
)
|
||||||
|
best_idx = scores.idxmax()
|
||||||
|
best_score = scores[best_idx]
|
||||||
|
|
||||||
|
if best_score <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
df.at[idx, "ordnance_survey_address"] = postcode_cache.at[
|
||||||
|
best_idx, "ADDRESS"
|
||||||
|
]
|
||||||
|
df.at[idx, "ordnance_survey_uprn"] = postcode_cache.at[best_idx, "UPRN"]
|
||||||
|
df.at[idx, "ordnance_survey_lexiscore"] = best_score
|
||||||
|
|
||||||
|
# Save results locally
|
||||||
|
if local:
|
||||||
|
df.to_csv("ordnance_survey_results.csv", index=False)
|
||||||
|
print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
|
||||||
|
|
||||||
|
# Save results to S3
|
||||||
|
if task_id and sub_task_id:
|
||||||
|
save_results_to_s3(df, task_id, sub_task_id)
|
||||||
65
backend/scripts/combine_address2uprn_outputs.py
Normal file
65
backend/scripts/combine_address2uprn_outputs.py
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import boto3
|
||||||
|
import pandas as pd
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
BUCKET = "retrofit-data-dev"
|
||||||
|
|
||||||
|
|
||||||
|
def list_csv_files(task_id):
|
||||||
|
s3 = boto3.client("s3")
|
||||||
|
paginator = s3.get_paginator("list_objects_v2")
|
||||||
|
|
||||||
|
prefix = f"ara_raw_outputs/{task_id}"
|
||||||
|
csv_files = []
|
||||||
|
|
||||||
|
for page in paginator.paginate(Bucket=BUCKET, Prefix=prefix):
|
||||||
|
for obj in page.get("Contents", []):
|
||||||
|
key = obj["Key"]
|
||||||
|
if key.endswith(".csv"):
|
||||||
|
csv_files.append(key)
|
||||||
|
|
||||||
|
return csv_files
|
||||||
|
|
||||||
|
|
||||||
|
def download_csv(key):
|
||||||
|
s3 = boto3.client("s3")
|
||||||
|
obj = s3.get_object(Bucket=BUCKET, Key=key)
|
||||||
|
return pd.read_csv(BytesIO(obj["Body"].read()))
|
||||||
|
|
||||||
|
|
||||||
|
def main(task_id, output):
|
||||||
|
print(f"Scanning task: {task_id}")
|
||||||
|
|
||||||
|
csv_files = list_csv_files(task_id)
|
||||||
|
|
||||||
|
if not csv_files:
|
||||||
|
print("No CSV files found")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Found {len(csv_files)} CSV files")
|
||||||
|
|
||||||
|
dfs = []
|
||||||
|
for key in csv_files:
|
||||||
|
print(f"Downloading {key}")
|
||||||
|
df = download_csv(key)
|
||||||
|
dfs.append(df)
|
||||||
|
|
||||||
|
combined = pd.concat(dfs, ignore_index=True)
|
||||||
|
|
||||||
|
combined.to_csv(output, index=False)
|
||||||
|
|
||||||
|
print(f"Combined CSV saved to {output}")
|
||||||
|
print(f"Total rows: {len(combined)}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("task_id", help="Task ID folder in S3")
|
||||||
|
parser.add_argument("--output", default="combined.csv")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
main(args.task_id, args.output)
|
||||||
0
backend/utils/__init__.py
Normal file
0
backend/utils/__init__.py
Normal file
201
backend/utils/addressMatch.py
Normal file
201
backend/utils/addressMatch.py
Normal file
|
|
@ -0,0 +1,201 @@
|
||||||
|
import re
|
||||||
|
from typing import Any, Optional
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class AddressMatch:
|
||||||
|
def __init__(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def score(a: str, b: str) -> float:
|
||||||
|
score: float = AddressMatch.levenshtein(a, b)
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_valid_postcode(postcode_clean: str) -> bool:
|
||||||
|
"""
|
||||||
|
Validate postcode using postcodes.io.
|
||||||
|
|
||||||
|
Expects a sanitised postcode (e.g. E84SQ).
|
||||||
|
Returns True if valid, False otherwise.
|
||||||
|
"""
|
||||||
|
POSTCODES_IO_VALIDATE_URL = (
|
||||||
|
"https://api.postcodes.io/postcodes/{postcode}/validate"
|
||||||
|
)
|
||||||
|
if not postcode_clean:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json().get("result", False)
|
||||||
|
except requests.RequestException:
|
||||||
|
# Network issues, rate limits, etc.
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def normalise_address(s: str) -> str:
|
||||||
|
"""
|
||||||
|
Canonical UK-focused address normalisation.
|
||||||
|
|
||||||
|
- Lowercases
|
||||||
|
- Removes punctuation (keeps / for flats)
|
||||||
|
- Normalises whitespace
|
||||||
|
- Applies synonym compression at token level
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not s:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
ADDRESS_SYNONYMS = {
|
||||||
|
# street types
|
||||||
|
"rd": "road",
|
||||||
|
"rd.": "road",
|
||||||
|
"st": "street",
|
||||||
|
"st.": "street",
|
||||||
|
"ave": "avenue",
|
||||||
|
"ave.": "avenue",
|
||||||
|
"ln": "lane",
|
||||||
|
"ln.": "lane",
|
||||||
|
"cres": "crescent",
|
||||||
|
"ct": "court",
|
||||||
|
"dr": "drive",
|
||||||
|
# flats / units
|
||||||
|
"apt": "flat",
|
||||||
|
"apartment": "flat",
|
||||||
|
"unit": "flat",
|
||||||
|
"ste": "suite",
|
||||||
|
# numbering noise
|
||||||
|
"no": "",
|
||||||
|
"no.": "",
|
||||||
|
}
|
||||||
|
# 1. lowercase
|
||||||
|
s = s.lower()
|
||||||
|
|
||||||
|
# 1.5 split digit-letter suffixes
|
||||||
|
s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
|
||||||
|
|
||||||
|
# 2. remove punctuation except /
|
||||||
|
s = re.sub(r"[^\w\s/]", " ", s)
|
||||||
|
|
||||||
|
# 3. normalise whitespace
|
||||||
|
s = re.sub(r"\s+", " ", s).strip()
|
||||||
|
|
||||||
|
# 4. tokenise + synonym normalisation
|
||||||
|
tokens: list[str] = []
|
||||||
|
for tok in s.split():
|
||||||
|
replacement = ADDRESS_SYNONYMS.get(tok, tok)
|
||||||
|
if replacement:
|
||||||
|
tokens.append(replacement)
|
||||||
|
return " ".join(tokens)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def levenshtein(a: str, b: str) -> float:
|
||||||
|
"""
|
||||||
|
Address similarity score in [0, 1].
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
- Normalise
|
||||||
|
- Strongly penalise mismatched house/flat numbers
|
||||||
|
- Combine token overlap + character similarity
|
||||||
|
"""
|
||||||
|
|
||||||
|
def extract_number_sequence(s: str) -> list[str]:
|
||||||
|
return re.findall(r"\d+[a-z]?", s)
|
||||||
|
|
||||||
|
def extract_numbers(s: str) -> set[str]:
|
||||||
|
return set(extract_number_sequence(s))
|
||||||
|
|
||||||
|
def tokenise(s: str) -> set[str]:
|
||||||
|
return set(s.split())
|
||||||
|
|
||||||
|
def extract_building_number(s: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Extract the main building number (NOT flat/unit).
|
||||||
|
Assumes formats like:
|
||||||
|
- '42 moreton road'
|
||||||
|
- 'flat 3 42 moreton road'
|
||||||
|
"""
|
||||||
|
tokens = s.split()
|
||||||
|
|
||||||
|
# remove flat/unit context
|
||||||
|
cleaned: list[Any] = []
|
||||||
|
skip_next = False
|
||||||
|
for t in tokens:
|
||||||
|
if t in ("flat", "apt", "apartment", "unit"):
|
||||||
|
skip_next = True
|
||||||
|
continue
|
||||||
|
if skip_next:
|
||||||
|
skip_next = False
|
||||||
|
continue
|
||||||
|
cleaned.append(t)
|
||||||
|
|
||||||
|
# first remaining number is building number
|
||||||
|
for t in cleaned:
|
||||||
|
if re.fullmatch(r"\d+[a-z]?", t):
|
||||||
|
return t
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
a_norm = AddressMatch.normalise_address(a)
|
||||||
|
b_norm = AddressMatch.normalise_address(b)
|
||||||
|
|
||||||
|
# --- hard signal: numbers ---
|
||||||
|
nums_a = extract_numbers(a_norm)
|
||||||
|
nums_b = extract_numbers(b_norm)
|
||||||
|
|
||||||
|
if nums_a and not nums_b:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# No shared numbers at all → impossible match
|
||||||
|
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# 🔒 HARD GUARD: building number must match
|
||||||
|
bld_a = extract_building_number(a_norm)
|
||||||
|
bld_b = extract_building_number(b_norm)
|
||||||
|
|
||||||
|
if bld_a and bld_b and bld_a != bld_b:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# --- order-sensitive flat/building guard ---
|
||||||
|
seq_a = extract_number_sequence(a_norm)
|
||||||
|
seq_b = extract_number_sequence(b_norm)
|
||||||
|
|
||||||
|
has_flat_token_user = any(
|
||||||
|
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
|
||||||
|
)
|
||||||
|
has_flat_token_epc = "flat" in b_norm
|
||||||
|
|
||||||
|
if (
|
||||||
|
len(seq_a) == 2
|
||||||
|
and len(seq_b) >= 2
|
||||||
|
and has_flat_token_epc
|
||||||
|
and not has_flat_token_user
|
||||||
|
and seq_a != seq_b[:2]
|
||||||
|
):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# --- token similarity (order-independent) ---
|
||||||
|
toks_a: set[str] = tokenise(a_norm)
|
||||||
|
toks_b: set[str] = tokenise(b_norm)
|
||||||
|
|
||||||
|
if not toks_a or not toks_b:
|
||||||
|
token_score = 0.0
|
||||||
|
else:
|
||||||
|
token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
|
||||||
|
|
||||||
|
# --- character similarity (soft signal) ---
|
||||||
|
char_score: float = SequenceMatcher(None, a_norm, b_norm).ratio()
|
||||||
|
|
||||||
|
# --- weighted blend ---
|
||||||
|
return round(
|
||||||
|
0.65 * token_score + 0.35 * char_score,
|
||||||
|
4,
|
||||||
|
)
|
||||||
95
backend/utils/subtasks.py
Normal file
95
backend/utils/subtasks.py
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
# decorators/subtask_handler.py
|
||||||
|
|
||||||
|
from functools import wraps
|
||||||
|
from typing import Callable, Any
|
||||||
|
from uuid import UUID
|
||||||
|
import json
|
||||||
|
|
||||||
|
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
||||||
|
|
||||||
|
|
||||||
|
def subtask_handler():
|
||||||
|
"""
|
||||||
|
Decorator that wraps your existing handler and automatically:
|
||||||
|
|
||||||
|
- Extracts task_id + sub_task_id from event
|
||||||
|
- Marks subtask as in progress
|
||||||
|
- Executes handler logic
|
||||||
|
- Marks subtask complete on success
|
||||||
|
- Marks failed on exception
|
||||||
|
"""
|
||||||
|
|
||||||
|
def decorator(func: Callable[..., Any]):
|
||||||
|
|
||||||
|
@wraps(func)
|
||||||
|
def wrapper(event: dict[str, Any], context: Any, *args, **kwargs):
|
||||||
|
|
||||||
|
records = event.get("Records", [event])
|
||||||
|
|
||||||
|
interface = SubTaskInterface()
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# Parse body safely
|
||||||
|
# -------------------------------
|
||||||
|
body = {}
|
||||||
|
|
||||||
|
if isinstance(record.get("body"), str):
|
||||||
|
try:
|
||||||
|
body = json.loads(record["body"])
|
||||||
|
except Exception:
|
||||||
|
body = {}
|
||||||
|
else:
|
||||||
|
body = record.get("body", {}) or {}
|
||||||
|
|
||||||
|
task_id_raw = body.get("task_id")
|
||||||
|
subtask_id_raw = body.get("sub_task_id")
|
||||||
|
|
||||||
|
task_id = UUID(task_id_raw) if isinstance(task_id_raw, str) else None
|
||||||
|
subtask_id = (
|
||||||
|
UUID(subtask_id_raw) if isinstance(subtask_id_raw, str) else None
|
||||||
|
)
|
||||||
|
|
||||||
|
if not task_id or not subtask_id:
|
||||||
|
raise RuntimeError("task_id or sub_task_id missing")
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# Mark in progress
|
||||||
|
# -------------------------------
|
||||||
|
interface.update_subtask_status(
|
||||||
|
subtask_id=subtask_id,
|
||||||
|
status="in progress",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Pass the parsed body into your function
|
||||||
|
result = func(body, context, *args, **kwargs)
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# Success → mark complete
|
||||||
|
# -------------------------------
|
||||||
|
interface.update_subtask_status(
|
||||||
|
subtask_id=subtask_id,
|
||||||
|
status="complete",
|
||||||
|
outputs={"result": result} if result else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# Failure → mark failed
|
||||||
|
# -------------------------------
|
||||||
|
interface.update_subtask_status(
|
||||||
|
subtask_id=subtask_id,
|
||||||
|
status="failed",
|
||||||
|
outputs={"error": str(e)},
|
||||||
|
)
|
||||||
|
|
||||||
|
raise
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return decorator
|
||||||
64
infrastructure/terraform/cdn/main.tf
Normal file
64
infrastructure/terraform/cdn/main.tf
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
############################################
|
||||||
|
# Load Shared Terraform State
|
||||||
|
############################################
|
||||||
|
data "terraform_remote_state" "shared" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "assessment-model-terraform-state"
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Load FastAPI Terraform State
|
||||||
|
############################################
|
||||||
|
data "terraform_remote_state" "fast_api" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "ara-fast-api-terraform-state"
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Load CDN Certificate Terraform State
|
||||||
|
############################################
|
||||||
|
data "terraform_remote_state" "cdn_certificate" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "cdn-certificate-terraform-state"
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# CloudFront for API
|
||||||
|
############################################
|
||||||
|
module "cdn" {
|
||||||
|
source = "../modules/cloudfront"
|
||||||
|
|
||||||
|
aliases = [data.terraform_remote_state.fast_api.outputs.domain_name]
|
||||||
|
|
||||||
|
acm_certificate_arn = data.terraform_remote_state.cdn_certificate.outputs.certificate_arn
|
||||||
|
|
||||||
|
origins = [
|
||||||
|
# ---- S3 ----
|
||||||
|
{
|
||||||
|
origin_type = "s3"
|
||||||
|
origin_domain_name = data.terraform_remote_state.shared.outputs.retrofit_datalake_bucket_domain_name
|
||||||
|
origin_id = "s3-origin"
|
||||||
|
bucket_id = data.terraform_remote_state.shared.outputs.retrofit_datalake_bucket_id
|
||||||
|
bucket_arn = data.terraform_remote_state.shared.outputs.retrofit_datalake_bucket_arn
|
||||||
|
},
|
||||||
|
|
||||||
|
# ---- API Gateway ----
|
||||||
|
{
|
||||||
|
origin_type = "api"
|
||||||
|
origin_domain_name = replace(data.terraform_remote_state.fast_api.outputs.invoke_url, "/^https?://([^/]*).*/", "$1")
|
||||||
|
origin_id = "api-origin"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
14
infrastructure/terraform/cdn/provider.tf
Normal file
14
infrastructure/terraform/cdn/provider.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = ">= 5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "ara-cdn-terraform-state"
|
||||||
|
key = "terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
3
infrastructure/terraform/cdn/variables.tf
Normal file
3
infrastructure/terraform/cdn/variables.tf
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
variable "stage" {
|
||||||
|
type = string
|
||||||
|
}
|
||||||
28
infrastructure/terraform/cdn_certificate/main.tf
Normal file
28
infrastructure/terraform/cdn_certificate/main.tf
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
############################################
|
||||||
|
# Load FastAPI Terraform State
|
||||||
|
############################################
|
||||||
|
data "terraform_remote_state" "fast_api" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "ara-fast-api-terraform-state"
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Define Certificate
|
||||||
|
############################################
|
||||||
|
module "cdn_certificate" {
|
||||||
|
source = "../modules/acm_certificate"
|
||||||
|
|
||||||
|
providers = {
|
||||||
|
aws = aws.us_east_1
|
||||||
|
}
|
||||||
|
|
||||||
|
domain_name = data.terraform_remote_state.fast_api.outputs.domain_name
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Environment = var.stage
|
||||||
|
}
|
||||||
|
}
|
||||||
3
infrastructure/terraform/cdn_certificate/outputs.tf
Normal file
3
infrastructure/terraform/cdn_certificate/outputs.tf
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
output "certificate_arn" {
|
||||||
|
value = module.cdn_certificate.certificate_arn
|
||||||
|
}
|
||||||
23
infrastructure/terraform/cdn_certificate/provider.tf
Normal file
23
infrastructure/terraform/cdn_certificate/provider.tf
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = ">= 5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "cdn-certificate-terraform-state"
|
||||||
|
key = "terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
provider "aws" {
|
||||||
|
region = var.region
|
||||||
|
}
|
||||||
|
|
||||||
|
provider "aws" {
|
||||||
|
alias = "us_east_1"
|
||||||
|
region = "us-east-1"
|
||||||
|
}
|
||||||
3
infrastructure/terraform/cdn_certificate/variables.tf
Normal file
3
infrastructure/terraform/cdn_certificate/variables.tf
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
variable "stage" {
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
@ -26,7 +26,7 @@ data "terraform_remote_state" "shared" {
|
||||||
}
|
}
|
||||||
|
|
||||||
module "lambda" {
|
module "lambda" {
|
||||||
source = "../modules/lambda_with_sqs"
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
name = REPLACE ME #"address2uprn" for example
|
name = REPLACE ME #"address2uprn" for example
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ terraform {
|
||||||
required_providers {
|
required_providers {
|
||||||
aws = {
|
aws = {
|
||||||
source = "hashicorp/aws"
|
source = "hashicorp/aws"
|
||||||
version = "~> 4.16"
|
version = ">= 5.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ locals {
|
||||||
}
|
}
|
||||||
|
|
||||||
module "address2uprn" {
|
module "address2uprn" {
|
||||||
source = "../modules/lambda_with_sqs"
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
name = "address2uprn"
|
name = "address2uprn"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
@ -33,19 +33,6 @@ module "address2uprn" {
|
||||||
LOG_LEVEL = "info"
|
LOG_LEVEL = "info"
|
||||||
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||||
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||||
GOOGLE_SOLAR_API_KEY = "test"
|
|
||||||
SAP_PREDICTIONS_BUCKET = "test"
|
|
||||||
CARBON_PREDICTIONS_BUCKET = "test"
|
|
||||||
HEAT_PREDICTIONS_BUCKET = "test"
|
|
||||||
HEATING_KWH_PREDICTIONS_BUCKET = "test"
|
|
||||||
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
|
|
||||||
API_KEY = "test"
|
|
||||||
ENVIRONMENT = "test"
|
|
||||||
SECRET_KEY = "test"
|
|
||||||
PLAN_TRIGGER_BUCKET = "test"
|
|
||||||
DATA_BUCKET = "test"
|
|
||||||
ENGINE_SQS_URL = "test"
|
|
||||||
ENERGY_ASSESSMENTS_BUCKET = "test"
|
|
||||||
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ terraform {
|
||||||
required_providers {
|
required_providers {
|
||||||
aws = {
|
aws = {
|
||||||
source = "hashicorp/aws"
|
source = "hashicorp/aws"
|
||||||
version = "~> 4.16"
|
version = ">= 5.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ locals {
|
||||||
}
|
}
|
||||||
|
|
||||||
module "lambda" {
|
module "lambda" {
|
||||||
source = "../modules/lambda_with_sqs"
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
name = "categorisation"
|
name = "categorisation"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
output "categorisation_queue_url" {
|
||||||
|
value = module.lambda.queue_url
|
||||||
|
description = "URL of the Categorisation SQS queue"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "categorisation_queue_arn" {
|
||||||
|
value = module.lambda.queue_arn
|
||||||
|
description = "ARN of the Categorisation SQS queue"
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,7 @@ terraform {
|
||||||
required_providers {
|
required_providers {
|
||||||
aws = {
|
aws = {
|
||||||
source = "hashicorp/aws"
|
source = "hashicorp/aws"
|
||||||
version = "~> 4.16"
|
version = ">= 5.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ locals {
|
||||||
|
|
||||||
|
|
||||||
module "lambda" {
|
module "lambda" {
|
||||||
source = "../modules/lambda_with_sqs"
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
name = "condition-etl"
|
name = "condition-etl"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ terraform {
|
||||||
required_providers {
|
required_providers {
|
||||||
aws = {
|
aws = {
|
||||||
source = "hashicorp/aws"
|
source = "hashicorp/aws"
|
||||||
version = "~> 4.16"
|
version = ">= 5.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,15 +17,17 @@ locals {
|
||||||
|
|
||||||
|
|
||||||
module "lambda" {
|
module "lambda" {
|
||||||
source = "../modules/lambda_with_sqs"
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
name = "engine"
|
name = "engine"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
||||||
image_uri = local.image_uri
|
image_uri = local.image_uri
|
||||||
|
|
||||||
# Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
|
|
||||||
maximum_concurrency = var.maximum_concurrency
|
maximum_concurrency = var.maximum_concurrency
|
||||||
|
batch_size = var.batch_size
|
||||||
|
timeout = var.timeout
|
||||||
|
memory_size = var.memory_size
|
||||||
|
|
||||||
environment = merge(
|
environment = merge(
|
||||||
{
|
{
|
||||||
|
|
@ -42,7 +44,6 @@ module "lambda" {
|
||||||
DB_PORT = var.db_port
|
DB_PORT = var.db_port
|
||||||
API_KEY = var.api_key
|
API_KEY = var.api_key
|
||||||
SECRET_KEY = var.secret_key
|
SECRET_KEY = var.secret_key
|
||||||
DOMAIN_NAME = var.domain_name
|
|
||||||
EPC_AUTH_TOKEN = var.epc_auth_token
|
EPC_AUTH_TOKEN = var.epc_auth_token
|
||||||
GOOGLE_SOLAR_API_KEY = var.google_solar_api_key
|
GOOGLE_SOLAR_API_KEY = var.google_solar_api_key
|
||||||
|
|
||||||
|
|
|
||||||
9
infrastructure/terraform/lambda/engine/outputs.tf
Normal file
9
infrastructure/terraform/lambda/engine/outputs.tf
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
output "ara_engine_queue_url" {
|
||||||
|
value = module.lambda.queue_url
|
||||||
|
description = "URL of the Engine SQS queue"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "ara_engine_queue_arn" {
|
||||||
|
value = module.lambda.queue_arn
|
||||||
|
description = "ARN of the Engine SQS queue"
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,7 @@ terraform {
|
||||||
required_providers {
|
required_providers {
|
||||||
aws = {
|
aws = {
|
||||||
source = "hashicorp/aws"
|
source = "hashicorp/aws"
|
||||||
version = "~> 4.16"
|
version = ">= 5.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,23 @@ variable "maximum_concurrency" {
|
||||||
description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
|
description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "batch_size" {
|
||||||
|
type = number
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "timeout" {
|
||||||
|
type = number
|
||||||
|
default = 900
|
||||||
|
description = "Lambda timeout in seconds"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "memory_size" {
|
||||||
|
type = number
|
||||||
|
default = 3008
|
||||||
|
description = "Lambda memory size in MB"
|
||||||
|
}
|
||||||
|
|
||||||
variable "db_host" {
|
variable "db_host" {
|
||||||
type = string
|
type = string
|
||||||
sensitive = true
|
sensitive = true
|
||||||
|
|
@ -48,10 +65,6 @@ variable "secret_key" {
|
||||||
sensitive = true
|
sensitive = true
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "domain_name" {
|
|
||||||
type = string
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "epc_auth_token" {
|
variable "epc_auth_token" {
|
||||||
type = string
|
type = string
|
||||||
sensitive = true
|
sensitive = true
|
||||||
|
|
|
||||||
124
infrastructure/terraform/lambda/fast-api/main.tf
Normal file
124
infrastructure/terraform/lambda/fast-api/main.tf
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
############################################
|
||||||
|
# Load Terraform State
|
||||||
|
############################################
|
||||||
|
data "terraform_remote_state" "shared" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "assessment-model-terraform-state"
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "terraform_remote_state" "engine" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "ara-engine-terraform-state",
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "terraform_remote_state" "categorisation" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "categorisation-terraform-state",
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Load Credentials
|
||||||
|
############################################
|
||||||
|
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||||
|
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# FastAPI Lambda + API Gateway
|
||||||
|
############################################
|
||||||
|
module "fastapi" {
|
||||||
|
source = "../../modules/lambda_with_api_gateway"
|
||||||
|
|
||||||
|
name = "fastapi"
|
||||||
|
stage = var.stage
|
||||||
|
source_dir = "${path.root}/../../../../"
|
||||||
|
handler = "backend.app.main.handler"
|
||||||
|
runtime = "python3.11"
|
||||||
|
timeout = 600
|
||||||
|
memory_size = 512
|
||||||
|
artifact_bucket = data.terraform_remote_state.shared.outputs.ara_fast_api_state_bucket
|
||||||
|
requirements_file = "${path.root}/../../../../backend/app/requirements/requirements.txt"
|
||||||
|
|
||||||
|
domain_name = "api.${var.domain_name}"
|
||||||
|
|
||||||
|
environment = {
|
||||||
|
ENVIRONMENT = var.stage
|
||||||
|
API_KEY = var.api_key
|
||||||
|
SECRET_KEY = var.secret_key
|
||||||
|
# DOMAIN_NAME = var.domain_name
|
||||||
|
EPC_AUTH_TOKEN = var.epc_auth_token
|
||||||
|
GOOGLE_SOLAR_API_KEY = var.google_solar_api_key
|
||||||
|
|
||||||
|
DB_HOST = var.db_host
|
||||||
|
DB_NAME = var.db_name
|
||||||
|
DB_PORT = var.db_port
|
||||||
|
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||||
|
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||||
|
|
||||||
|
PLAN_TRIGGER_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_plan_trigger_bucket_name
|
||||||
|
DATA_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||||
|
SAP_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_sap_predictions_bucket_name
|
||||||
|
CARBON_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_carbon_predictions_bucket_name
|
||||||
|
HEAT_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_heat_predictions_bucket_name
|
||||||
|
HEATING_KWH_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_heating_kwh_predictions_bucket_name
|
||||||
|
HOTWATER_KWH_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_hotwater_kwh_predictions_bucket_name
|
||||||
|
ENERGY_ASSESSMENTS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_energy_assessments_bucket_name
|
||||||
|
|
||||||
|
ENGINE_SQS_URL = data.terraform_remote_state.engine.outputs.ara_engine_queue_url
|
||||||
|
CATEGORISATION_SQS_URL = data.terraform_remote_state.categorisation.outputs.categorisation_queue_url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# IAM policy attachments
|
||||||
|
############################################
|
||||||
|
# SQS
|
||||||
|
module "fastapi_sqs_policy" {
|
||||||
|
source = "../../modules/general_iam_policy"
|
||||||
|
|
||||||
|
policy_name = "fastapi-sqs-send-${var.stage}"
|
||||||
|
policy_description = "Allow FastAPI to send messages to engine & categorisation queues"
|
||||||
|
|
||||||
|
actions = [
|
||||||
|
"sqs:SendMessage"
|
||||||
|
]
|
||||||
|
|
||||||
|
resources = [
|
||||||
|
data.terraform_remote_state.engine.outputs.ara_engine_queue_arn,
|
||||||
|
data.terraform_remote_state.categorisation.outputs.categorisation_queue_arn
|
||||||
|
]
|
||||||
|
|
||||||
|
conditions = null
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Service = "fastapi"
|
||||||
|
Stage = var.stage
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_role_policy_attachment" "fastapi_sqs_send" {
|
||||||
|
role = module.fastapi.role_name
|
||||||
|
policy_arn = module.fastapi_sqs_policy.policy_arn
|
||||||
|
}
|
||||||
|
|
||||||
|
# S3
|
||||||
|
resource "aws_iam_role_policy_attachment" "fastapi_s3_read_and_write" {
|
||||||
|
role = module.fastapi.role_name
|
||||||
|
policy_arn = data.terraform_remote_state.shared.outputs.fast_api_s3_read_and_write_arn
|
||||||
|
}
|
||||||
7
infrastructure/terraform/lambda/fast-api/outputs.tf
Normal file
7
infrastructure/terraform/lambda/fast-api/outputs.tf
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
output "domain_name" {
|
||||||
|
value = module.fastapi.domain_name
|
||||||
|
}
|
||||||
|
|
||||||
|
output "invoke_url" {
|
||||||
|
value = module.fastapi.invoke_url
|
||||||
|
}
|
||||||
16
infrastructure/terraform/lambda/fast-api/provider.tf
Normal file
16
infrastructure/terraform/lambda/fast-api/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = ">= 5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "ara-fast-api-terraform-state"
|
||||||
|
key = "terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
|
||||||
|
required_version = ">= 1.2.0"
|
||||||
|
}
|
||||||
44
infrastructure/terraform/lambda/fast-api/variables.tf
Normal file
44
infrastructure/terraform/lambda/fast-api/variables.tf
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
variable "lambda_name" {
|
||||||
|
type = string
|
||||||
|
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "stage" {
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_host" {
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_name" {
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_port" {
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "api_key" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "secret_key" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "domain_name" {
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "epc_auth_token" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "google_solar_api_key" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
45
infrastructure/terraform/lambda/ordnanceSurvey/main.tf
Normal file
45
infrastructure/terraform/lambda/ordnanceSurvey/main.tf
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
data "terraform_remote_state" "shared" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "assessment-model-terraform-state"
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||||
|
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||||
|
}
|
||||||
|
locals {
|
||||||
|
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||||
|
}
|
||||||
|
|
||||||
|
module "ordnance" {
|
||||||
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
|
name = "ordnanceSurvey" #"address2uprn" for example
|
||||||
|
stage = var.stage
|
||||||
|
|
||||||
|
image_uri = local.image_uri
|
||||||
|
|
||||||
|
timeout = 900
|
||||||
|
|
||||||
|
# Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
|
||||||
|
maximum_concurrency = var.maximum_concurrency
|
||||||
|
|
||||||
|
environment = merge(
|
||||||
|
{
|
||||||
|
STAGE = var.stage
|
||||||
|
LOG_LEVEL = "info"
|
||||||
|
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||||
|
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||||
|
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||||
|
ORDNANCE_SURVEY_API_KEY = var.ordnance_survey_api_key
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Attach S3 read policy to the Lambda execution role
|
||||||
|
resource "aws_iam_role_policy_attachment" "ordanceSurvey_read_and_write" {
|
||||||
|
role = module.ordnance.role_name
|
||||||
|
policy_arn = data.terraform_remote_state.shared.outputs.ordnance_s3_read_and_write_arn
|
||||||
|
}
|
||||||
16
infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
Normal file
16
infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = ">= 5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "ordnance-terraform-state"
|
||||||
|
key = "terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
|
||||||
|
required_version = ">= 1.2.0"
|
||||||
|
}
|
||||||
43
infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
Normal file
43
infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
variable "lambda_name" {
|
||||||
|
type = string
|
||||||
|
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "stage" {
|
||||||
|
description = "Deployment stage (e.g. dev, prod)"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
variable "ecr_repo_url" {
|
||||||
|
type = string
|
||||||
|
description = "ECR repository URL (no tag, no digest)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "image_digest" {
|
||||||
|
type = string
|
||||||
|
description = "Image digest (sha256:...)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "maximum_concurrency" {
|
||||||
|
type = number
|
||||||
|
default = null
|
||||||
|
description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "batch_size" {
|
||||||
|
type = number
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "ordnance_survey_api_key" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
locals {
|
||||||
|
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "resolved_image_uri" {
|
||||||
|
value = local.image_uri
|
||||||
|
}
|
||||||
|
|
@ -26,7 +26,7 @@ data "terraform_remote_state" "address2uprn" {
|
||||||
}
|
}
|
||||||
|
|
||||||
module "lambda" {
|
module "lambda" {
|
||||||
source = "../modules/lambda_with_sqs"
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
name = "postcode-splitter"
|
name = "postcode-splitter"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ terraform {
|
||||||
required_providers {
|
required_providers {
|
||||||
aws = {
|
aws = {
|
||||||
source = "hashicorp/aws"
|
source = "hashicorp/aws"
|
||||||
version = "~> 4.16"
|
version = ">= 5.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
11
infrastructure/terraform/modules/acm_certificate/main.tf
Normal file
11
infrastructure/terraform/modules/acm_certificate/main.tf
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
resource "aws_acm_certificate" "this" {
|
||||||
|
domain_name = var.domain_name
|
||||||
|
subject_alternative_names = var.subject_alternative_names
|
||||||
|
validation_method = "DNS"
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
create_before_destroy = false
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = var.tags
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
output "certificate_arn" {
|
||||||
|
value = aws_acm_certificate.this.arn
|
||||||
|
}
|
||||||
|
|
||||||
|
output "domain_validation_options" {
|
||||||
|
value = aws_acm_certificate.this.domain_validation_options
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
variable "domain_name" {
|
||||||
|
description = "Primary domain name for the certificate"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "subject_alternative_names" {
|
||||||
|
description = "Additional domains for the certificate"
|
||||||
|
type = list(string)
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "tags" {
|
||||||
|
description = "Tags to apply to the certificate"
|
||||||
|
type = map(string)
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
@ -1,65 +1,165 @@
|
||||||
resource "aws_cloudfront_distribution" "s3_distribution" {
|
#############################################
|
||||||
origin {
|
# Use Managed Caching and Forwarding Policies
|
||||||
domain_name = var.bucket_domain_name
|
#############################################
|
||||||
origin_id = "S3-${var.bucket_name}"
|
data "aws_cloudfront_cache_policy" "caching_disabled" {
|
||||||
|
name = "Managed-CachingDisabled"
|
||||||
|
}
|
||||||
|
|
||||||
s3_origin_config {
|
data "aws_cloudfront_origin_request_policy" "all_viewer_except_host_header" {
|
||||||
origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
|
name = "Managed-AllViewerExceptHostHeader"
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# CloudFront Distribution
|
||||||
|
############################################
|
||||||
|
|
||||||
|
resource "aws_cloudfront_distribution" "this" {
|
||||||
|
|
||||||
|
##########################################
|
||||||
|
# Origins
|
||||||
|
##########################################
|
||||||
|
|
||||||
|
dynamic "origin" {
|
||||||
|
for_each = { for o in var.origins : o.origin_id => o }
|
||||||
|
|
||||||
|
content {
|
||||||
|
domain_name = origin.value.origin_domain_name
|
||||||
|
origin_id = origin.value.origin_id
|
||||||
|
|
||||||
|
######################################
|
||||||
|
# S3 Origin
|
||||||
|
######################################
|
||||||
|
dynamic "s3_origin_config" {
|
||||||
|
for_each = origin.value.origin_type == "s3" ? [1] : []
|
||||||
|
|
||||||
|
content {
|
||||||
|
origin_access_identity = aws_cloudfront_origin_access_identity.oai[origin.key].cloudfront_access_identity_path
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
######################################
|
||||||
|
# API Gateway Origin
|
||||||
|
######################################
|
||||||
|
dynamic "custom_origin_config" {
|
||||||
|
for_each = origin.value.origin_type == "api" ? [1] : []
|
||||||
|
|
||||||
|
content {
|
||||||
|
http_port = 80
|
||||||
|
https_port = 443
|
||||||
|
origin_protocol_policy = "https-only"
|
||||||
|
origin_ssl_protocols = ["TLSv1.2"]
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enabled = true
|
enabled = true
|
||||||
|
aliases = var.aliases
|
||||||
|
|
||||||
|
##########################################
|
||||||
|
# Default Cache Behavior (S3)
|
||||||
|
##########################################
|
||||||
|
|
||||||
default_cache_behavior {
|
default_cache_behavior {
|
||||||
allowed_methods = ["GET", "HEAD"]
|
target_origin_id = "s3-origin"
|
||||||
cached_methods = ["GET", "HEAD"]
|
|
||||||
target_origin_id = "S3-${var.bucket_name}"
|
|
||||||
viewer_protocol_policy = "redirect-to-https"
|
viewer_protocol_policy = "redirect-to-https"
|
||||||
compress = true
|
|
||||||
|
allowed_methods = ["GET", "HEAD"]
|
||||||
|
cached_methods = ["GET", "HEAD"]
|
||||||
|
|
||||||
forwarded_values {
|
forwarded_values {
|
||||||
query_string = false
|
query_string = false
|
||||||
|
|
||||||
cookies {
|
cookies {
|
||||||
forward = "none"
|
forward = "none"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
min_ttl = 0
|
compress = true
|
||||||
default_ttl = 86400
|
min_ttl = 0
|
||||||
max_ttl = 31536000
|
default_ttl = 3600
|
||||||
|
max_ttl = 86400
|
||||||
|
}
|
||||||
|
|
||||||
|
##########################################
|
||||||
|
# API Behavior
|
||||||
|
##########################################
|
||||||
|
|
||||||
|
ordered_cache_behavior {
|
||||||
|
path_pattern = "/v1/*"
|
||||||
|
target_origin_id = "api-origin"
|
||||||
|
|
||||||
|
viewer_protocol_policy = "redirect-to-https"
|
||||||
|
|
||||||
|
allowed_methods = ["GET","HEAD","OPTIONS","PUT","POST","PATCH","DELETE"]
|
||||||
|
cached_methods = ["GET","HEAD"]
|
||||||
|
|
||||||
|
cache_policy_id = data.aws_cloudfront_cache_policy.caching_disabled.id
|
||||||
|
origin_request_policy_id = data.aws_cloudfront_origin_request_policy.all_viewer_except_host_header.id
|
||||||
}
|
}
|
||||||
|
|
||||||
price_class = "PriceClass_All"
|
price_class = "PriceClass_All"
|
||||||
|
|
||||||
|
##########################################
|
||||||
|
# Geo Restrictions
|
||||||
|
##########################################
|
||||||
|
|
||||||
restrictions {
|
restrictions {
|
||||||
geo_restriction {
|
geo_restriction {
|
||||||
restriction_type = "none"
|
restriction_type = "none"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
##########################################
|
||||||
|
# SSL Certificate
|
||||||
|
##########################################
|
||||||
|
|
||||||
viewer_certificate {
|
viewer_certificate {
|
||||||
cloudfront_default_certificate = true
|
acm_certificate_arn = var.acm_certificate_arn
|
||||||
|
ssl_support_method = var.acm_certificate_arn != null ? "sni-only" : null
|
||||||
|
minimum_protocol_version = var.acm_certificate_arn != null ? "TLSv1.2_2021" : null
|
||||||
|
|
||||||
|
cloudfront_default_certificate = var.acm_certificate_arn == null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Origin Access Identities (S3 only)
|
||||||
|
############################################
|
||||||
|
|
||||||
resource "aws_cloudfront_origin_access_identity" "oai" {
|
resource "aws_cloudfront_origin_access_identity" "oai" {
|
||||||
comment = "OAI for ${var.bucket_name}"
|
for_each = {
|
||||||
|
for o in var.origins : o.origin_id => o
|
||||||
|
if o.origin_type == "s3"
|
||||||
|
}
|
||||||
|
|
||||||
|
comment = "OAI for ${each.key}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# S3 Bucket Policy (S3 only)
|
||||||
|
############################################
|
||||||
|
|
||||||
resource "aws_s3_bucket_policy" "bucket_policy" {
|
resource "aws_s3_bucket_policy" "bucket_policy" {
|
||||||
bucket = var.bucket_id
|
for_each = {
|
||||||
|
for o in var.origins : o.origin_id => o
|
||||||
|
if o.origin_type == "s3"
|
||||||
|
}
|
||||||
|
|
||||||
|
bucket = each.value.bucket_id
|
||||||
|
|
||||||
policy = jsonencode({
|
policy = jsonencode({
|
||||||
Version = "2012-10-17"
|
Version = "2012-10-17"
|
||||||
Statement = [
|
Statement = [
|
||||||
{
|
{
|
||||||
Effect = "Allow"
|
Effect = "Allow"
|
||||||
Principal = {
|
Principal = {
|
||||||
AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
|
AWS = aws_cloudfront_origin_access_identity.oai[each.key].iam_arn
|
||||||
}
|
}
|
||||||
Action = "s3:GetObject"
|
Action = "s3:GetObject"
|
||||||
Resource = "${var.bucket_arn}/*"
|
Resource = "${each.value.bucket_arn}/*"
|
||||||
},
|
}
|
||||||
]
|
]
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -1,24 +1,20 @@
|
||||||
variable "bucket_name" {
|
variable "origins" {
|
||||||
description = "The name of the bucket"
|
type = list(object({
|
||||||
type = string
|
origin_type = string # "s3" or "api"
|
||||||
|
origin_domain_name = string
|
||||||
|
origin_id = string
|
||||||
|
|
||||||
|
bucket_id = optional(string)
|
||||||
|
bucket_arn = optional(string)
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "stage" {
|
variable "aliases" {
|
||||||
description = "The deployment stage"
|
type = list(string)
|
||||||
type = string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "bucket_id" {
|
variable "acm_certificate_arn" {
|
||||||
description = "The ID of the S3 bucket"
|
description = "ACM certificate ARN for custom aliases"
|
||||||
type = string
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "bucket_arn" {
|
|
||||||
description = "The ARN of the S3 bucket"
|
|
||||||
type = string
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "bucket_domain_name" {
|
|
||||||
description = "The regional domain name of the S3 bucket"
|
|
||||||
type = string
|
type = string
|
||||||
|
default = null
|
||||||
}
|
}
|
||||||
27
infrastructure/terraform/modules/lambda_service_zip/main.tf
Normal file
27
infrastructure/terraform/modules/lambda_service_zip/main.tf
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
resource "aws_lambda_function" "this" {
|
||||||
|
function_name = var.name
|
||||||
|
role = var.role_arn
|
||||||
|
package_type = "Zip"
|
||||||
|
# filename = var.filename
|
||||||
|
source_code_hash = var.source_code_hash
|
||||||
|
handler = var.handler
|
||||||
|
runtime = var.runtime
|
||||||
|
timeout = var.timeout
|
||||||
|
memory_size = var.memory_size
|
||||||
|
publish = true
|
||||||
|
|
||||||
|
s3_bucket = var.s3_bucket
|
||||||
|
s3_key = var.s3_key
|
||||||
|
|
||||||
|
environment {
|
||||||
|
variables = var.environment
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output "lambda_arn" {
|
||||||
|
value = aws_lambda_function.this.arn
|
||||||
|
}
|
||||||
|
|
||||||
|
output "function_name" {
|
||||||
|
value = aws_lambda_function.this.function_name
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
variable "name" { type = string }
|
||||||
|
variable "role_arn" { type = string }
|
||||||
|
# variable "filename" { type = string }
|
||||||
|
variable "source_code_hash" { type = string }
|
||||||
|
variable "handler" { type = string }
|
||||||
|
variable "runtime" { type = string }
|
||||||
|
variable "timeout" {
|
||||||
|
type = number
|
||||||
|
default = 30
|
||||||
|
}
|
||||||
|
variable "memory_size" {
|
||||||
|
type = number
|
||||||
|
default = 128
|
||||||
|
}
|
||||||
|
variable "environment" {
|
||||||
|
type = map(string)
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
variable "s3_bucket" { type = string }
|
||||||
|
variable "s3_key" { type = string }
|
||||||
117
infrastructure/terraform/modules/lambda_with_api_gateway/main.tf
Normal file
117
infrastructure/terraform/modules/lambda_with_api_gateway/main.tf
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
############################################
|
||||||
|
# IAM role
|
||||||
|
############################################
|
||||||
|
module "role" {
|
||||||
|
source = "../lambda_execution_role"
|
||||||
|
name = "${var.name}-lambda-${var.stage}"
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Cloudwatch log group
|
||||||
|
############################################
|
||||||
|
resource "aws_cloudwatch_log_group" "api_logs" {
|
||||||
|
name = "/aws/apigateway/${var.name}-${var.stage}"
|
||||||
|
retention_in_days = 14
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Install python packages
|
||||||
|
############################################
|
||||||
|
resource "null_resource" "pip_install" {
|
||||||
|
count = var.requirements_file != null ? 1 : 0
|
||||||
|
|
||||||
|
triggers = {
|
||||||
|
always_run = timestamp()
|
||||||
|
}
|
||||||
|
|
||||||
|
provisioner "local-exec" {
|
||||||
|
command = "pip install -r ${var.requirements_file} -t ${var.source_dir} --platform manylinux2014_x86_64 --implementation cp --python-version 3.11 --only-binary=:all: --upgrade"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Zip the source code
|
||||||
|
############################################
|
||||||
|
data "archive_file" "this" {
|
||||||
|
depends_on = [null_resource.pip_install]
|
||||||
|
type = "zip"
|
||||||
|
source_dir = var.source_dir
|
||||||
|
output_path = "${path.module}/lambda_package.zip"
|
||||||
|
excludes = var.zip_excludes
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Upload zip to S3
|
||||||
|
############################################
|
||||||
|
resource "aws_s3_object" "lambda_zip" {
|
||||||
|
bucket = var.artifact_bucket
|
||||||
|
key = "env:/${var.stage}/${var.name}.zip"
|
||||||
|
source = data.archive_file.this.output_path
|
||||||
|
etag = data.archive_file.this.output_md5
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# Lambda
|
||||||
|
############################################
|
||||||
|
module "lambda" {
|
||||||
|
source = "../lambda_service_zip"
|
||||||
|
|
||||||
|
name = "${var.name}-${var.stage}"
|
||||||
|
role_arn = module.role.role_arn
|
||||||
|
s3_bucket = var.artifact_bucket
|
||||||
|
s3_key = aws_s3_object.lambda_zip.key
|
||||||
|
source_code_hash = data.archive_file.this.output_base64sha256
|
||||||
|
handler = var.handler
|
||||||
|
runtime = var.runtime
|
||||||
|
timeout = var.timeout
|
||||||
|
memory_size = var.memory_size
|
||||||
|
environment = var.environment
|
||||||
|
}
|
||||||
|
|
||||||
|
############################################
|
||||||
|
# API Gateway
|
||||||
|
############################################
|
||||||
|
resource "aws_apigatewayv2_api" "this" {
|
||||||
|
name = "${var.name}-api-${var.stage}"
|
||||||
|
protocol_type = "HTTP"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_apigatewayv2_stage" "this" {
|
||||||
|
api_id = aws_apigatewayv2_api.this.id
|
||||||
|
name = "$default"
|
||||||
|
auto_deploy = true
|
||||||
|
|
||||||
|
access_log_settings {
|
||||||
|
destination_arn = aws_cloudwatch_log_group.api_logs.arn
|
||||||
|
|
||||||
|
format = jsonencode({
|
||||||
|
requestId = "$context.requestId"
|
||||||
|
domainName = "$context.domainName"
|
||||||
|
path = "$context.path"
|
||||||
|
status = "$context.status"
|
||||||
|
sourceIp = "$context.identity.sourceIp"
|
||||||
|
userAgent = "$context.identity.userAgent"
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_apigatewayv2_integration" "this" {
|
||||||
|
api_id = aws_apigatewayv2_api.this.id
|
||||||
|
integration_type = "AWS_PROXY"
|
||||||
|
integration_uri = module.lambda.lambda_arn
|
||||||
|
payload_format_version = "2.0"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_apigatewayv2_route" "catch_all" {
|
||||||
|
api_id = aws_apigatewayv2_api.this.id
|
||||||
|
route_key = "$default"
|
||||||
|
target = "integrations/${aws_apigatewayv2_integration.this.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lambda_permission" "apigw_invoke" {
|
||||||
|
statement_id = "AllowAPIGatewayInvoke"
|
||||||
|
action = "lambda:InvokeFunction"
|
||||||
|
function_name = module.lambda.lambda_arn
|
||||||
|
principal = "apigateway.amazonaws.com"
|
||||||
|
source_arn = "${aws_apigatewayv2_api.this.execution_arn}/*/*"
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
output "role_name" {
|
||||||
|
value = module.role.role_name
|
||||||
|
}
|
||||||
|
|
||||||
|
output "domain_name" {
|
||||||
|
value = var.domain_name
|
||||||
|
}
|
||||||
|
|
||||||
|
output "invoke_url" {
|
||||||
|
value = aws_apigatewayv2_stage.this.invoke_url
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
variable "name" { type = string }
|
||||||
|
variable "stage" { type = string }
|
||||||
|
variable "source_dir" { type = string }
|
||||||
|
variable "handler" { type = string }
|
||||||
|
variable "runtime" { type = string }
|
||||||
|
|
||||||
|
variable "zip_excludes" {
|
||||||
|
type = list(string)
|
||||||
|
default = [
|
||||||
|
"**/__pycache__/**",
|
||||||
|
"**/*.pyc",
|
||||||
|
"**/.pytest_cache/**",
|
||||||
|
"**/tests/**",
|
||||||
|
"**/infrastructure/**"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "timeout" {
|
||||||
|
type = number
|
||||||
|
default = 600
|
||||||
|
}
|
||||||
|
variable "memory_size" {
|
||||||
|
type = number
|
||||||
|
default = 512
|
||||||
|
}
|
||||||
|
variable "environment" {
|
||||||
|
type = map(string)
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "domain_name" {
|
||||||
|
type = string
|
||||||
|
default = null
|
||||||
|
}
|
||||||
|
variable "certificate_arn" {
|
||||||
|
type = string
|
||||||
|
default = null
|
||||||
|
}
|
||||||
|
variable "route53_zone_id" {
|
||||||
|
type = string
|
||||||
|
default = null
|
||||||
|
}
|
||||||
|
variable "artifact_bucket" { type = string }
|
||||||
|
|
||||||
|
variable "requirements_file" {
|
||||||
|
type = string
|
||||||
|
default = null
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# IAM role
|
# IAM role
|
||||||
############################################
|
############################################
|
||||||
module "role" {
|
module "role" {
|
||||||
source = "../../../modules/lambda_execution_role"
|
source = "../lambda_execution_role"
|
||||||
name = "${var.name}-lambda-${var.stage}"
|
name = "${var.name}-lambda-${var.stage}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -14,7 +14,7 @@ output "role_name" {
|
||||||
# SQS queue + DLQ
|
# SQS queue + DLQ
|
||||||
############################################
|
############################################
|
||||||
module "queue" {
|
module "queue" {
|
||||||
source = "../../../modules/sqs_queue"
|
source = "../sqs_queue"
|
||||||
name = "${var.name}-queue-${var.stage}"
|
name = "${var.name}-queue-${var.stage}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -22,7 +22,7 @@ module "queue" {
|
||||||
# Lambda
|
# Lambda
|
||||||
############################################
|
############################################
|
||||||
module "lambda" {
|
module "lambda" {
|
||||||
source = "../../../modules/lambda_service"
|
source = "../lambda_service"
|
||||||
|
|
||||||
name = "${var.name}-${var.stage}"
|
name = "${var.name}-${var.stage}"
|
||||||
role_arn = module.role.role_arn
|
role_arn = module.role.role_arn
|
||||||
|
|
@ -38,7 +38,7 @@ module "lambda" {
|
||||||
# SQS → Lambda trigger
|
# SQS → Lambda trigger
|
||||||
############################################
|
############################################
|
||||||
module "sqs_trigger" {
|
module "sqs_trigger" {
|
||||||
source = "../../../modules/lambda_sqs_trigger"
|
source = "../lambda_sqs_trigger"
|
||||||
|
|
||||||
lambda_arn = module.lambda.lambda_arn
|
lambda_arn = module.lambda.lambda_arn
|
||||||
lambda_role_name = module.role.role_name
|
lambda_role_name = module.role.role_name
|
||||||
|
|
@ -2,9 +2,10 @@
|
||||||
locals {
|
locals {
|
||||||
# Generate full resource ARNs by combining bucket ARNs with resource paths
|
# Generate full resource ARNs by combining bucket ARNs with resource paths
|
||||||
resources = flatten([
|
resources = flatten([
|
||||||
for bucket_arn in var.bucket_arns : [
|
for bucket_arn in var.bucket_arns : concat(
|
||||||
for path in var.resource_paths : "${bucket_arn}${path}"
|
[bucket_arn], # bare ARN for bucket-level actions like ListBucket
|
||||||
]
|
[for path in var.resource_paths : "${bucket_arn}${path}"]
|
||||||
|
)
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,20 +2,20 @@ terraform {
|
||||||
required_providers {
|
required_providers {
|
||||||
aws = {
|
aws = {
|
||||||
source = "hashicorp/aws"
|
source = "hashicorp/aws"
|
||||||
version = "~> 4.16"
|
version = ">= 5.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
backend "s3" {
|
backend "s3" {
|
||||||
bucket = "assessment-model-terraform-state"
|
bucket = "assessment-model-terraform-state"
|
||||||
region = "eu-west-2"
|
region = "eu-west-2"
|
||||||
key = "terraform.tfstate"
|
key = "terraform.tfstate"
|
||||||
}
|
}
|
||||||
|
|
||||||
required_version = ">= 1.2.0"
|
required_version = ">= 1.2.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
provider "aws" {
|
provider "aws" {
|
||||||
region = var.region
|
region = var.region
|
||||||
}
|
}
|
||||||
|
|
||||||
# Additional provider for resources that need to be in us-east-1, specifically the SSL certificate
|
# Additional provider for resources that need to be in us-east-1, specifically the SSL certificate
|
||||||
|
|
@ -47,30 +47,30 @@ resource "aws_security_group" "allow_db" {
|
||||||
|
|
||||||
ingress {
|
ingress {
|
||||||
# TLS (change to whatever ports you need)
|
# TLS (change to whatever ports you need)
|
||||||
from_port = 5432
|
from_port = 5432
|
||||||
to_port = 5432
|
to_port = 5432
|
||||||
protocol = "tcp"
|
protocol = "tcp"
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
}
|
}
|
||||||
|
|
||||||
egress {
|
egress {
|
||||||
from_port = 0
|
from_port = 0
|
||||||
to_port = 0
|
to_port = 0
|
||||||
protocol = "-1"
|
protocol = "-1"
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "aws_db_instance" "default" {
|
resource "aws_db_instance" "default" {
|
||||||
allocated_storage = var.allocated_storage
|
allocated_storage = var.allocated_storage
|
||||||
engine = "postgres"
|
engine = "postgres"
|
||||||
engine_version = "14.17"
|
engine_version = "14.17"
|
||||||
instance_class = var.instance_class
|
instance_class = var.instance_class
|
||||||
db_name = var.database_name
|
db_name = var.database_name
|
||||||
username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
|
username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
|
||||||
password = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_password"]
|
password = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_password"]
|
||||||
parameter_group_name = "default.postgres14"
|
parameter_group_name = "default.postgres14"
|
||||||
skip_final_snapshot = true
|
skip_final_snapshot = true
|
||||||
vpc_security_group_ids = [aws_security_group.allow_db.id]
|
vpc_security_group_ids = [aws_security_group.allow_db.id]
|
||||||
lifecycle {
|
lifecycle {
|
||||||
prevent_destroy = true
|
prevent_destroy = true
|
||||||
|
|
@ -87,7 +87,7 @@ resource "aws_db_instance" "default" {
|
||||||
storage_type = "gp3"
|
storage_type = "gp3"
|
||||||
|
|
||||||
# Automated backups configuration
|
# Automated backups configuration
|
||||||
backup_retention_period = 14
|
backup_retention_period = 14
|
||||||
backup_window = "03:00-04:00"
|
backup_window = "03:00-04:00"
|
||||||
maintenance_window = "Sun:02:00-Sun:02:30"
|
maintenance_window = "Sun:02:00-Sun:02:30"
|
||||||
copy_tags_to_snapshot = true
|
copy_tags_to_snapshot = true
|
||||||
|
|
@ -103,7 +103,7 @@ module "s3_presignable_bucket" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_plan_trigger_bucket_name" {
|
output "retrofit_plan_trigger_bucket_name" {
|
||||||
value = module.s3_presignable_bucket.bucket_name
|
value = module.s3_presignable_bucket.bucket_name
|
||||||
description = "Name of the retrofit plan trigger bucket"
|
description = "Name of the retrofit plan trigger bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -127,6 +127,22 @@ module "s3" {
|
||||||
allowed_origins = var.allowed_origins
|
allowed_origins = var.allowed_origins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
output "retrofit_datalake_bucket_id" {
|
||||||
|
value = module.s3.bucket_id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "retrofit_datalake_bucket_arn" {
|
||||||
|
value = module.s3.bucket_arn
|
||||||
|
}
|
||||||
|
|
||||||
|
output "retrofit_datalake_bucket_domain_name" {
|
||||||
|
value = module.s3.bucket_domain_name
|
||||||
|
}
|
||||||
|
|
||||||
|
output "retrofit_datalake_bucket_name" {
|
||||||
|
value = module.s3.bucket_name
|
||||||
|
}
|
||||||
|
|
||||||
module "model_directory" {
|
module "model_directory" {
|
||||||
source = "../modules/s3"
|
source = "../modules/s3"
|
||||||
bucketname = "retrofit-model-directory-${var.stage}"
|
bucketname = "retrofit-model-directory-${var.stage}"
|
||||||
|
|
@ -140,7 +156,7 @@ module "retrofit_sap_predictions" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_sap_predictions_bucket_name" {
|
output "retrofit_sap_predictions_bucket_name" {
|
||||||
value = module.retrofit_sap_predictions.bucket_name
|
value = module.retrofit_sap_predictions.bucket_name
|
||||||
description = "Name of the retrofit SAP predictions bucket"
|
description = "Name of the retrofit SAP predictions bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -151,7 +167,7 @@ module "retrofit_sap_data" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_sap_data_bucket_name" {
|
output "retrofit_sap_data_bucket_name" {
|
||||||
value = module.retrofit_sap_data.bucket_name
|
value = module.retrofit_sap_data.bucket_name
|
||||||
description = "Name of the retrofit SAP data bucket"
|
description = "Name of the retrofit SAP data bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -162,7 +178,7 @@ module "retrofit_carbon_predictions" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_carbon_predictions_bucket_name" {
|
output "retrofit_carbon_predictions_bucket_name" {
|
||||||
value = module.retrofit_carbon_predictions.bucket_name
|
value = module.retrofit_carbon_predictions.bucket_name
|
||||||
description = "Name of the retrofit carbon predictions bucket"
|
description = "Name of the retrofit carbon predictions bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -173,7 +189,7 @@ module "retrofit_heat_predictions" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_heat_predictions_bucket_name" {
|
output "retrofit_heat_predictions_bucket_name" {
|
||||||
value = module.retrofit_heat_predictions.bucket_name
|
value = module.retrofit_heat_predictions.bucket_name
|
||||||
description = "Name of the retrofit heat predictions bucket"
|
description = "Name of the retrofit heat predictions bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -202,7 +218,7 @@ module "retrofit_heating_kwh_predictions" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_heating_kwh_predictions_bucket_name" {
|
output "retrofit_heating_kwh_predictions_bucket_name" {
|
||||||
value = module.retrofit_heating_kwh_predictions.bucket_name
|
value = module.retrofit_heating_kwh_predictions.bucket_name
|
||||||
description = "Name of the retrofit heating kWh predictions bucket"
|
description = "Name of the retrofit heating kWh predictions bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -213,7 +229,7 @@ module "retrofit_hotwater_kwh_predictions" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_hotwater_kwh_predictions_bucket_name" {
|
output "retrofit_hotwater_kwh_predictions_bucket_name" {
|
||||||
value = module.retrofit_hotwater_kwh_predictions.bucket_name
|
value = module.retrofit_hotwater_kwh_predictions.bucket_name
|
||||||
description = "Name of the retrofit hotwater kWh predictions bucket"
|
description = "Name of the retrofit hotwater kWh predictions bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -232,7 +248,7 @@ module "retrofit_energy_assessments" {
|
||||||
}
|
}
|
||||||
|
|
||||||
output "retrofit_energy_assessments_bucket_name" {
|
output "retrofit_energy_assessments_bucket_name" {
|
||||||
value = module.retrofit_energy_assessments.bucket_name
|
value = module.retrofit_energy_assessments.bucket_name
|
||||||
description = "Name of the retrofit energy assessments bucket"
|
description = "Name of the retrofit energy assessments bucket"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -311,16 +327,14 @@ module "sap_baseline_ecr" {
|
||||||
source = "../modules/ecr"
|
source = "../modules/ecr"
|
||||||
}
|
}
|
||||||
|
|
||||||
##############################################
|
module "heat_baseline_ecr" {
|
||||||
# CDN - Cloudfront
|
ecr_name = "heat-baseline-prediction-${var.stage}"
|
||||||
##############################################
|
source = "../modules/ecr"
|
||||||
module "cloudfront_distribution" {
|
}
|
||||||
source = "../modules/cloudfront"
|
|
||||||
bucket_name = module.s3.bucket_name
|
module "carbon_baseline_ecr" {
|
||||||
bucket_id = module.s3.bucket_id
|
ecr_name = "carbon-baseline-prediction-${var.stage}"
|
||||||
bucket_arn = module.s3.bucket_arn
|
source = "../modules/ecr"
|
||||||
bucket_domain_name = module.s3.bucket_domain_name
|
|
||||||
stage = var.stage
|
|
||||||
}
|
}
|
||||||
|
|
||||||
################################################
|
################################################
|
||||||
|
|
@ -348,7 +362,7 @@ module "address2uprn_state_bucket" {
|
||||||
module "address2uprn_registry" {
|
module "address2uprn_registry" {
|
||||||
source = "../modules/container_registry"
|
source = "../modules/container_registry"
|
||||||
name = "address2uprn"
|
name = "address2uprn"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -379,14 +393,14 @@ module "condition_etl_state_bucket" {
|
||||||
module "condition_etl_registry" {
|
module "condition_etl_registry" {
|
||||||
source = "../modules/container_registry"
|
source = "../modules/container_registry"
|
||||||
name = "condition-etl"
|
name = "condition-etl"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Condition Data S3 Bucket to store initial data
|
# Condition Data S3 Bucket to store initial data
|
||||||
module "condition_data_bucket" {
|
module "condition_data_bucket" {
|
||||||
source = "../modules/s3"
|
source = "../modules/s3"
|
||||||
bucketname = "condition-data-${var.stage}"
|
bucketname = "condition-data-${var.stage}"
|
||||||
allowed_origins = var.allowed_origins
|
allowed_origins = var.allowed_origins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -417,7 +431,7 @@ module "postcode_splitter_state_bucket" {
|
||||||
module "postcode_splitter_registry" {
|
module "postcode_splitter_registry" {
|
||||||
source = "../modules/container_registry"
|
source = "../modules/container_registry"
|
||||||
name = "postcode_splitter"
|
name = "postcode_splitter"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -448,7 +462,39 @@ module "categorisation_state_bucket" {
|
||||||
module "categorisation_registry" {
|
module "categorisation_registry" {
|
||||||
source = "../modules/container_registry"
|
source = "../modules/container_registry"
|
||||||
name = "categorisation"
|
name = "categorisation"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# OrdnanceSurveyAPI – Lambda
|
||||||
|
################################################
|
||||||
|
module "ordnance_state_bucket" {
|
||||||
|
source = "../modules/tf_state_bucket"
|
||||||
|
bucket_name = "ordnance-terraform-state"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
module "ordnance_registry" {
|
||||||
|
source = "../modules/container_registry"
|
||||||
|
name = "ordnance"
|
||||||
|
stage = var.stage
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# S3 policy for postcode splitter to read from retrofit data bucket
|
||||||
|
module "ordnance_s3_read_and_write" {
|
||||||
|
source = "../modules/s3_iam_policy"
|
||||||
|
|
||||||
|
policy_name = "OrdnanceSurveyReadandWriteS3"
|
||||||
|
policy_description = "Allow ordnance Lambda to read and write from retrofit-data bucket"
|
||||||
|
bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"]
|
||||||
|
actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
|
||||||
|
resource_paths = ["/*"]
|
||||||
|
}
|
||||||
|
|
||||||
|
output "ordnance_s3_read_and_write_arn" {
|
||||||
|
value = module.ordnance_s3_read_and_write.policy_arn
|
||||||
}
|
}
|
||||||
|
|
||||||
################################################
|
################################################
|
||||||
|
|
@ -463,7 +509,7 @@ module "engine_state_bucket" {
|
||||||
module "engine_registry" {
|
module "engine_registry" {
|
||||||
source = "../modules/container_registry"
|
source = "../modules/container_registry"
|
||||||
name = "engine"
|
name = "engine"
|
||||||
stage = var.stage
|
stage = var.stage
|
||||||
}
|
}
|
||||||
|
|
||||||
# S3 policy for Engine to read and write from various S3 buckets
|
# S3 policy for Engine to read and write from various S3 buckets
|
||||||
|
|
@ -472,7 +518,7 @@ module "engine_s3_read_and_write" {
|
||||||
|
|
||||||
policy_name = "EngineReadandWriteS3"
|
policy_name = "EngineReadandWriteS3"
|
||||||
policy_description = "Allow Engine Lambda to read from and write to various S3 buckets"
|
policy_description = "Allow Engine Lambda to read from and write to various S3 buckets"
|
||||||
bucket_arns = [
|
bucket_arns = [
|
||||||
"arn:aws:s3:::${module.s3_presignable_bucket.bucket_name}",
|
"arn:aws:s3:::${module.s3_presignable_bucket.bucket_name}",
|
||||||
"arn:aws:s3:::${module.retrofit_sap_data.bucket_name}",
|
"arn:aws:s3:::${module.retrofit_sap_data.bucket_name}",
|
||||||
"arn:aws:s3:::${module.retrofit_sap_predictions.bucket_name}",
|
"arn:aws:s3:::${module.retrofit_sap_predictions.bucket_name}",
|
||||||
|
|
@ -482,10 +528,70 @@ module "engine_s3_read_and_write" {
|
||||||
"arn:aws:s3:::${module.retrofit_hotwater_kwh_predictions.bucket_name}",
|
"arn:aws:s3:::${module.retrofit_hotwater_kwh_predictions.bucket_name}",
|
||||||
"arn:aws:s3:::${module.retrofit_energy_assessments.bucket_name}"
|
"arn:aws:s3:::${module.retrofit_energy_assessments.bucket_name}"
|
||||||
]
|
]
|
||||||
actions = ["s3:*"]
|
actions = ["s3:*"]
|
||||||
resource_paths = ["/*"]
|
resource_paths = ["/*"]
|
||||||
}
|
}
|
||||||
|
|
||||||
output "engine_s3_read_and_write_arn" {
|
output "engine_s3_read_and_write_arn" {
|
||||||
value = module.engine_s3_read_and_write.policy_arn
|
value = module.engine_s3_read_and_write.policy_arn
|
||||||
}
|
}
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# FastAPI – Lambda
|
||||||
|
################################################
|
||||||
|
module "ara_fast_api_state_bucket" {
|
||||||
|
source = "../modules/tf_state_bucket"
|
||||||
|
bucket_name = "ara-fast-api-terraform-state"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "ara_fast_api_state_bucket" {
|
||||||
|
value = module.ara_fast_api_state_bucket.bucket_name
|
||||||
|
}
|
||||||
|
|
||||||
|
# S3 policy for FastAPI app to read and write from various S3 buckets
|
||||||
|
module "fast_api_s3_read_and_write" {
|
||||||
|
source = "../modules/s3_iam_policy"
|
||||||
|
|
||||||
|
policy_name = "FastAPIReadandWriteS3"
|
||||||
|
policy_description = "Allow FastAPI Lambda to read from and write to various S3 buckets"
|
||||||
|
bucket_arns = [
|
||||||
|
"arn:aws:s3:::${module.s3_presignable_bucket.bucket_name}",
|
||||||
|
"arn:aws:s3:::${module.retrofit_sap_data.bucket_name}",
|
||||||
|
"arn:aws:s3:::${module.retrofit_sap_predictions.bucket_name}",
|
||||||
|
"arn:aws:s3:::${module.retrofit_carbon_predictions.bucket_name}",
|
||||||
|
"arn:aws:s3:::${module.retrofit_heat_predictions.bucket_name}",
|
||||||
|
"arn:aws:s3:::${module.retrofit_heating_kwh_predictions.bucket_name}",
|
||||||
|
"arn:aws:s3:::${module.retrofit_hotwater_kwh_predictions.bucket_name}",
|
||||||
|
"arn:aws:s3:::${module.retrofit_energy_assessments.bucket_name}"
|
||||||
|
]
|
||||||
|
actions = ["s3:GetObject", "s3:ListBucket"]
|
||||||
|
resource_paths = ["/*"]
|
||||||
|
}
|
||||||
|
|
||||||
|
output "fast_api_s3_read_and_write_arn" {
|
||||||
|
value = module.fast_api_s3_read_and_write.policy_arn
|
||||||
|
}
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# CDN Certificate
|
||||||
|
################################################
|
||||||
|
module "cdn_certificate_state_bucket" {
|
||||||
|
source = "../modules/tf_state_bucket"
|
||||||
|
bucket_name = "cdn-certificate-terraform-state"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "cdn_certificate_state_bucket" {
|
||||||
|
value = module.cdn_certificate_state_bucket.bucket_name
|
||||||
|
}
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# CDN
|
||||||
|
################################################
|
||||||
|
module "cdn_state_bucket" {
|
||||||
|
source = "../modules/tf_state_bucket"
|
||||||
|
bucket_name = "ara-cdn-terraform-state"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "cdn_state_bucket" {
|
||||||
|
value = module.cdn_state_bucket.bucket_name
|
||||||
|
}
|
||||||
|
|
|
||||||
73
scripts/download_cotality_evidence.py
Normal file
73
scripts/download_cotality_evidence.py
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzMyMzc4MjQsImV4cCI6MTc3MzI0NTAyNCwic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.mkkxeZiD_ByHY4TJKpLQ-trmeGs15s0ekL6u1n-ek9j-EzNyf6qalEHCyHf8gzdNhU_vay96bIOMRHp4vXFaLqSANwKZayIS3EoA_b9-u2FAZpooxEvReAMNJGoZ6WLD01AQXWv-l7ww1ZqAnQzw0moL_Oma6hVmA5oa-RJKJ3MerS7e0Wei97Db48E140-EAbQf2iPcKYYtCNRA4il6n8DFiqGeoUMGo99jkR1ceZAvMpOAj8RhKX-4qSiDfX6yXUS2G96U5m7S_GWI-DEj5TazkN10Af3TyOY3EVjmZoJcRpiAR4cFmlfcTydjrShU03DWmPZm1QItf2McxfCpNA"
|
||||||
|
|
||||||
|
base = "https://pashub.net/api"
|
||||||
|
|
||||||
|
headers = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/json"}
|
||||||
|
|
||||||
|
company_id = "cb5249e2-8f31-4ef4-aefd-08ddaccb1fa2"
|
||||||
|
|
||||||
|
# 1️⃣ get jobs
|
||||||
|
params = {
|
||||||
|
"pageIndex": 0,
|
||||||
|
"pageSize": 20,
|
||||||
|
"orderBy": "createdUtc",
|
||||||
|
"orderDesc": "true",
|
||||||
|
"addressUprn": "100061885568",
|
||||||
|
"companyId": company_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
r = requests.get(f"{base}/jobs", headers=headers, params=params)
|
||||||
|
|
||||||
|
payload = r.json()
|
||||||
|
|
||||||
|
property_id = payload["results"][0]["id"]
|
||||||
|
|
||||||
|
print("JOB:", property_id)
|
||||||
|
|
||||||
|
# 2️⃣ get evidence list
|
||||||
|
r = requests.get(f"{base}/jobs/{property_id}/evidence", headers=headers)
|
||||||
|
|
||||||
|
print(r.status_code)
|
||||||
|
|
||||||
|
evidence = r.json()
|
||||||
|
|
||||||
|
print(evidence)
|
||||||
|
|
||||||
|
|
||||||
|
# 3️⃣ get evidence metadata
|
||||||
|
|
||||||
|
if evidence:
|
||||||
|
evidence_id = evidence["results"][0]["fileId"]
|
||||||
|
|
||||||
|
meta_url = f"https://pashub.net/api/jobs/{property_id}/evidenceMetadata"
|
||||||
|
|
||||||
|
meta_params = {"evidenceIds": evidence_id}
|
||||||
|
|
||||||
|
r = requests.get(meta_url, headers=headers, params=meta_params)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
meta = r.json()
|
||||||
|
|
||||||
|
container = meta["containerName"]
|
||||||
|
blob_uri = meta["blobUri"]
|
||||||
|
|
||||||
|
file = meta["files"][0]
|
||||||
|
file_id = file["fileId"]
|
||||||
|
file_name = file["fileName"]
|
||||||
|
|
||||||
|
base, sas = blob_uri.split("?", 1)
|
||||||
|
|
||||||
|
download_url = f"{base}{container}/{file_id}?{sas}"
|
||||||
|
|
||||||
|
print("Download URL:", download_url)
|
||||||
|
|
||||||
|
pdf = requests.get(download_url)
|
||||||
|
pdf.raise_for_status()
|
||||||
|
|
||||||
|
with open(file_name, "wb") as f:
|
||||||
|
f.write(pdf.content)
|
||||||
|
|
||||||
|
print("Saved:", file_name)
|
||||||
|
|
@ -29,9 +29,7 @@ from sqlalchemy import func
|
||||||
# PORTFOLIO_ID = 206
|
# PORTFOLIO_ID = 206
|
||||||
# SCENARIOS = [389]
|
# SCENARIOS = [389]
|
||||||
PORTFOLIO_ID = 581
|
PORTFOLIO_ID = 581
|
||||||
SCENARIOS = [
|
SCENARIOS = [1124]
|
||||||
1124
|
|
||||||
]
|
|
||||||
scenario_names = {
|
scenario_names = {
|
||||||
1124: "EPC C - Solar Focused",
|
1124: "EPC C - Solar Focused",
|
||||||
}
|
}
|
||||||
|
|
@ -234,7 +232,7 @@ for scenario_id in SCENARIOS:
|
||||||
# Get recs for this scenario
|
# Get recs for this scenario
|
||||||
recommended_measures_df = recommendations_df[
|
recommended_measures_df = recommendations_df[
|
||||||
recommendations_df["scenario_id"] == scenario_id
|
recommendations_df["scenario_id"] == scenario_id
|
||||||
][["property_id", "measure_type", "estimated_cost", "default"]]
|
][["property_id", "measure_type", "estimated_cost", "default"]]
|
||||||
recommended_measures_df = recommended_measures_df[
|
recommended_measures_df = recommended_measures_df[
|
||||||
recommended_measures_df["default"]
|
recommended_measures_df["default"]
|
||||||
]
|
]
|
||||||
|
|
@ -242,7 +240,7 @@ for scenario_id in SCENARIOS:
|
||||||
|
|
||||||
post_install_sap = recommendations_df[
|
post_install_sap = recommendations_df[
|
||||||
recommendations_df["scenario_id"] == scenario_id
|
recommendations_df["scenario_id"] == scenario_id
|
||||||
][["property_id", "default", "sap_points"]]
|
][["property_id", "default", "sap_points"]]
|
||||||
post_install_sap = post_install_sap[post_install_sap["default"]]
|
post_install_sap = post_install_sap[post_install_sap["default"]]
|
||||||
# Sum up the sap points by property id
|
# Sum up the sap points by property id
|
||||||
post_install_sap = (
|
post_install_sap = (
|
||||||
|
|
@ -320,7 +318,7 @@ for scenario_id in SCENARIOS:
|
||||||
z = df2[
|
z = df2[
|
||||||
(df2["predicted_post_works_epc"] != "D")
|
(df2["predicted_post_works_epc"] != "D")
|
||||||
& (df2["post_epc_rating"].astype(str) == "Epc.D")
|
& (df2["post_epc_rating"].astype(str) == "Epc.D")
|
||||||
]
|
]
|
||||||
|
|
||||||
df2["predicted_post_works_epc"].value_counts()
|
df2["predicted_post_works_epc"].value_counts()
|
||||||
df2["post_epc_rating"].astype(str).value_counts()
|
df2["post_epc_rating"].astype(str).value_counts()
|
||||||
|
|
@ -330,8 +328,6 @@ for scenario_id in SCENARIOS:
|
||||||
getting_works = df[df["total_retrofit_cost"] > 0]
|
getting_works = df[df["total_retrofit_cost"] > 0]
|
||||||
getting_works["predicted_post_works_epc"].value_counts()
|
getting_works["predicted_post_works_epc"].value_counts()
|
||||||
|
|
||||||
32565 / getting_works.shape[0]
|
|
||||||
|
|
||||||
df[df["predicted_post_works_sap"] == ""]
|
df[df["predicted_post_works_sap"] == ""]
|
||||||
|
|
||||||
# Expected columns list
|
# Expected columns list
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from io import BytesIO, StringIO
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
from utils.logger import setup_logger
|
from utils.logger import setup_logger
|
||||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
@ -316,7 +317,7 @@ def save_excel_to_s3(df, bucket_name, file_key):
|
||||||
logger.info(f"Excel file saved to S3 bucket '{bucket_name}' with key '{file_key}'")
|
logger.info(f"Excel file saved to S3 bucket '{bucket_name}' with key '{file_key}'")
|
||||||
|
|
||||||
|
|
||||||
def read_csv_from_s3(bucket_name, filepath):
|
def read_csv_from_s3(bucket_name: str, filepath: str) -> list[dict[str, str]]:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'"
|
f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue