mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #1107 from Hestia-Homes/feature/rewrite_task_handler
Feature/rewrite task handler and postcode splitter
This commit is contained in:
commit
9ad4f3359f
201 changed files with 3429 additions and 120 deletions
|
|
@ -6,7 +6,7 @@ backend/.idea/*
|
|||
backend/.env
|
||||
recommendations/tests/*
|
||||
model_data/tests/*
|
||||
infrastructure/*
|
||||
deployment/*
|
||||
data_collection/*
|
||||
node_modules/*
|
||||
conservation_areas/*
|
||||
|
|
|
|||
46
.github/workflows/deploy_terraform.yml
vendored
46
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -62,20 +62,20 @@ jobs:
|
|||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: infrastructure/terraform/shared
|
||||
working-directory: deployment/terraform/shared
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Workspace
|
||||
working-directory: infrastructure/terraform/shared
|
||||
working-directory: deployment/terraform/shared
|
||||
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
|
||||
|
||||
- name: Terraform Plan
|
||||
working-directory: infrastructure/terraform/shared
|
||||
working-directory: deployment/terraform/shared
|
||||
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
|
||||
|
||||
- name: Terraform Apply
|
||||
if: env.TERRAFORM_APPLY == 'true'
|
||||
working-directory: infrastructure/terraform/shared
|
||||
working-directory: deployment/terraform/shared
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
# ============================================================
|
||||
|
|
@ -101,7 +101,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: ara_engine
|
||||
lambda_path: infrastructure/terraform/lambda/engine
|
||||
lambda_path: deployment/terraform/lambda/engine
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
|
||||
|
|
@ -150,7 +150,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: address2uprn
|
||||
lambda_path: infrastructure/terraform/lambda/address2UPRN
|
||||
lambda_path: deployment/terraform/lambda/address2UPRN
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
|
||||
|
|
@ -191,7 +191,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: postcodeSplitter
|
||||
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
|
||||
lambda_path: deployment/terraform/lambda/postcodeSplitter
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
|
||||
|
|
@ -231,7 +231,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: bulk_address2uprn_combiner
|
||||
lambda_path: infrastructure/terraform/lambda/bulk_address2uprn_combiner
|
||||
lambda_path: deployment/terraform/lambda/bulk_address2uprn_combiner
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: bulk_address2uprn_combiner-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.bulk_address2uprn_combiner_image.outputs.image_digest }}
|
||||
|
|
@ -271,7 +271,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: condition-etl
|
||||
lambda_path: infrastructure/terraform/lambda/condition-etl
|
||||
lambda_path: deployment/terraform/lambda/condition-etl
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
|
||||
|
|
@ -311,7 +311,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: categorisation
|
||||
lambda_path: infrastructure/terraform/lambda/categorisation
|
||||
lambda_path: deployment/terraform/lambda/categorisation
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.categorisation_image.outputs.image_digest }}
|
||||
|
|
@ -351,7 +351,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: ordnanceSurvey
|
||||
lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
|
||||
lambda_path: deployment/terraform/lambda/ordnanceSurvey
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
|
||||
|
|
@ -386,7 +386,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: pashub_to_ara
|
||||
lambda_path: infrastructure/terraform/lambda/pashub_to_ara
|
||||
lambda_path: deployment/terraform/lambda/pashub_to_ara
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.pashub_to_ara_image.outputs.image_digest }}
|
||||
|
|
@ -419,7 +419,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: ara_fast_api
|
||||
lambda_path: infrastructure/terraform/lambda/fast-api
|
||||
lambda_path: deployment/terraform/lambda/fast-api
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||
secrets:
|
||||
|
|
@ -458,17 +458,17 @@ jobs:
|
|||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: infrastructure/terraform/cdn_certificate
|
||||
working-directory: deployment/terraform/cdn_certificate
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Workspace
|
||||
working-directory: infrastructure/terraform/cdn_certificate
|
||||
working-directory: deployment/terraform/cdn_certificate
|
||||
run: |
|
||||
terraform workspace select $STAGE \
|
||||
|| terraform workspace new $STAGE
|
||||
|
||||
- name: Terraform Plan
|
||||
working-directory: infrastructure/terraform/cdn_certificate
|
||||
working-directory: deployment/terraform/cdn_certificate
|
||||
run: |
|
||||
terraform plan \
|
||||
-var="stage=${STAGE}" \
|
||||
|
|
@ -476,7 +476,7 @@ jobs:
|
|||
|
||||
- name: Terraform Apply
|
||||
if: env.TERRAFORM_APPLY == 'true'
|
||||
working-directory: infrastructure/terraform/cdn_certificate
|
||||
working-directory: deployment/terraform/cdn_certificate
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
|
||||
|
|
@ -503,17 +503,17 @@ jobs:
|
|||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: infrastructure/terraform/cdn
|
||||
working-directory: deployment/terraform/cdn
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Workspace
|
||||
working-directory: infrastructure/terraform/cdn
|
||||
working-directory: deployment/terraform/cdn
|
||||
run: |
|
||||
terraform workspace select $STAGE \
|
||||
|| terraform workspace new $STAGE
|
||||
|
||||
- name: Terraform Plan
|
||||
working-directory: infrastructure/terraform/cdn
|
||||
working-directory: deployment/terraform/cdn
|
||||
run: |
|
||||
terraform plan \
|
||||
-var="stage=${STAGE}" \
|
||||
|
|
@ -521,7 +521,7 @@ jobs:
|
|||
|
||||
- name: Terraform Apply
|
||||
if: env.TERRAFORM_APPLY == 'true'
|
||||
working-directory: infrastructure/terraform/cdn
|
||||
working-directory: deployment/terraform/cdn
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
# ============================================================
|
||||
|
|
@ -562,7 +562,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: magic_plan
|
||||
lambda_path: infrastructure/terraform/lambda/magic_plan
|
||||
lambda_path: deployment/terraform/lambda/magic_plan
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: magic-plan-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.magic_plan_image.outputs.image_digest }}
|
||||
|
|
@ -585,7 +585,7 @@ jobs:
|
|||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: hubspot-etl-to-ara
|
||||
lambda_path: infrastructure/terraform/lambda/hubspot_deal_etl
|
||||
lambda_path: deployment/terraform/lambda/hubspot_deal_etl
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: hubspot-etl-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.hubspot_etl_image.outputs.image_digest }}
|
||||
|
|
|
|||
12
.github/workflows/unit_tests.yml
vendored
12
.github/workflows/unit_tests.yml
vendored
|
|
@ -60,3 +60,15 @@ jobs:
|
|||
-e DB_PASSWORD=test \
|
||||
-e DB_PORT=5432 \
|
||||
model-test pytest -vv -m 'not integration'
|
||||
|
||||
# The DDD rewrite (tests/) defines SQLModel table classes that map to the
|
||||
# same physical tables as the legacy backend models. Both sets share the
|
||||
# one global SQLModel.metadata, so they cannot be imported into the same
|
||||
# pytest process. It runs as a separate invocation until the legacy
|
||||
# models are retired. Its DB is spawned in-process by pytest-postgresql,
|
||||
# so no DB service or env is required.
|
||||
- name: Run DDD tests
|
||||
run: |
|
||||
docker run --rm \
|
||||
--network host \
|
||||
model-test pytest -vv tests/
|
||||
|
|
|
|||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -121,6 +121,7 @@ celerybeat.pid
|
|||
|
||||
# Environments
|
||||
.env
|
||||
.env.local
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
|
|
|
|||
29
AGENTS.md
29
AGENTS.md
|
|
@ -1,29 +0,0 @@
|
|||
|
||||
<!-- BACKLOG.MD MCP GUIDELINES START -->
|
||||
|
||||
<CRITICAL_INSTRUCTION>
|
||||
|
||||
## BACKLOG WORKFLOW INSTRUCTIONS
|
||||
|
||||
This project uses Backlog.md MCP for all task and project management activities.
|
||||
|
||||
**CRITICAL GUIDANCE**
|
||||
|
||||
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
|
||||
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
|
||||
|
||||
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
|
||||
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
|
||||
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
|
||||
|
||||
These guides cover:
|
||||
- Decision framework for when to create tasks
|
||||
- Search-first workflow to avoid duplicates
|
||||
- Links to detailed guides for task creation, execution, and finalization
|
||||
- MCP tools reference
|
||||
|
||||
You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
|
||||
|
||||
</CRITICAL_INSTRUCTION>
|
||||
|
||||
<!-- BACKLOG.MD MCP GUIDELINES END -->
|
||||
29
CLAUDE.md
29
CLAUDE.md
|
|
@ -1,33 +1,4 @@
|
|||
|
||||
<!-- BACKLOG.MD MCP GUIDELINES START -->
|
||||
|
||||
<CRITICAL_INSTRUCTION>
|
||||
|
||||
## BACKLOG WORKFLOW INSTRUCTIONS
|
||||
|
||||
This project uses Backlog.md MCP for all task and project management activities.
|
||||
|
||||
**CRITICAL GUIDANCE**
|
||||
|
||||
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
|
||||
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
|
||||
|
||||
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
|
||||
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
|
||||
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
|
||||
|
||||
These guides cover:
|
||||
- Decision framework for when to create tasks
|
||||
- Search-first workflow to avoid duplicates
|
||||
- Links to detailed guides for task creation, execution, and finalization
|
||||
- MCP tools reference
|
||||
|
||||
You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
|
||||
|
||||
</CRITICAL_INSTRUCTION>
|
||||
|
||||
<!-- BACKLOG.MD MCP GUIDELINES END -->
|
||||
|
||||
## Available Skills
|
||||
|
||||
Five Claude Code skills are installed in this repo's dev container. Each maps to a phase of the feature lifecycle.
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ model_data/local_data/
|
|||
backend/node_modules/
|
||||
backend/.idea/
|
||||
backend/.env
|
||||
infrastructure/
|
||||
deployment/
|
||||
data_collection/
|
||||
node_modules/
|
||||
conservation_areas/
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
|
|||
|------|------------|------------------|
|
||||
| **UPRN** | Unique Property Reference Number — the government-issued permanent identifier for a physical address in the UK. | "property ID", "address ID", "code" |
|
||||
| **Postcode** | A UK postal code used to group nearby addresses; the primary search key for finding EPC records. | "zip code", "postal code" |
|
||||
| **User Address** | A free-text address string provided by a user or imported from a customer dataset, before any normalisation or matching. | "user input", "raw address", "user_inputed_address" |
|
||||
| **User Address** | A structured dataclass (`domain.addresses.user_address.UserAddress`) capturing a customer-supplied address: a free-text `user_address` line, a canonical `postcode` (sanitised on construction), and an optional `internal_reference`. The bare string sense -- the raw free-text address line as it arrives from upstream ingestion, before being wrapped -- remains valid when discussing CSV columns, API payloads, or other upstream contexts; in domain code, prefer the dataclass. | "user input", "raw address", "user_inputed_address" |
|
||||
| **Dwelling** | A single residential unit that can hold an EPC — a house, flat, or maisonette. | "property", "unit", "home" |
|
||||
|
||||
## Address Matching
|
||||
|
|
@ -72,7 +72,7 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
|
|||
|
||||
## Flagged ambiguities
|
||||
|
||||
- **"address"** appears as both the raw **User Address** (free-text from customer data) and a structured field on an **EPC Search Result** (normalised address lines). Always qualify: "user address" vs "EPC address" or "address line 1".
|
||||
- **"address"** appears as both the raw **User Address** (free-text from customer data, or the structured `UserAddress` dataclass that wraps it) and a structured field on an **EPC Search Result** (normalised address lines). Always qualify: "user address" vs "EPC address" or "address line 1". Within `domain/`, **User Address** specifically means the `UserAddress` dataclass; in upstream ingestion contexts (CSV columns, SQS payloads) it can still mean the raw string sense.
|
||||
- **"score"** is used for the `AddressMatch.score()` function output, the `lexiscore` DataFrame column, and informally in conversation. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
|
||||
- **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`.
|
||||
- **"EPC"** is overloaded as both the document (an Energy Performance Certificate) and the rating band letter. Use **EPC** for the document and **EPC Band** for the letter.
|
||||
|
|
|
|||
0
applications/__init__.py
Normal file
0
applications/__init__.py
Normal file
34
applications/postcode_splitter/Dockerfile
Normal file
34
applications/postcode_splitter/Dockerfile
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
FROM public.ecr.aws/lambda/python:3.11
|
||||
|
||||
# Postgres host/port/database are baked into the image at build time from
|
||||
# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
|
||||
# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
|
||||
# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
|
||||
# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
|
||||
ARG DEV_DB_HOST
|
||||
ARG DEV_DB_PORT
|
||||
ARG DEV_DB_NAME
|
||||
|
||||
ENV POSTGRES_HOST=${DEV_DB_HOST}
|
||||
ENV POSTGRES_PORT=${DEV_DB_PORT}
|
||||
ENV POSTGRES_DATABASE=${DEV_DB_NAME}
|
||||
|
||||
WORKDIR /var/task
|
||||
|
||||
COPY applications/postcode_splitter/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy the layered source the handler imports from. The new splitter pulls
|
||||
# only DDD-shaped packages — no pandas, no legacy backend/.
|
||||
COPY domain/ domain/
|
||||
COPY infrastructure/ infrastructure/
|
||||
COPY orchestration/ orchestration/
|
||||
COPY repositories/ repositories/
|
||||
COPY utilities/ utilities/
|
||||
COPY applications/ applications/
|
||||
|
||||
# Place the handler at the Lambda task root so the runtime can resolve
|
||||
# ``main.handler`` without an extra package prefix.
|
||||
COPY applications/postcode_splitter/handler.py /var/task/main.py
|
||||
|
||||
CMD ["main.handler"]
|
||||
0
applications/postcode_splitter/__init__.py
Normal file
0
applications/postcode_splitter/__init__.py
Normal file
52
applications/postcode_splitter/handler.py
Normal file
52
applications/postcode_splitter/handler.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import boto3
|
||||
|
||||
from applications.postcode_splitter.postcode_splitter_trigger_body import (
|
||||
PostcodeSplitterTriggerBody,
|
||||
)
|
||||
from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
|
||||
from infrastructure.csv_s3_client import CsvS3Client
|
||||
from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
|
||||
from orchestration.task_orchestrator import TaskOrchestrator
|
||||
from repositories.user_address.user_address_csv_s3_repository import (
|
||||
UserAddressCsvS3Repository,
|
||||
)
|
||||
from utilities.aws_lambda.subtask_handler import subtask_handler
|
||||
|
||||
|
||||
@subtask_handler()
|
||||
def handler(
|
||||
body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
|
||||
) -> dict[str, list[str]]:
|
||||
trigger = PostcodeSplitterTriggerBody.model_validate(body)
|
||||
|
||||
bucket = os.environ["S3_BUCKET_NAME"]
|
||||
queue_url = os.environ["ADDRESS2UPRN_QUEUE_URL"]
|
||||
|
||||
# boto3.client is overloaded per-service in the installed stubs; cast
|
||||
# to Any so the strict-mode checker treats it as opaque.
|
||||
boto3_client: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
||||
boto_s3: Any = boto3_client("s3")
|
||||
boto_sqs: Any = boto3_client("sqs")
|
||||
|
||||
csv_client = CsvS3Client(boto_s3, bucket)
|
||||
user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
|
||||
queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
|
||||
|
||||
splitter = PostcodeSplitterOrchestrator(
|
||||
task_orchestrator=task_orchestrator,
|
||||
user_address_repo=user_address_repo,
|
||||
queue_client=queue_client,
|
||||
)
|
||||
|
||||
child_ids = splitter.split_and_dispatch(
|
||||
parent_task_id=trigger.task_id,
|
||||
parent_subtask_id=trigger.sub_task_id,
|
||||
input_s3_uri=trigger.s3_uri,
|
||||
)
|
||||
|
||||
return {"child_subtask_ids": [str(cid) for cid in child_ids]}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
# Local-test environment for the postcode_splitter Lambda.
|
||||
#
|
||||
# cp .env.local.example .env.local then fill in the values below.
|
||||
#
|
||||
# .env.local is gitignored. The container hits REAL AWS and a REAL Postgres,
|
||||
# so every value here points at infrastructure that actually exists.
|
||||
#
|
||||
# NOTE: the new DDD code uses different env var names than the repo root
|
||||
# .env. The mapping (root .env name -> var here) is given per section.
|
||||
# Keep comments on their own lines — docker-compose's env_file parser folds a
|
||||
# trailing "# ..." into the value.
|
||||
|
||||
# --- Postgres (orchestration/default_orchestrator -> PostgresConfig.from_env) ---
|
||||
# POSTGRES_HOST <- DB_HOST, PORT <- DB_PORT, USERNAME <- DB_USERNAME,
|
||||
# PASSWORD <- DB_PASSWORD, DATABASE <- DB_NAME.
|
||||
POSTGRES_HOST=
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USERNAME=
|
||||
POSTGRES_PASSWORD=
|
||||
POSTGRES_DATABASE=
|
||||
# POSTGRES_DRIVER=psycopg2 (optional; defaults to psycopg2)
|
||||
|
||||
# --- Handler config (applications/postcode_splitter/handler.py) ---
|
||||
# S3_BUCKET_NAME: bucket holding the input address CSV (root .env: DATA_BUCKET).
|
||||
# ADDRESS2UPRN_QUEUE_URL: SQS queue the splitter fans batches out to; not in
|
||||
# the root .env (Terraform sets it in prod).
|
||||
S3_BUCKET_NAME=
|
||||
ADDRESS2UPRN_QUEUE_URL=
|
||||
|
||||
# --- AWS credentials for boto3 (S3 + SQS clients) ---
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_DEFAULT_REGION=eu-west-2
|
||||
# AWS_SESSION_TOKEN= (only if using temporary/SSO credentials)
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
services:
|
||||
postcode-splitter:
|
||||
build:
|
||||
context: ../../../
|
||||
dockerfile: applications/postcode_splitter/Dockerfile
|
||||
ports:
|
||||
- "9001:8080"
|
||||
env_file:
|
||||
- .env.local
|
||||
28
applications/postcode_splitter/local_handler/invoke_local_lambda.py
Executable file
28
applications/postcode_splitter/local_handler/invoke_local_lambda.py
Executable file
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env python3
|
||||
import json
|
||||
import requests
|
||||
|
||||
HOST = "localhost"
|
||||
PORT = "9001"
|
||||
|
||||
LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
|
||||
|
||||
payload = {
|
||||
"Records": [
|
||||
{
|
||||
"body": json.dumps(
|
||||
{
|
||||
"task_id": "f4b3332f-c0cc-481f-96a5-d39860a647cf",
|
||||
"sub_task_id": "14c042de-40c4-473b-8cd8-72c983a94a8d",
|
||||
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv",
|
||||
}
|
||||
)
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(LAMBDA_URL, json=payload)
|
||||
|
||||
print("Status code:", response.status_code)
|
||||
print("Response:")
|
||||
print(response.text)
|
||||
12
applications/postcode_splitter/local_handler/run_local.sh
Executable file
12
applications/postcode_splitter/local_handler/run_local.sh
Executable file
|
|
@ -0,0 +1,12 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
if [ ! -f .env.local ]; then
|
||||
cp .env.local.example .env.local
|
||||
echo "Created .env.local from the template — fill it in, then re-run." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
docker compose build --no-cache
|
||||
docker compose up --force-recreate
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class PostcodeSplitterTriggerBody(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
task_id: UUID
|
||||
sub_task_id: UUID
|
||||
s3_uri: str
|
||||
4
applications/postcode_splitter/requirements.txt
Normal file
4
applications/postcode_splitter/requirements.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
boto3
|
||||
pydantic
|
||||
sqlmodel
|
||||
psycopg2-binary
|
||||
|
|
@ -79,23 +79,23 @@ def app():
|
|||
"""
|
||||
|
||||
data_folder = "/workspaces/model/asset_list"
|
||||
data_filename = "input.xlsx"
|
||||
sheet_name = "Handovers"
|
||||
postcode_column = "POSTCODE"
|
||||
address1_column = "Full Addres"
|
||||
data_filename = "lincs_address_list.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = "Postcode"
|
||||
address1_column = "Deal Name"
|
||||
address1_method = None
|
||||
fulladdress_column = "Full Addres"
|
||||
fulladdress_column = "Deal Name"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = "domna_found_uprn"
|
||||
landlord_property_type = "PROPERTY TYPE" # Good to include if landlord gave
|
||||
landlord_built_form = "Type Description" # Good to include if landlord gave
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = None # Good to include if landlord gave
|
||||
landlord_built_form = None # Good to include if landlord gave
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "PROP REF"
|
||||
landlord_property_id = "landlord_id"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -468,9 +468,3 @@ def app():
|
|||
asset_list.duplicated_addresses.to_excel(
|
||||
writer, sheet_name="Duplicate Properties", index=False
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
for key,value in dict.items():
|
||||
lsakjfldsa
|
||||
|
|
@ -8,4 +8,5 @@ boto3==1.35.44
|
|||
sqlmodel
|
||||
sqlalchemy==2.0.36
|
||||
psycopg2-binary==2.9.10
|
||||
pydantic-settings==2.6.0
|
||||
pydantic-settings==2.6.0
|
||||
httpx
|
||||
|
|
@ -10,7 +10,7 @@
|
|||
### 2. Add infrastructure prerequisites (shared stack)
|
||||
- Add a new ECR repository in:
|
||||
|
||||
infrastructure/terraform/shared/main.tf
|
||||
deployment/terraform/shared/main.tf
|
||||
|
||||
- Create a PR to deploy this to main then dev in order to deploy the shared stack
|
||||
|
||||
|
|
@ -40,20 +40,6 @@ module "lambda" {
|
|||
LOG_LEVEL = "info"
|
||||
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||
GOOGLE_SOLAR_API_KEY = "test"
|
||||
SAP_PREDICTIONS_BUCKET = "test"
|
||||
CARBON_PREDICTIONS_BUCKET = "test"
|
||||
HEAT_PREDICTIONS_BUCKET = "test"
|
||||
HEATING_KWH_PREDICTIONS_BUCKET = "test"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
|
||||
API_KEY = "test"
|
||||
ENVIRONMENT = "test"
|
||||
SECRET_KEY = "test"
|
||||
PLAN_TRIGGER_BUCKET = "test"
|
||||
DATA_BUCKET = "test"
|
||||
EPC_AUTH_TOKEN = "test"
|
||||
ENGINE_SQS_URL = "test"
|
||||
ENERGY_ASSESSMENTS_BUCKET = "test"
|
||||
ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url
|
||||
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||
},
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue