fixed merge conflicts from main

This commit is contained in:
Khalim Conn-Kowlessar 2026-05-26 11:21:09 +00:00
commit 87b6045c97
233 changed files with 4750 additions and 191 deletions

View file

@ -5,7 +5,7 @@
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"initializeCommand": "docker network create shared-dev 2>/dev/null || true; test -d \"$HOME/.config/gh\" || test -n \"$GITHUB_TOKEN\" || { echo >&2 'error: no GitHub auth found. Run `gh auth login && gh auth setup-git` on the host, or export GITHUB_TOKEN, then retry.'; exit 1; }",
"postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.5 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
"postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.7 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind",

View file

@ -6,7 +6,7 @@ backend/.idea/*
backend/.env
recommendations/tests/*
model_data/tests/*
infrastructure/*
deployment/*
data_collection/*
node_modules/*
conservation_areas/*

View file

@ -40,6 +40,8 @@ on:
required: false
EPC_AUTH_TOKEN:
required: false
OPEN_EPC_API_TOKEN:
required: false
jobs:
build:
@ -50,6 +52,7 @@ jobs:
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}
OPEN_EPC_API_TOKEN: ${{ secrets.OPEN_EPC_API_TOKEN }}
outputs:
image_digest: ${{ steps.digest.outputs.image_digest }}

View file

@ -80,6 +80,10 @@ on:
required: false
TF_VAR_pashub_password:
required: false
TF_VAR_pashub_coordination_email:
required: false
TF_VAR_pashub_coordination_password:
required: false
TF_VAR_hubspot_api_key:
required: false
@ -154,6 +158,8 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }}
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
@ -202,6 +208,8 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }}
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}

View file

@ -0,0 +1,85 @@
name: Lambda smoke test
on:
workflow_call:
inputs:
dockerfile_path:
required: true
type: string
build_context:
required: false
default: "."
type: string
service_name:
required: true
type: string
jobs:
smoke-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download AWS Lambda RIE
run: |
mkdir -p ~/.aws-lambda-rie
curl -fsSL -o ~/.aws-lambda-rie/aws-lambda-rie \
https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie
chmod +x ~/.aws-lambda-rie/aws-lambda-rie
- name: Build Lambda image
run: |
docker build \
--platform linux/amd64 \
-f ${{ inputs.dockerfile_path }} \
-t ${{ inputs.service_name }}-smoke-test:latest \
${{ inputs.build_context }}
- name: Start Lambda container
run: |
IMG=${{ inputs.service_name }}-smoke-test:latest
ENTRY=$(docker inspect --format='{{range .Config.Entrypoint}}{{.}} {{end}}' "$IMG")
CMD_ARGS=$(docker inspect --format='{{range .Config.Cmd}}{{.}} {{end}}' "$IMG")
if echo "$ENTRY" | grep -q "lambda-entrypoint.sh"; then
# AWS base image — RIE is bundled
docker run -d --name ${{ inputs.service_name }}-smoke-test \
-p 9000:8080 \
"$IMG"
else
# Custom base — mount RIE from runner and re-wire entrypoint
docker run -d --name ${{ inputs.service_name }}-smoke-test \
-v "$HOME/.aws-lambda-rie:/aws-lambda-rie" \
-p 9000:8080 \
--entrypoint /aws-lambda-rie/aws-lambda-rie \
"$IMG" \
$ENTRY $CMD_ARGS
fi
- name: Invoke Lambda and check for import errors
run: |
response=$(curl -s --retry-connrefused --retry 15 --retry-delay 1 \
-X POST \
http://localhost:9000/2015-03-31/functions/function/invocations \
-H "Content-Type: application/json" \
-d '{"Records":[{"body":"{}"}]}')
echo "Response: $response"
if [ -z "$response" ]; then
echo "No response from Lambda RIE"
exit 1
fi
if echo "$response" | grep -qE 'ImportModuleError|ModuleNotFoundError|ImportError'; then
echo "Import error detected in handler"
exit 1
fi
- name: Dump container logs
if: always()
run: docker logs ${{ inputs.service_name }}-smoke-test
- name: Tear down container
if: always()
run: docker rm -f ${{ inputs.service_name }}-smoke-test

View file

@ -62,20 +62,20 @@ jobs:
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
working-directory: infrastructure/terraform/shared
working-directory: deployment/terraform/shared
run: terraform init -reconfigure
- name: Terraform Workspace
working-directory: infrastructure/terraform/shared
working-directory: deployment/terraform/shared
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
- name: Terraform Plan
working-directory: infrastructure/terraform/shared
working-directory: deployment/terraform/shared
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
- name: Terraform Apply
if: env.TERRAFORM_APPLY == 'true'
working-directory: infrastructure/terraform/shared
working-directory: deployment/terraform/shared
run: terraform apply -auto-approve tfplan
# ============================================================
@ -101,7 +101,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ara_engine
lambda_path: infrastructure/terraform/lambda/engine
lambda_path: deployment/terraform/lambda/engine
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
@ -133,6 +133,7 @@ jobs:
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
OPEN_EPC_API_TOKEN=$OPEN_EPC_API_TOKEN
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
@ -141,6 +142,7 @@ jobs:
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
OPEN_EPC_API_TOKEN: ${{ secrets.DEV_OPEN_EPC_API_TOKEN }}
# ============================================================
# Deploy Address 2 UPRN Lambda
@ -150,7 +152,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: address2uprn
lambda_path: infrastructure/terraform/lambda/address2UPRN
lambda_path: deployment/terraform/lambda/address2UPRN
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
@ -169,7 +171,7 @@ jobs:
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
dockerfile_path: applications/postcode_splitter/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
@ -191,7 +193,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: postcodeSplitter
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
lambda_path: deployment/terraform/lambda/postcodeSplitter
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
@ -231,7 +233,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: bulk_address2uprn_combiner
lambda_path: infrastructure/terraform/lambda/bulk_address2uprn_combiner
lambda_path: deployment/terraform/lambda/bulk_address2uprn_combiner
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: bulk_address2uprn_combiner-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.bulk_address2uprn_combiner_image.outputs.image_digest }}
@ -271,7 +273,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: condition-etl
lambda_path: infrastructure/terraform/lambda/condition-etl
lambda_path: deployment/terraform/lambda/condition-etl
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
@ -311,7 +313,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: categorisation
lambda_path: infrastructure/terraform/lambda/categorisation
lambda_path: deployment/terraform/lambda/categorisation
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.categorisation_image.outputs.image_digest }}
@ -351,7 +353,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ordnanceSurvey
lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
lambda_path: deployment/terraform/lambda/ordnanceSurvey
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
@ -386,7 +388,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: pashub_to_ara
lambda_path: infrastructure/terraform/lambda/pashub_to_ara
lambda_path: deployment/terraform/lambda/pashub_to_ara
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.pashub_to_ara_image.outputs.image_digest }}
@ -407,6 +409,8 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }}
TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }}
TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }}
TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }}
TF_VAR_pashub_coordination_password: ${{ secrets.PASHUB_COORDINATION_PASSWORD }}
# ============================================================
@ -417,7 +421,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ara_fast_api
lambda_path: infrastructure/terraform/lambda/fast-api
lambda_path: deployment/terraform/lambda/fast-api
stage: ${{ needs.determine_stage.outputs.stage }}
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
secrets:
@ -456,17 +460,17 @@ jobs:
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
working-directory: infrastructure/terraform/cdn_certificate
working-directory: deployment/terraform/cdn_certificate
run: terraform init -reconfigure
- name: Terraform Workspace
working-directory: infrastructure/terraform/cdn_certificate
working-directory: deployment/terraform/cdn_certificate
run: |
terraform workspace select $STAGE \
|| terraform workspace new $STAGE
- name: Terraform Plan
working-directory: infrastructure/terraform/cdn_certificate
working-directory: deployment/terraform/cdn_certificate
run: |
terraform plan \
-var="stage=${STAGE}" \
@ -474,7 +478,7 @@ jobs:
- name: Terraform Apply
if: env.TERRAFORM_APPLY == 'true'
working-directory: infrastructure/terraform/cdn_certificate
working-directory: deployment/terraform/cdn_certificate
run: terraform apply -auto-approve tfplan
@ -501,17 +505,17 @@ jobs:
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
working-directory: infrastructure/terraform/cdn
working-directory: deployment/terraform/cdn
run: terraform init -reconfigure
- name: Terraform Workspace
working-directory: infrastructure/terraform/cdn
working-directory: deployment/terraform/cdn
run: |
terraform workspace select $STAGE \
|| terraform workspace new $STAGE
- name: Terraform Plan
working-directory: infrastructure/terraform/cdn
working-directory: deployment/terraform/cdn
run: |
terraform plan \
-var="stage=${STAGE}" \
@ -519,7 +523,7 @@ jobs:
- name: Terraform Apply
if: env.TERRAFORM_APPLY == 'true'
working-directory: infrastructure/terraform/cdn
working-directory: deployment/terraform/cdn
run: terraform apply -auto-approve tfplan
# ============================================================
@ -560,7 +564,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: magic_plan
lambda_path: infrastructure/terraform/lambda/magic_plan
lambda_path: deployment/terraform/lambda/magic_plan
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: magic-plan-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.magic_plan_image.outputs.image_digest }}
@ -583,7 +587,7 @@ jobs:
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: hubspot-etl-to-ara
lambda_path: infrastructure/terraform/lambda/hubspot_deal_etl
lambda_path: deployment/terraform/lambda/hubspot_deal_etl
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: hubspot-etl-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.hubspot_etl_image.outputs.image_digest }}

114
.github/workflows/lambda_smoke_tests.yml vendored Normal file
View file

@ -0,0 +1,114 @@
name: Lambda Smoke Tests
on:
pull_request:
branches:
- main
jobs:
# ============================================================
# Ara Engine
# ============================================================
ara_engine_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/docker/engine.Dockerfile
build_context: .
service_name: ara-engine
# ============================================================
# Address 2 UPRN
# ============================================================
address2uprn_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/address2UPRN/handler/Dockerfile
build_context: .
service_name: address2uprn
# ============================================================
# Postcode Splitter
# ============================================================
postcode_splitter_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
build_context: .
service_name: postcode-splitter
postcode_splitter_ddd_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: applications/postcode_splitter/Dockerfile
build_context: .
service_name: postcode-splitter-ddd
# ============================================================
# Bulk Address2UPRN Combiner
# ============================================================
bulk_address2uprn_combiner_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/bulk_address2uprn_combiner/handler/Dockerfile
build_context: .
service_name: bulk-address2uprn-combiner
# ============================================================
# Condition ETL
# ============================================================
condition_etl_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/condition/handler/Dockerfile
build_context: .
service_name: condition-etl
# ============================================================
# Categorisation
# ============================================================
categorisation_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/categorisation/handler/Dockerfile
build_context: .
service_name: categorisation
# ============================================================
# Ordnance Survey
# ============================================================
ordnance_survey_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
build_context: .
service_name: ordnance-survey
# ============================================================
# Pas Hub Fetcher
# ============================================================
pashub_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/pashub_fetcher/handler/Dockerfile
build_context: .
service_name: pashub
# ============================================================
# MagicPlan
# ============================================================
magic_plan_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: backend/magic_plan/handler/Dockerfile
build_context: .
service_name: magic-plan
# ============================================================
# HubSpot Scraper
# ============================================================
hubspot_scraper_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: etl/hubspot/scripts/scraper/handler/Dockerfile
build_context: .
service_name: hubspot-scraper

View file

@ -60,3 +60,15 @@ jobs:
-e DB_PASSWORD=test \
-e DB_PORT=5432 \
model-test pytest -vv -m 'not integration'
# The DDD rewrite (tests/) defines SQLModel table classes that map to the
# same physical tables as the legacy backend models. Both sets share the
# one global SQLModel.metadata, so they cannot be imported into the same
# pytest process. It runs as a separate invocation until the legacy
# models are retired. Its DB is spawned in-process by pytest-postgresql,
# so no DB service or env is required.
- name: Run DDD tests
run: |
docker run --rm \
--network host \
model-test pytest -vv tests/

1
.gitignore vendored
View file

@ -121,6 +121,7 @@ celerybeat.pid
# Environments
.env
.env.local
.venv
env/
venv/

View file

@ -1,29 +0,0 @@
<!-- BACKLOG.MD MCP GUIDELINES START -->
<CRITICAL_INSTRUCTION>
## BACKLOG WORKFLOW INSTRUCTIONS
This project uses Backlog.md MCP for all task and project management activities.
**CRITICAL GUIDANCE**
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
These guides cover:
- Decision framework for when to create tasks
- Search-first workflow to avoid duplicates
- Links to detailed guides for task creation, execution, and finalization
- MCP tools reference
You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
</CRITICAL_INSTRUCTION>
<!-- BACKLOG.MD MCP GUIDELINES END -->

View file

@ -1,33 +1,4 @@
<!-- BACKLOG.MD MCP GUIDELINES START -->
<CRITICAL_INSTRUCTION>
## BACKLOG WORKFLOW INSTRUCTIONS
This project uses Backlog.md MCP for all task and project management activities.
**CRITICAL GUIDANCE**
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
These guides cover:
- Decision framework for when to create tasks
- Search-first workflow to avoid duplicates
- Links to detailed guides for task creation, execution, and finalization
- MCP tools reference
You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
</CRITICAL_INSTRUCTION>
<!-- BACKLOG.MD MCP GUIDELINES END -->
## Available Skills
Five Claude Code skills are installed in this repo's dev container. Each maps to a phase of the feature lifecycle.

View file

@ -58,7 +58,7 @@ A UK postal code used to group nearby addresses; the primary search key for find
_Avoid_: zip code, postal code
**User Address**:
A free-text address string provided by a user or imported from a customer dataset, before any normalisation or matching.
A structured dataclass (`domain.addresses.user_address.UserAddress`) capturing a customer-supplied address: a free-text `user_address` line, a canonical `postcode` (sanitised on construction), and an optional `internal_reference`. The bare string sense — the raw free-text address line as it arrives from upstream ingestion, before being wrapped — remains valid when discussing CSV columns, API payloads, or other upstream contexts; in domain code, prefer the dataclass.
_Avoid_: user input, raw address, user_inputed_address
**Comparable Properties**:
@ -297,7 +297,7 @@ _Avoid_: API key, auth token, secret
- **"energy assessment"** in the existing codebase (`energy_assessment_functions`, `energy_assessments_by_uprn`) refers to what is now canonically called **Site Notes**. New code uses **Site Notes**.
- **"patch"** / `patch_epc` in the existing codebase has been merged into **Landlord Overrides**; the original concept is deprecated.
- **"already_installed measures"** in the existing codebase is likely subsumed by **Landlord Overrides** ("we have a heat pump now" → override the heating fields). Final call deferred to implementation.
- **"address"** appears as both the raw **User Address** (free-text) and a structured field on an **EPC Search Result** (normalised lines). Always qualify: "user address" vs "EPC address" or "address line 1".
- **"address"** appears as both the raw **User Address** (free-text from customer data, or the structured `UserAddress` dataclass that wraps it) and a structured field on an **EPC Search Result** (normalised lines). Always qualify: "user address" vs "EPC address" or "address line 1". Within `domain/`, **User Address** specifically means the `UserAddress` dataclass; in upstream ingestion contexts (CSV columns, SQS payloads) it can still mean the raw string sense.
- **"score"** is used for `AddressMatch.score()` output, the `lexiscore` column, and informally. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
- **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`.
- **"EPC"** is overloaded as both the document and the rating band letter. Use **EPC** for the document, **EPC Band** for the letter.

View file

@ -4,7 +4,7 @@ model_data/local_data/
backend/node_modules/
backend/.idea/
backend/.env
infrastructure/
deployment/
data_collection/
node_modules/
conservation_areas/

0
applications/__init__.py Normal file
View file

View file

@ -0,0 +1,34 @@
FROM public.ecr.aws/lambda/python:3.11
# Postgres host/port/database are baked into the image at build time from
# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
ENV POSTGRES_HOST=${DEV_DB_HOST}
ENV POSTGRES_PORT=${DEV_DB_PORT}
ENV POSTGRES_DATABASE=${DEV_DB_NAME}
WORKDIR /var/task
COPY applications/postcode_splitter/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the layered source the handler imports from. The new splitter pulls
# only DDD-shaped packages — no pandas, no legacy backend/.
COPY domain/ domain/
COPY infrastructure/ infrastructure/
COPY orchestration/ orchestration/
COPY repositories/ repositories/
COPY utilities/ utilities/
COPY applications/ applications/
# Place the handler at the Lambda task root so the runtime can resolve
# ``main.handler`` without an extra package prefix.
COPY applications/postcode_splitter/handler.py /var/task/main.py
CMD ["main.handler"]

View file

@ -0,0 +1,52 @@
from __future__ import annotations
import os
from typing import Any
import boto3
from applications.postcode_splitter.postcode_splitter_trigger_body import (
PostcodeSplitterTriggerBody,
)
from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
from infrastructure.csv_s3_client import CsvS3Client
from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
from orchestration.task_orchestrator import TaskOrchestrator
from repositories.user_address.user_address_csv_s3_repository import (
UserAddressCsvS3Repository,
)
from utilities.aws_lambda.subtask_handler import subtask_handler
@subtask_handler()
def handler(
body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
) -> dict[str, list[str]]:
trigger = PostcodeSplitterTriggerBody.model_validate(body)
bucket = os.environ["S3_BUCKET_NAME"]
queue_url = os.environ["ADDRESS2UPRN_QUEUE_URL"]
# boto3.client is overloaded per-service in the installed stubs; cast
# to Any so the strict-mode checker treats it as opaque.
boto3_client: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
boto_s3: Any = boto3_client("s3")
boto_sqs: Any = boto3_client("sqs")
csv_client = CsvS3Client(boto_s3, bucket)
user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
splitter = PostcodeSplitterOrchestrator(
task_orchestrator=task_orchestrator,
user_address_repo=user_address_repo,
queue_client=queue_client,
)
child_ids = splitter.split_and_dispatch(
parent_task_id=trigger.task_id,
parent_subtask_id=trigger.sub_task_id,
input_s3_uri=trigger.s3_uri,
)
return {"child_subtask_ids": [str(cid) for cid in child_ids]}

View file

@ -0,0 +1,34 @@
# Local-test environment for the postcode_splitter Lambda.
#
# cp .env.local.example .env.local then fill in the values below.
#
# .env.local is gitignored. The container hits REAL AWS and a REAL Postgres,
# so every value here points at infrastructure that actually exists.
#
# NOTE: the new DDD code uses different env var names than the repo root
# .env. The mapping (root .env name -> var here) is given per section.
# Keep comments on their own lines — docker-compose's env_file parser folds a
# trailing "# ..." into the value.
# --- Postgres (orchestration/default_orchestrator -> PostgresConfig.from_env) ---
# POSTGRES_HOST <- DB_HOST, PORT <- DB_PORT, USERNAME <- DB_USERNAME,
# PASSWORD <- DB_PASSWORD, DATABASE <- DB_NAME.
POSTGRES_HOST=
POSTGRES_PORT=5432
POSTGRES_USERNAME=
POSTGRES_PASSWORD=
POSTGRES_DATABASE=
# POSTGRES_DRIVER=psycopg2 (optional; defaults to psycopg2)
# --- Handler config (applications/postcode_splitter/handler.py) ---
# S3_BUCKET_NAME: bucket holding the input address CSV (root .env: DATA_BUCKET).
# ADDRESS2UPRN_QUEUE_URL: SQS queue the splitter fans batches out to; not in
# the root .env (Terraform sets it in prod).
S3_BUCKET_NAME=
ADDRESS2UPRN_QUEUE_URL=
# --- AWS credentials for boto3 (S3 + SQS clients) ---
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_DEFAULT_REGION=eu-west-2
# AWS_SESSION_TOKEN= (only if using temporary/SSO credentials)

View file

@ -0,0 +1,9 @@
services:
postcode-splitter:
build:
context: ../../../
dockerfile: applications/postcode_splitter/Dockerfile
ports:
- "9001:8080"
env_file:
- .env.local

View file

@ -0,0 +1,28 @@
#!/usr/bin/env python3
import json
import requests
HOST = "localhost"
PORT = "9001"
LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
payload = {
"Records": [
{
"body": json.dumps(
{
"task_id": "e295d89b-a7c5-4a9a-8b4e-b405fab1f298",
"sub_task_id": "f4a9944f-41f0-4a33-8669-5016ec574068",
"s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv",
}
)
}
]
}
response = requests.post(LAMBDA_URL, json=payload)
print("Status code:", response.status_code)
print("Response:")
print(response.text)

View file

@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
if [ ! -f .env.local ]; then
cp .env.local.example .env.local
echo "Created .env.local from the template — fill it in, then re-run." >&2
exit 1
fi
docker compose build --no-cache
docker compose up --force-recreate

View file

@ -0,0 +1,11 @@
from uuid import UUID
from pydantic import BaseModel, ConfigDict
class PostcodeSplitterTriggerBody(BaseModel):
model_config = ConfigDict(extra="allow")
task_id: UUID
sub_task_id: UUID
s3_uri: str

View file

@ -0,0 +1,4 @@
boto3
pydantic
sqlmodel
psycopg2-binary

View file

@ -79,23 +79,23 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
data_filename = "input.xlsx"
sheet_name = "Handovers"
postcode_column = "POSTCODE"
address1_column = "Full Addres"
data_filename = "hyde.xlsx"
sheet_name = "AddressProfilingResults"
postcode_column = "Postcode"
address1_column = "Address"
address1_method = None
fulladdress_column = "Full Addres"
fulladdress_column = "Postcode"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = "domna_found_uprn"
landlord_property_type = "PROPERTY TYPE" # Good to include if landlord gave
landlord_built_form = "Type Description" # Good to include if landlord gave
landlord_os_uprn = None
landlord_property_type = "Property Type" # Good to include if landlord gave
landlord_built_form = None # Good to include if landlord gave
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "PROP REF"
landlord_property_id = "Organisation Reference"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -469,8 +469,3 @@ def app():
writer, sheet_name="Duplicate Properties", index=False
)
for key,value in dict.items():
lsakjfldsa

View file

@ -6,11 +6,13 @@ ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
ARG EPC_AUTH_TOKEN
ARG OPEN_EPC_API_TOKEN
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
ENV OPEN_EPC_API_TOKEN=${OPEN_EPC_API_TOKEN}
# Set working directory (Lambda task root)

View file

@ -8,4 +8,5 @@ boto3==1.35.44
sqlmodel
sqlalchemy==2.0.36
psycopg2-binary==2.9.10
pydantic-settings==2.6.0
pydantic-settings==2.6.0
httpx

View file

@ -12,12 +12,21 @@ FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
# Each parametrized case fires at least one EPC request; without throttling,
# GitHub-hosted runners burst fast enough to hit 429s.
EPC_THROTTLE_SECONDS = 1.0
EPC_LONG_PAUSE_EVERY = 100
EPC_LONG_PAUSE_SECONDS = 5.0
_epc_request_count = 0
@pytest.fixture(autouse=True)
def _throttle_epc_requests():
global _epc_request_count
yield
time.sleep(EPC_THROTTLE_SECONDS)
_epc_request_count += 1
if _epc_request_count % EPC_LONG_PAUSE_EVERY == 0:
time.sleep(EPC_LONG_PAUSE_SECONDS)
else:
time.sleep(EPC_THROTTLE_SECONDS)
def load_test_cases():

View file

@ -364,4 +364,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
164a Victoria Square,M4 5FA,77211315
165a Victoria Square,M4 5FA,77211316
166a Victoria Square,M4 5FA,None
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
71A Stoneleigh Avenue,NE12 8NP,None
71B Stoneleigh Avenue,NE12 8NP,None
71 Stoneleigh Avenue,NE12 8NP,47086009
1 User Input Postcode Manual UPRN Code
364 164a Victoria Square M4 5FA 77211315
365 165a Victoria Square M4 5FA 77211316
366 166a Victoria Square M4 5FA None
367 FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY CR2 7DL None
368 71A Stoneleigh Avenue NE12 8NP None
369 71B Stoneleigh Avenue NE12 8NP None
370 71 Stoneleigh Avenue NE12 8NP 47086009

View file

@ -86,6 +86,8 @@ class Settings(BaseSettings):
# Pas Hub
PASHUB_EMAIL: Optional[str] = None
PASHUB_PASSWORD: Optional[str] = None
PASHUB_COORDINATION_EMAIL: Optional[str] = None
PASHUB_COORDINATION_PASSWORD: Optional[str] = None
# Optional AWS creds (only required in local)
AWS_ACCESS_KEY_ID: Optional[str] = None

View file

@ -14,15 +14,15 @@ from backend.app.db.models.magic_plan import (
)
def save_plan(session: Session, plan: Plan) -> None:
plan_id: int = _upsert_plan(session, plan)
def save_plan(session: Session, plan: Plan, uploaded_file_id: int) -> None:
plan_id: int = _upsert_plan(session, plan, uploaded_file_id)
_delete_children(session, plan_id)
floor_ids: list[int] = _insert_floors(session, plan.floors, plan_id)
room_ids: list[int] = _insert_rooms(session, plan.floors, floor_ids)
_insert_windows_and_doors(session, plan.floors, room_ids)
def _upsert_plan(session: Session, plan: Plan) -> int:
def _upsert_plan(session: Session, plan: Plan, uploaded_file_id: int) -> int:
stmt = (
pg_insert(MagicPlanPlanModel)
.values(
@ -30,6 +30,7 @@ def _upsert_plan(session: Session, plan: Plan) -> int:
name=plan.name,
address=plan.address,
postcode=plan.postcode,
uploaded_file_id=uploaded_file_id,
)
.on_conflict_do_update(
index_elements=["magic_plan_uid"],
@ -37,6 +38,7 @@ def _upsert_plan(session: Session, plan: Plan) -> int:
"name": plan.name,
"address": plan.address,
"postcode": plan.postcode,
"uploaded_file_id": uploaded_file_id,
},
)
.returning(col(MagicPlanPlanModel.id))

View file

@ -36,7 +36,7 @@ def _count(session: Session, model: type[SQLModel]) -> int:
def test_plan_row_present_after_save(db_session: Session, domain_plan: Plan) -> None:
# Act
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanPlanModel) == 1
@ -45,7 +45,7 @@ def test_floor_count_matches_domain(db_session: Session, domain_plan: Plan) -> N
# Arrange
expected = len(domain_plan.floors)
# Act
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanFloorModel) == expected
@ -54,7 +54,7 @@ def test_room_count_matches_domain(db_session: Session, domain_plan: Plan) -> No
# Arrange
expected = sum(len(f.rooms) for f in domain_plan.floors)
# Act
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanRoomModel) == expected
@ -63,7 +63,7 @@ def test_window_count_matches_domain(db_session: Session, domain_plan: Plan) ->
# Arrange
expected = sum(len(r.windows) for f in domain_plan.floors for r in f.rooms)
# Act
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanWindowModel) == expected
@ -72,15 +72,15 @@ def test_door_count_matches_domain(db_session: Session, domain_plan: Plan) -> No
# Arrange
expected = sum(len(r.doors) for f in domain_plan.floors for r in f.rooms)
# Act
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan, 1)
# Assert
assert _count(db_session, MagicPlanDoorModel) == expected
def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
# Act — call twice within the same session
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan, 1)
save_plan(db_session, domain_plan, 1)
# Assert — same row counts as a single call
assert _count(db_session, MagicPlanPlanModel) == 1
assert _count(db_session, MagicPlanFloorModel) == len(domain_plan.floors)
@ -93,3 +93,23 @@ def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
assert _count(db_session, MagicPlanDoorModel) == sum(
len(r.doors) for f in domain_plan.floors for r in f.rooms
)
def test_uploaded_file_id_stored_after_save(db_session: Session, domain_plan: Plan) -> None:
# Act
save_plan(db_session, domain_plan, 1)
# Assert
row = db_session.execute(select(MagicPlanPlanModel)).scalar_one()
assert row.uploaded_file_id == 1
def test_save_plan_updates_uploaded_file_id_on_reingest(
db_session: Session, domain_plan: Plan
) -> None:
# Arrange
save_plan(db_session, domain_plan, 1)
# Act
save_plan(db_session, domain_plan, 2)
# Assert
row = db_session.execute(select(MagicPlanPlanModel)).scalar_one()
assert row.uploaded_file_id == 2

View file

@ -11,6 +11,7 @@ class MagicPlanPlanModel(SQLModel, table=True):
name: Optional[str] = None
address: Optional[str] = None
postcode: Optional[str] = None
uploaded_file_id: Optional[int] = Field(default=None)
class MagicPlanFloorModel(SQLModel, table=True):

View file

@ -18,10 +18,14 @@ class FileTypeEnum(enum.Enum):
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
ECMK_SURVEY_XML = "ecmk_survey_xml"
MAGIC_PLAN_JSON = "magic_plan_json"
IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation"
MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan"
RETROFIT_DESIGN_DOC = "retrofit_design_doc"
class FileSourceEnum(enum.Enum):
PAS_HUB = "pas hub"
COORDINATION_HUB = "coordination_hub"
SHAREPOINT = "sharepoint"
HUBSPOT = "hubspot"
ECMK = "ecmk"

View file

@ -32,6 +32,7 @@ COPY utils/ utils/
COPY backend/condition/ backend/condition/
COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
COPY backend/app/db/base.py backend/app/db/base.py
COPY backend/app/db/connection.py backend/app/db/connection.py
COPY backend/app/config.py backend/app/config.py

View file

@ -47,8 +47,14 @@ class EpcClientService:
latest = max(results, key=lambda r: r.registration_date)
return self.get_by_certificate_number(latest.certificate_number)
@staticmethod
def _normalise_postcode(postcode: str) -> str:
"""Return the postcode with all spaces removed and uppercased."""
return postcode.replace(" ", "").upper()
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
return call_with_retry(lambda: self._search(postcode=postcode))
normalised = self._normalise_postcode(postcode)
return call_with_retry(lambda: self._search(postcode=normalised))
# ------------------------------------------------------------------
# Private helperEpcRateLimpolarss

View file

@ -1,7 +1,7 @@
import gzip
import json
from datetime import datetime, timezone
from typing import Optional
from typing import Optional, cast
from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary
from datatypes.magicplan.domain.mapper import map_plan
@ -55,8 +55,9 @@ class MagicPlanService:
)
with db_session() as session:
save_plan(session, plan)
session.add(uploaded_file)
session.flush()
save_plan(session, plan, cast(int, uploaded_file.id))
return plan

View file

@ -271,3 +271,38 @@ def test_run_creates_uploaded_file_record(
assert uploaded_file.s3_upload_timestamp is not None
assert uploaded_file.uprn == 100023336956
assert uploaded_file.hubspot_deal_id == "deal-789"
def test_run_passes_flushed_uploaded_file_id_to_save_plan(
mock_client: MagicMock,
plan_summary: PlanSummary,
) -> None:
# Arrange
mock_client.get_plans.return_value = [plan_summary]
service = _make_service(mock_client)
mock_session = MagicMock()
added_objects: list = []
mock_session.add.side_effect = added_objects.append
def simulate_flush() -> None:
for obj in added_objects:
if isinstance(obj, UploadedFile):
obj.id = 42
mock_session.flush.side_effect = simulate_flush
with patch(
"backend.magic_plan.magic_plan_service.find_matching_plan",
return_value=plan_summary,
), patch("backend.magic_plan.magic_plan_service.save_plan") as mock_save, patch(
"backend.magic_plan.magic_plan_service.db_session"
) as mock_db, patch(
"backend.magic_plan.magic_plan_service.save_data_to_s3"
):
mock_db.return_value.__enter__.return_value = mock_session
# Act
service.run(_make_request())
# Assert
assert mock_save.call_args[0][2] == 42

View file

@ -14,9 +14,12 @@ class CoreFiles(Enum):
PAR_PHOTOPACK = "PAR Photo Pack"
PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation"
MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan"
RETROFIT_DESIGN_DOC = "Retrofit Design Doc"
CORE_TO_FILETYPE_MAP = {
_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
@ -26,11 +29,49 @@ CORE_TO_FILETYPE_MAP = {
CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value,
}
def infer_file_type(filename: str) -> Optional[str]:
for core_file, file_type in CORE_TO_FILETYPE_MAP.items():
def get_core_file_type(
filename: str, evidence_category: Optional[str] = None
) -> Optional[CoreFiles]:
# Identify retrofit design doc using evidence category as the name is possibly unreliable.
# We might change to always use evidence category, but needs more investigation
if evidence_category is not None and evidence_category.lower() == "retrofit design":
return CoreFiles.RETROFIT_DESIGN_DOC
if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename:
return CoreFiles.IMPROVEMENT_OPTION_EVALUATION
if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename:
return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename:
return CoreFiles.RETROFIT_DESIGN_DOC
_prefix_skip = {
CoreFiles.RETROFIT_DESIGN_DOC,
CoreFiles.IMPROVEMENT_OPTION_EVALUATION,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN,
}
for core_file in CoreFiles:
if core_file in _prefix_skip:
continue
if filename.startswith(core_file.value):
return file_type
return core_file
return None
def get_file_type_string(filename: str) -> Optional[str]:
core_file: Optional[CoreFiles] = get_core_file_type(filename)
if core_file is None:
return None
return _CORE_FILE_TO_FILE_TYPE[core_file]

View file

@ -1,9 +1,11 @@
from typing import Any, Dict, List
from typing import Any, Callable, Dict, List, Optional
from backend.app.config import get_settings
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
from backend.app.db.models.tasks import SourceEnum
from backend.utils.subtasks import task_handler
@ -28,38 +30,41 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
settings = get_settings()
pas_hub_email = settings.PASHUB_EMAIL
pas_hub_password = settings.PASHUB_PASSWORD
pashub_email = settings.PASHUB_EMAIL
pashub_password = settings.PASHUB_PASSWORD
if (not pas_hub_email) or (not pas_hub_password):
coordination_hub_email = settings.PASHUB_COORDINATION_EMAIL
coordination_hub_password = settings.PASHUB_COORDINATION_PASSWORD
coordination_client_factory: Optional[Callable[[], PashubClient]] = None
if (not pashub_email) or (not pashub_password):
raise ValueError("Pas Hub credentials not provided")
sharepoint_client = DomnaSharepointClient(
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
)
if coordination_hub_email and coordination_hub_password:
_coord_email, _coord_password = (
coordination_hub_email,
coordination_hub_password,
)
coordination_client_factory = lambda: get_pashub_client(
_coord_email, _coord_password
)
logger.debug("Validating request body")
payload = PashubToAraTriggerRequest.model_validate(body)
logger.debug("Successfully validated request body")
service = PashubService(
pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
pashub_client=get_pashub_client(pashub_email, pashub_password),
sharepoint_client=sharepoint_client,
s3_bucket=S3_BUCKET,
coordination_client_factory=coordination_client_factory,
)
try:
files: List[str] = service.run(payload)
except UnauthorizedError:
logger.warning("Token expired - refreshing")
service = PashubService(
pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
sharepoint_client=sharepoint_client,
s3_bucket=S3_BUCKET,
)
files = service.run(payload)
files: List[str] = service.run(payload)
logger.info(f"Saved {len(files)} files")

View file

@ -5,12 +5,11 @@ from datetime import datetime
import requests
from backend.pashub_fetcher.core_files import CoreFiles
from backend.pashub_fetcher.core_files import CoreFiles, get_core_file_type
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
from utils.logger import setup_logger
logger = setup_logger()
@ -75,6 +74,10 @@ class PashubClient:
logger.info(f"Getting UPRN for job ID {job_id}")
url = f"{self.base}/jobs/{job_id}"
logger.debug(
f"About to make API request with session headers: {self.session.headers}"
)
r = self.session.get(url)
if r.status_code == 401:
raise UnauthorizedError("Token expired or invalid")
@ -83,15 +86,12 @@ class PashubClient:
try:
return r.json()["uprn"]
except Exception:
except Exception as e:
logger.warning(
f"Failed to get UPRN for Job ID {job_id} with exception: {e}"
)
return None
def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]:
for core_file in CoreFiles:
if file.file_name.startswith(core_file.value):
return core_file
return None
def _select_latest_core_files(
self,
files: List[EvidenceFileData],
@ -99,7 +99,9 @@ class PashubClient:
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
for file in files:
core_type = self._get_core_file_type(file)
core_type: Optional[CoreFiles] = get_core_file_type(
file.file_name, file.evidence_category
)
if not core_type:
continue
grouped[core_type].append(file)
@ -107,6 +109,9 @@ class PashubClient:
latest_files: Dict[CoreFiles, EvidenceFileData] = {}
for core_type, group in grouped.items():
if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1:
osm_candidates = [f for f in group if "-OSM-" in f.file_name]
group = osm_candidates if osm_candidates else group
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
latest_files[core_type] = latest

View file

@ -1,6 +1,6 @@
import os
from datetime import datetime, timezone
from typing import List, NamedTuple, Optional, cast
from typing import Callable, List, NamedTuple, Optional, cast
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
@ -10,8 +10,8 @@ from backend.app.db.models.uploaded_file import (
)
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.core_files import get_file_type_string
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
@ -36,17 +36,37 @@ class PashubService:
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
s3_bucket: str,
coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
) -> None:
self._pashub_client = pashub_client
self._sharepoint_client = sharepoint_client
self._s3_bucket = s3_bucket
self._coordination_client_factory = coordination_client_factory
self._coordination_client: Optional[PashubClient] = None
def _get_coordination_client(self) -> PashubClient:
if self._coordination_client_factory is None:
raise UnauthorizedError("No coordination client factory configured")
if self._coordination_client is None:
self._coordination_client = self._coordination_client_factory()
return self._coordination_client
def run(self, request: PashubToAraTriggerRequest) -> List[str]:
job_id = request.pashub_job_id
active_client = self._pashub_client
if request.uprn:
uprn: Optional[str] = request.uprn
else:
try:
uprn = active_client.get_uprn_by_job_id(job_id)
except UnauthorizedError:
logger.info(
f"PasHub credentials unauthorized for job {job_id}; retrying with CoordinationHub credentials"
)
active_client = self._get_coordination_client()
uprn = active_client.get_uprn_by_job_id(job_id)
uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(
job_id
)
hubspot_deal_id: Optional[str] = request.hubspot_deal_id
if uprn:
@ -54,14 +74,25 @@ class PashubService:
else:
logger.info(f"No UPRN found for job {job_id}")
job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(
job_id
)
try:
job_files: List[str] = active_client.get_core_evidence_files_by_job_id(
job_id
)
except UnauthorizedError:
if active_client is not self._pashub_client:
raise
active_client = self._get_coordination_client()
job_files = active_client.get_core_evidence_files_by_job_id(job_id)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
file_source = (
FileSourceEnum.PAS_HUB
if active_client is self._pashub_client
else FileSourceEnum.COORDINATION_HUB
)
upload_records = self._upload_to_s3_and_update_db(
job_files, uprn, hubspot_deal_id
job_files, uprn, hubspot_deal_id, file_source
)
self._save_site_notes(upload_records)
@ -83,6 +114,7 @@ class PashubService:
job_files: List[str],
uprn: Optional[str],
hubspot_deal_id: Optional[str],
file_source: FileSourceEnum,
) -> List[_FileUploadRecord]:
if not uprn and not hubspot_deal_id:
return []
@ -108,8 +140,8 @@ class PashubService:
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
file_source=file_source.value,
file_type=get_file_type_string(filename),
)
file_paths.append(file_path)
uploaded_files.append(uploaded_file)

View file

@ -1,11 +1,10 @@
import re
from typing import Optional
from pydantic import BaseModel
class PashubToAraTriggerRequest(BaseModel):
pashub_link: (
str # e.g. https://pashub.net/jobs/12345-abcd-1234-abcd-12345abcde/details
)
pashub_link: str # e.g. https://pashub.net/jobs/{id}/details, /jobs/{id}/evidence/view, /jobs/{id}
address: Optional[str] = None
sharepoint_link: Optional[str] = None
@ -17,4 +16,7 @@ class PashubToAraTriggerRequest(BaseModel):
@property
def pashub_job_id(self) -> str:
return self.pashub_link.split("/")[-2]
match = re.search(r"/jobs/([^/]+)", self.pashub_link)
if not match:
raise ValueError(f"No job ID found in PasHub link: {self.pashub_link}")
return match.group(1)

View file

@ -0,0 +1,185 @@
from backend.pashub_fetcher.core_files import (
CoreFiles,
get_core_file_type,
get_file_type_string,
)
def test_file_type_for_photopack():
assert get_file_type_string("Photopack_123456_V1.pdf") == "photo_pack"
def test_file_type_for_sitenote():
assert get_file_type_string("SiteNote_123456_V1.pdf") == "site_note"
def test_file_type_for_rdsap_sitenote():
assert (
get_file_type_string("RdSAP_SiteNote_9510890_V1_Assessmet.pdf")
== "rd_sap_site_note"
)
def test_file_type_for_pas2023_ventilation():
assert (
get_file_type_string("PAS 2023 Ventilation Assessment Report_123456.pdf")
== "pas_2023_ventilation"
)
def test_file_type_for_pas2023_condition():
assert (
get_file_type_string("PAS 2023 Condition Report_123456.pdf")
== "pas_2023_condition"
)
def test_file_type_for_pas_significance():
assert get_file_type_string("PAS Significance_123456.pdf") == "pas_significance"
def test_file_type_for_par_photopack():
assert (
get_file_type_string("PAR Photo Pack_95101890_V2_Assessment.pdf")
== "par_photo_pack"
)
def test_file_type_for_pas2023_property():
assert (
get_file_type_string("PAS 2023 Property Assessment Report_123456.pdf")
== "pas_2023_property"
)
def test_file_type_for_pas2023_occupancy():
assert (
get_file_type_string("PAS 2023 Occupancy Assessment Report_123456.pdf")
== "pas_2023_occupancy"
)
def test_file_type_for_improvement_option_evaluation():
# filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
assert (
get_file_type_string("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
== "improvement_option_evaluation"
)
def test_file_type_for_medium_term_improvement_plan():
# filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
assert (
get_file_type_string(
"60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
)
== "medium_term_improvement_plan"
)
def test_file_type_for_retrofit_design_doc():
assert (
get_file_type_string("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
== "retrofit_design_doc"
)
assert (
get_file_type_string("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
== "retrofit_design_doc"
)
# ---------------------------------------------------------------------------
# core_file_for
# ---------------------------------------------------------------------------
def test_core_file_for_evidence_category_match_is_case_insensitive() -> None:
# Arrange
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = get_core_file_type(filename, evidence_category="Retrofit Design")
# Assert
assert result == CoreFiles.RETROFIT_DESIGN_DOC
def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None:
# Arrange
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = get_core_file_type(filename, evidence_category="retrofit design")
# Assert
assert result == CoreFiles.RETROFIT_DESIGN_DOC
def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> None:
# Arrange
filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf"
# Act
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION
def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> None:
# Arrange
filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
# Act
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> (
None
):
# Arrange
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.RETROFIT_DESIGN_DOC
def test_core_file_for_prefix_returns_photopack() -> None:
# Arrange
filename = "Photopack_123456_V1.pdf"
# Act
result = get_core_file_type(filename)
# Assert
assert result == CoreFiles.PHOTOPACK
def test_core_file_for_unknown_filename_returns_none() -> None:
# Arrange
filename = "unknown_document_123.pdf"
# Act
result = get_core_file_type(filename)
# Assert
assert result is None
def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> (
None
):
# Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
# Act
result = get_core_file_type(filename, evidence_category="some other category")
# Assert
assert result is None

View file

@ -0,0 +1,117 @@
# pyright: reportPrivateUsage=false
from typing import Optional
from backend.pashub_fetcher.core_files import CoreFiles
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
from backend.pashub_fetcher.pashub_client import PashubClient
def make_client() -> PashubClient:
return PashubClient(token="test-token")
def make_file(
file_name: str = "unknown.pdf",
evidence_category: Optional[str] = None,
created_utc: str = "2024-01-01T00:00:00",
) -> EvidenceFileData:
return EvidenceFileData(
file_id="id-1",
file_name=file_name,
created_utc=created_utc,
file_size=1024,
file_extension="pdf",
evidence_category=evidence_category,
)
# ---------------------------------------------------------------------------
# _select_latest_core_files
# ---------------------------------------------------------------------------
def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
)
]
# Act
result = client._select_latest_core_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
# Arrange - the non-OSM file is newer but should lose to the OSM file
client = make_client()
files = [
make_file(
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
evidence_category="retrofit design",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="Retrofit Design Doc non-osm variant.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._select_latest_core_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
evidence_category="retrofit design",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._select_latest_core_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="retrofit_design_v1.pdf",
evidence_category="retrofit design",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="retrofit_design_v2.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._select_latest_core_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"

View file

@ -1,8 +1,10 @@
from typing import Optional
import pytest
from typing import Any, Callable, Optional
from unittest.mock import MagicMock, call, patch
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.app.db.models.uploaded_file import FileSourceEnum
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
@ -31,11 +33,13 @@ def make_service(
pashub_client: Optional[PashubClient] = None,
sharepoint_client: Optional[DomnaSharepointClient] = None,
s3_bucket: str = "test-bucket",
coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
) -> PashubService:
return PashubService(
pashub_client=pashub_client or MagicMock(spec=PashubClient),
sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
s3_bucket=s3_bucket,
coordination_client_factory=coordination_client_factory,
)
@ -144,10 +148,11 @@ def test_run_persists_uploaded_file_records_to_db() -> None:
service.run(make_request(uprn="12345"))
fake_session.add_all.assert_called_once()
added: list = fake_session.add_all.call_args[0][0]
added: list[Any] = fake_session.add_all.call_args[0][0]
assert len(added) == 1
assert added[0].s3_file_bucket == "test-bucket"
assert added[0].uprn == 12345
assert added[0].file_source == FileSourceEnum.PAS_HUB.value
# ---------------------------------------------------------------------------
@ -225,6 +230,135 @@ def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# run(): coordination fallback
# ---------------------------------------------------------------------------
def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
result = service.run(make_request())
assert result == ["/tmp/a.pdf"]
coord_client.get_uprn_by_job_id.assert_called_once()
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
assert factory.call_count == 1
def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
result = service.run(make_request(uprn="12345"))
assert result == ["/tmp/a.pdf"]
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
pas_client.get_uprn_by_job_id.assert_not_called()
def test_run_raises_unauthorized_when_pas_401_and_no_factory() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
service = make_service(pashub_client=pas_client)
with pytest.raises(UnauthorizedError):
service.run(make_request())
def test_run_raises_unauthorized_when_both_clients_401() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
with pytest.raises(UnauthorizedError):
service.run(make_request())
def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request())
fake_session.add_all.assert_called_once()
added: list[Any] = fake_session.add_all.call_args[0][0]
assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345"))
fake_session.add_all.assert_called_once()
added: list[Any] = fake_session.add_all.call_args[0][0]
assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None

View file

@ -0,0 +1,51 @@
import pytest
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
def make_request(pashub_link: str) -> PashubToAraTriggerRequest:
return PashubToAraTriggerRequest(pashub_link=pashub_link)
def test_pashub_job_id_extracts_id_from_details_link() -> None:
# Arrange
request = make_request("https://pashub.net/jobs/job-id-123/details")
# Act
result = request.pashub_job_id
# Assert
assert result == "job-id-123"
def test_pashub_job_id_raises_for_invalid_link() -> None:
# Arrange
request = make_request("https://pashub.net/rcs-dashboard")
# Act / Assert
with pytest.raises(ValueError):
request.pashub_job_id
def test_pashub_job_id_extracts_id_from_bare_job_link() -> None:
# Arrange
request = make_request("https://pashub.net/jobs/job-id-123")
# Act
result = request.pashub_job_id
# Assert
assert result == "job-id-123"
def test_pashub_job_id_extracts_id_from_evidence_view_link() -> None:
# Arrange
request = make_request("https://pashub.net/jobs/job-id-123/evidence/view")
# Act
result = request.pashub_job_id
# Assert
assert result == "job-id-123"

View file

@ -0,0 +1,137 @@
import json
import logging
import os
from typing import Any, Optional, cast
import boto3
from openpyxl import load_workbook
from backend.app.config import get_settings
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger: logging.Logger = logging.getLogger(__name__)
DRY_RUN: bool = False
DEAL_ID_FILTER: frozenset[str] = frozenset(
{
"379452094688",
"379466504437",
"379660170452",
"380016925932",
"379848065216",
"379466504434",
"379452094690",
"379965924567",
"380016925923",
"379792072898",
"379654754502",
"379560262861",
"379969670369",
"379248717001",
"379971468493",
"379999888607",
"379606372580",
"379969603797",
"379967743213",
"379263155434",
"379855267025",
"379889899719",
"379071064307",
"379867925741",
}
)
EXCEL_PATH: str = os.path.join(
os.path.dirname(__file__),
"united-infrastructure-exports-all-deals-2026-05-14.xlsx",
)
def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
wb = load_workbook(excel_path, data_only=True)
ws = wb.worksheets[0]
headers: dict[str, int] = {}
for col in range(1, ws.max_column + 1):
header_val = ws.cell(row=1, column=col).value
if header_val is not None:
headers[str(header_val).strip()] = col
pashub_col: int = headers["PasHub link"]
record_id_col: int = headers["Record ID"]
deal_name_col: int = headers["Deal Name"]
deal_stage_col: int = headers["Deal Stage"]
requests: list[PashubToAraTriggerRequest] = []
for row in range(2, ws.max_row + 1):
pashub_link_raw = ws.cell(row=row, column=pashub_col).value
if not pashub_link_raw:
continue
pashub_link: str = str(pashub_link_raw).strip()
record_id_raw = ws.cell(row=row, column=record_id_col).value
deal_name_raw = ws.cell(row=row, column=deal_name_col).value
deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value
hubspot_deal_id: Optional[str] = (
str(record_id_raw) if record_id_raw is not None else None
)
address: Optional[str] = (
str(deal_name_raw).strip() if deal_name_raw is not None else None
)
deal_stage: Optional[str] = (
str(deal_stage_raw).strip() if deal_stage_raw is not None else None
)
requests.append(
PashubToAraTriggerRequest(
pashub_link=pashub_link,
hubspot_deal_id=hubspot_deal_id,
address=address,
deal_stage=deal_stage,
)
)
return requests
def main() -> None:
trigger_requests: list[PashubToAraTriggerRequest] = _build_requests(EXCEL_PATH)
if DEAL_ID_FILTER:
trigger_requests = [
r for r in trigger_requests if r.hubspot_deal_id in DEAL_ID_FILTER
]
sqs: Any = cast(Any, boto3.client("sqs")) # type: ignore[reportUnknownMemberType]
queue_url: str = get_settings().PASHUB_TO_ARA_SQS_URL
count: int = 0
for request in trigger_requests:
action: str = "DRY RUN" if DRY_RUN else "SENDING"
logger.info(
f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}"
)
if not DRY_RUN:
response: dict[str, Any] = sqs.send_message(
QueueUrl=queue_url,
MessageBody=json.dumps(request.model_dump()),
)
message_id: str = response["MessageId"]
logger.info(f" MessageId: {message_id}")
count += 1
label: str = "would send" if DRY_RUN else "sent"
print(f"{count} messages {label}")
if __name__ == "__main__":
main()

View file

@ -10,7 +10,7 @@
### 2. Add infrastructure prerequisites (shared stack)
- Add a new ECR repository in:
infrastructure/terraform/shared/main.tf
deployment/terraform/shared/main.tf
- Create a PR to deploy this to main then dev in order to deploy the shared stack

View file

@ -49,6 +49,8 @@ module "lambda" {
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id
PASHUB_EMAIL = var.pashub_email
PASHUB_PASSWORD = var.pashub_password
PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email
PASHUB_COORDINATION_PASSWORD = var.pashub_coordination_password
}
}

View file

@ -100,4 +100,16 @@ variable "pashub_email" {
variable "pashub_password" {
type = string
sensitive = true
}
variable "pashub_coordination_email" {
type = string
sensitive = true
default = null
}
variable "pashub_coordination_password" {
type = string
sensitive = true
default = null
}

View file

@ -38,22 +38,8 @@ module "lambda" {
{
STAGE = var.stage
LOG_LEVEL = "info"
DB_USERNAME = local.db_credentials.db_assessment_model_username
DB_PASSWORD = local.db_credentials.db_assessment_model_password
GOOGLE_SOLAR_API_KEY = "test"
SAP_PREDICTIONS_BUCKET = "test"
CARBON_PREDICTIONS_BUCKET = "test"
HEAT_PREDICTIONS_BUCKET = "test"
HEATING_KWH_PREDICTIONS_BUCKET = "test"
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
API_KEY = "test"
ENVIRONMENT = "test"
SECRET_KEY = "test"
PLAN_TRIGGER_BUCKET = "test"
DATA_BUCKET = "test"
EPC_AUTH_TOKEN = "test"
ENGINE_SQS_URL = "test"
ENERGY_ASSESSMENTS_BUCKET = "test"
POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username
POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password
ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
},

Some files were not shown because too many files have changed in this diff Show more